--- /dev/null
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Jakub Kicinski <jakub.kicinski@netronome.com>
+Date: Mon, 27 Nov 2017 11:11:41 -0800
+Subject: cls_bpf: don't decrement net's refcount when offload fails
+
+From: Jakub Kicinski <jakub.kicinski@netronome.com>
+
+
+[ Upstream commit 25415cec502a1232b19fffc85465882b19a90415 ]
+
+When cls_bpf offload was added it seemed like a good idea to
+call cls_bpf_delete_prog() instead of extending the error
+handling path, since the software state is fully initialized
+at that point. This handling of errors without jumping to
+the end of the function is error prone, as proven by later
+commit missing that extra call to __cls_bpf_delete_prog().
+
+__cls_bpf_delete_prog() is now expected to be invoked with
+a reference on exts->net or the field zeroed out. The call
+on the offload's error patch does not fullfil this requirement,
+leading to each error stealing a reference on net namespace.
+
+Create a function undoing what cls_bpf_set_parms() did and
+use it from __cls_bpf_delete_prog() and the error path.
+
+Fixes: aae2c35ec892 ("cls_bpf: use tcf_exts_get_net() before call_rcu()")
+Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
+Reviewed-by: Simon Horman <simon.horman@netronome.com>
+Acked-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/cls_bpf.c | 21 +++++++++++++--------
+ 1 file changed, 13 insertions(+), 8 deletions(-)
+
+--- a/net/sched/cls_bpf.c
++++ b/net/sched/cls_bpf.c
+@@ -246,11 +246,8 @@ static int cls_bpf_init(struct tcf_proto
+ return 0;
+ }
+
+-static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog)
++static void cls_bpf_free_parms(struct cls_bpf_prog *prog)
+ {
+- tcf_exts_destroy(&prog->exts);
+- tcf_exts_put_net(&prog->exts);
+-
+ if (cls_bpf_is_ebpf(prog))
+ bpf_prog_put(prog->filter);
+ else
+@@ -258,6 +255,14 @@ static void __cls_bpf_delete_prog(struct
+
+ kfree(prog->bpf_name);
+ kfree(prog->bpf_ops);
++}
++
++static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog)
++{
++ tcf_exts_destroy(&prog->exts);
++ tcf_exts_put_net(&prog->exts);
++
++ cls_bpf_free_parms(prog);
+ kfree(prog);
+ }
+
+@@ -509,10 +514,8 @@ static int cls_bpf_change(struct net *ne
+ goto errout;
+
+ ret = cls_bpf_offload(tp, prog, oldprog);
+- if (ret) {
+- __cls_bpf_delete_prog(prog);
+- return ret;
+- }
++ if (ret)
++ goto errout_parms;
+
+ if (!tc_in_hw(prog->gen_flags))
+ prog->gen_flags |= TCA_CLS_FLAGS_NOT_IN_HW;
+@@ -529,6 +532,8 @@ static int cls_bpf_change(struct net *ne
+ *arg = prog;
+ return 0;
+
++errout_parms:
++ cls_bpf_free_parms(prog);
+ errout:
+ tcf_exts_destroy(&prog->exts);
+ kfree(prog);
--- /dev/null
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Willem de Bruijn <willemb@google.com>
+Date: Tue, 21 Nov 2017 10:22:25 -0500
+Subject: net: accept UFO datagrams from tuntap and packet
+
+From: Willem de Bruijn <willemb@google.com>
+
+
+[ Upstream commit 0c19f846d582af919db66a5914a0189f9f92c936 ]
+
+Tuntap and similar devices can inject GSO packets. Accept type
+VIRTIO_NET_HDR_GSO_UDP, even though not generating UFO natively.
+
+Processes are expected to use feature negotiation such as TUNSETOFFLOAD
+to detect supported offload types and refrain from injecting other
+packets. This process breaks down with live migration: guest kernels
+do not renegotiate flags, so destination hosts need to expose all
+features that the source host does.
+
+Partially revert the UFO removal from 182e0b6b5846~1..d9d30adf5677.
+This patch introduces nearly(*) no new code to simplify verification.
+It brings back verbatim tuntap UFO negotiation, VIRTIO_NET_HDR_GSO_UDP
+insertion and software UFO segmentation.
+
+It does not reinstate protocol stack support, hardware offload
+(NETIF_F_UFO), SKB_GSO_UDP tunneling in SKB_GSO_SOFTWARE or reception
+of VIRTIO_NET_HDR_GSO_UDP packets in tuntap.
+
+To support SKB_GSO_UDP reappearing in the stack, also reinstate
+logic in act_csum and openvswitch. Achieve equivalence with v4.13 HEAD
+by squashing in commit 939912216fa8 ("net: skb_needs_check() removes
+CHECKSUM_UNNECESSARY check for tx.") and reverting commit 8d63bee643f1
+("net: avoid skb_warn_bad_offload false positives on UFO").
+
+(*) To avoid having to bring back skb_shinfo(skb)->ip6_frag_id,
+ipv6_proxy_select_ident is changed to return a __be32 and this is
+assigned directly to the frag_hdr. Also, SKB_GSO_UDP is inserted
+at the end of the enum to minimize code churn.
+
+Tested
+ Booted a v4.13 guest kernel with QEMU. On a host kernel before this
+ patch `ethtool -k eth0` shows UFO disabled. After the patch, it is
+ enabled, same as on a v4.13 host kernel.
+
+ A UFO packet sent from the guest appears on the tap device:
+ host:
+ nc -l -p -u 8000 &
+ tcpdump -n -i tap0
+
+ guest:
+ dd if=/dev/zero of=payload.txt bs=1 count=2000
+ nc -u 192.16.1.1 8000 < payload.txt
+
+ Direct tap to tap transmission of VIRTIO_NET_HDR_GSO_UDP succeeds,
+ packets arriving fragmented:
+
+ ./with_tap_pair.sh ./tap_send_ufo tap0 tap1
+ (from https://github.com/wdebruij/kerneltools/tree/master/tests)
+
+Changes
+ v1 -> v2
+ - simplified set_offload change (review comment)
+ - documented test procedure
+
+Link: http://lkml.kernel.org/r/<CAF=yD-LuUeDuL9YWPJD9ykOZ0QCjNeznPDr6whqZ9NGMNF12Mw@mail.gmail.com>
+Fixes: fb652fdfe837 ("macvlan/macvtap: Remove NETIF_F_UFO advertisement.")
+Reported-by: Michal Kubecek <mkubecek@suse.cz>
+Signed-off-by: Willem de Bruijn <willemb@google.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/tap.c | 2
+ drivers/net/tun.c | 2
+ include/linux/netdev_features.h | 4 +
+ include/linux/netdevice.h | 1
+ include/linux/skbuff.h | 2
+ include/linux/virtio_net.h | 5 +-
+ include/net/ipv6.h | 2
+ net/core/dev.c | 3 -
+ net/ipv4/af_inet.c | 12 ++++-
+ net/ipv4/udp_offload.c | 49 +++++++++++++++++++++--
+ net/ipv6/output_core.c | 6 +-
+ net/ipv6/udp_offload.c | 85 ++++++++++++++++++++++++++++++++++++++--
+ net/openvswitch/datapath.c | 14 ++++++
+ net/openvswitch/flow.c | 6 ++
+ net/sched/act_csum.c | 6 ++
+ 15 files changed, 181 insertions(+), 18 deletions(-)
+
+--- a/drivers/net/tap.c
++++ b/drivers/net/tap.c
+@@ -1080,7 +1080,7 @@ static long tap_ioctl(struct file *file,
+ case TUNSETOFFLOAD:
+ /* let the user check for future flags */
+ if (arg & ~(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 |
+- TUN_F_TSO_ECN))
++ TUN_F_TSO_ECN | TUN_F_UFO))
+ return -EINVAL;
+
+ rtnl_lock();
+--- a/drivers/net/tun.c
++++ b/drivers/net/tun.c
+@@ -2157,6 +2157,8 @@ static int set_offload(struct tun_struct
+ features |= NETIF_F_TSO6;
+ arg &= ~(TUN_F_TSO4|TUN_F_TSO6);
+ }
++
++ arg &= ~TUN_F_UFO;
+ }
+
+ /* This gives the user a way to test for new features in future by
+--- a/include/linux/netdev_features.h
++++ b/include/linux/netdev_features.h
+@@ -54,8 +54,9 @@ enum {
+ NETIF_F_GSO_TUNNEL_REMCSUM_BIT, /* ... TUNNEL with TSO & REMCSUM */
+ NETIF_F_GSO_SCTP_BIT, /* ... SCTP fragmentation */
+ NETIF_F_GSO_ESP_BIT, /* ... ESP with TSO */
++ NETIF_F_GSO_UDP_BIT, /* ... UFO, deprecated except tuntap */
+ /**/NETIF_F_GSO_LAST = /* last bit, see GSO_MASK */
+- NETIF_F_GSO_ESP_BIT,
++ NETIF_F_GSO_UDP_BIT,
+
+ NETIF_F_FCOE_CRC_BIT, /* FCoE CRC32 */
+ NETIF_F_SCTP_CRC_BIT, /* SCTP checksum offload */
+@@ -132,6 +133,7 @@ enum {
+ #define NETIF_F_GSO_TUNNEL_REMCSUM __NETIF_F(GSO_TUNNEL_REMCSUM)
+ #define NETIF_F_GSO_SCTP __NETIF_F(GSO_SCTP)
+ #define NETIF_F_GSO_ESP __NETIF_F(GSO_ESP)
++#define NETIF_F_GSO_UDP __NETIF_F(GSO_UDP)
+ #define NETIF_F_HW_VLAN_STAG_FILTER __NETIF_F(HW_VLAN_STAG_FILTER)
+ #define NETIF_F_HW_VLAN_STAG_RX __NETIF_F(HW_VLAN_STAG_RX)
+ #define NETIF_F_HW_VLAN_STAG_TX __NETIF_F(HW_VLAN_STAG_TX)
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -4101,6 +4101,7 @@ static inline bool net_gso_ok(netdev_fea
+ BUILD_BUG_ON(SKB_GSO_TUNNEL_REMCSUM != (NETIF_F_GSO_TUNNEL_REMCSUM >> NETIF_F_GSO_SHIFT));
+ BUILD_BUG_ON(SKB_GSO_SCTP != (NETIF_F_GSO_SCTP >> NETIF_F_GSO_SHIFT));
+ BUILD_BUG_ON(SKB_GSO_ESP != (NETIF_F_GSO_ESP >> NETIF_F_GSO_SHIFT));
++ BUILD_BUG_ON(SKB_GSO_UDP != (NETIF_F_GSO_UDP >> NETIF_F_GSO_SHIFT));
+
+ return (features & feature) == feature;
+ }
+--- a/include/linux/skbuff.h
++++ b/include/linux/skbuff.h
+@@ -569,6 +569,8 @@ enum {
+ SKB_GSO_SCTP = 1 << 14,
+
+ SKB_GSO_ESP = 1 << 15,
++
++ SKB_GSO_UDP = 1 << 16,
+ };
+
+ #if BITS_PER_LONG > 32
+--- a/include/linux/virtio_net.h
++++ b/include/linux/virtio_net.h
+@@ -9,7 +9,7 @@ static inline int virtio_net_hdr_to_skb(
+ const struct virtio_net_hdr *hdr,
+ bool little_endian)
+ {
+- unsigned short gso_type = 0;
++ unsigned int gso_type = 0;
+
+ if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
+ switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
+@@ -19,6 +19,9 @@ static inline int virtio_net_hdr_to_skb(
+ case VIRTIO_NET_HDR_GSO_TCPV6:
+ gso_type = SKB_GSO_TCPV6;
+ break;
++ case VIRTIO_NET_HDR_GSO_UDP:
++ gso_type = SKB_GSO_UDP;
++ break;
+ default:
+ return -EINVAL;
+ }
+--- a/include/net/ipv6.h
++++ b/include/net/ipv6.h
+@@ -727,7 +727,7 @@ static inline int ipv6_addr_diff(const s
+ __be32 ipv6_select_ident(struct net *net,
+ const struct in6_addr *daddr,
+ const struct in6_addr *saddr);
+-void ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb);
++__be32 ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb);
+
+ int ip6_dst_hoplimit(struct dst_entry *dst);
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -2735,7 +2735,8 @@ EXPORT_SYMBOL(skb_mac_gso_segment);
+ static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
+ {
+ if (tx_path)
+- return skb->ip_summed != CHECKSUM_PARTIAL;
++ return skb->ip_summed != CHECKSUM_PARTIAL &&
++ skb->ip_summed != CHECKSUM_UNNECESSARY;
+
+ return skb->ip_summed == CHECKSUM_NONE;
+ }
+--- a/net/ipv4/af_inet.c
++++ b/net/ipv4/af_inet.c
+@@ -1221,9 +1221,10 @@ EXPORT_SYMBOL(inet_sk_rebuild_header);
+ struct sk_buff *inet_gso_segment(struct sk_buff *skb,
+ netdev_features_t features)
+ {
+- bool fixedid = false, gso_partial, encap;
++ bool udpfrag = false, fixedid = false, gso_partial, encap;
+ struct sk_buff *segs = ERR_PTR(-EINVAL);
+ const struct net_offload *ops;
++ unsigned int offset = 0;
+ struct iphdr *iph;
+ int proto, tot_len;
+ int nhoff;
+@@ -1258,6 +1259,7 @@ struct sk_buff *inet_gso_segment(struct
+ segs = ERR_PTR(-EPROTONOSUPPORT);
+
+ if (!skb->encapsulation || encap) {
++ udpfrag = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP);
+ fixedid = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID);
+
+ /* fixed ID is invalid if DF bit is not set */
+@@ -1277,7 +1279,13 @@ struct sk_buff *inet_gso_segment(struct
+ skb = segs;
+ do {
+ iph = (struct iphdr *)(skb_mac_header(skb) + nhoff);
+- if (skb_is_gso(skb)) {
++ if (udpfrag) {
++ iph->frag_off = htons(offset >> 3);
++ if (skb->next)
++ iph->frag_off |= htons(IP_MF);
++ offset += skb->len - nhoff - ihl;
++ tot_len = skb->len - nhoff;
++ } else if (skb_is_gso(skb)) {
+ if (!fixedid) {
+ iph->id = htons(id);
+ id += skb_shinfo(skb)->gso_segs;
+--- a/net/ipv4/udp_offload.c
++++ b/net/ipv4/udp_offload.c
+@@ -187,16 +187,57 @@ out_unlock:
+ }
+ EXPORT_SYMBOL(skb_udp_tunnel_segment);
+
+-static struct sk_buff *udp4_tunnel_segment(struct sk_buff *skb,
+- netdev_features_t features)
++static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
++ netdev_features_t features)
+ {
+ struct sk_buff *segs = ERR_PTR(-EINVAL);
++ unsigned int mss;
++ __wsum csum;
++ struct udphdr *uh;
++ struct iphdr *iph;
+
+ if (skb->encapsulation &&
+ (skb_shinfo(skb)->gso_type &
+- (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM)))
++ (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) {
+ segs = skb_udp_tunnel_segment(skb, features, false);
++ goto out;
++ }
++
++ if (!pskb_may_pull(skb, sizeof(struct udphdr)))
++ goto out;
+
++ mss = skb_shinfo(skb)->gso_size;
++ if (unlikely(skb->len <= mss))
++ goto out;
++
++ /* Do software UFO. Complete and fill in the UDP checksum as
++ * HW cannot do checksum of UDP packets sent as multiple
++ * IP fragments.
++ */
++
++ uh = udp_hdr(skb);
++ iph = ip_hdr(skb);
++
++ uh->check = 0;
++ csum = skb_checksum(skb, 0, skb->len, 0);
++ uh->check = udp_v4_check(skb->len, iph->saddr, iph->daddr, csum);
++ if (uh->check == 0)
++ uh->check = CSUM_MANGLED_0;
++
++ skb->ip_summed = CHECKSUM_UNNECESSARY;
++
++ /* If there is no outer header we can fake a checksum offload
++ * due to the fact that we have already done the checksum in
++ * software prior to segmenting the frame.
++ */
++ if (!skb->encap_hdr_csum)
++ features |= NETIF_F_HW_CSUM;
++
++ /* Fragment the skb. IP headers of the fragments are updated in
++ * inet_gso_segment()
++ */
++ segs = skb_segment(skb, features);
++out:
+ return segs;
+ }
+
+@@ -330,7 +371,7 @@ static int udp4_gro_complete(struct sk_b
+
+ static const struct net_offload udpv4_offload = {
+ .callbacks = {
+- .gso_segment = udp4_tunnel_segment,
++ .gso_segment = udp4_ufo_fragment,
+ .gro_receive = udp4_gro_receive,
+ .gro_complete = udp4_gro_complete,
+ },
+--- a/net/ipv6/output_core.c
++++ b/net/ipv6/output_core.c
+@@ -39,7 +39,7 @@ static u32 __ipv6_select_ident(struct ne
+ *
+ * The network header must be set before calling this.
+ */
+-void ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb)
++__be32 ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb)
+ {
+ static u32 ip6_proxy_idents_hashrnd __read_mostly;
+ struct in6_addr buf[2];
+@@ -51,14 +51,14 @@ void ipv6_proxy_select_ident(struct net
+ offsetof(struct ipv6hdr, saddr),
+ sizeof(buf), buf);
+ if (!addrs)
+- return;
++ return 0;
+
+ net_get_random_once(&ip6_proxy_idents_hashrnd,
+ sizeof(ip6_proxy_idents_hashrnd));
+
+ id = __ipv6_select_ident(net, ip6_proxy_idents_hashrnd,
+ &addrs[1], &addrs[0]);
+- skb_shinfo(skb)->ip6_frag_id = htonl(id);
++ return htonl(id);
+ }
+ EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident);
+
+--- a/net/ipv6/udp_offload.c
++++ b/net/ipv6/udp_offload.c
+@@ -17,15 +17,94 @@
+ #include <net/ip6_checksum.h>
+ #include "ip6_offload.h"
+
+-static struct sk_buff *udp6_tunnel_segment(struct sk_buff *skb,
+- netdev_features_t features)
++static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
++ netdev_features_t features)
+ {
+ struct sk_buff *segs = ERR_PTR(-EINVAL);
++ unsigned int mss;
++ unsigned int unfrag_ip6hlen, unfrag_len;
++ struct frag_hdr *fptr;
++ u8 *packet_start, *prevhdr;
++ u8 nexthdr;
++ u8 frag_hdr_sz = sizeof(struct frag_hdr);
++ __wsum csum;
++ int tnl_hlen;
++ int err;
++
++ mss = skb_shinfo(skb)->gso_size;
++ if (unlikely(skb->len <= mss))
++ goto out;
+
+ if (skb->encapsulation && skb_shinfo(skb)->gso_type &
+ (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))
+ segs = skb_udp_tunnel_segment(skb, features, true);
++ else {
++ const struct ipv6hdr *ipv6h;
++ struct udphdr *uh;
++
++ if (!pskb_may_pull(skb, sizeof(struct udphdr)))
++ goto out;
++
++ /* Do software UFO. Complete and fill in the UDP checksum as HW cannot
++ * do checksum of UDP packets sent as multiple IP fragments.
++ */
++
++ uh = udp_hdr(skb);
++ ipv6h = ipv6_hdr(skb);
++
++ uh->check = 0;
++ csum = skb_checksum(skb, 0, skb->len, 0);
++ uh->check = udp_v6_check(skb->len, &ipv6h->saddr,
++ &ipv6h->daddr, csum);
++ if (uh->check == 0)
++ uh->check = CSUM_MANGLED_0;
++
++ skb->ip_summed = CHECKSUM_UNNECESSARY;
++
++ /* If there is no outer header we can fake a checksum offload
++ * due to the fact that we have already done the checksum in
++ * software prior to segmenting the frame.
++ */
++ if (!skb->encap_hdr_csum)
++ features |= NETIF_F_HW_CSUM;
++
++ /* Check if there is enough headroom to insert fragment header. */
++ tnl_hlen = skb_tnl_header_len(skb);
++ if (skb->mac_header < (tnl_hlen + frag_hdr_sz)) {
++ if (gso_pskb_expand_head(skb, tnl_hlen + frag_hdr_sz))
++ goto out;
++ }
++
++ /* Find the unfragmentable header and shift it left by frag_hdr_sz
++ * bytes to insert fragment header.
++ */
++ err = ip6_find_1stfragopt(skb, &prevhdr);
++ if (err < 0)
++ return ERR_PTR(err);
++ unfrag_ip6hlen = err;
++ nexthdr = *prevhdr;
++ *prevhdr = NEXTHDR_FRAGMENT;
++ unfrag_len = (skb_network_header(skb) - skb_mac_header(skb)) +
++ unfrag_ip6hlen + tnl_hlen;
++ packet_start = (u8 *) skb->head + SKB_GSO_CB(skb)->mac_offset;
++ memmove(packet_start-frag_hdr_sz, packet_start, unfrag_len);
++
++ SKB_GSO_CB(skb)->mac_offset -= frag_hdr_sz;
++ skb->mac_header -= frag_hdr_sz;
++ skb->network_header -= frag_hdr_sz;
++
++ fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen);
++ fptr->nexthdr = nexthdr;
++ fptr->reserved = 0;
++ fptr->identification = ipv6_proxy_select_ident(dev_net(skb->dev), skb);
++
++ /* Fragment the skb. ipv6 header and the remaining fields of the
++ * fragment header are updated in ipv6_gso_segment()
++ */
++ segs = skb_segment(skb, features);
++ }
+
++out:
+ return segs;
+ }
+
+@@ -75,7 +154,7 @@ static int udp6_gro_complete(struct sk_b
+
+ static const struct net_offload udpv6_offload = {
+ .callbacks = {
+- .gso_segment = udp6_tunnel_segment,
++ .gso_segment = udp6_ufo_fragment,
+ .gro_receive = udp6_gro_receive,
+ .gro_complete = udp6_gro_complete,
+ },
+--- a/net/openvswitch/datapath.c
++++ b/net/openvswitch/datapath.c
+@@ -335,6 +335,8 @@ static int queue_gso_packets(struct data
+ const struct dp_upcall_info *upcall_info,
+ uint32_t cutlen)
+ {
++ unsigned short gso_type = skb_shinfo(skb)->gso_type;
++ struct sw_flow_key later_key;
+ struct sk_buff *segs, *nskb;
+ int err;
+
+@@ -345,9 +347,21 @@ static int queue_gso_packets(struct data
+ if (segs == NULL)
+ return -EINVAL;
+
++ if (gso_type & SKB_GSO_UDP) {
++ /* The initial flow key extracted by ovs_flow_key_extract()
++ * in this case is for a first fragment, so we need to
++ * properly mark later fragments.
++ */
++ later_key = *key;
++ later_key.ip.frag = OVS_FRAG_TYPE_LATER;
++ }
++
+ /* Queue all of the segments. */
+ skb = segs;
+ do {
++ if (gso_type & SKB_GSO_UDP && skb != segs)
++ key = &later_key;
++
+ err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
+ if (err)
+ break;
+--- a/net/openvswitch/flow.c
++++ b/net/openvswitch/flow.c
+@@ -584,7 +584,8 @@ static int key_extract(struct sk_buff *s
+ key->ip.frag = OVS_FRAG_TYPE_LATER;
+ return 0;
+ }
+- if (nh->frag_off & htons(IP_MF))
++ if (nh->frag_off & htons(IP_MF) ||
++ skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
+ key->ip.frag = OVS_FRAG_TYPE_FIRST;
+ else
+ key->ip.frag = OVS_FRAG_TYPE_NONE;
+@@ -700,6 +701,9 @@ static int key_extract(struct sk_buff *s
+
+ if (key->ip.frag == OVS_FRAG_TYPE_LATER)
+ return 0;
++ if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
++ key->ip.frag = OVS_FRAG_TYPE_FIRST;
++
+ /* Transport layer. */
+ if (key->ip.proto == NEXTHDR_TCP) {
+ if (tcphdr_ok(skb)) {
+--- a/net/sched/act_csum.c
++++ b/net/sched/act_csum.c
+@@ -229,6 +229,9 @@ static int tcf_csum_ipv4_udp(struct sk_b
+ const struct iphdr *iph;
+ u16 ul;
+
++ if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
++ return 1;
++
+ /*
+ * Support both UDP and UDPLITE checksum algorithms, Don't use
+ * udph->len to get the real length without any protocol check,
+@@ -282,6 +285,9 @@ static int tcf_csum_ipv6_udp(struct sk_b
+ const struct ipv6hdr *ip6h;
+ u16 ul;
+
++ if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
++ return 1;
++
+ /*
+ * Support both UDP and UDPLITE checksum algorithms, Don't use
+ * udph->len to get the real length without any protocol check,
--- /dev/null
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: David Ahern <dsahern@gmail.com>
+Date: Tue, 21 Nov 2017 07:08:57 -0800
+Subject: net: ipv6: Fixup device for anycast routes during copy
+
+From: David Ahern <dsahern@gmail.com>
+
+
+[ Upstream commit 98d11291d189cb5adf49694d0ad1b971c0212697 ]
+
+Florian reported a breakage with anycast routes due to commit
+4832c30d5458 ("net: ipv6: put host and anycast routes on device with
+address"). Prior to this commit anycast routes were added against the
+loopback device causing repetitive route entries with no insight into
+why they existed. e.g.:
+ $ ip -6 ro ls table local type anycast
+ anycast 2001:db8:1:: dev lo proto kernel metric 0 pref medium
+ anycast 2001:db8:2:: dev lo proto kernel metric 0 pref medium
+ anycast fe80:: dev lo proto kernel metric 0 pref medium
+ anycast fe80:: dev lo proto kernel metric 0 pref medium
+
+The point of commit 4832c30d5458 is to add the routes using the device
+with the address which is causing the route to be added. e.g.,:
+ $ ip -6 ro ls table local type anycast
+ anycast 2001:db8:1:: dev eth1 proto kernel metric 0 pref medium
+ anycast 2001:db8:2:: dev eth2 proto kernel metric 0 pref medium
+ anycast fe80:: dev eth2 proto kernel metric 0 pref medium
+ anycast fe80:: dev eth1 proto kernel metric 0 pref medium
+
+For traffic to work as it did before, the dst device needs to be switched
+to the loopback when the copy is created similar to local routes.
+
+Fixes: 4832c30d5458 ("net: ipv6: put host and anycast routes on device with address")
+Signed-off-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/route.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv6/route.c
++++ b/net/ipv6/route.c
+@@ -960,7 +960,7 @@ static struct net_device *ip6_rt_get_dev
+ {
+ struct net_device *dev = rt->dst.dev;
+
+- if (rt->rt6i_flags & RTF_LOCAL) {
++ if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) {
+ /* for copies of local routes, dst->dev needs to be the
+ * device if it is a master device, the master device if
+ * device is enslaved, and the loopback as the default
--- /dev/null
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: "Gustavo A. R. Silva" <garsilva@embeddedor.com>
+Date: Sat, 25 Nov 2017 13:14:40 -0600
+Subject: net: openvswitch: datapath: fix data type in queue_gso_packets
+
+From: "Gustavo A. R. Silva" <garsilva@embeddedor.com>
+
+
+[ Upstream commit 2734166e89639c973c6e125ac8bcfc2d9db72b70 ]
+
+gso_type is being used in binary AND operations together with SKB_GSO_UDP.
+The issue is that variable gso_type is of type unsigned short and
+SKB_GSO_UDP expands to more than 16 bits:
+
+SKB_GSO_UDP = 1 << 16
+
+this makes any binary AND operation between gso_type and SKB_GSO_UDP to
+be always zero, hence making some code unreachable and likely causing
+undesired behavior.
+
+Fix this by changing the data type of variable gso_type to unsigned int.
+
+Addresses-Coverity-ID: 1462223
+Fixes: 0c19f846d582 ("net: accept UFO datagrams from tuntap and packet")
+Signed-off-by: Gustavo A. R. Silva <garsilva@embeddedor.com>
+Acked-by: Willem de Bruijn <willemb@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/openvswitch/datapath.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/openvswitch/datapath.c
++++ b/net/openvswitch/datapath.c
+@@ -335,7 +335,7 @@ static int queue_gso_packets(struct data
+ const struct dp_upcall_info *upcall_info,
+ uint32_t cutlen)
+ {
+- unsigned short gso_type = skb_shinfo(skb)->gso_type;
++ unsigned int gso_type = skb_shinfo(skb)->gso_type;
+ struct sw_flow_key later_key;
+ struct sk_buff *segs, *nskb;
+ int err;
--- /dev/null
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 28 Nov 2017 08:03:30 -0800
+Subject: net/packet: fix a race in packet_bind() and packet_notifier()
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 15fe076edea787807a7cdc168df832544b58eba6 ]
+
+syzbot reported crashes [1] and provided a C repro easing bug hunting.
+
+When/if packet_do_bind() calls __unregister_prot_hook() and releases
+po->bind_lock, another thread can run packet_notifier() and process an
+NETDEV_UP event.
+
+This calls register_prot_hook() and hooks again the socket right before
+first thread is able to grab again po->bind_lock.
+
+Fixes this issue by temporarily setting po->num to 0, as suggested by
+David Miller.
+
+[1]
+dev_remove_pack: ffff8801bf16fa80 not found
+------------[ cut here ]------------
+kernel BUG at net/core/dev.c:7945! ( BUG_ON(!list_empty(&dev->ptype_all)); )
+invalid opcode: 0000 [#1] SMP KASAN
+Dumping ftrace buffer:
+ (ftrace buffer empty)
+Modules linked in:
+device syz0 entered promiscuous mode
+CPU: 0 PID: 3161 Comm: syzkaller404108 Not tainted 4.14.0+ #190
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+task: ffff8801cc57a500 task.stack: ffff8801cc588000
+RIP: 0010:netdev_run_todo+0x772/0xae0 net/core/dev.c:7945
+RSP: 0018:ffff8801cc58f598 EFLAGS: 00010293
+RAX: ffff8801cc57a500 RBX: dffffc0000000000 RCX: ffffffff841f75b2
+RDX: 0000000000000000 RSI: 1ffff100398b1ede RDI: ffff8801bf1f8810
+device syz0 entered promiscuous mode
+RBP: ffff8801cc58f898 R08: 0000000000000001 R09: 0000000000000000
+R10: 0000000000000000 R11: 0000000000000000 R12: ffff8801bf1f8cd8
+R13: ffff8801cc58f870 R14: ffff8801bf1f8780 R15: ffff8801cc58f7f0
+FS: 0000000001716880(0000) GS:ffff8801db400000(0000) knlGS:0000000000000000
+CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 0000000020b13000 CR3: 0000000005e25000 CR4: 00000000001406f0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Call Trace:
+ rtnl_unlock+0xe/0x10 net/core/rtnetlink.c:106
+ tun_detach drivers/net/tun.c:670 [inline]
+ tun_chr_close+0x49/0x60 drivers/net/tun.c:2845
+ __fput+0x333/0x7f0 fs/file_table.c:210
+ ____fput+0x15/0x20 fs/file_table.c:244
+ task_work_run+0x199/0x270 kernel/task_work.c:113
+ exit_task_work include/linux/task_work.h:22 [inline]
+ do_exit+0x9bb/0x1ae0 kernel/exit.c:865
+ do_group_exit+0x149/0x400 kernel/exit.c:968
+ SYSC_exit_group kernel/exit.c:979 [inline]
+ SyS_exit_group+0x1d/0x20 kernel/exit.c:977
+ entry_SYSCALL_64_fastpath+0x1f/0x96
+RIP: 0033:0x44ad19
+
+Fixes: 30f7ea1c2b5f ("packet: race condition in packet_bind")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Cc: Francesco Ruggeri <fruggeri@aristanetworks.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/packet/af_packet.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -3101,6 +3101,10 @@ static int packet_do_bind(struct sock *s
+ if (need_rehook) {
+ if (po->running) {
+ rcu_read_unlock();
++ /* prevents packet_notifier() from calling
++ * register_prot_hook()
++ */
++ po->num = 0;
+ __unregister_prot_hook(sk, true);
+ rcu_read_lock();
+ dev_curr = po->prot_hook.dev;
+@@ -3109,6 +3113,7 @@ static int packet_do_bind(struct sock *s
+ dev->ifindex);
+ }
+
++ BUG_ON(po->running);
+ po->num = proto;
+ po->prot_hook.type = proto;
+
--- /dev/null
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Sebastian Sjoholm <ssjoholm@mac.com>
+Date: Mon, 20 Nov 2017 19:05:17 +0100
+Subject: net: qmi_wwan: add Quectel BG96 2c7c:0296
+
+From: Sebastian Sjoholm <ssjoholm@mac.com>
+
+
+[ Upstream commit f9409e7f086fa6c4623769b4b2f4f17a024d8143 ]
+
+Quectel BG96 is an Qualcomm MDM9206 based IoT modem, supporting both
+CAT-M and NB-IoT. Tested hardware is BG96 mounted on Quectel development
+board (EVB). The USB id is added to qmi_wwan.c to allow QMI
+communication with the BG96.
+
+Signed-off-by: Sebastian Sjoholm <ssjoholm@mac.com>
+Acked-by: Bjørn Mork <bjorn@mork.no>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/usb/qmi_wwan.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/usb/qmi_wwan.c
++++ b/drivers/net/usb/qmi_wwan.c
+@@ -1239,6 +1239,7 @@ static const struct usb_device_id produc
+ {QMI_FIXED_INTF(0x1e0e, 0x9001, 5)}, /* SIMCom 7230E */
+ {QMI_QUIRK_SET_DTR(0x2c7c, 0x0125, 4)}, /* Quectel EC25, EC20 R2.0 Mini PCIe */
+ {QMI_QUIRK_SET_DTR(0x2c7c, 0x0121, 4)}, /* Quectel EC21 Mini PCIe */
++ {QMI_FIXED_INTF(0x2c7c, 0x0296, 4)}, /* Quectel BG96 */
+
+ /* 4. Gobi 1000 devices */
+ {QMI_GOBI1K_DEVICE(0x05c6, 0x9212)}, /* Acer Gobi Modem Device */
--- /dev/null
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Tobias Jakobi <tjakobi@math.uni-bielefeld.de>
+Date: Tue, 21 Nov 2017 16:15:57 +0100
+Subject: net: realtek: r8169: implement set_link_ksettings()
+
+From: Tobias Jakobi <tjakobi@math.uni-bielefeld.de>
+
+
+[ Upstream commit 9e77d7a5549dc4d4999a60676373ab3fd1dae4db ]
+
+Commit 6fa1ba61520576cf1346c4ff09a056f2950cb3bf partially
+implemented the new ethtool API, by replacing get_settings()
+with get_link_ksettings(). This breaks ethtool, since the
+userspace tool (according to the new API specs) never tries
+the legacy set() call, when the new get() call succeeds.
+
+All attempts to chance some setting from userspace result in:
+> Cannot set new settings: Operation not supported
+
+Implement the missing set() call.
+
+Signed-off-by: Tobias Jakobi <tjakobi@math.uni-bielefeld.de>
+Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/realtek/r8169.c | 38 ++++++++++++++++++++---------------
+ 1 file changed, 22 insertions(+), 16 deletions(-)
+
+--- a/drivers/net/ethernet/realtek/r8169.c
++++ b/drivers/net/ethernet/realtek/r8169.c
+@@ -2025,21 +2025,6 @@ out:
+ return ret;
+ }
+
+-static int rtl8169_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+-{
+- struct rtl8169_private *tp = netdev_priv(dev);
+- int ret;
+-
+- del_timer_sync(&tp->timer);
+-
+- rtl_lock_work(tp);
+- ret = rtl8169_set_speed(dev, cmd->autoneg, ethtool_cmd_speed(cmd),
+- cmd->duplex, cmd->advertising);
+- rtl_unlock_work(tp);
+-
+- return ret;
+-}
+-
+ static netdev_features_t rtl8169_fix_features(struct net_device *dev,
+ netdev_features_t features)
+ {
+@@ -2166,6 +2151,27 @@ static int rtl8169_get_link_ksettings(st
+ return rc;
+ }
+
++static int rtl8169_set_link_ksettings(struct net_device *dev,
++ const struct ethtool_link_ksettings *cmd)
++{
++ struct rtl8169_private *tp = netdev_priv(dev);
++ int rc;
++ u32 advertising;
++
++ if (!ethtool_convert_link_mode_to_legacy_u32(&advertising,
++ cmd->link_modes.advertising))
++ return -EINVAL;
++
++ del_timer_sync(&tp->timer);
++
++ rtl_lock_work(tp);
++ rc = rtl8169_set_speed(dev, cmd->base.autoneg, cmd->base.speed,
++ cmd->base.duplex, advertising);
++ rtl_unlock_work(tp);
++
++ return rc;
++}
++
+ static void rtl8169_get_regs(struct net_device *dev, struct ethtool_regs *regs,
+ void *p)
+ {
+@@ -2367,7 +2373,6 @@ static const struct ethtool_ops rtl8169_
+ .get_drvinfo = rtl8169_get_drvinfo,
+ .get_regs_len = rtl8169_get_regs_len,
+ .get_link = ethtool_op_get_link,
+- .set_settings = rtl8169_set_settings,
+ .get_msglevel = rtl8169_get_msglevel,
+ .set_msglevel = rtl8169_set_msglevel,
+ .get_regs = rtl8169_get_regs,
+@@ -2379,6 +2384,7 @@ static const struct ethtool_ops rtl8169_
+ .get_ts_info = ethtool_op_get_ts_info,
+ .nway_reset = rtl8169_nway_reset,
+ .get_link_ksettings = rtl8169_get_link_ksettings,
++ .set_link_ksettings = rtl8169_set_link_ksettings,
+ };
+
+ static void rtl8169_get_mac_version(struct rtl8169_private *tp,
--- /dev/null
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 5 Dec 2017 12:45:56 -0800
+Subject: net: remove hlist_nulls_add_tail_rcu()
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit d7efc6c11b277d9d80b99b1334a78bfe7d7edf10 ]
+
+Alexander Potapenko reported use of uninitialized memory [1]
+
+This happens when inserting a request socket into TCP ehash,
+in __sk_nulls_add_node_rcu(), since sk_reuseport is not initialized.
+
+Bug was added by commit d894ba18d4e4 ("soreuseport: fix ordering for
+mixed v4/v6 sockets")
+
+Note that d296ba60d8e2 ("soreuseport: Resolve merge conflict for v4/v6
+ordering fix") missed the opportunity to get rid of
+hlist_nulls_add_tail_rcu() :
+
+Both UDP sockets and TCP/DCCP listeners no longer use
+__sk_nulls_add_node_rcu() for their hash insertion.
+
+Since all other sockets have unique 4-tuple, the reuseport status
+has no special meaning, so we can always use hlist_nulls_add_head_rcu()
+for them and save few cycles/instructions.
+
+[1]
+
+==================================================================
+BUG: KMSAN: use of uninitialized memory in inet_ehash_insert+0xd40/0x1050
+CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.13.0+ #3288
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
+Call Trace:
+ <IRQ>
+ __dump_stack lib/dump_stack.c:16
+ dump_stack+0x185/0x1d0 lib/dump_stack.c:52
+ kmsan_report+0x13f/0x1c0 mm/kmsan/kmsan.c:1016
+ __msan_warning_32+0x69/0xb0 mm/kmsan/kmsan_instr.c:766
+ __sk_nulls_add_node_rcu ./include/net/sock.h:684
+ inet_ehash_insert+0xd40/0x1050 net/ipv4/inet_hashtables.c:413
+ reqsk_queue_hash_req net/ipv4/inet_connection_sock.c:754
+ inet_csk_reqsk_queue_hash_add+0x1cc/0x300 net/ipv4/inet_connection_sock.c:765
+ tcp_conn_request+0x31e7/0x36f0 net/ipv4/tcp_input.c:6414
+ tcp_v4_conn_request+0x16d/0x220 net/ipv4/tcp_ipv4.c:1314
+ tcp_rcv_state_process+0x42a/0x7210 net/ipv4/tcp_input.c:5917
+ tcp_v4_do_rcv+0xa6a/0xcd0 net/ipv4/tcp_ipv4.c:1483
+ tcp_v4_rcv+0x3de0/0x4ab0 net/ipv4/tcp_ipv4.c:1763
+ ip_local_deliver_finish+0x6bb/0xcb0 net/ipv4/ip_input.c:216
+ NF_HOOK ./include/linux/netfilter.h:248
+ ip_local_deliver+0x3fa/0x480 net/ipv4/ip_input.c:257
+ dst_input ./include/net/dst.h:477
+ ip_rcv_finish+0x6fb/0x1540 net/ipv4/ip_input.c:397
+ NF_HOOK ./include/linux/netfilter.h:248
+ ip_rcv+0x10f6/0x15c0 net/ipv4/ip_input.c:488
+ __netif_receive_skb_core+0x36f6/0x3f60 net/core/dev.c:4298
+ __netif_receive_skb net/core/dev.c:4336
+ netif_receive_skb_internal+0x63c/0x19c0 net/core/dev.c:4497
+ napi_skb_finish net/core/dev.c:4858
+ napi_gro_receive+0x629/0xa50 net/core/dev.c:4889
+ e1000_receive_skb drivers/net/ethernet/intel/e1000/e1000_main.c:4018
+ e1000_clean_rx_irq+0x1492/0x1d30
+drivers/net/ethernet/intel/e1000/e1000_main.c:4474
+ e1000_clean+0x43aa/0x5970 drivers/net/ethernet/intel/e1000/e1000_main.c:3819
+ napi_poll net/core/dev.c:5500
+ net_rx_action+0x73c/0x1820 net/core/dev.c:5566
+ __do_softirq+0x4b4/0x8dd kernel/softirq.c:284
+ invoke_softirq kernel/softirq.c:364
+ irq_exit+0x203/0x240 kernel/softirq.c:405
+ exiting_irq+0xe/0x10 ./arch/x86/include/asm/apic.h:638
+ do_IRQ+0x15e/0x1a0 arch/x86/kernel/irq.c:263
+ common_interrupt+0x86/0x86
+
+Fixes: d894ba18d4e4 ("soreuseport: fix ordering for mixed v4/v6 sockets")
+Fixes: d296ba60d8e2 ("soreuseport: Resolve merge conflict for v4/v6 ordering fix")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Alexander Potapenko <glider@google.com>
+Acked-by: Craig Gallek <kraig@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/rculist_nulls.h | 38 --------------------------------------
+ include/net/sock.h | 6 +-----
+ 2 files changed, 1 insertion(+), 43 deletions(-)
+
+--- a/include/linux/rculist_nulls.h
++++ b/include/linux/rculist_nulls.h
+@@ -101,44 +101,6 @@ static inline void hlist_nulls_add_head_
+ }
+
+ /**
+- * hlist_nulls_add_tail_rcu
+- * @n: the element to add to the hash list.
+- * @h: the list to add to.
+- *
+- * Description:
+- * Adds the specified element to the end of the specified hlist_nulls,
+- * while permitting racing traversals. NOTE: tail insertion requires
+- * list traversal.
+- *
+- * The caller must take whatever precautions are necessary
+- * (such as holding appropriate locks) to avoid racing
+- * with another list-mutation primitive, such as hlist_nulls_add_head_rcu()
+- * or hlist_nulls_del_rcu(), running on this same list.
+- * However, it is perfectly legal to run concurrently with
+- * the _rcu list-traversal primitives, such as
+- * hlist_nulls_for_each_entry_rcu(), used to prevent memory-consistency
+- * problems on Alpha CPUs. Regardless of the type of CPU, the
+- * list-traversal primitive must be guarded by rcu_read_lock().
+- */
+-static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n,
+- struct hlist_nulls_head *h)
+-{
+- struct hlist_nulls_node *i, *last = NULL;
+-
+- for (i = hlist_nulls_first_rcu(h); !is_a_nulls(i);
+- i = hlist_nulls_next_rcu(i))
+- last = i;
+-
+- if (last) {
+- n->next = last->next;
+- n->pprev = &last->next;
+- rcu_assign_pointer(hlist_nulls_next_rcu(last), n);
+- } else {
+- hlist_nulls_add_head_rcu(n, h);
+- }
+-}
+-
+-/**
+ * hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type
+ * @tpos: the type * to use as a loop cursor.
+ * @pos: the &struct hlist_nulls_node to use as a loop cursor.
+--- a/include/net/sock.h
++++ b/include/net/sock.h
+@@ -683,11 +683,7 @@ static inline void sk_add_node_rcu(struc
+
+ static inline void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list)
+ {
+- if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport &&
+- sk->sk_family == AF_INET6)
+- hlist_nulls_add_tail_rcu(&sk->sk_nulls_node, list);
+- else
+- hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list);
++ hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list);
+ }
+
+ static inline void sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list)
--- /dev/null
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Jiri Pirko <jiri@mellanox.com>
+Date: Mon, 27 Nov 2017 18:37:21 +0100
+Subject: net: sched: cbq: create block for q->link.block
+
+From: Jiri Pirko <jiri@mellanox.com>
+
+
+[ Upstream commit d51aae68b142f48232257e96ce317db25445418d ]
+
+q->link.block is not initialized, that leads to EINVAL when one tries to
+add filter there. So initialize it properly.
+
+This can be reproduced by:
+$ tc qdisc add dev eth0 root handle 1: cbq avpkt 1000 rate 1000Mbit bandwidth 1000Mbit
+$ tc filter add dev eth0 parent 1: protocol ip prio 100 u32 match ip protocol 0 0x00 flowid 1:1
+
+Reported-by: Jaroslav Aster <jaster@redhat.com>
+Reported-by: Ivan Vecera <ivecera@redhat.com>
+Fixes: 6529eaba33f0 ("net: sched: introduce tcf block infractructure")
+Signed-off-by: Jiri Pirko <jiri@mellanox.com>
+Acked-by: Eelco Chaudron <echaudro@redhat.com>
+Reviewed-by: Ivan Vecera <ivecera@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_cbq.c | 9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+--- a/net/sched/sch_cbq.c
++++ b/net/sched/sch_cbq.c
+@@ -1157,9 +1157,13 @@ static int cbq_init(struct Qdisc *sch, s
+ if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
+ return -EINVAL;
+
++ err = tcf_block_get(&q->link.block, &q->link.filter_list);
++ if (err)
++ goto put_rtab;
++
+ err = qdisc_class_hash_init(&q->clhash);
+ if (err < 0)
+- goto put_rtab;
++ goto put_block;
+
+ q->link.sibling = &q->link;
+ q->link.common.classid = sch->handle;
+@@ -1193,6 +1197,9 @@ static int cbq_init(struct Qdisc *sch, s
+ cbq_addprio(q, &q->link);
+ return 0;
+
++put_block:
++ tcf_block_put(q->link.block);
++
+ put_rtab:
+ qdisc_put_rtab(q->link.R_tab);
+ return err;
--- /dev/null
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Florian Westphal <fw@strlen.de>
+Date: Wed, 6 Dec 2017 01:04:50 +0100
+Subject: net: thunderx: Fix TCP/UDP checksum offload for IPv4 pkts
+
+From: Florian Westphal <fw@strlen.de>
+
+
+[ Upstream commit 134059fd2775be79e26c2dff87d25cc2f6ea5626 ]
+
+Offload IP header checksum to NIC.
+
+This fixes a previous patch which disabled checksum offloading
+for both IPv4 and IPv6 packets. So L3 checksum offload was
+getting disabled for IPv4 pkts. And HW is dropping these pkts
+for some reason.
+
+Without this patch, IPv4 TSO appears to be broken:
+
+WIthout this patch I get ~16kbyte/s, with patch close to 2mbyte/s
+when copying files via scp from test box to my home workstation.
+
+Looking at tcpdump on sender it looks like hardware drops IPv4 TSO skbs.
+This patch restores performance for me, ipv6 looks good too.
+
+Fixes: fa6d7cb5d76c ("net: thunderx: Fix TCP/UDP checksum offload for IPv6 pkts")
+Cc: Sunil Goutham <sgoutham@cavium.com>
+Cc: Aleksey Makarov <aleksey.makarov@auriga.com>
+Cc: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/cavium/thunder/nicvf_queues.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
++++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+@@ -1355,6 +1355,8 @@ nicvf_sq_add_hdr_subdesc(struct nicvf *n
+
+ /* Offload checksum calculation to HW */
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
++ if (ip.v4->version == 4)
++ hdr->csum_l3 = 1; /* Enable IP csum calculation */
+ hdr->l3_offset = skb_network_offset(skb);
+ hdr->l4_offset = skb_transport_offset(skb);
+
--- /dev/null
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Sunil Goutham <sgoutham@cavium.com>
+Date: Thu, 23 Nov 2017 22:34:31 +0300
+Subject: net: thunderx: Fix TCP/UDP checksum offload for IPv6 pkts
+
+From: Sunil Goutham <sgoutham@cavium.com>
+
+
+[ Upstream commit fa6d7cb5d76cf0467c61420fc9238045aedfd379 ]
+
+Don't offload IP header checksum to NIC.
+
+This fixes a previous patch which enabled checksum offloading
+for both IPv4 and IPv6 packets. So L3 checksum offload was
+getting enabled for IPv6 pkts. And HW is dropping these pkts
+as it assumes the pkt is IPv4 when IP csum offload is set
+in the SQ descriptor.
+
+Fixes: 3a9024f52c2e ("net: thunderx: Enable TSO and checksum offloads for ipv6")
+Signed-off-by: Sunil Goutham <sgoutham@cavium.com>
+Signed-off-by: Aleksey Makarov <aleksey.makarov@auriga.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/cavium/thunder/nicvf_queues.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
++++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+@@ -1355,7 +1355,6 @@ nicvf_sq_add_hdr_subdesc(struct nicvf *n
+
+ /* Offload checksum calculation to HW */
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
+- hdr->csum_l3 = 1; /* Enable IP csum calculation */
+ hdr->l3_offset = skb_network_offset(skb);
+ hdr->l4_offset = skb_transport_offset(skb);
+
--- /dev/null
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Mike Maloney <maloney@google.com>
+Date: Tue, 28 Nov 2017 10:44:29 -0500
+Subject: packet: fix crash in fanout_demux_rollover()
+
+From: Mike Maloney <maloney@google.com>
+
+
+syzkaller found a race condition fanout_demux_rollover() while removing
+a packet socket from a fanout group.
+
+po->rollover is read and operated on during packet_rcv_fanout(), via
+fanout_demux_rollover(), but the pointer is currently cleared before the
+synchronization in packet_release(). It is safer to delay the cleanup
+until after synchronize_net() has been called, ensuring all calls to
+packet_rcv_fanout() for this socket have finished.
+
+To further simplify synchronization around the rollover structure, set
+po->rollover in fanout_add() only if there are no errors. This removes
+the need for rcu in the struct and in the call to
+packet_getsockopt(..., PACKET_ROLLOVER_STATS, ...).
+
+Crashing stack trace:
+ fanout_demux_rollover+0xb6/0x4d0 net/packet/af_packet.c:1392
+ packet_rcv_fanout+0x649/0x7c8 net/packet/af_packet.c:1487
+ dev_queue_xmit_nit+0x835/0xc10 net/core/dev.c:1953
+ xmit_one net/core/dev.c:2975 [inline]
+ dev_hard_start_xmit+0x16b/0xac0 net/core/dev.c:2995
+ __dev_queue_xmit+0x17a4/0x2050 net/core/dev.c:3476
+ dev_queue_xmit+0x17/0x20 net/core/dev.c:3509
+ neigh_connected_output+0x489/0x720 net/core/neighbour.c:1379
+ neigh_output include/net/neighbour.h:482 [inline]
+ ip6_finish_output2+0xad1/0x22a0 net/ipv6/ip6_output.c:120
+ ip6_finish_output+0x2f9/0x920 net/ipv6/ip6_output.c:146
+ NF_HOOK_COND include/linux/netfilter.h:239 [inline]
+ ip6_output+0x1f4/0x850 net/ipv6/ip6_output.c:163
+ dst_output include/net/dst.h:459 [inline]
+ NF_HOOK.constprop.35+0xff/0x630 include/linux/netfilter.h:250
+ mld_sendpack+0x6a8/0xcc0 net/ipv6/mcast.c:1660
+ mld_send_initial_cr.part.24+0x103/0x150 net/ipv6/mcast.c:2072
+ mld_send_initial_cr net/ipv6/mcast.c:2056 [inline]
+ ipv6_mc_dad_complete+0x99/0x130 net/ipv6/mcast.c:2079
+ addrconf_dad_completed+0x595/0x970 net/ipv6/addrconf.c:4039
+ addrconf_dad_work+0xac9/0x1160 net/ipv6/addrconf.c:3971
+ process_one_work+0xbf0/0x1bc0 kernel/workqueue.c:2113
+ worker_thread+0x223/0x1990 kernel/workqueue.c:2247
+ kthread+0x35e/0x430 kernel/kthread.c:231
+ ret_from_fork+0x2a/0x40 arch/x86/entry/entry_64.S:432
+
+Fixes: 0648ab70afe6 ("packet: rollover prepare: per-socket state")
+Fixes: 509c7a1ecc860 ("packet: avoid panic in packet_getsockopt()")
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: Mike Maloney <maloney@google.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/packet/af_packet.c | 32 ++++++++++----------------------
+ net/packet/internal.h | 1 -
+ 2 files changed, 10 insertions(+), 23 deletions(-)
+
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -1697,7 +1697,6 @@ static int fanout_add(struct sock *sk, u
+ atomic_long_set(&rollover->num, 0);
+ atomic_long_set(&rollover->num_huge, 0);
+ atomic_long_set(&rollover->num_failed, 0);
+- po->rollover = rollover;
+ }
+
+ if (type_flags & PACKET_FANOUT_FLAG_UNIQUEID) {
+@@ -1755,6 +1754,8 @@ static int fanout_add(struct sock *sk, u
+ if (refcount_read(&match->sk_ref) < PACKET_FANOUT_MAX) {
+ __dev_remove_pack(&po->prot_hook);
+ po->fanout = match;
++ po->rollover = rollover;
++ rollover = NULL;
+ refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1);
+ __fanout_link(sk, po);
+ err = 0;
+@@ -1768,10 +1769,7 @@ static int fanout_add(struct sock *sk, u
+ }
+
+ out:
+- if (err && rollover) {
+- kfree_rcu(rollover, rcu);
+- po->rollover = NULL;
+- }
++ kfree(rollover);
+ mutex_unlock(&fanout_mutex);
+ return err;
+ }
+@@ -1795,11 +1793,6 @@ static struct packet_fanout *fanout_rele
+ list_del(&f->list);
+ else
+ f = NULL;
+-
+- if (po->rollover) {
+- kfree_rcu(po->rollover, rcu);
+- po->rollover = NULL;
+- }
+ }
+ mutex_unlock(&fanout_mutex);
+
+@@ -3039,6 +3032,7 @@ static int packet_release(struct socket
+ synchronize_net();
+
+ if (f) {
++ kfree(po->rollover);
+ fanout_release_data(f);
+ kfree(f);
+ }
+@@ -3853,7 +3847,6 @@ static int packet_getsockopt(struct sock
+ void *data = &val;
+ union tpacket_stats_u st;
+ struct tpacket_rollover_stats rstats;
+- struct packet_rollover *rollover;
+
+ if (level != SOL_PACKET)
+ return -ENOPROTOOPT;
+@@ -3932,18 +3925,13 @@ static int packet_getsockopt(struct sock
+ 0);
+ break;
+ case PACKET_ROLLOVER_STATS:
+- rcu_read_lock();
+- rollover = rcu_dereference(po->rollover);
+- if (rollover) {
+- rstats.tp_all = atomic_long_read(&rollover->num);
+- rstats.tp_huge = atomic_long_read(&rollover->num_huge);
+- rstats.tp_failed = atomic_long_read(&rollover->num_failed);
+- data = &rstats;
+- lv = sizeof(rstats);
+- }
+- rcu_read_unlock();
+- if (!rollover)
++ if (!po->rollover)
+ return -EINVAL;
++ rstats.tp_all = atomic_long_read(&po->rollover->num);
++ rstats.tp_huge = atomic_long_read(&po->rollover->num_huge);
++ rstats.tp_failed = atomic_long_read(&po->rollover->num_failed);
++ data = &rstats;
++ lv = sizeof(rstats);
+ break;
+ case PACKET_TX_HAS_OFF:
+ val = po->tp_tx_has_off;
+--- a/net/packet/internal.h
++++ b/net/packet/internal.h
+@@ -95,7 +95,6 @@ struct packet_fanout {
+
+ struct packet_rollover {
+ int sock;
+- struct rcu_head rcu;
+ atomic_long_t num;
+ atomic_long_t num_huge;
+ atomic_long_t num_failed;
--- /dev/null
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Håkon Bugge <Haakon.Bugge@oracle.com>
+Date: Wed, 6 Dec 2017 17:18:28 +0100
+Subject: rds: Fix NULL pointer dereference in __rds_rdma_map
+
+From: Håkon Bugge <Haakon.Bugge@oracle.com>
+
+
+[ Upstream commit f3069c6d33f6ae63a1668737bc78aaaa51bff7ca ]
+
+This is a fix for syzkaller719569, where memory registration was
+attempted without any underlying transport being loaded.
+
+Analysis of the case reveals that it is the setsockopt() RDS_GET_MR
+(2) and RDS_GET_MR_FOR_DEST (7) that are vulnerable.
+
+Here is an example stack trace when the bug is hit:
+
+BUG: unable to handle kernel NULL pointer dereference at 00000000000000c0
+IP: __rds_rdma_map+0x36/0x440 [rds]
+PGD 2f93d03067 P4D 2f93d03067 PUD 2f93d02067 PMD 0
+Oops: 0000 [#1] SMP
+Modules linked in: bridge stp llc tun rpcsec_gss_krb5 nfsv4
+dns_resolver nfs fscache rds binfmt_misc sb_edac intel_powerclamp
+coretemp kvm_intel kvm irqbypass crct10dif_pclmul c rc32_pclmul
+ghash_clmulni_intel pcbc aesni_intel crypto_simd glue_helper cryptd
+iTCO_wdt mei_me sg iTCO_vendor_support ipmi_si mei ipmi_devintf nfsd
+shpchp pcspkr i2c_i801 ioatd ma ipmi_msghandler wmi lpc_ich mfd_core
+auth_rpcgss nfs_acl lockd grace sunrpc ip_tables ext4 mbcache jbd2
+mgag200 i2c_algo_bit drm_kms_helper ixgbe syscopyarea ahci sysfillrect
+sysimgblt libahci mdio fb_sys_fops ttm ptp libata sd_mod mlx4_core drm
+crc32c_intel pps_core megaraid_sas i2c_core dca dm_mirror
+dm_region_hash dm_log dm_mod
+CPU: 48 PID: 45787 Comm: repro_set2 Not tainted 4.14.2-3.el7uek.x86_64 #2
+Hardware name: Oracle Corporation ORACLE SERVER X5-2L/ASM,MOBO TRAY,2U, BIOS 31110000 03/03/2017
+task: ffff882f9190db00 task.stack: ffffc9002b994000
+RIP: 0010:__rds_rdma_map+0x36/0x440 [rds]
+RSP: 0018:ffffc9002b997df0 EFLAGS: 00010202
+RAX: 0000000000000000 RBX: ffff882fa2182580 RCX: 0000000000000000
+RDX: 0000000000000000 RSI: ffffc9002b997e40 RDI: ffff882fa2182580
+RBP: ffffc9002b997e30 R08: 0000000000000000 R09: 0000000000000002
+R10: ffff885fb29e3838 R11: 0000000000000000 R12: ffff882fa2182580
+R13: ffff882fa2182580 R14: 0000000000000002 R15: 0000000020000ffc
+FS: 00007fbffa20b700(0000) GS:ffff882fbfb80000(0000) knlGS:0000000000000000
+CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00000000000000c0 CR3: 0000002f98a66006 CR4: 00000000001606e0
+Call Trace:
+ rds_get_mr+0x56/0x80 [rds]
+ rds_setsockopt+0x172/0x340 [rds]
+ ? __fget_light+0x25/0x60
+ ? __fdget+0x13/0x20
+ SyS_setsockopt+0x80/0xe0
+ do_syscall_64+0x67/0x1b0
+ entry_SYSCALL64_slow_path+0x25/0x25
+RIP: 0033:0x7fbff9b117f9
+RSP: 002b:00007fbffa20aed8 EFLAGS: 00000293 ORIG_RAX: 0000000000000036
+RAX: ffffffffffffffda RBX: 00000000000c84a4 RCX: 00007fbff9b117f9
+RDX: 0000000000000002 RSI: 0000400000000114 RDI: 000000000000109b
+RBP: 00007fbffa20af10 R08: 0000000000000020 R09: 00007fbff9dd7860
+R10: 0000000020000ffc R11: 0000000000000293 R12: 0000000000000000
+R13: 00007fbffa20b9c0 R14: 00007fbffa20b700 R15: 0000000000000021
+
+Code: 41 56 41 55 49 89 fd 41 54 53 48 83 ec 18 8b 87 f0 02 00 00 48
+89 55 d0 48 89 4d c8 85 c0 0f 84 2d 03 00 00 48 8b 87 00 03 00 00 <48>
+83 b8 c0 00 00 00 00 0f 84 25 03 00 0 0 48 8b 06 48 8b 56 08
+
+The fix is to check the existence of an underlying transport in
+__rds_rdma_map().
+
+Signed-off-by: Håkon Bugge <haakon.bugge@oracle.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/rds/rdma.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/rds/rdma.c
++++ b/net/rds/rdma.c
+@@ -183,7 +183,7 @@ static int __rds_rdma_map(struct rds_soc
+ long i;
+ int ret;
+
+- if (rs->rs_bound_addr == 0) {
++ if (rs->rs_bound_addr == 0 || !rs->rs_transport) {
+ ret = -ENOTCONN; /* XXX not a great errno */
+ goto out;
+ }
--- /dev/null
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+Date: Fri, 1 Dec 2017 10:14:51 +0100
+Subject: s390/qeth: build max size GSO skbs on L2 devices
+
+From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+
+
+[ Upstream commit 0cbff6d4546613330a1c5f139f5c368e4ce33ca1 ]
+
+The current GSO skb size limit was copy&pasted over from the L3 path,
+where it is needed due to a TSO limitation.
+As L2 devices don't offer TSO support (and thus all GSO skbs are
+segmented before they reach the driver), there's no reason to restrict
+the stack in how large it may build the GSO skbs.
+
+Fixes: d52aec97e5bc ("qeth: enable scatter/gather in layer 2 mode")
+Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/s390/net/qeth_l2_main.c | 2 --
+ drivers/s390/net/qeth_l3_main.c | 4 ++--
+ 2 files changed, 2 insertions(+), 4 deletions(-)
+
+--- a/drivers/s390/net/qeth_l2_main.c
++++ b/drivers/s390/net/qeth_l2_main.c
+@@ -1027,8 +1027,6 @@ static int qeth_l2_setup_netdev(struct q
+
+ card->info.broadcast_capable = 1;
+ qeth_l2_request_initial_mac(card);
+- card->dev->gso_max_size = (QETH_MAX_BUFFER_ELEMENTS(card) - 1) *
+- PAGE_SIZE;
+ SET_NETDEV_DEV(card->dev, &card->gdev->dev);
+ netif_napi_add(card->dev, &card->napi, qeth_poll, QETH_NAPI_WEIGHT);
+ netif_carrier_off(card->dev);
+--- a/drivers/s390/net/qeth_l3_main.c
++++ b/drivers/s390/net/qeth_l3_main.c
+@@ -2989,8 +2989,8 @@ static int qeth_l3_setup_netdev(struct q
+ NETIF_F_HW_VLAN_CTAG_RX |
+ NETIF_F_HW_VLAN_CTAG_FILTER;
+ netif_keep_dst(card->dev);
+- card->dev->gso_max_size = (QETH_MAX_BUFFER_ELEMENTS(card) - 1) *
+- PAGE_SIZE;
++ netif_set_gso_max_size(card->dev, (QETH_MAX_BUFFER_ELEMENTS(card) - 1) *
++ PAGE_SIZE);
+
+ SET_NETDEV_DEV(card->dev, &card->gdev->dev);
+ netif_napi_add(card->dev, &card->napi, qeth_poll, QETH_NAPI_WEIGHT);
--- /dev/null
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+Date: Wed, 18 Oct 2017 17:40:17 +0200
+Subject: s390/qeth: fix early exit from error path
+
+From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+
+
+[ Upstream commit 83cf79a2fec3cf499eb6cb9eb608656fc2a82776 ]
+
+When the allocation of the addr buffer fails, we need to free
+our refcount on the inetdevice before returning.
+
+Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/s390/net/qeth_l3_main.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/s390/net/qeth_l3_main.c
++++ b/drivers/s390/net/qeth_l3_main.c
+@@ -1553,7 +1553,7 @@ static void qeth_l3_free_vlan_addresses4
+
+ addr = qeth_l3_get_addr_buffer(QETH_PROT_IPV4);
+ if (!addr)
+- return;
++ goto out;
+
+ spin_lock_bh(&card->ip_lock);
+
+@@ -1567,6 +1567,7 @@ static void qeth_l3_free_vlan_addresses4
+ spin_unlock_bh(&card->ip_lock);
+
+ kfree(addr);
++out:
+ in_dev_put(in_dev);
+ }
+
+@@ -1591,7 +1592,7 @@ static void qeth_l3_free_vlan_addresses6
+
+ addr = qeth_l3_get_addr_buffer(QETH_PROT_IPV6);
+ if (!addr)
+- return;
++ goto out;
+
+ spin_lock_bh(&card->ip_lock);
+
+@@ -1606,6 +1607,7 @@ static void qeth_l3_free_vlan_addresses6
+ spin_unlock_bh(&card->ip_lock);
+
+ kfree(addr);
++out:
+ in6_dev_put(in6_dev);
+ #endif /* CONFIG_QETH_IPV6 */
+ }
--- /dev/null
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+Date: Fri, 1 Dec 2017 10:14:50 +0100
+Subject: s390/qeth: fix GSO throughput regression
+
+From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+
+
+[ Upstream commit 6d69b1f1eb7a2edf8a3547f361c61f2538e054bb ]
+
+Using GSO with small MTUs currently results in a substantial throughput
+regression - which is caused by how qeth needs to map non-linear skbs
+into its IO buffer elements:
+compared to a linear skb, each GSO-segmented skb effectively consumes
+twice as many buffer elements (ie two instead of one) due to the
+additional header-only part. This causes the Output Queue to be
+congested with low-utilized IO buffers.
+
+Fix this as follows:
+If the MSS is low enough so that a non-SG GSO segmentation produces
+order-0 skbs (currently ~3500 byte), opt out from NETIF_F_SG. This is
+where we anticipate the biggest savings, since an SG-enabled
+GSO segmentation produces skbs that always consume at least two
+buffer elements.
+
+Larger MSS values continue to get a SG-enabled GSO segmentation, since
+1) the relative overhead of the additional header-only buffer element
+becomes less noticeable, and
+2) the linearization overhead increases.
+
+With the throughput regression fixed, re-enable NETIF_F_SG by default to
+reap the significant CPU savings of GSO.
+
+Fixes: 5722963a8e83 ("qeth: do not turn on SG per default")
+Reported-by: Nils Hoppmann <niho@de.ibm.com>
+Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/s390/net/qeth_core.h | 3 +++
+ drivers/s390/net/qeth_core_main.c | 31 +++++++++++++++++++++++++++++++
+ drivers/s390/net/qeth_l2_main.c | 2 ++
+ drivers/s390/net/qeth_l3_main.c | 2 ++
+ 4 files changed, 38 insertions(+)
+
+--- a/drivers/s390/net/qeth_core.h
++++ b/drivers/s390/net/qeth_core.h
+@@ -985,6 +985,9 @@ struct qeth_cmd_buffer *qeth_get_setassp
+ int qeth_set_features(struct net_device *, netdev_features_t);
+ int qeth_recover_features(struct net_device *);
+ netdev_features_t qeth_fix_features(struct net_device *, netdev_features_t);
++netdev_features_t qeth_features_check(struct sk_buff *skb,
++ struct net_device *dev,
++ netdev_features_t features);
+ int qeth_vm_request_mac(struct qeth_card *card);
+ int qeth_push_hdr(struct sk_buff *skb, struct qeth_hdr **hdr, unsigned int len);
+
+--- a/drivers/s390/net/qeth_core_main.c
++++ b/drivers/s390/net/qeth_core_main.c
+@@ -19,6 +19,11 @@
+ #include <linux/mii.h>
+ #include <linux/kthread.h>
+ #include <linux/slab.h>
++#include <linux/if_vlan.h>
++#include <linux/netdevice.h>
++#include <linux/netdev_features.h>
++#include <linux/skbuff.h>
++
+ #include <net/iucv/af_iucv.h>
+ #include <net/dsfield.h>
+
+@@ -6505,6 +6510,32 @@ netdev_features_t qeth_fix_features(stru
+ }
+ EXPORT_SYMBOL_GPL(qeth_fix_features);
+
++netdev_features_t qeth_features_check(struct sk_buff *skb,
++ struct net_device *dev,
++ netdev_features_t features)
++{
++ /* GSO segmentation builds skbs with
++ * a (small) linear part for the headers, and
++ * page frags for the data.
++ * Compared to a linear skb, the header-only part consumes an
++ * additional buffer element. This reduces buffer utilization, and
++ * hurts throughput. So compress small segments into one element.
++ */
++ if (netif_needs_gso(skb, features)) {
++ /* match skb_segment(): */
++ unsigned int doffset = skb->data - skb_mac_header(skb);
++ unsigned int hsize = skb_shinfo(skb)->gso_size;
++ unsigned int hroom = skb_headroom(skb);
++
++ /* linearize only if resulting skb allocations are order-0: */
++ if (SKB_DATA_ALIGN(hroom + doffset + hsize) <= SKB_MAX_HEAD(0))
++ features &= ~NETIF_F_SG;
++ }
++
++ return vlan_features_check(skb, features);
++}
++EXPORT_SYMBOL_GPL(qeth_features_check);
++
+ static int __init qeth_core_init(void)
+ {
+ int rc;
+--- a/drivers/s390/net/qeth_l2_main.c
++++ b/drivers/s390/net/qeth_l2_main.c
+@@ -963,6 +963,7 @@ static const struct net_device_ops qeth_
+ .ndo_stop = qeth_l2_stop,
+ .ndo_get_stats = qeth_get_stats,
+ .ndo_start_xmit = qeth_l2_hard_start_xmit,
++ .ndo_features_check = qeth_features_check,
+ .ndo_validate_addr = eth_validate_addr,
+ .ndo_set_rx_mode = qeth_l2_set_rx_mode,
+ .ndo_do_ioctl = qeth_do_ioctl,
+@@ -1009,6 +1010,7 @@ static int qeth_l2_setup_netdev(struct q
+ if (card->info.type == QETH_CARD_TYPE_OSD && !card->info.guestlan) {
+ card->dev->hw_features = NETIF_F_SG;
+ card->dev->vlan_features = NETIF_F_SG;
++ card->dev->features |= NETIF_F_SG;
+ /* OSA 3S and earlier has no RX/TX support */
+ if (qeth_is_supported(card, IPA_OUTBOUND_CHECKSUM)) {
+ card->dev->hw_features |= NETIF_F_IP_CSUM;
+--- a/drivers/s390/net/qeth_l3_main.c
++++ b/drivers/s390/net/qeth_l3_main.c
+@@ -2923,6 +2923,7 @@ static const struct net_device_ops qeth_
+ .ndo_stop = qeth_l3_stop,
+ .ndo_get_stats = qeth_get_stats,
+ .ndo_start_xmit = qeth_l3_hard_start_xmit,
++ .ndo_features_check = qeth_features_check,
+ .ndo_validate_addr = eth_validate_addr,
+ .ndo_set_rx_mode = qeth_l3_set_multicast_list,
+ .ndo_do_ioctl = qeth_do_ioctl,
+@@ -2963,6 +2964,7 @@ static int qeth_l3_setup_netdev(struct q
+ card->dev->vlan_features = NETIF_F_SG |
+ NETIF_F_RXCSUM | NETIF_F_IP_CSUM |
+ NETIF_F_TSO;
++ card->dev->features |= NETIF_F_SG;
+ }
+ }
+ } else if (card->info.type == QETH_CARD_TYPE_IQD) {
--- /dev/null
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+Date: Fri, 1 Dec 2017 10:14:49 +0100
+Subject: s390/qeth: fix thinko in IPv4 multicast address tracking
+
+From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+
+
+[ Upsteam commit bc3ab70584696cb798b9e1e0ac8e6ced5fd4c3b8 ]
+
+Commit 5f78e29ceebf ("qeth: optimize IP handling in rx_mode callback")
+reworked how secondary addresses are managed for qeth devices.
+Instead of dropping & subsequently re-adding all addresses on every
+ndo_set_rx_mode() call, qeth now keeps track of the addresses that are
+currently registered with the HW.
+On a ndo_set_rx_mode(), we thus only need to do (de-)registration
+requests for the addresses that have actually changed.
+
+On L3 devices, the lookup for IPv4 Multicast addresses checks the wrong
+hashtable - and thus never finds a match. As a result, we first delete
+*all* such addresses, and then re-add them again. So each set_rx_mode()
+causes a short period where the IPv4 Multicast addresses are not
+registered, and the card stops forwarding inbound traffic for them.
+
+Fix this by setting the ->is_multicast flag on the lookup object, thus
+enabling qeth_l3_ip_from_hash() to search the correct hashtable and
+find a match there.
+
+Fixes: 5f78e29ceebf ("qeth: optimize IP handling in rx_mode callback")
+Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/s390/net/qeth_l3_main.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/s390/net/qeth_l3_main.c
++++ b/drivers/s390/net/qeth_l3_main.c
+@@ -1376,6 +1376,7 @@ qeth_l3_add_mc_to_hash(struct qeth_card
+
+ tmp->u.a4.addr = be32_to_cpu(im4->multiaddr);
+ memcpy(tmp->mac, buf, sizeof(tmp->mac));
++ tmp->is_multicast = 1;
+
+ ipm = qeth_l3_ip_from_hash(card, tmp);
+ if (ipm) {
--- /dev/null
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Xin Long <lucien.xin@gmail.com>
+Date: Sun, 26 Nov 2017 20:56:07 +0800
+Subject: sctp: use right member as the param of list_for_each_entry
+
+From: Xin Long <lucien.xin@gmail.com>
+
+
+[ Upstream commit a8dd397903a6e57157f6265911f7d35681364427 ]
+
+Commit d04adf1b3551 ("sctp: reset owner sk for data chunks on out queues
+when migrating a sock") made a mistake that using 'list' as the param of
+list_for_each_entry to traverse the retransmit, sacked and abandoned
+queues, while chunks are using 'transmitted_list' to link into these
+queues.
+
+It could cause NULL dereference panic if there are chunks in any of these
+queues when peeling off one asoc.
+
+So use the chunk member 'transmitted_list' instead in this patch.
+
+Fixes: d04adf1b3551 ("sctp: reset owner sk for data chunks on out queues when migrating a sock")
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Acked-by: Neil Horman <nhorman@tuxdriver.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/socket.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/net/sctp/socket.c
++++ b/net/sctp/socket.c
+@@ -187,13 +187,13 @@ static void sctp_for_each_tx_datachunk(s
+ list_for_each_entry(chunk, &t->transmitted, transmitted_list)
+ cb(chunk);
+
+- list_for_each_entry(chunk, &q->retransmit, list)
++ list_for_each_entry(chunk, &q->retransmit, transmitted_list)
+ cb(chunk);
+
+- list_for_each_entry(chunk, &q->sacked, list)
++ list_for_each_entry(chunk, &q->sacked, transmitted_list)
+ cb(chunk);
+
+- list_for_each_entry(chunk, &q->abandoned, list)
++ list_for_each_entry(chunk, &q->abandoned, transmitted_list)
+ cb(chunk);
+
+ list_for_each_entry(chunk, &q->out_chunk_list, list)
--- /dev/null
+net-qmi_wwan-add-quectel-bg96-2c7c-0296.patch
+net-thunderx-fix-tcp-udp-checksum-offload-for-ipv6-pkts.patch
+net-thunderx-fix-tcp-udp-checksum-offload-for-ipv4-pkts.patch
+net-realtek-r8169-implement-set_link_ksettings.patch
+s390-qeth-fix-early-exit-from-error-path.patch
+tipc-fix-memory-leak-in-tipc_accept_from_sock.patch
+vhost-fix-skb-leak-in-handle_rx.patch
+rds-fix-null-pointer-dereference-in-__rds_rdma_map.patch
+sit-update-frag_off-info.patch
+tcp-add-tcp_v4_fill_cb-tcp_v4_restore_cb.patch
+packet-fix-crash-in-fanout_demux_rollover.patch
+net-packet-fix-a-race-in-packet_bind-and-packet_notifier.patch
+tcp-remove-buggy-call-to-tcp_v6_restore_cb.patch
+usbnet-fix-alignment-for-frames-with-no-ethernet-header.patch
+net-remove-hlist_nulls_add_tail_rcu.patch
+stmmac-reset-last-tso-segment-size-after-device-open.patch
+tcp-dccp-block-bh-before-arming-time_wait-timer.patch
+s390-qeth-build-max-size-gso-skbs-on-l2-devices.patch
+s390-qeth-fix-thinko-in-ipv4-multicast-address-tracking.patch
+s390-qeth-fix-gso-throughput-regression.patch
+tcp-use-ipcb-instead-of-tcp_skb_cb-in-inet_exact_dif_match.patch
+tipc-call-tipc_rcv-only-if-bearer-is-up-in-tipc_udp_recv.patch
+tcp-use-current-time-in-tcp_rcv_space_adjust.patch
+net-sched-cbq-create-block-for-q-link.block.patch
+tap-free-skb-if-flags-error.patch
+tcp-when-scheduling-tlp-time-of-rto-should-account-for-current-ack.patch
+tun-free-skb-in-early-errors.patch
+net-ipv6-fixup-device-for-anycast-routes-during-copy.patch
+tun-fix-rcu_read_lock-imbalance-in-tun_build_skb.patch
+net-accept-ufo-datagrams-from-tuntap-and-packet.patch
+net-openvswitch-datapath-fix-data-type-in-queue_gso_packets.patch
+cls_bpf-don-t-decrement-net-s-refcount-when-offload-fails.patch
+sctp-use-right-member-as-the-param-of-list_for_each_entry.patch
--- /dev/null
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Hangbin Liu <liuhangbin@gmail.com>
+Date: Thu, 30 Nov 2017 10:41:14 +0800
+Subject: sit: update frag_off info
+
+From: Hangbin Liu <liuhangbin@gmail.com>
+
+
+[ Upstream commit f859b4af1c52493ec21173ccc73d0b60029b5b88 ]
+
+After parsing the sit netlink change info, we forget to update frag_off in
+ipip6_tunnel_update(). Fix it by assigning frag_off with new value.
+
+Reported-by: Jianlin Shi <jishi@redhat.com>
+Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
+Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/sit.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/ipv6/sit.c
++++ b/net/ipv6/sit.c
+@@ -1087,6 +1087,7 @@ static void ipip6_tunnel_update(struct i
+ ipip6_tunnel_link(sitn, t);
+ t->parms.iph.ttl = p->iph.ttl;
+ t->parms.iph.tos = p->iph.tos;
++ t->parms.iph.frag_off = p->iph.frag_off;
+ if (t->parms.link != p->link || t->fwmark != fwmark) {
+ t->parms.link = p->link;
+ t->fwmark = fwmark;
--- /dev/null
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Lars Persson <lars.persson@axis.com>
+Date: Fri, 1 Dec 2017 11:12:44 +0100
+Subject: stmmac: reset last TSO segment size after device open
+
+From: Lars Persson <lars.persson@axis.com>
+
+
+[ Upstream commit 45ab4b13e46325d00f4acdb365d406e941a15f81 ]
+
+The mss variable tracks the last max segment size sent to the TSO
+engine. We do not update the hardware as long as we receive skb:s with
+the same value in gso_size.
+
+During a network device down/up cycle (mapped to stmmac_release() and
+stmmac_open() callbacks) we issue a reset to the hardware and it
+forgets the setting for mss. However we did not zero out our mss
+variable so the next transmission of a gso packet happens with an
+undefined hardware setting.
+
+This triggers a hang in the TSO engine and eventuelly the netdev
+watchdog will bark.
+
+Fixes: f748be531d70 ("stmmac: support new GMAC4")
+Signed-off-by: Lars Persson <larper@axis.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+@@ -2564,6 +2564,7 @@ static int stmmac_open(struct net_device
+
+ priv->dma_buf_sz = STMMAC_ALIGN(buf_sz);
+ priv->rx_copybreak = STMMAC_RX_COPYBREAK;
++ priv->mss = 0;
+
+ ret = alloc_dma_desc_resources(priv);
+ if (ret < 0) {
--- /dev/null
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Wei Xu <wexu@redhat.com>
+Date: Fri, 1 Dec 2017 05:10:38 -0500
+Subject: tap: free skb if flags error
+
+From: Wei Xu <wexu@redhat.com>
+
+
+[ Upstream commit 61d78537843e676e7f56ac6db333db0c0529b892 ]
+
+tap_recvmsg() supports accepting skb by msg_control after
+commit 3b4ba04acca8 ("tap: support receiving skb from msg_control"),
+the skb if presented should be freed within the function, otherwise
+it would be leaked.
+
+Signed-off-by: Wei Xu <wexu@redhat.com>
+Reported-by: Matthew Rosato <mjrosato@linux.vnet.ibm.com>
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/tap.c | 14 ++++++++++----
+ 1 file changed, 10 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/tap.c
++++ b/drivers/net/tap.c
+@@ -829,8 +829,11 @@ static ssize_t tap_do_read(struct tap_qu
+ DEFINE_WAIT(wait);
+ ssize_t ret = 0;
+
+- if (!iov_iter_count(to))
++ if (!iov_iter_count(to)) {
++ if (skb)
++ kfree_skb(skb);
+ return 0;
++ }
+
+ if (skb)
+ goto put;
+@@ -1154,11 +1157,14 @@ static int tap_recvmsg(struct socket *so
+ size_t total_len, int flags)
+ {
+ struct tap_queue *q = container_of(sock, struct tap_queue, sock);
++ struct sk_buff *skb = m->msg_control;
+ int ret;
+- if (flags & ~(MSG_DONTWAIT|MSG_TRUNC))
++ if (flags & ~(MSG_DONTWAIT|MSG_TRUNC)) {
++ if (skb)
++ kfree_skb(skb);
+ return -EINVAL;
+- ret = tap_do_read(q, &m->msg_iter, flags & MSG_DONTWAIT,
+- m->msg_control);
++ }
++ ret = tap_do_read(q, &m->msg_iter, flags & MSG_DONTWAIT, skb);
+ if (ret > total_len) {
+ m->msg_flags |= MSG_TRUNC;
+ ret = flags & MSG_TRUNC ? ret : total_len;
--- /dev/null
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Sun, 3 Dec 2017 09:32:59 -0800
+Subject: tcp: add tcp_v4_fill_cb()/tcp_v4_restore_cb()
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit eeea10b83a139451130df1594f26710c8fa390c8 ]
+
+James Morris reported kernel stack corruption bug [1] while
+running the SELinux testsuite, and bisected to a recent
+commit bffa72cf7f9d ("net: sk_buff rbnode reorg")
+
+We believe this commit is fine, but exposes an older bug.
+
+SELinux code runs from tcp_filter() and might send an ICMP,
+expecting IP options to be found in skb->cb[] using regular IPCB placement.
+
+We need to defer TCP mangling of skb->cb[] after tcp_filter() calls.
+
+This patch adds tcp_v4_fill_cb()/tcp_v4_restore_cb() in a very
+similar way we added them for IPv6.
+
+[1]
+[ 339.806024] SELinux: failure in selinux_parse_skb(), unable to parse packet
+[ 339.822505] Kernel panic - not syncing: stack-protector: Kernel stack is corrupted in: ffffffff81745af5
+[ 339.822505]
+[ 339.852250] CPU: 4 PID: 3642 Comm: client Not tainted 4.15.0-rc1-test #15
+[ 339.868498] Hardware name: LENOVO 10FGS0VA1L/30BC, BIOS FWKT68A 01/19/2017
+[ 339.885060] Call Trace:
+[ 339.896875] <IRQ>
+[ 339.908103] dump_stack+0x63/0x87
+[ 339.920645] panic+0xe8/0x248
+[ 339.932668] ? ip_push_pending_frames+0x33/0x40
+[ 339.946328] ? icmp_send+0x525/0x530
+[ 339.958861] ? kfree_skbmem+0x60/0x70
+[ 339.971431] __stack_chk_fail+0x1b/0x20
+[ 339.984049] icmp_send+0x525/0x530
+[ 339.996205] ? netlbl_skbuff_err+0x36/0x40
+[ 340.008997] ? selinux_netlbl_err+0x11/0x20
+[ 340.021816] ? selinux_socket_sock_rcv_skb+0x211/0x230
+[ 340.035529] ? security_sock_rcv_skb+0x3b/0x50
+[ 340.048471] ? sk_filter_trim_cap+0x44/0x1c0
+[ 340.061246] ? tcp_v4_inbound_md5_hash+0x69/0x1b0
+[ 340.074562] ? tcp_filter+0x2c/0x40
+[ 340.086400] ? tcp_v4_rcv+0x820/0xa20
+[ 340.098329] ? ip_local_deliver_finish+0x71/0x1a0
+[ 340.111279] ? ip_local_deliver+0x6f/0xe0
+[ 340.123535] ? ip_rcv_finish+0x3a0/0x3a0
+[ 340.135523] ? ip_rcv_finish+0xdb/0x3a0
+[ 340.147442] ? ip_rcv+0x27c/0x3c0
+[ 340.158668] ? inet_del_offload+0x40/0x40
+[ 340.170580] ? __netif_receive_skb_core+0x4ac/0x900
+[ 340.183285] ? rcu_accelerate_cbs+0x5b/0x80
+[ 340.195282] ? __netif_receive_skb+0x18/0x60
+[ 340.207288] ? process_backlog+0x95/0x140
+[ 340.218948] ? net_rx_action+0x26c/0x3b0
+[ 340.230416] ? __do_softirq+0xc9/0x26a
+[ 340.241625] ? do_softirq_own_stack+0x2a/0x40
+[ 340.253368] </IRQ>
+[ 340.262673] ? do_softirq+0x50/0x60
+[ 340.273450] ? __local_bh_enable_ip+0x57/0x60
+[ 340.285045] ? ip_finish_output2+0x175/0x350
+[ 340.296403] ? ip_finish_output+0x127/0x1d0
+[ 340.307665] ? nf_hook_slow+0x3c/0xb0
+[ 340.318230] ? ip_output+0x72/0xe0
+[ 340.328524] ? ip_fragment.constprop.54+0x80/0x80
+[ 340.340070] ? ip_local_out+0x35/0x40
+[ 340.350497] ? ip_queue_xmit+0x15c/0x3f0
+[ 340.361060] ? __kmalloc_reserve.isra.40+0x31/0x90
+[ 340.372484] ? __skb_clone+0x2e/0x130
+[ 340.382633] ? tcp_transmit_skb+0x558/0xa10
+[ 340.393262] ? tcp_connect+0x938/0xad0
+[ 340.403370] ? ktime_get_with_offset+0x4c/0xb0
+[ 340.414206] ? tcp_v4_connect+0x457/0x4e0
+[ 340.424471] ? __inet_stream_connect+0xb3/0x300
+[ 340.435195] ? inet_stream_connect+0x3b/0x60
+[ 340.445607] ? SYSC_connect+0xd9/0x110
+[ 340.455455] ? __audit_syscall_entry+0xaf/0x100
+[ 340.466112] ? syscall_trace_enter+0x1d0/0x2b0
+[ 340.476636] ? __audit_syscall_exit+0x209/0x290
+[ 340.487151] ? SyS_connect+0xe/0x10
+[ 340.496453] ? do_syscall_64+0x67/0x1b0
+[ 340.506078] ? entry_SYSCALL64_slow_path+0x25/0x25
+
+Fixes: 971f10eca186 ("tcp: better TCP_SKB_CB layout to reduce cache line misses")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: James Morris <james.l.morris@oracle.com>
+Tested-by: James Morris <james.l.morris@oracle.com>
+Tested-by: Casey Schaufler <casey@schaufler-ca.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_ipv4.c | 59 +++++++++++++++++++++++++++++++++++-----------------
+ net/ipv6/tcp_ipv6.c | 10 +++++---
+ 2 files changed, 46 insertions(+), 23 deletions(-)
+
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -1587,6 +1587,34 @@ int tcp_filter(struct sock *sk, struct s
+ }
+ EXPORT_SYMBOL(tcp_filter);
+
++static void tcp_v4_restore_cb(struct sk_buff *skb)
++{
++ memmove(IPCB(skb), &TCP_SKB_CB(skb)->header.h4,
++ sizeof(struct inet_skb_parm));
++}
++
++static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph,
++ const struct tcphdr *th)
++{
++ /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
++ * barrier() makes sure compiler wont play fool^Waliasing games.
++ */
++ memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
++ sizeof(struct inet_skb_parm));
++ barrier();
++
++ TCP_SKB_CB(skb)->seq = ntohl(th->seq);
++ TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
++ skb->len - th->doff * 4);
++ TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
++ TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
++ TCP_SKB_CB(skb)->tcp_tw_isn = 0;
++ TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
++ TCP_SKB_CB(skb)->sacked = 0;
++ TCP_SKB_CB(skb)->has_rxtstamp =
++ skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
++}
++
+ /*
+ * From tcp_input.c
+ */
+@@ -1627,24 +1655,6 @@ int tcp_v4_rcv(struct sk_buff *skb)
+
+ th = (const struct tcphdr *)skb->data;
+ iph = ip_hdr(skb);
+- /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
+- * barrier() makes sure compiler wont play fool^Waliasing games.
+- */
+- memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
+- sizeof(struct inet_skb_parm));
+- barrier();
+-
+- TCP_SKB_CB(skb)->seq = ntohl(th->seq);
+- TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
+- skb->len - th->doff * 4);
+- TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
+- TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
+- TCP_SKB_CB(skb)->tcp_tw_isn = 0;
+- TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
+- TCP_SKB_CB(skb)->sacked = 0;
+- TCP_SKB_CB(skb)->has_rxtstamp =
+- skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
+-
+ lookup:
+ sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
+ th->dest, sdif, &refcounted);
+@@ -1675,14 +1685,19 @@ process:
+ sock_hold(sk);
+ refcounted = true;
+ nsk = NULL;
+- if (!tcp_filter(sk, skb))
++ if (!tcp_filter(sk, skb)) {
++ th = (const struct tcphdr *)skb->data;
++ iph = ip_hdr(skb);
++ tcp_v4_fill_cb(skb, iph, th);
+ nsk = tcp_check_req(sk, skb, req, false);
++ }
+ if (!nsk) {
+ reqsk_put(req);
+ goto discard_and_relse;
+ }
+ if (nsk == sk) {
+ reqsk_put(req);
++ tcp_v4_restore_cb(skb);
+ } else if (tcp_child_process(sk, nsk, skb)) {
+ tcp_v4_send_reset(nsk, skb);
+ goto discard_and_relse;
+@@ -1708,6 +1723,7 @@ process:
+ goto discard_and_relse;
+ th = (const struct tcphdr *)skb->data;
+ iph = ip_hdr(skb);
++ tcp_v4_fill_cb(skb, iph, th);
+
+ skb->dev = NULL;
+
+@@ -1738,6 +1754,8 @@ no_tcp_socket:
+ if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
+ goto discard_it;
+
++ tcp_v4_fill_cb(skb, iph, th);
++
+ if (tcp_checksum_complete(skb)) {
+ csum_error:
+ __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
+@@ -1764,6 +1782,8 @@ do_time_wait:
+ goto discard_it;
+ }
+
++ tcp_v4_fill_cb(skb, iph, th);
++
+ if (tcp_checksum_complete(skb)) {
+ inet_twsk_put(inet_twsk(sk));
+ goto csum_error;
+@@ -1780,6 +1800,7 @@ do_time_wait:
+ if (sk2) {
+ inet_twsk_deschedule_put(inet_twsk(sk));
+ sk = sk2;
++ tcp_v4_restore_cb(skb);
+ refcounted = false;
+ goto process;
+ }
+--- a/net/ipv6/tcp_ipv6.c
++++ b/net/ipv6/tcp_ipv6.c
+@@ -1448,7 +1448,6 @@ process:
+ struct sock *nsk;
+
+ sk = req->rsk_listener;
+- tcp_v6_fill_cb(skb, hdr, th);
+ if (tcp_v6_inbound_md5_hash(sk, skb)) {
+ sk_drops_add(sk, skb);
+ reqsk_put(req);
+@@ -1461,8 +1460,12 @@ process:
+ sock_hold(sk);
+ refcounted = true;
+ nsk = NULL;
+- if (!tcp_filter(sk, skb))
++ if (!tcp_filter(sk, skb)) {
++ th = (const struct tcphdr *)skb->data;
++ hdr = ipv6_hdr(skb);
++ tcp_v6_fill_cb(skb, hdr, th);
+ nsk = tcp_check_req(sk, skb, req, false);
++ }
+ if (!nsk) {
+ reqsk_put(req);
+ goto discard_and_relse;
+@@ -1486,8 +1489,6 @@ process:
+ if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
+ goto discard_and_relse;
+
+- tcp_v6_fill_cb(skb, hdr, th);
+-
+ if (tcp_v6_inbound_md5_hash(sk, skb))
+ goto discard_and_relse;
+
+@@ -1495,6 +1496,7 @@ process:
+ goto discard_and_relse;
+ th = (const struct tcphdr *)skb->data;
+ hdr = ipv6_hdr(skb);
++ tcp_v6_fill_cb(skb, hdr, th);
+
+ skb->dev = NULL;
+
--- /dev/null
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 1 Dec 2017 10:06:56 -0800
+Subject: tcp/dccp: block bh before arming time_wait timer
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit cfac7f836a715b91f08c851df915d401a4d52783 ]
+
+Maciej Żenczykowski reported some panics in tcp_twsk_destructor()
+that might be caused by the following bug.
+
+timewait timer is pinned to the cpu, because we want to transition
+timwewait refcount from 0 to 4 in one go, once everything has been
+initialized.
+
+At the time commit ed2e92394589 ("tcp/dccp: fix timewait races in timer
+handling") was merged, TCP was always running from BH habdler.
+
+After commit 5413d1babe8f ("net: do not block BH while processing
+socket backlog") we definitely can run tcp_time_wait() from process
+context.
+
+We need to block BH in the critical section so that the pinned timer
+has still its purpose.
+
+This bug is more likely to happen under stress and when very small RTO
+are used in datacenter flows.
+
+Fixes: 5413d1babe8f ("net: do not block BH while processing socket backlog")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Maciej Żenczykowski <maze@google.com>
+Acked-by: Maciej Żenczykowski <maze@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/dccp/minisocks.c | 6 ++++++
+ net/ipv4/tcp_minisocks.c | 6 ++++++
+ 2 files changed, 12 insertions(+)
+
+--- a/net/dccp/minisocks.c
++++ b/net/dccp/minisocks.c
+@@ -57,10 +57,16 @@ void dccp_time_wait(struct sock *sk, int
+ if (state == DCCP_TIME_WAIT)
+ timeo = DCCP_TIMEWAIT_LEN;
+
++ /* tw_timer is pinned, so we need to make sure BH are disabled
++ * in following section, otherwise timer handler could run before
++ * we complete the initialization.
++ */
++ local_bh_disable();
+ inet_twsk_schedule(tw, timeo);
+ /* Linkage updates. */
+ __inet_twsk_hashdance(tw, sk, &dccp_hashinfo);
+ inet_twsk_put(tw);
++ local_bh_enable();
+ } else {
+ /* Sorry, if we're out of memory, just CLOSE this
+ * socket up. We've got bigger problems than
+--- a/net/ipv4/tcp_minisocks.c
++++ b/net/ipv4/tcp_minisocks.c
+@@ -312,10 +312,16 @@ void tcp_time_wait(struct sock *sk, int
+ if (state == TCP_TIME_WAIT)
+ timeo = TCP_TIMEWAIT_LEN;
+
++ /* tw_timer is pinned, so we need to make sure BH are disabled
++ * in following section, otherwise timer handler could run before
++ * we complete the initialization.
++ */
++ local_bh_disable();
+ inet_twsk_schedule(tw, timeo);
+ /* Linkage updates. */
+ __inet_twsk_hashdance(tw, sk, &tcp_hashinfo);
+ inet_twsk_put(tw);
++ local_bh_enable();
+ } else {
+ /* Sorry, if we're out of memory, just CLOSE this
+ * socket up. We've got bigger problems than
--- /dev/null
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 29 Nov 2017 17:43:57 -0800
+Subject: tcp: remove buggy call to tcp_v6_restore_cb()
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 3016dad75b48279e579117ee3ed566ba90a3b023 ]
+
+tcp_v6_send_reset() expects to receive an skb with skb->cb[] layout as
+used in TCP stack.
+MD5 lookup uses tcp_v6_iif() and tcp_v6_sdif() and thus
+TCP_SKB_CB(skb)->header.h6
+
+This patch probably fixes RST packets sent on behalf of a timewait md5
+ipv6 socket.
+
+Before Florian patch, tcp_v6_restore_cb() was needed before jumping to
+no_tcp_socket label.
+
+Fixes: 271c3b9b7bda ("tcp: honour SO_BINDTODEVICE for TW_RST case too")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Florian Westphal <fw@strlen.de>
+Acked-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/tcp_ipv6.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/net/ipv6/tcp_ipv6.c
++++ b/net/ipv6/tcp_ipv6.c
+@@ -1585,7 +1585,6 @@ do_time_wait:
+ tcp_v6_timewait_ack(sk, skb);
+ break;
+ case TCP_TW_RST:
+- tcp_v6_restore_cb(skb);
+ tcp_v6_send_reset(sk, skb);
+ inet_twsk_deschedule_put(inet_twsk(sk));
+ goto discard_it;
--- /dev/null
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 6 Dec 2017 11:08:19 -0800
+Subject: tcp: use current time in tcp_rcv_space_adjust()
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 8632385022f2b05a6ca0b9e0f95575865de0e2ce ]
+
+When I switched rcv_rtt_est to high resolution timestamps, I forgot
+that tp->tcp_mstamp needed to be refreshed in tcp_rcv_space_adjust()
+
+Using an old timestamp leads to autotuning lags.
+
+Fixes: 645f4c6f2ebd ("tcp: switch rcv_rtt_est and rcvq_space to high resolution timestamps")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Wei Wang <weiwan@google.com>
+Cc: Neal Cardwell <ncardwell@google.com>
+Cc: Yuchung Cheng <ycheng@google.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_input.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -592,6 +592,7 @@ void tcp_rcv_space_adjust(struct sock *s
+ int time;
+ int copied;
+
++ tcp_mstamp_refresh(tp);
+ time = tcp_stamp_us_delta(tp->tcp_mstamp, tp->rcvq_space.time);
+ if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0)
+ return;
--- /dev/null
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: David Ahern <dsahern@gmail.com>
+Date: Sun, 3 Dec 2017 09:33:00 -0800
+Subject: tcp: use IPCB instead of TCP_SKB_CB in inet_exact_dif_match()
+
+From: David Ahern <dsahern@gmail.com>
+
+
+[ Usptream commit b4d1605a8ea608fd7dc45b926a05d75d340bde4b ]
+
+After this fix : ("tcp: add tcp_v4_fill_cb()/tcp_v4_restore_cb()"),
+socket lookups happen while skb->cb[] has not been mangled yet by TCP.
+
+Fixes: a04a480d4392 ("net: Require exact match for TCP socket lookups if dif is l3mdev")
+Signed-off-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/tcp.h | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -874,12 +874,11 @@ static inline int tcp_v6_sdif(const stru
+ }
+ #endif
+
+-/* TCP_SKB_CB reference means this can not be used from early demux */
+ static inline bool inet_exact_dif_match(struct net *net, struct sk_buff *skb)
+ {
+ #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
+ if (!net->ipv4.sysctl_tcp_l3mdev_accept &&
+- skb && ipv4_l3mdev_skb(TCP_SKB_CB(skb)->header.h4.flags))
++ skb && ipv4_l3mdev_skb(IPCB(skb)->flags))
+ return true;
+ #endif
+ return false;
--- /dev/null
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Neal Cardwell <ncardwell@google.com>
+Date: Fri, 17 Nov 2017 21:06:14 -0500
+Subject: tcp: when scheduling TLP, time of RTO should account for current ACK
+
+From: Neal Cardwell <ncardwell@google.com>
+
+
+[ Upstream commit ed66dfaf236c04d414de1d218441296e57fb2bd2 ]
+
+Fix the TLP scheduling logic so that when scheduling a TLP probe, we
+ensure that the estimated time at which an RTO would fire accounts for
+the fact that ACKs indicating forward progress should push back RTO
+times.
+
+After the following fix:
+
+df92c8394e6e ("tcp: fix xmit timer to only be reset if data ACKed/SACKed")
+
+we had an unintentional behavior change in the following kind of
+scenario: suppose the RTT variance has been very low recently. Then
+suppose we send out a flight of N packets and our RTT is 100ms:
+
+t=0: send a flight of N packets
+t=100ms: receive an ACK for N-1 packets
+
+The response before df92c8394e6e that was:
+ -> schedule a TLP for now + RTO_interval
+
+The response after df92c8394e6e is:
+ -> schedule a TLP for t=0 + RTO_interval
+
+Since RTO_interval = srtt + RTT_variance, this means that we have
+scheduled a TLP timer at a point in the future that only accounts for
+RTT_variance. If the RTT_variance term is small, this means that the
+timer fires soon.
+
+Before df92c8394e6e this would not happen, because in that code, when
+we receive an ACK for a prefix of flight, we did:
+
+ 1) Near the top of tcp_ack(), switch from TLP timer to RTO
+ at write_queue_head->paket_tx_time + RTO_interval:
+ if (icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
+ tcp_rearm_rto(sk);
+
+ 2) In tcp_clean_rtx_queue(), update the RTO to now + RTO_interval:
+ if (flag & FLAG_ACKED) {
+ tcp_rearm_rto(sk);
+
+ 3) In tcp_ack() after tcp_fastretrans_alert() switch from RTO
+ to TLP at now + RTO_interval:
+ if (icsk->icsk_pending == ICSK_TIME_RETRANS)
+ tcp_schedule_loss_probe(sk);
+
+In df92c8394e6e we removed that 3-phase dance, and instead directly
+set the TLP timer once: we set the TLP timer in cases like this to
+write_queue_head->packet_tx_time + RTO_interval. So if the RTT
+variance is small, then this means that this is setting the TLP timer
+to fire quite soon. This means if the ACK for the tail of the flight
+takes longer than an RTT to arrive (often due to delayed ACKs), then
+the TLP timer fires too quickly.
+
+Fixes: df92c8394e6e ("tcp: fix xmit timer to only be reset if data ACKed/SACKed")
+Signed-off-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/tcp.h | 2 +-
+ net/ipv4/tcp_input.c | 2 +-
+ net/ipv4/tcp_output.c | 8 +++++---
+ 3 files changed, 7 insertions(+), 5 deletions(-)
+
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -563,7 +563,7 @@ void tcp_push_one(struct sock *, unsigne
+ void tcp_send_ack(struct sock *sk);
+ void tcp_send_delayed_ack(struct sock *sk);
+ void tcp_send_loss_probe(struct sock *sk);
+-bool tcp_schedule_loss_probe(struct sock *sk);
++bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto);
+ void tcp_skb_collapse_tstamp(struct sk_buff *skb,
+ const struct sk_buff *next_skb);
+
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -3021,7 +3021,7 @@ void tcp_rearm_rto(struct sock *sk)
+ /* Try to schedule a loss probe; if that doesn't work, then schedule an RTO. */
+ static void tcp_set_xmit_timer(struct sock *sk)
+ {
+- if (!tcp_schedule_loss_probe(sk))
++ if (!tcp_schedule_loss_probe(sk, true))
+ tcp_rearm_rto(sk);
+ }
+
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -2337,7 +2337,7 @@ repair:
+
+ /* Send one loss probe per tail loss episode. */
+ if (push_one != 2)
+- tcp_schedule_loss_probe(sk);
++ tcp_schedule_loss_probe(sk, false);
+ is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd);
+ tcp_cwnd_validate(sk, is_cwnd_limited);
+ return false;
+@@ -2345,7 +2345,7 @@ repair:
+ return !tp->packets_out && tcp_send_head(sk);
+ }
+
+-bool tcp_schedule_loss_probe(struct sock *sk)
++bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto)
+ {
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+@@ -2384,7 +2384,9 @@ bool tcp_schedule_loss_probe(struct sock
+ }
+
+ /* If the RTO formula yields an earlier time, then use that time. */
+- rto_delta_us = tcp_rto_delta_us(sk); /* How far in future is RTO? */
++ rto_delta_us = advancing_rto ?
++ jiffies_to_usecs(inet_csk(sk)->icsk_rto) :
++ tcp_rto_delta_us(sk); /* How far in future is RTO? */
+ if (rto_delta_us > 0)
+ timeout = min_t(u32, timeout, usecs_to_jiffies(rto_delta_us));
+
--- /dev/null
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Tommi Rantala <tommi.t.rantala@nokia.com>
+Date: Wed, 29 Nov 2017 12:48:42 +0200
+Subject: tipc: call tipc_rcv() only if bearer is up in tipc_udp_recv()
+
+From: Tommi Rantala <tommi.t.rantala@nokia.com>
+
+
+[ Upstream commit c7799c067c2ae33e348508c8afec354f3257ff25 ]
+
+Remove the second tipc_rcv() call in tipc_udp_recv(). We have just
+checked that the bearer is not up, and calling tipc_rcv() with a bearer
+that is not up leads to a TIPC div-by-zero crash in
+tipc_node_calculate_timer(). The crash is rare in practice, but can
+happen like this:
+
+ We're enabling a bearer, but it's not yet up and fully initialized.
+ At the same time we receive a discovery packet, and in tipc_udp_recv()
+ we end up calling tipc_rcv() with the not-yet-initialized bearer,
+ causing later the div-by-zero crash in tipc_node_calculate_timer().
+
+Jon Maloy explains the impact of removing the second tipc_rcv() call:
+ "link setup in the worst case will be delayed until the next arriving
+ discovery messages, 1 sec later, and this is an acceptable delay."
+
+As the tipc_rcv() call is removed, just leave the function via the
+rcu_out label, so that we will kfree_skb().
+
+[ 12.590450] Own node address <1.1.1>, network identity 1
+[ 12.668088] divide error: 0000 [#1] SMP
+[ 12.676952] CPU: 2 PID: 0 Comm: swapper/2 Not tainted 4.14.2-dirty #1
+[ 12.679225] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-2.fc27 04/01/2014
+[ 12.682095] task: ffff8c2a761edb80 task.stack: ffffa41cc0cac000
+[ 12.684087] RIP: 0010:tipc_node_calculate_timer.isra.12+0x45/0x60 [tipc]
+[ 12.686486] RSP: 0018:ffff8c2a7fc838a0 EFLAGS: 00010246
+[ 12.688451] RAX: 0000000000000000 RBX: ffff8c2a5b382600 RCX: 0000000000000000
+[ 12.691197] RDX: 0000000000000000 RSI: ffff8c2a5b382600 RDI: ffff8c2a5b382600
+[ 12.693945] RBP: ffff8c2a7fc838b0 R08: 0000000000000001 R09: 0000000000000001
+[ 12.696632] R10: 0000000000000000 R11: 0000000000000000 R12: ffff8c2a5d8949d8
+[ 12.699491] R13: ffffffff95ede400 R14: 0000000000000000 R15: ffff8c2a5d894800
+[ 12.702338] FS: 0000000000000000(0000) GS:ffff8c2a7fc80000(0000) knlGS:0000000000000000
+[ 12.705099] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[ 12.706776] CR2: 0000000001bb9440 CR3: 00000000bd009001 CR4: 00000000003606e0
+[ 12.708847] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+[ 12.711016] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+[ 12.712627] Call Trace:
+[ 12.713390] <IRQ>
+[ 12.714011] tipc_node_check_dest+0x2e8/0x350 [tipc]
+[ 12.715286] tipc_disc_rcv+0x14d/0x1d0 [tipc]
+[ 12.716370] tipc_rcv+0x8b0/0xd40 [tipc]
+[ 12.717396] ? minmax_running_min+0x2f/0x60
+[ 12.718248] ? dst_alloc+0x4c/0xa0
+[ 12.718964] ? tcp_ack+0xaf1/0x10b0
+[ 12.719658] ? tipc_udp_is_known_peer+0xa0/0xa0 [tipc]
+[ 12.720634] tipc_udp_recv+0x71/0x1d0 [tipc]
+[ 12.721459] ? dst_alloc+0x4c/0xa0
+[ 12.722130] udp_queue_rcv_skb+0x264/0x490
+[ 12.722924] __udp4_lib_rcv+0x21e/0x990
+[ 12.723670] ? ip_route_input_rcu+0x2dd/0xbf0
+[ 12.724442] ? tcp_v4_rcv+0x958/0xa40
+[ 12.725039] udp_rcv+0x1a/0x20
+[ 12.725587] ip_local_deliver_finish+0x97/0x1d0
+[ 12.726323] ip_local_deliver+0xaf/0xc0
+[ 12.726959] ? ip_route_input_noref+0x19/0x20
+[ 12.727689] ip_rcv_finish+0xdd/0x3b0
+[ 12.728307] ip_rcv+0x2ac/0x360
+[ 12.728839] __netif_receive_skb_core+0x6fb/0xa90
+[ 12.729580] ? udp4_gro_receive+0x1a7/0x2c0
+[ 12.730274] __netif_receive_skb+0x1d/0x60
+[ 12.730953] ? __netif_receive_skb+0x1d/0x60
+[ 12.731637] netif_receive_skb_internal+0x37/0xd0
+[ 12.732371] napi_gro_receive+0xc7/0xf0
+[ 12.732920] receive_buf+0x3c3/0xd40
+[ 12.733441] virtnet_poll+0xb1/0x250
+[ 12.733944] net_rx_action+0x23e/0x370
+[ 12.734476] __do_softirq+0xc5/0x2f8
+[ 12.734922] irq_exit+0xfa/0x100
+[ 12.735315] do_IRQ+0x4f/0xd0
+[ 12.735680] common_interrupt+0xa2/0xa2
+[ 12.736126] </IRQ>
+[ 12.736416] RIP: 0010:native_safe_halt+0x6/0x10
+[ 12.736925] RSP: 0018:ffffa41cc0cafe90 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff4d
+[ 12.737756] RAX: 0000000000000000 RBX: ffff8c2a761edb80 RCX: 0000000000000000
+[ 12.738504] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
+[ 12.739258] RBP: ffffa41cc0cafe90 R08: 0000014b5b9795e5 R09: ffffa41cc12c7e88
+[ 12.740118] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000002
+[ 12.740964] R13: ffff8c2a761edb80 R14: 0000000000000000 R15: 0000000000000000
+[ 12.741831] default_idle+0x2a/0x100
+[ 12.742323] arch_cpu_idle+0xf/0x20
+[ 12.742796] default_idle_call+0x28/0x40
+[ 12.743312] do_idle+0x179/0x1f0
+[ 12.743761] cpu_startup_entry+0x1d/0x20
+[ 12.744291] start_secondary+0x112/0x120
+[ 12.744816] secondary_startup_64+0xa5/0xa5
+[ 12.745367] Code: b9 f4 01 00 00 48 89 c2 48 c1 ea 02 48 3d d3 07 00
+00 48 0f 47 d1 49 8b 0c 24 48 39 d1 76 07 49 89 14 24 48 89 d1 31 d2 48
+89 df <48> f7 f1 89 c6 e8 81 6e ff ff 5b 41 5c 5d c3 66 90 66 2e 0f 1f
+[ 12.747527] RIP: tipc_node_calculate_timer.isra.12+0x45/0x60 [tipc] RSP: ffff8c2a7fc838a0
+[ 12.748555] ---[ end trace 1399ab83390650fd ]---
+[ 12.749296] Kernel panic - not syncing: Fatal exception in interrupt
+[ 12.750123] Kernel Offset: 0x13200000 from 0xffffffff82000000
+(relocation range: 0xffffffff80000000-0xffffffffbfffffff)
+[ 12.751215] Rebooting in 60 seconds..
+
+Fixes: c9b64d492b1f ("tipc: add replicast peer discovery")
+Signed-off-by: Tommi Rantala <tommi.t.rantala@nokia.com>
+Cc: Jon Maloy <jon.maloy@ericsson.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/tipc/udp_media.c | 4 ----
+ 1 file changed, 4 deletions(-)
+
+--- a/net/tipc/udp_media.c
++++ b/net/tipc/udp_media.c
+@@ -371,10 +371,6 @@ static int tipc_udp_recv(struct sock *sk
+ goto rcu_out;
+ }
+
+- tipc_rcv(sock_net(sk), skb, b);
+- rcu_read_unlock();
+- return 0;
+-
+ rcu_out:
+ rcu_read_unlock();
+ out:
--- /dev/null
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Jon Maloy <jon.maloy@ericsson.com>
+Date: Mon, 4 Dec 2017 22:00:20 +0100
+Subject: tipc: fix memory leak in tipc_accept_from_sock()
+
+From: Jon Maloy <jon.maloy@ericsson.com>
+
+
+[ Upstream commit a7d5f107b4978e08eeab599ee7449af34d034053 ]
+
+When the function tipc_accept_from_sock() fails to create an instance of
+struct tipc_subscriber it omits to free the already created instance of
+struct tipc_conn instance before it returns.
+
+We fix that with this commit.
+
+Reported-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/tipc/server.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/tipc/server.c
++++ b/net/tipc/server.c
+@@ -313,6 +313,7 @@ static int tipc_accept_from_sock(struct
+ newcon->usr_data = s->tipc_conn_new(newcon->conid);
+ if (!newcon->usr_data) {
+ sock_release(newsock);
++ conn_put(newcon);
+ return -ENOMEM;
+ }
+
--- /dev/null
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Xin Long <lucien.xin@gmail.com>
+Date: Sun, 19 Nov 2017 19:31:04 +0800
+Subject: tun: fix rcu_read_lock imbalance in tun_build_skb
+
+From: Xin Long <lucien.xin@gmail.com>
+
+
+[ Upstream commit 654d573845f35017dc397840fa03610fef3d08b0 ]
+
+rcu_read_lock in tun_build_skb is used to rcu_dereference tun->xdp_prog
+safely, rcu_read_unlock should be done in every return path.
+
+Now I could see one place missing it, where it returns NULL in switch-case
+XDP_REDIRECT, another palce using rcu_read_lock wrongly, where it returns
+NULL in if (xdp_xmit) chunk.
+
+So fix both in this patch.
+
+Fixes: 761876c857cb ("tap: XDP support")
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/tun.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/tun.c
++++ b/drivers/net/tun.c
+@@ -1326,6 +1326,7 @@ static struct sk_buff *tun_build_skb(str
+ err = xdp_do_redirect(tun->dev, &xdp, xdp_prog);
+ if (err)
+ goto err_redirect;
++ rcu_read_unlock();
+ return NULL;
+ case XDP_TX:
+ xdp_xmit = true;
+@@ -1358,7 +1359,7 @@ static struct sk_buff *tun_build_skb(str
+ if (xdp_xmit) {
+ skb->dev = tun->dev;
+ generic_xdp_tx(skb, xdp_prog);
+- rcu_read_lock();
++ rcu_read_unlock();
+ return NULL;
+ }
+
--- /dev/null
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Wei Xu <wexu@redhat.com>
+Date: Fri, 1 Dec 2017 05:10:37 -0500
+Subject: tun: free skb in early errors
+
+From: Wei Xu <wexu@redhat.com>
+
+
+[ Upstream commit c33ee15b3820a03cf8229ba9415084197b827f8c ]
+
+tun_recvmsg() supports accepting skb by msg_control after
+commit ac77cfd4258f ("tun: support receiving skb through msg_control"),
+the skb if presented should be freed no matter how far it can go
+along, otherwise it would be leaked.
+
+This patch fixes several missed cases.
+
+Signed-off-by: Wei Xu <wexu@redhat.com>
+Reported-by: Matthew Rosato <mjrosato@linux.vnet.ibm.com>
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/tun.c | 24 ++++++++++++++++++------
+ 1 file changed, 18 insertions(+), 6 deletions(-)
+
+--- a/drivers/net/tun.c
++++ b/drivers/net/tun.c
+@@ -1734,8 +1734,11 @@ static ssize_t tun_do_read(struct tun_st
+
+ tun_debug(KERN_INFO, tun, "tun_do_read\n");
+
+- if (!iov_iter_count(to))
++ if (!iov_iter_count(to)) {
++ if (skb)
++ kfree_skb(skb);
+ return 0;
++ }
+
+ if (!skb) {
+ /* Read frames from ring */
+@@ -1851,22 +1854,24 @@ static int tun_recvmsg(struct socket *so
+ {
+ struct tun_file *tfile = container_of(sock, struct tun_file, socket);
+ struct tun_struct *tun = __tun_get(tfile);
++ struct sk_buff *skb = m->msg_control;
+ int ret;
+
+- if (!tun)
+- return -EBADFD;
++ if (!tun) {
++ ret = -EBADFD;
++ goto out_free_skb;
++ }
+
+ if (flags & ~(MSG_DONTWAIT|MSG_TRUNC|MSG_ERRQUEUE)) {
+ ret = -EINVAL;
+- goto out;
++ goto out_put_tun;
+ }
+ if (flags & MSG_ERRQUEUE) {
+ ret = sock_recv_errqueue(sock->sk, m, total_len,
+ SOL_PACKET, TUN_TX_TIMESTAMP);
+ goto out;
+ }
+- ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT,
+- m->msg_control);
++ ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT, skb);
+ if (ret > (ssize_t)total_len) {
+ m->msg_flags |= MSG_TRUNC;
+ ret = flags & MSG_TRUNC ? ret : total_len;
+@@ -1874,6 +1879,13 @@ static int tun_recvmsg(struct socket *so
+ out:
+ tun_put(tun);
+ return ret;
++
++out_put_tun:
++ tun_put(tun);
++out_free_skb:
++ if (skb)
++ kfree_skb(skb);
++ return ret;
+ }
+
+ static int tun_peek_len(struct socket *sock)
--- /dev/null
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Bjørn Mork <bjorn@mork.no>
+Date: Wed, 6 Dec 2017 20:21:24 +0100
+Subject: usbnet: fix alignment for frames with no ethernet header
+
+From: Bjørn Mork <bjorn@mork.no>
+
+
+[ Upstream commit a4abd7a80addb4a9547f7dfc7812566b60ec505c ]
+
+The qmi_wwan minidriver support a 'raw-ip' mode where frames are
+received without any ethernet header. This causes alignment issues
+because the skbs allocated by usbnet are "IP aligned".
+
+Fix by allowing minidrivers to disable the additional alignment
+offset. This is implemented using a per-device flag, since the same
+minidriver also supports 'ethernet' mode.
+
+Fixes: 32f7adf633b9 ("net: qmi_wwan: support "raw IP" mode")
+Reported-and-tested-by: Jay Foster <jay@systech.com>
+Signed-off-by: Bjørn Mork <bjorn@mork.no>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/usb/qmi_wwan.c | 2 ++
+ drivers/net/usb/usbnet.c | 5 ++++-
+ include/linux/usb/usbnet.h | 1 +
+ 3 files changed, 7 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/usb/qmi_wwan.c
++++ b/drivers/net/usb/qmi_wwan.c
+@@ -261,9 +261,11 @@ static void qmi_wwan_netdev_setup(struct
+ net->hard_header_len = 0;
+ net->addr_len = 0;
+ net->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
++ set_bit(EVENT_NO_IP_ALIGN, &dev->flags);
+ netdev_dbg(net, "mode: raw IP\n");
+ } else if (!net->header_ops) { /* don't bother if already set */
+ ether_setup(net);
++ clear_bit(EVENT_NO_IP_ALIGN, &dev->flags);
+ netdev_dbg(net, "mode: Ethernet\n");
+ }
+
+--- a/drivers/net/usb/usbnet.c
++++ b/drivers/net/usb/usbnet.c
+@@ -484,7 +484,10 @@ static int rx_submit (struct usbnet *dev
+ return -ENOLINK;
+ }
+
+- skb = __netdev_alloc_skb_ip_align(dev->net, size, flags);
++ if (test_bit(EVENT_NO_IP_ALIGN, &dev->flags))
++ skb = __netdev_alloc_skb(dev->net, size, flags);
++ else
++ skb = __netdev_alloc_skb_ip_align(dev->net, size, flags);
+ if (!skb) {
+ netif_dbg(dev, rx_err, dev->net, "no rx skb\n");
+ usbnet_defer_kevent (dev, EVENT_RX_MEMORY);
+--- a/include/linux/usb/usbnet.h
++++ b/include/linux/usb/usbnet.h
+@@ -81,6 +81,7 @@ struct usbnet {
+ # define EVENT_RX_KILL 10
+ # define EVENT_LINK_CHANGE 11
+ # define EVENT_SET_RX_MODE 12
++# define EVENT_NO_IP_ALIGN 13
+ };
+
+ static inline struct usb_driver *driver_of(struct usb_interface *intf)
--- /dev/null
+From foo@baz Thu Dec 14 11:45:40 CET 2017
+From: Wei Xu <wexu@redhat.com>
+Date: Fri, 1 Dec 2017 05:10:36 -0500
+Subject: vhost: fix skb leak in handle_rx()
+
+From: Wei Xu <wexu@redhat.com>
+
+
+[ Upstream commit 6e474083f3daf3a3546737f5d7d502ad12eb257c ]
+
+Matthew found a roughly 40% tcp throughput regression with commit
+c67df11f(vhost_net: try batch dequing from skb array) as discussed
+in the following thread:
+https://www.mail-archive.com/netdev@vger.kernel.org/msg187936.html
+
+Eventually we figured out that it was a skb leak in handle_rx()
+when sending packets to the VM. This usually happens when a guest
+can not drain out vq as fast as vhost fills in, afterwards it sets
+off the traffic jam and leaks skb(s) which occurs as no headcount
+to send on the vq from vhost side.
+
+This can be avoided by making sure we have got enough headcount
+before actually consuming a skb from the batched rx array while
+transmitting, which is simply done by moving checking the zero
+headcount a bit ahead.
+
+Signed-off-by: Wei Xu <wexu@redhat.com>
+Reported-by: Matthew Rosato <mjrosato@linux.vnet.ibm.com>
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/vhost/net.c | 20 ++++++++++----------
+ 1 file changed, 10 insertions(+), 10 deletions(-)
+
+--- a/drivers/vhost/net.c
++++ b/drivers/vhost/net.c
+@@ -782,16 +782,6 @@ static void handle_rx(struct vhost_net *
+ /* On error, stop handling until the next kick. */
+ if (unlikely(headcount < 0))
+ goto out;
+- if (nvq->rx_array)
+- msg.msg_control = vhost_net_buf_consume(&nvq->rxq);
+- /* On overrun, truncate and discard */
+- if (unlikely(headcount > UIO_MAXIOV)) {
+- iov_iter_init(&msg.msg_iter, READ, vq->iov, 1, 1);
+- err = sock->ops->recvmsg(sock, &msg,
+- 1, MSG_DONTWAIT | MSG_TRUNC);
+- pr_debug("Discarded rx packet: len %zd\n", sock_len);
+- continue;
+- }
+ /* OK, now we need to know about added descriptors. */
+ if (!headcount) {
+ if (unlikely(vhost_enable_notify(&net->dev, vq))) {
+@@ -804,6 +794,16 @@ static void handle_rx(struct vhost_net *
+ * they refilled. */
+ goto out;
+ }
++ if (nvq->rx_array)
++ msg.msg_control = vhost_net_buf_consume(&nvq->rxq);
++ /* On overrun, truncate and discard */
++ if (unlikely(headcount > UIO_MAXIOV)) {
++ iov_iter_init(&msg.msg_iter, READ, vq->iov, 1, 1);
++ err = sock->ops->recvmsg(sock, &msg,
++ 1, MSG_DONTWAIT | MSG_TRUNC);
++ pr_debug("Discarded rx packet: len %zd\n", sock_len);
++ continue;
++ }
+ /* We don't need to be notified again. */
+ iov_iter_init(&msg.msg_iter, READ, vq->iov, in, vhost_len);
+ fixup = msg.msg_iter;