--- /dev/null
+From dd95a78e85e04332f38d10bdf0a207475d26cd70 Mon Sep 17 00:00:00 2001
+From: Ben Hutchings <ben@decadent.org.uk>
+Date: Sun, 27 Oct 2013 21:02:39 +0000
+Subject: cxgb3: Fix length calculation in write_ofld_wr() on 32-bit architectures
+
+From: Ben Hutchings <ben@decadent.org.uk>
+
+[ Upstream commit 262e827fe745642589450ae241b7afd3912c3f25 ]
+
+The length calculation here is now invalid on 32-bit architectures,
+since sk_buff::tail is a pointer and sk_buff::transport_header is
+an integer offset:
+
+drivers/net/ethernet/chelsio/cxgb3/sge.c: In function 'write_ofld_wr':
+drivers/net/ethernet/chelsio/cxgb3/sge.c:1603:9: warning: passing argument 4 of 'make_sgl' makes integer from pointer without a cast [enabled by default]
+ adap->pdev);
+ ^
+drivers/net/ethernet/chelsio/cxgb3/sge.c:964:28: note: expected 'unsigned int' but argument is of type 'sk_buff_data_t'
+ static inline unsigned int make_sgl(const struct sk_buff *skb,
+ ^
+
+Use the appropriate skb accessor functions.
+
+Compile-tested only.
+
+Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
+Fixes: 1a37e412a022 ('net: Use 16bits for *_headers fields of struct skbuff')
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/chelsio/cxgb3/sge.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/chelsio/cxgb3/sge.c
++++ b/drivers/net/ethernet/chelsio/cxgb3/sge.c
+@@ -1599,7 +1599,8 @@ static void write_ofld_wr(struct adapter
+ flits = skb_transport_offset(skb) / 8;
+ sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
+ sgl_flits = make_sgl(skb, sgp, skb_transport_header(skb),
+- skb->tail - skb->transport_header,
++ skb_tail_pointer(skb) -
++ skb_transport_header(skb),
+ adap->pdev);
+ if (need_skb_unmap()) {
+ setup_deferred_unmapping(skb, adap->pdev, sgp, sgl_flits);
--- /dev/null
+From 755fca6c4f87642528fac2fc898a06568dcfacdd Mon Sep 17 00:00:00 2001
+From: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Date: Thu, 24 Oct 2013 07:48:24 +0200
+Subject: ipv6: ip6_dst_check needs to check for expired dst_entries
+
+From: Hannes Frederic Sowa <hannes@stressinduktion.org>
+
+[ Upstream commit e3bc10bd95d7fcc3f2ac690c6ff22833ea6781d6 ]
+
+On receiving a packet too big icmp error we check if our current cached
+dst_entry in the socket is still valid. This validation check did not
+care about the expiration of the (cached) route.
+
+The error path I traced down:
+The socket receives a packet too big mtu notification. It still has a
+valid dst_entry and thus issues the ip6_rt_pmtu_update on this dst_entry,
+setting RTF_EXPIRE and updates the dst.expiration value (which could
+fail because of not up-to-date expiration values, see previous patch).
+
+In some seldom cases we race with a) the ip6_fib gc or b) another routing
+lookup which would result in a recreation of the cached rt6_info from its
+parent non-cached rt6_info. While copying the rt6_info we reinitialize the
+metrics store by copying it over from the parent thus invalidating the
+just installed pmtu update (both dsts use the same key to the inetpeer
+storage). The dst_entry with the just invalidated metrics data would
+just get its RTF_EXPIRES flag cleared and would continue to stay valid
+for the socket.
+
+We should have not issued the pmtu update on the already expired dst_entry
+in the first placed. By checking the expiration on the dst entry and
+doing a relookup in case it is out of date we close the race because
+we would install a new rt6_info into the fib before we issue the pmtu
+update, thus closing this race.
+
+Not reliably updating the dst.expire value was fixed by the patch "ipv6:
+reset dst.expires value when clearing expire flag".
+
+Reported-by: Steinar H. Gunderson <sgunderson@bigfoot.com>
+Reported-by: Valentijn Sessink <valentyn@blub.net>
+Cc: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
+Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Tested-by: Valentijn Sessink <valentyn@blub.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/route.c | 9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+--- a/net/ipv6/route.c
++++ b/net/ipv6/route.c
+@@ -1087,10 +1087,13 @@ static struct dst_entry *ip6_dst_check(s
+ if (rt->rt6i_genid != rt_genid_ipv6(dev_net(rt->dst.dev)))
+ return NULL;
+
+- if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
+- return dst;
++ if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
++ return NULL;
++
++ if (rt6_check_expired(rt))
++ return NULL;
+
+- return NULL;
++ return dst;
+ }
+
+ static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
--- /dev/null
+From 36baf2ba73e632dc049e19cd742e57aba9714a1a Mon Sep 17 00:00:00 2001
+From: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Date: Thu, 24 Oct 2013 10:14:27 +0200
+Subject: ipv6: reset dst.expires value when clearing expire flag
+
+From: Hannes Frederic Sowa <hannes@stressinduktion.org>
+
+[ Upstream commit 01ba16d6ec85a1ec4669c75513a76b61ec53ee50 ]
+
+On receiving a packet too big icmp error we update the expire value by
+calling rt6_update_expires. This function uses dst_set_expires which is
+implemented that it can only reduce the expiration value of the dst entry.
+
+If we insert new routing non-expiry information into the ipv6 fib where
+we already have a matching rt6_info we only clear the RTF_EXPIRES flag
+in rt6i_flags and leave the dst.expires value as is.
+
+When new mtu information arrives for that cached dst_entry we again
+call dst_set_expires. This time it won't update the dst.expire value
+because we left the dst.expire value intact from the last update. So
+dst_set_expires won't touch dst.expires.
+
+Fix this by resetting dst.expires when clearing the RTF_EXPIRE flag.
+dst_set_expires checks for a zero expiration and updates the
+dst.expires.
+
+In the past this (not updating dst.expires) was necessary because
+dst.expire was placed in a union with the dst_entry *from reference
+and rt6_clean_expires did assign NULL to it. This split happend in
+ecd9883724b78cc72ed92c98bcb1a46c764fff21 ("ipv6: fix race condition
+regarding dst->expires and dst->from").
+
+Reported-by: Steinar H. Gunderson <sgunderson@bigfoot.com>
+Reported-by: Valentijn Sessink <valentyn@blub.net>
+Cc: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Tested-by: Valentijn Sessink <valentyn@blub.net>
+Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/ip6_fib.h | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/include/net/ip6_fib.h
++++ b/include/net/ip6_fib.h
+@@ -165,6 +165,7 @@ static inline struct inet6_dev *ip6_dst_
+ static inline void rt6_clean_expires(struct rt6_info *rt)
+ {
+ rt->rt6i_flags &= ~RTF_EXPIRES;
++ rt->dst.expires = 0;
+ }
+
+ static inline void rt6_set_expires(struct rt6_info *rt, unsigned long expires)
--- /dev/null
+From 3fd3ed1e1b114b720da50addbe28a27df95a1cb8 Mon Sep 17 00:00:00 2001
+From: Jason Wang <jasowang@redhat.com>
+Date: Fri, 1 Nov 2013 15:01:10 +0800
+Subject: net: flow_dissector: fail on evil iph->ihl
+
+From: Jason Wang <jasowang@redhat.com>
+
+[ Upstream commit 6f092343855a71e03b8d209815d8c45bf3a27fcd ]
+
+We don't validate iph->ihl which may lead a dead loop if we meet a IPIP
+skb whose iph->ihl is zero. Fix this by failing immediately when iph->ihl
+is evil (less than 5).
+
+This issue were introduced by commit ec5efe7946280d1e84603389a1030ccec0a767ae
+(rps: support IPIP encapsulation).
+
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+Cc: Eric Dumazet <edumazet@google.com>
+Cc: Petr Matousek <pmatouse@redhat.com>
+Cc: Michael S. Tsirkin <mst@redhat.com>
+Cc: Daniel Borkmann <dborkman@redhat.com>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/flow_dissector.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/core/flow_dissector.c
++++ b/net/core/flow_dissector.c
+@@ -40,7 +40,7 @@ again:
+ struct iphdr _iph;
+ ip:
+ iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
+- if (!iph)
++ if (!iph || iph->ihl < 5)
+ return false;
+
+ if (ip_is_fragment(iph))
--- /dev/null
+From 305a0e486650924657bb34d35e58311373a54d2e Mon Sep 17 00:00:00 2001
+From: Jack Morgenstein <jackm@dev.mellanox.co.il>
+Date: Sun, 3 Nov 2013 10:04:07 +0200
+Subject: net/mlx4_core: Fix call to __mlx4_unregister_mac
+
+From: Jack Morgenstein <jackm@dev.mellanox.co.il>
+
+[ Upstream commit c32b7dfbb1dfb3f0a68f250deff65103c8bb704a ]
+
+In function mlx4_master_deactivate_admin_state() __mlx4_unregister_mac was
+called using the MAC index. It should be called with the value of the MAC itself.
+
+Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
+Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx4/cmd.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
++++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
+@@ -1691,7 +1691,7 @@ static void mlx4_master_deactivate_admin
+ vp_oper->vlan_idx = NO_INDX;
+ }
+ if (NO_INDX != vp_oper->mac_idx) {
+- __mlx4_unregister_mac(&priv->dev, port, vp_oper->mac_idx);
++ __mlx4_unregister_mac(&priv->dev, port, vp_oper->state.mac);
+ vp_oper->mac_idx = NO_INDX;
+ }
+ }
--- /dev/null
+From 22d32d9d4843a02fbe5410ec7ba70e97f2caf1d8 Mon Sep 17 00:00:00 2001
+From: Daniel Borkmann <dborkman@redhat.com>
+Date: Thu, 31 Oct 2013 09:13:32 +0100
+Subject: net: sctp: do not trigger BUG_ON in sctp_cmd_delete_tcb
+
+From: Daniel Borkmann <dborkman@redhat.com>
+
+[ Upstream commit 7926c1d5be0b7cbe5b8d5c788d7d39237e7b212c ]
+
+Introduced in f9e42b853523 ("net: sctp: sideeffect: throw BUG if
+primary_path is NULL"), we intended to find a buggy assoc that's
+part of the assoc hash table with a primary_path that is NULL.
+However, we better remove the BUG_ON for now and find a more
+suitable place to assert for these things as Mark reports that
+this also triggers the bug when duplication cookie processing
+happens, and the assoc is not part of the hash table (so all
+good in this case). Such a situation can for example easily be
+reproduced by:
+
+ tc qdisc add dev eth0 root handle 1: prio bands 2 priomap 1 1 1 1 1 1
+ tc qdisc add dev eth0 parent 1:2 handle 20: netem loss 20%
+ tc filter add dev eth0 protocol ip parent 1: prio 2 u32 match ip \
+ protocol 132 0xff match u8 0x0b 0xff at 32 flowid 1:2
+
+This drops 20% of COOKIE-ACK packets. After some follow-up
+discussion with Vlad we came to the conclusion that for now we
+should still better remove this BUG_ON() assertion, and come up
+with two follow-ups later on, that is, i) find a more suitable
+place for this assertion, and possibly ii) have a special
+allocator/initializer for such kind of temporary assocs.
+
+Reported-by: Mark Thomas <Mark.Thomas@metaswitch.com>
+Signed-off-by: Vlad Yasevich <vyasevich@gmail.com>
+Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
+Acked-by: Neil Horman <nhorman@tuxdriver.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/sm_sideeffect.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/net/sctp/sm_sideeffect.c
++++ b/net/sctp/sm_sideeffect.c
+@@ -860,7 +860,6 @@ static void sctp_cmd_delete_tcb(sctp_cmd
+ (!asoc->temp) && (sk->sk_shutdown != SHUTDOWN_MASK))
+ return;
+
+- BUG_ON(asoc->peer.primary_path == NULL);
+ sctp_unhash_established(asoc);
+ sctp_association_free(asoc);
+ }
--- /dev/null
+net-mlx4_core-fix-call-to-__mlx4_unregister_mac.patch
+net-sctp-do-not-trigger-bug_on-in-sctp_cmd_delete_tcb.patch
+net-flow_dissector-fail-on-evil-iph-ihl.patch
+virtio-net-correctly-handle-cpu-hotplug-notifier-during-resuming.patch
+xen-netback-use-jiffies_64-value-to-calculate-credit-timeout.patch
+cxgb3-fix-length-calculation-in-write_ofld_wr-on-32-bit-architectures.patch
+tcp-gso-fix-truesize-tracking.patch
+tcp-fix-synack-rtt-estimation-in-fast-open.patch
+tcp-only-take-rtt-from-timestamps-if-new-data-is-acked.patch
+tcp-do-not-rearm-rto-when-future-data-are-sacked.patch
+ipv6-ip6_dst_check-needs-to-check-for-expired-dst_entries.patch
+ipv6-reset-dst.expires-value-when-clearing-expire-flag.patch
--- /dev/null
+From 22b6cfcd281c80e5bf1d404f67c21303210bd32f Mon Sep 17 00:00:00 2001
+From: Yuchung Cheng <ycheng@google.com>
+Date: Thu, 24 Oct 2013 08:59:27 -0700
+Subject: tcp: do not rearm RTO when future data are sacked
+
+From: Yuchung Cheng <ycheng@google.com>
+
+[ Upstream commit 2f715c1dde6e1760f3101358dc26f8c9489be0bf ]
+
+Patch ed08495c3 "tcp: use RTT from SACK for RTO" always re-arms RTO upon
+obtaining a RTT sample from newly sacked data.
+
+But technically RTO should only be re-armed when the data sent before
+the last (re)transmission of write queue head are (s)acked. Otherwise
+the RTO may continue to extend during loss recovery on data sent
+in the future.
+
+Note that RTTs from ACK or timestamps do not have this problem, as the RTT
+source must be from data sent before.
+
+The new RTO re-arm policy is
+1) Always re-arm RTO if SND.UNA is advanced
+2) Re-arm RTO if sack RTT is available, provided the sacked data was
+ sent before the last time write_queue_head was sent.
+
+Signed-off-by: Larry Brakmo <brakmo@google.com>
+Signed-off-by: Yuchung Cheng <ycheng@google.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_input.c | 13 ++++++++++---
+ 1 file changed, 10 insertions(+), 3 deletions(-)
+
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -2987,6 +2987,7 @@ static int tcp_clean_rtx_queue(struct so
+ s32 seq_rtt = -1;
+ s32 ca_seq_rtt = -1;
+ ktime_t last_ackt = net_invalid_timestamp();
++ bool rtt_update;
+
+ while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
+ struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
+@@ -3063,14 +3064,13 @@ static int tcp_clean_rtx_queue(struct so
+ if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
+ flag |= FLAG_SACK_RENEGING;
+
+- if (tcp_ack_update_rtt(sk, flag, seq_rtt, sack_rtt) ||
+- (flag & FLAG_ACKED))
+- tcp_rearm_rto(sk);
++ rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt, sack_rtt);
+
+ if (flag & FLAG_ACKED) {
+ const struct tcp_congestion_ops *ca_ops
+ = inet_csk(sk)->icsk_ca_ops;
+
++ tcp_rearm_rto(sk);
+ if (unlikely(icsk->icsk_mtup.probe_size &&
+ !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) {
+ tcp_mtup_probe_success(sk);
+@@ -3109,6 +3109,13 @@ static int tcp_clean_rtx_queue(struct so
+
+ ca_ops->pkts_acked(sk, pkts_acked, rtt_us);
+ }
++ } else if (skb && rtt_update && sack_rtt >= 0 &&
++ sack_rtt > (s32)(now - TCP_SKB_CB(skb)->when)) {
++ /* Do not re-arm RTO if the sack RTT is measured from data sent
++ * after when the head was last (re)transmitted. Otherwise the
++ * timeout may continue to extend in loss recovery.
++ */
++ tcp_rearm_rto(sk);
+ }
+
+ #if FASTRETRANS_DEBUG > 0
--- /dev/null
+From 2a8aaa341a6aed901a25e55447a879ef77b48386 Mon Sep 17 00:00:00 2001
+From: Yuchung Cheng <ycheng@google.com>
+Date: Thu, 24 Oct 2013 08:44:25 -0700
+Subject: tcp: fix SYNACK RTT estimation in Fast Open
+
+From: Yuchung Cheng <ycheng@google.com>
+
+[ Upstream commit bc15afa39ecc16f01c3389d15d8f6015a427fe85 ]
+
+tp->lsndtime may not always be the SYNACK timestamp if a passive
+Fast Open socket sends data before handshake completes. And if the
+remote acknowledges both the data and the SYNACK, the RTT sample
+is already taken in tcp_ack(), so no need to call
+tcp_update_ack_rtt() in tcp_synack_rtt_meas() aagain.
+
+Signed-off-by: Yuchung Cheng <ycheng@google.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_input.c | 18 +++++++++++++-----
+ 1 file changed, 13 insertions(+), 5 deletions(-)
+
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -2871,14 +2871,19 @@ static inline bool tcp_ack_update_rtt(st
+ }
+
+ /* Compute time elapsed between (last) SYNACK and the ACK completing 3WHS. */
+-static void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req)
++static void tcp_synack_rtt_meas(struct sock *sk, const u32 synack_stamp)
+ {
+ struct tcp_sock *tp = tcp_sk(sk);
+ s32 seq_rtt = -1;
+
+- if (tp->lsndtime && !tp->total_retrans)
+- seq_rtt = tcp_time_stamp - tp->lsndtime;
+- tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt, -1);
++ if (synack_stamp && !tp->total_retrans)
++ seq_rtt = tcp_time_stamp - synack_stamp;
++
++ /* If the ACK acks both the SYNACK and the (Fast Open'd) data packets
++ * sent in SYN_RECV, SYNACK RTT is the smooth RTT computed in tcp_ack()
++ */
++ if (!tp->srtt)
++ tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt, -1);
+ }
+
+ static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
+@@ -5587,6 +5592,7 @@ int tcp_rcv_state_process(struct sock *s
+ struct request_sock *req;
+ int queued = 0;
+ bool acceptable;
++ u32 synack_stamp;
+
+ tp->rx_opt.saw_tstamp = 0;
+
+@@ -5669,9 +5675,11 @@ int tcp_rcv_state_process(struct sock *s
+ * so release it.
+ */
+ if (req) {
++ synack_stamp = tcp_rsk(req)->snt_synack;
+ tp->total_retrans = req->num_retrans;
+ reqsk_fastopen_remove(sk, req, false);
+ } else {
++ synack_stamp = tp->lsndtime;
+ /* Make sure socket is routed, for correct metrics. */
+ icsk->icsk_af_ops->rebuild_header(sk);
+ tcp_init_congestion_control(sk);
+@@ -5694,7 +5702,7 @@ int tcp_rcv_state_process(struct sock *s
+ tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
+ tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale;
+ tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
+- tcp_synack_rtt_meas(sk, req);
++ tcp_synack_rtt_meas(sk, synack_stamp);
+
+ if (tp->rx_opt.tstamp_ok)
+ tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
--- /dev/null
+From 4db90cba8f8c482183e9d4c6da83ecd36cb8ef17 Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 25 Oct 2013 17:26:17 -0700
+Subject: tcp: gso: fix truesize tracking
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 0d08c42cf9a71530fef5ebcfe368f38f2dd0476f ]
+
+commit 6ff50cd55545 ("tcp: gso: do not generate out of order packets")
+had an heuristic that can trigger a warning in skb_try_coalesce(),
+because skb->truesize of the gso segments were exactly set to mss.
+
+This breaks the requirement that
+
+skb->truesize >= skb->len + truesizeof(struct sk_buff);
+
+It can trivially be reproduced by :
+
+ifconfig lo mtu 1500
+ethtool -K lo tso off
+netperf
+
+As the skbs are looped into the TCP networking stack, skb_try_coalesce()
+warns us of these skb under-estimating their truesize.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Alexei Starovoitov <ast@plumgrid.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_offload.c | 13 +++++--------
+ 1 file changed, 5 insertions(+), 8 deletions(-)
+
+--- a/net/ipv4/tcp_offload.c
++++ b/net/ipv4/tcp_offload.c
+@@ -18,6 +18,7 @@ struct sk_buff *tcp_tso_segment(struct s
+ netdev_features_t features)
+ {
+ struct sk_buff *segs = ERR_PTR(-EINVAL);
++ unsigned int sum_truesize = 0;
+ struct tcphdr *th;
+ unsigned int thlen;
+ unsigned int seq;
+@@ -102,13 +103,7 @@ struct sk_buff *tcp_tso_segment(struct s
+ if (copy_destructor) {
+ skb->destructor = gso_skb->destructor;
+ skb->sk = gso_skb->sk;
+- /* {tcp|sock}_wfree() use exact truesize accounting :
+- * sum(skb->truesize) MUST be exactly be gso_skb->truesize
+- * So we account mss bytes of 'true size' for each segment.
+- * The last segment will contain the remaining.
+- */
+- skb->truesize = mss;
+- gso_skb->truesize -= mss;
++ sum_truesize += skb->truesize;
+ }
+ skb = skb->next;
+ th = tcp_hdr(skb);
+@@ -125,7 +120,9 @@ struct sk_buff *tcp_tso_segment(struct s
+ if (copy_destructor) {
+ swap(gso_skb->sk, skb->sk);
+ swap(gso_skb->destructor, skb->destructor);
+- swap(gso_skb->truesize, skb->truesize);
++ sum_truesize += skb->truesize;
++ atomic_add(sum_truesize - gso_skb->truesize,
++ &skb->sk->sk_wmem_alloc);
+ }
+
+ delta = htonl(oldlen + (skb_tail_pointer(skb) -
--- /dev/null
+From 49627d9ffd019bffa9613a2dcfb34f3e85adcf6c Mon Sep 17 00:00:00 2001
+From: Yuchung Cheng <ycheng@google.com>
+Date: Thu, 24 Oct 2013 08:55:25 -0700
+Subject: tcp: only take RTT from timestamps if new data is acked
+
+From: Yuchung Cheng <ycheng@google.com>
+
+[ Upstream commit 2909d874f34eae157aecab0af27c6dc4a1751f8f ]
+
+Patch ed08495c3 "tcp: use RTT from SACK for RTO" has a bug that
+it does not check if the ACK acknowledge new data before taking
+the RTT sample from TCP timestamps. This patch adds the check
+back as required by the RFC.
+
+Signed-off-by: Yuchung Cheng <ycheng@google.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Acked-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_input.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -2856,7 +2856,8 @@ static inline bool tcp_ack_update_rtt(st
+ * left edge of the send window.
+ * See draft-ietf-tcplw-high-performance-00, section 3.3.
+ */
+- if (seq_rtt < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
++ if (seq_rtt < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
++ flag & FLAG_ACKED)
+ seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
+
+ if (seq_rtt < 0)
--- /dev/null
+From c483901d63b5856e9fd03f654a8917b7ed88246c Mon Sep 17 00:00:00 2001
+From: Jason Wang <jasowang@redhat.com>
+Date: Tue, 29 Oct 2013 15:11:07 +0800
+Subject: virtio-net: correctly handle cpu hotplug notifier during resuming
+
+From: Jason Wang <jasowang@redhat.com>
+
+[ Upstream commit ec9debbd9a88d8ea86c488d6ffcac419ee7d46d9 ]
+
+commit 3ab098df35f8b98b6553edc2e40234af512ba877 (virtio-net: don't respond to
+cpu hotplug notifier if we're not ready) tries to bypass the cpu hotplug
+notifier by checking the config_enable and does nothing is it was false. So it
+need to try to hold the config_lock mutex which may happen in atomic
+environment which leads the following warnings:
+
+[ 622.944441] CPU0 attaching NULL sched-domain.
+[ 622.944446] CPU1 attaching NULL sched-domain.
+[ 622.944485] CPU0 attaching NULL sched-domain.
+[ 622.950795] BUG: sleeping function called from invalid context at kernel/mutex.c:616
+[ 622.950796] in_atomic(): 1, irqs_disabled(): 1, pid: 10, name: migration/1
+[ 622.950796] no locks held by migration/1/10.
+[ 622.950798] CPU: 1 PID: 10 Comm: migration/1 Not tainted 3.12.0-rc5-wl-01249-gb91e82d #317
+[ 622.950799] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
+[ 622.950802] 0000000000000000 ffff88001d42dba0 ffffffff81a32f22 ffff88001bfb9c70
+[ 622.950803] ffff88001d42dbb0 ffffffff810edb02 ffff88001d42dc38 ffffffff81a396ed
+[ 622.950805] 0000000000000046 ffff88001d42dbe8 ffffffff810e861d 0000000000000000
+[ 622.950805] Call Trace:
+[ 622.950810] [<ffffffff81a32f22>] dump_stack+0x54/0x74
+[ 622.950815] [<ffffffff810edb02>] __might_sleep+0x112/0x114
+[ 622.950817] [<ffffffff81a396ed>] mutex_lock_nested+0x3c/0x3c6
+[ 622.950818] [<ffffffff810e861d>] ? up+0x39/0x3e
+[ 622.950821] [<ffffffff8153ea7c>] ? acpi_os_signal_semaphore+0x21/0x2d
+[ 622.950824] [<ffffffff81565ed1>] ? acpi_ut_release_mutex+0x5e/0x62
+[ 622.950828] [<ffffffff816d04ec>] virtnet_cpu_callback+0x33/0x87
+[ 622.950830] [<ffffffff81a42576>] notifier_call_chain+0x3c/0x5e
+[ 622.950832] [<ffffffff810e86a8>] __raw_notifier_call_chain+0xe/0x10
+[ 622.950835] [<ffffffff810c5556>] __cpu_notify+0x20/0x37
+[ 622.950836] [<ffffffff810c5580>] cpu_notify+0x13/0x15
+[ 622.950838] [<ffffffff81a237cd>] take_cpu_down+0x27/0x3a
+[ 622.950841] [<ffffffff81136289>] stop_machine_cpu_stop+0x93/0xf1
+[ 622.950842] [<ffffffff81136167>] cpu_stopper_thread+0xa0/0x12f
+[ 622.950844] [<ffffffff811361f6>] ? cpu_stopper_thread+0x12f/0x12f
+[ 622.950847] [<ffffffff81119710>] ? lock_release_holdtime.part.7+0xa3/0xa8
+[ 622.950848] [<ffffffff81135e4b>] ? cpu_stop_should_run+0x3f/0x47
+[ 622.950850] [<ffffffff810ea9b0>] smpboot_thread_fn+0x1c5/0x1e3
+[ 622.950852] [<ffffffff810ea7eb>] ? lg_global_unlock+0x67/0x67
+[ 622.950854] [<ffffffff810e36b7>] kthread+0xd8/0xe0
+[ 622.950857] [<ffffffff81a3bfad>] ? wait_for_common+0x12f/0x164
+[ 622.950859] [<ffffffff810e35df>] ? kthread_create_on_node+0x124/0x124
+[ 622.950861] [<ffffffff81a45ffc>] ret_from_fork+0x7c/0xb0
+[ 622.950862] [<ffffffff810e35df>] ? kthread_create_on_node+0x124/0x124
+[ 622.950876] smpboot: CPU 1 is now offline
+[ 623.194556] SMP alternatives: lockdep: fixing up alternatives
+[ 623.194559] smpboot: Booting Node 0 Processor 1 APIC 0x1
+...
+
+A correct fix is to unregister the hotcpu notifier during restore and register a
+new one in resume.
+
+Reported-by: Fengguang Wu <fengguang.wu@intel.com>
+Tested-by: Fengguang Wu <fengguang.wu@intel.com>
+Cc: Wanlong Gao <gaowanlong@cn.fujitsu.com>
+Cc: Rusty Russell <rusty@rustcorp.com.au>
+Cc: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Reviewed-by: Wanlong Gao <gaowanlong@cn.fujitsu.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/virtio_net.c | 13 ++++++-------
+ 1 file changed, 6 insertions(+), 7 deletions(-)
+
+--- a/drivers/net/virtio_net.c
++++ b/drivers/net/virtio_net.c
+@@ -1118,11 +1118,6 @@ static int virtnet_cpu_callback(struct n
+ {
+ struct virtnet_info *vi = container_of(nfb, struct virtnet_info, nb);
+
+- mutex_lock(&vi->config_lock);
+-
+- if (!vi->config_enable)
+- goto done;
+-
+ switch(action & ~CPU_TASKS_FROZEN) {
+ case CPU_ONLINE:
+ case CPU_DOWN_FAILED:
+@@ -1136,8 +1131,6 @@ static int virtnet_cpu_callback(struct n
+ break;
+ }
+
+-done:
+- mutex_unlock(&vi->config_lock);
+ return NOTIFY_OK;
+ }
+
+@@ -1699,6 +1692,8 @@ static int virtnet_freeze(struct virtio_
+ struct virtnet_info *vi = vdev->priv;
+ int i;
+
++ unregister_hotcpu_notifier(&vi->nb);
++
+ /* Prevent config work handler from accessing the device */
+ mutex_lock(&vi->config_lock);
+ vi->config_enable = false;
+@@ -1747,6 +1742,10 @@ static int virtnet_restore(struct virtio
+ virtnet_set_queues(vi, vi->curr_queue_pairs);
+ rtnl_unlock();
+
++ err = register_hotcpu_notifier(&vi->nb);
++ if (err)
++ return err;
++
+ return 0;
+ }
+ #endif
--- /dev/null
+From 7aa17751a3b382398a1db036ea6b1c3ab58392df Mon Sep 17 00:00:00 2001
+From: Wei Liu <wei.liu2@citrix.com>
+Date: Mon, 28 Oct 2013 12:07:57 +0000
+Subject: xen-netback: use jiffies_64 value to calculate credit timeout
+
+From: Wei Liu <wei.liu2@citrix.com>
+
+[ Upstream commit 059dfa6a93b779516321e5112db9d7621b1367ba ]
+
+time_after_eq() only works if the delta is < MAX_ULONG/2.
+
+For a 32bit Dom0, if netfront sends packets at a very low rate, the time
+between subsequent calls to tx_credit_exceeded() may exceed MAX_ULONG/2
+and the test for timer_after_eq() will be incorrect. Credit will not be
+replenished and the guest may become unable to send packets (e.g., if
+prior to the long gap, all credit was exhausted).
+
+Use jiffies_64 variant to mitigate this problem for 32bit Dom0.
+
+Suggested-by: Jan Beulich <jbeulich@suse.com>
+Signed-off-by: Wei Liu <wei.liu2@citrix.com>
+Reviewed-by: David Vrabel <david.vrabel@citrix.com>
+Cc: Ian Campbell <ian.campbell@citrix.com>
+Cc: Jason Luan <jianhai.luan@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/xen-netback/common.h | 1 +
+ drivers/net/xen-netback/interface.c | 3 +--
+ drivers/net/xen-netback/netback.c | 10 +++++-----
+ 3 files changed, 7 insertions(+), 7 deletions(-)
+
+--- a/drivers/net/xen-netback/common.h
++++ b/drivers/net/xen-netback/common.h
+@@ -163,6 +163,7 @@ struct xenvif {
+ unsigned long credit_usec;
+ unsigned long remaining_credit;
+ struct timer_list credit_timeout;
++ u64 credit_window_start;
+
+ /* Statistics */
+ unsigned long rx_gso_checksum_fixup;
+--- a/drivers/net/xen-netback/interface.c
++++ b/drivers/net/xen-netback/interface.c
+@@ -312,8 +312,7 @@ struct xenvif *xenvif_alloc(struct devic
+ vif->credit_bytes = vif->remaining_credit = ~0UL;
+ vif->credit_usec = 0UL;
+ init_timer(&vif->credit_timeout);
+- /* Initialize 'expires' now: it's used to track the credit window. */
+- vif->credit_timeout.expires = jiffies;
++ vif->credit_window_start = get_jiffies_64();
+
+ dev->netdev_ops = &xenvif_netdev_ops;
+ dev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;
+--- a/drivers/net/xen-netback/netback.c
++++ b/drivers/net/xen-netback/netback.c
+@@ -1185,9 +1185,8 @@ out:
+
+ static bool tx_credit_exceeded(struct xenvif *vif, unsigned size)
+ {
+- unsigned long now = jiffies;
+- unsigned long next_credit =
+- vif->credit_timeout.expires +
++ u64 now = get_jiffies_64();
++ u64 next_credit = vif->credit_window_start +
+ msecs_to_jiffies(vif->credit_usec / 1000);
+
+ /* Timer could already be pending in rare cases. */
+@@ -1195,8 +1194,8 @@ static bool tx_credit_exceeded(struct xe
+ return true;
+
+ /* Passed the point where we can replenish credit? */
+- if (time_after_eq(now, next_credit)) {
+- vif->credit_timeout.expires = now;
++ if (time_after_eq64(now, next_credit)) {
++ vif->credit_window_start = now;
+ tx_add_credit(vif);
+ }
+
+@@ -1208,6 +1207,7 @@ static bool tx_credit_exceeded(struct xe
+ tx_credit_callback;
+ mod_timer(&vif->credit_timeout,
+ next_credit);
++ vif->credit_window_start = next_credit;
+
+ return true;
+ }