From: Greg Kroah-Hartman Date: Thu, 14 Nov 2013 03:14:54 +0000 (+0900) Subject: 3.12-stable patches X-Git-Tag: v3.4.70~27 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=1d11b4f0acd84f9f0939ffb0c6b216a2f465ea4b;p=thirdparty%2Fkernel%2Fstable-queue.git 3.12-stable patches added patches: cxgb3-fix-length-calculation-in-write_ofld_wr-on-32-bit-architectures.patch ipv6-ip6_dst_check-needs-to-check-for-expired-dst_entries.patch ipv6-reset-dst.expires-value-when-clearing-expire-flag.patch net-flow_dissector-fail-on-evil-iph-ihl.patch net-mlx4_core-fix-call-to-__mlx4_unregister_mac.patch net-sctp-do-not-trigger-bug_on-in-sctp_cmd_delete_tcb.patch tcp-do-not-rearm-rto-when-future-data-are-sacked.patch tcp-fix-synack-rtt-estimation-in-fast-open.patch tcp-gso-fix-truesize-tracking.patch tcp-only-take-rtt-from-timestamps-if-new-data-is-acked.patch virtio-net-correctly-handle-cpu-hotplug-notifier-during-resuming.patch xen-netback-use-jiffies_64-value-to-calculate-credit-timeout.patch --- diff --git a/queue-3.12/cxgb3-fix-length-calculation-in-write_ofld_wr-on-32-bit-architectures.patch b/queue-3.12/cxgb3-fix-length-calculation-in-write_ofld_wr-on-32-bit-architectures.patch new file mode 100644 index 00000000000..68eab457c61 --- /dev/null +++ b/queue-3.12/cxgb3-fix-length-calculation-in-write_ofld_wr-on-32-bit-architectures.patch @@ -0,0 +1,45 @@ +From dd95a78e85e04332f38d10bdf0a207475d26cd70 Mon Sep 17 00:00:00 2001 +From: Ben Hutchings +Date: Sun, 27 Oct 2013 21:02:39 +0000 +Subject: cxgb3: Fix length calculation in write_ofld_wr() on 32-bit architectures + +From: Ben Hutchings + +[ Upstream commit 262e827fe745642589450ae241b7afd3912c3f25 ] + +The length calculation here is now invalid on 32-bit architectures, +since sk_buff::tail is a pointer and sk_buff::transport_header is +an integer offset: + +drivers/net/ethernet/chelsio/cxgb3/sge.c: In function 'write_ofld_wr': +drivers/net/ethernet/chelsio/cxgb3/sge.c:1603:9: warning: passing argument 4 of 'make_sgl' makes integer from pointer without a cast [enabled by default] + adap->pdev); + ^ +drivers/net/ethernet/chelsio/cxgb3/sge.c:964:28: note: expected 'unsigned int' but argument is of type 'sk_buff_data_t' + static inline unsigned int make_sgl(const struct sk_buff *skb, + ^ + +Use the appropriate skb accessor functions. + +Compile-tested only. + +Signed-off-by: Ben Hutchings +Fixes: 1a37e412a022 ('net: Use 16bits for *_headers fields of struct skbuff') +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/chelsio/cxgb3/sge.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/chelsio/cxgb3/sge.c ++++ b/drivers/net/ethernet/chelsio/cxgb3/sge.c +@@ -1599,7 +1599,8 @@ static void write_ofld_wr(struct adapter + flits = skb_transport_offset(skb) / 8; + sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl; + sgl_flits = make_sgl(skb, sgp, skb_transport_header(skb), +- skb->tail - skb->transport_header, ++ skb_tail_pointer(skb) - ++ skb_transport_header(skb), + adap->pdev); + if (need_skb_unmap()) { + setup_deferred_unmapping(skb, adap->pdev, sgp, sgl_flits); diff --git a/queue-3.12/ipv6-ip6_dst_check-needs-to-check-for-expired-dst_entries.patch b/queue-3.12/ipv6-ip6_dst_check-needs-to-check-for-expired-dst_entries.patch new file mode 100644 index 00000000000..13e4c4652c2 --- /dev/null +++ b/queue-3.12/ipv6-ip6_dst_check-needs-to-check-for-expired-dst_entries.patch @@ -0,0 +1,68 @@ +From 755fca6c4f87642528fac2fc898a06568dcfacdd Mon Sep 17 00:00:00 2001 +From: Hannes Frederic Sowa +Date: Thu, 24 Oct 2013 07:48:24 +0200 +Subject: ipv6: ip6_dst_check needs to check for expired dst_entries + +From: Hannes Frederic Sowa + +[ Upstream commit e3bc10bd95d7fcc3f2ac690c6ff22833ea6781d6 ] + +On receiving a packet too big icmp error we check if our current cached +dst_entry in the socket is still valid. This validation check did not +care about the expiration of the (cached) route. + +The error path I traced down: +The socket receives a packet too big mtu notification. It still has a +valid dst_entry and thus issues the ip6_rt_pmtu_update on this dst_entry, +setting RTF_EXPIRE and updates the dst.expiration value (which could +fail because of not up-to-date expiration values, see previous patch). + +In some seldom cases we race with a) the ip6_fib gc or b) another routing +lookup which would result in a recreation of the cached rt6_info from its +parent non-cached rt6_info. While copying the rt6_info we reinitialize the +metrics store by copying it over from the parent thus invalidating the +just installed pmtu update (both dsts use the same key to the inetpeer +storage). The dst_entry with the just invalidated metrics data would +just get its RTF_EXPIRES flag cleared and would continue to stay valid +for the socket. + +We should have not issued the pmtu update on the already expired dst_entry +in the first placed. By checking the expiration on the dst entry and +doing a relookup in case it is out of date we close the race because +we would install a new rt6_info into the fib before we issue the pmtu +update, thus closing this race. + +Not reliably updating the dst.expire value was fixed by the patch "ipv6: +reset dst.expires value when clearing expire flag". + +Reported-by: Steinar H. Gunderson +Reported-by: Valentijn Sessink +Cc: YOSHIFUJI Hideaki +Signed-off-by: Hannes Frederic Sowa +Reviewed-by: Eric Dumazet +Tested-by: Valentijn Sessink +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/route.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +--- a/net/ipv6/route.c ++++ b/net/ipv6/route.c +@@ -1087,10 +1087,13 @@ static struct dst_entry *ip6_dst_check(s + if (rt->rt6i_genid != rt_genid_ipv6(dev_net(rt->dst.dev))) + return NULL; + +- if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) +- return dst; ++ if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie)) ++ return NULL; ++ ++ if (rt6_check_expired(rt)) ++ return NULL; + +- return NULL; ++ return dst; + } + + static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) diff --git a/queue-3.12/ipv6-reset-dst.expires-value-when-clearing-expire-flag.patch b/queue-3.12/ipv6-reset-dst.expires-value-when-clearing-expire-flag.patch new file mode 100644 index 00000000000..8f608c42213 --- /dev/null +++ b/queue-3.12/ipv6-reset-dst.expires-value-when-clearing-expire-flag.patch @@ -0,0 +1,54 @@ +From 36baf2ba73e632dc049e19cd742e57aba9714a1a Mon Sep 17 00:00:00 2001 +From: Hannes Frederic Sowa +Date: Thu, 24 Oct 2013 10:14:27 +0200 +Subject: ipv6: reset dst.expires value when clearing expire flag + +From: Hannes Frederic Sowa + +[ Upstream commit 01ba16d6ec85a1ec4669c75513a76b61ec53ee50 ] + +On receiving a packet too big icmp error we update the expire value by +calling rt6_update_expires. This function uses dst_set_expires which is +implemented that it can only reduce the expiration value of the dst entry. + +If we insert new routing non-expiry information into the ipv6 fib where +we already have a matching rt6_info we only clear the RTF_EXPIRES flag +in rt6i_flags and leave the dst.expires value as is. + +When new mtu information arrives for that cached dst_entry we again +call dst_set_expires. This time it won't update the dst.expire value +because we left the dst.expire value intact from the last update. So +dst_set_expires won't touch dst.expires. + +Fix this by resetting dst.expires when clearing the RTF_EXPIRE flag. +dst_set_expires checks for a zero expiration and updates the +dst.expires. + +In the past this (not updating dst.expires) was necessary because +dst.expire was placed in a union with the dst_entry *from reference +and rt6_clean_expires did assign NULL to it. This split happend in +ecd9883724b78cc72ed92c98bcb1a46c764fff21 ("ipv6: fix race condition +regarding dst->expires and dst->from"). + +Reported-by: Steinar H. Gunderson +Reported-by: Valentijn Sessink +Cc: YOSHIFUJI Hideaki +Acked-by: Eric Dumazet +Tested-by: Valentijn Sessink +Signed-off-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/ip6_fib.h | 1 + + 1 file changed, 1 insertion(+) + +--- a/include/net/ip6_fib.h ++++ b/include/net/ip6_fib.h +@@ -165,6 +165,7 @@ static inline struct inet6_dev *ip6_dst_ + static inline void rt6_clean_expires(struct rt6_info *rt) + { + rt->rt6i_flags &= ~RTF_EXPIRES; ++ rt->dst.expires = 0; + } + + static inline void rt6_set_expires(struct rt6_info *rt, unsigned long expires) diff --git a/queue-3.12/net-flow_dissector-fail-on-evil-iph-ihl.patch b/queue-3.12/net-flow_dissector-fail-on-evil-iph-ihl.patch new file mode 100644 index 00000000000..b18ecdc589d --- /dev/null +++ b/queue-3.12/net-flow_dissector-fail-on-evil-iph-ihl.patch @@ -0,0 +1,39 @@ +From 3fd3ed1e1b114b720da50addbe28a27df95a1cb8 Mon Sep 17 00:00:00 2001 +From: Jason Wang +Date: Fri, 1 Nov 2013 15:01:10 +0800 +Subject: net: flow_dissector: fail on evil iph->ihl + +From: Jason Wang + +[ Upstream commit 6f092343855a71e03b8d209815d8c45bf3a27fcd ] + +We don't validate iph->ihl which may lead a dead loop if we meet a IPIP +skb whose iph->ihl is zero. Fix this by failing immediately when iph->ihl +is evil (less than 5). + +This issue were introduced by commit ec5efe7946280d1e84603389a1030ccec0a767ae +(rps: support IPIP encapsulation). + +Signed-off-by: Jason Wang +Cc: Eric Dumazet +Cc: Petr Matousek +Cc: Michael S. Tsirkin +Cc: Daniel Borkmann +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/flow_dissector.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/core/flow_dissector.c ++++ b/net/core/flow_dissector.c +@@ -40,7 +40,7 @@ again: + struct iphdr _iph; + ip: + iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); +- if (!iph) ++ if (!iph || iph->ihl < 5) + return false; + + if (ip_is_fragment(iph)) diff --git a/queue-3.12/net-mlx4_core-fix-call-to-__mlx4_unregister_mac.patch b/queue-3.12/net-mlx4_core-fix-call-to-__mlx4_unregister_mac.patch new file mode 100644 index 00000000000..686fdd7d6ed --- /dev/null +++ b/queue-3.12/net-mlx4_core-fix-call-to-__mlx4_unregister_mac.patch @@ -0,0 +1,31 @@ +From 305a0e486650924657bb34d35e58311373a54d2e Mon Sep 17 00:00:00 2001 +From: Jack Morgenstein +Date: Sun, 3 Nov 2013 10:04:07 +0200 +Subject: net/mlx4_core: Fix call to __mlx4_unregister_mac + +From: Jack Morgenstein + +[ Upstream commit c32b7dfbb1dfb3f0a68f250deff65103c8bb704a ] + +In function mlx4_master_deactivate_admin_state() __mlx4_unregister_mac was +called using the MAC index. It should be called with the value of the MAC itself. + +Signed-off-by: Jack Morgenstein +Signed-off-by: Or Gerlitz +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx4/cmd.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c ++++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c +@@ -1691,7 +1691,7 @@ static void mlx4_master_deactivate_admin + vp_oper->vlan_idx = NO_INDX; + } + if (NO_INDX != vp_oper->mac_idx) { +- __mlx4_unregister_mac(&priv->dev, port, vp_oper->mac_idx); ++ __mlx4_unregister_mac(&priv->dev, port, vp_oper->state.mac); + vp_oper->mac_idx = NO_INDX; + } + } diff --git a/queue-3.12/net-sctp-do-not-trigger-bug_on-in-sctp_cmd_delete_tcb.patch b/queue-3.12/net-sctp-do-not-trigger-bug_on-in-sctp_cmd_delete_tcb.patch new file mode 100644 index 00000000000..4fdb6fc7a2b --- /dev/null +++ b/queue-3.12/net-sctp-do-not-trigger-bug_on-in-sctp_cmd_delete_tcb.patch @@ -0,0 +1,51 @@ +From 22d32d9d4843a02fbe5410ec7ba70e97f2caf1d8 Mon Sep 17 00:00:00 2001 +From: Daniel Borkmann +Date: Thu, 31 Oct 2013 09:13:32 +0100 +Subject: net: sctp: do not trigger BUG_ON in sctp_cmd_delete_tcb + +From: Daniel Borkmann + +[ Upstream commit 7926c1d5be0b7cbe5b8d5c788d7d39237e7b212c ] + +Introduced in f9e42b853523 ("net: sctp: sideeffect: throw BUG if +primary_path is NULL"), we intended to find a buggy assoc that's +part of the assoc hash table with a primary_path that is NULL. +However, we better remove the BUG_ON for now and find a more +suitable place to assert for these things as Mark reports that +this also triggers the bug when duplication cookie processing +happens, and the assoc is not part of the hash table (so all +good in this case). Such a situation can for example easily be +reproduced by: + + tc qdisc add dev eth0 root handle 1: prio bands 2 priomap 1 1 1 1 1 1 + tc qdisc add dev eth0 parent 1:2 handle 20: netem loss 20% + tc filter add dev eth0 protocol ip parent 1: prio 2 u32 match ip \ + protocol 132 0xff match u8 0x0b 0xff at 32 flowid 1:2 + +This drops 20% of COOKIE-ACK packets. After some follow-up +discussion with Vlad we came to the conclusion that for now we +should still better remove this BUG_ON() assertion, and come up +with two follow-ups later on, that is, i) find a more suitable +place for this assertion, and possibly ii) have a special +allocator/initializer for such kind of temporary assocs. + +Reported-by: Mark Thomas +Signed-off-by: Vlad Yasevich +Signed-off-by: Daniel Borkmann +Acked-by: Neil Horman +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/sm_sideeffect.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/net/sctp/sm_sideeffect.c ++++ b/net/sctp/sm_sideeffect.c +@@ -860,7 +860,6 @@ static void sctp_cmd_delete_tcb(sctp_cmd + (!asoc->temp) && (sk->sk_shutdown != SHUTDOWN_MASK)) + return; + +- BUG_ON(asoc->peer.primary_path == NULL); + sctp_unhash_established(asoc); + sctp_association_free(asoc); + } diff --git a/queue-3.12/series b/queue-3.12/series new file mode 100644 index 00000000000..48386cee931 --- /dev/null +++ b/queue-3.12/series @@ -0,0 +1,12 @@ +net-mlx4_core-fix-call-to-__mlx4_unregister_mac.patch +net-sctp-do-not-trigger-bug_on-in-sctp_cmd_delete_tcb.patch +net-flow_dissector-fail-on-evil-iph-ihl.patch +virtio-net-correctly-handle-cpu-hotplug-notifier-during-resuming.patch +xen-netback-use-jiffies_64-value-to-calculate-credit-timeout.patch +cxgb3-fix-length-calculation-in-write_ofld_wr-on-32-bit-architectures.patch +tcp-gso-fix-truesize-tracking.patch +tcp-fix-synack-rtt-estimation-in-fast-open.patch +tcp-only-take-rtt-from-timestamps-if-new-data-is-acked.patch +tcp-do-not-rearm-rto-when-future-data-are-sacked.patch +ipv6-ip6_dst_check-needs-to-check-for-expired-dst_entries.patch +ipv6-reset-dst.expires-value-when-clearing-expire-flag.patch diff --git a/queue-3.12/tcp-do-not-rearm-rto-when-future-data-are-sacked.patch b/queue-3.12/tcp-do-not-rearm-rto-when-future-data-are-sacked.patch new file mode 100644 index 00000000000..7013abeeb32 --- /dev/null +++ b/queue-3.12/tcp-do-not-rearm-rto-when-future-data-are-sacked.patch @@ -0,0 +1,76 @@ +From 22b6cfcd281c80e5bf1d404f67c21303210bd32f Mon Sep 17 00:00:00 2001 +From: Yuchung Cheng +Date: Thu, 24 Oct 2013 08:59:27 -0700 +Subject: tcp: do not rearm RTO when future data are sacked + +From: Yuchung Cheng + +[ Upstream commit 2f715c1dde6e1760f3101358dc26f8c9489be0bf ] + +Patch ed08495c3 "tcp: use RTT from SACK for RTO" always re-arms RTO upon +obtaining a RTT sample from newly sacked data. + +But technically RTO should only be re-armed when the data sent before +the last (re)transmission of write queue head are (s)acked. Otherwise +the RTO may continue to extend during loss recovery on data sent +in the future. + +Note that RTTs from ACK or timestamps do not have this problem, as the RTT +source must be from data sent before. + +The new RTO re-arm policy is +1) Always re-arm RTO if SND.UNA is advanced +2) Re-arm RTO if sack RTT is available, provided the sacked data was + sent before the last time write_queue_head was sent. + +Signed-off-by: Larry Brakmo +Signed-off-by: Yuchung Cheng +Acked-by: Neal Cardwell +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_input.c | 13 ++++++++++--- + 1 file changed, 10 insertions(+), 3 deletions(-) + +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -2987,6 +2987,7 @@ static int tcp_clean_rtx_queue(struct so + s32 seq_rtt = -1; + s32 ca_seq_rtt = -1; + ktime_t last_ackt = net_invalid_timestamp(); ++ bool rtt_update; + + while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) { + struct tcp_skb_cb *scb = TCP_SKB_CB(skb); +@@ -3063,14 +3064,13 @@ static int tcp_clean_rtx_queue(struct so + if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) + flag |= FLAG_SACK_RENEGING; + +- if (tcp_ack_update_rtt(sk, flag, seq_rtt, sack_rtt) || +- (flag & FLAG_ACKED)) +- tcp_rearm_rto(sk); ++ rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt, sack_rtt); + + if (flag & FLAG_ACKED) { + const struct tcp_congestion_ops *ca_ops + = inet_csk(sk)->icsk_ca_ops; + ++ tcp_rearm_rto(sk); + if (unlikely(icsk->icsk_mtup.probe_size && + !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) { + tcp_mtup_probe_success(sk); +@@ -3109,6 +3109,13 @@ static int tcp_clean_rtx_queue(struct so + + ca_ops->pkts_acked(sk, pkts_acked, rtt_us); + } ++ } else if (skb && rtt_update && sack_rtt >= 0 && ++ sack_rtt > (s32)(now - TCP_SKB_CB(skb)->when)) { ++ /* Do not re-arm RTO if the sack RTT is measured from data sent ++ * after when the head was last (re)transmitted. Otherwise the ++ * timeout may continue to extend in loss recovery. ++ */ ++ tcp_rearm_rto(sk); + } + + #if FASTRETRANS_DEBUG > 0 diff --git a/queue-3.12/tcp-fix-synack-rtt-estimation-in-fast-open.patch b/queue-3.12/tcp-fix-synack-rtt-estimation-in-fast-open.patch new file mode 100644 index 00000000000..5ee110eb022 --- /dev/null +++ b/queue-3.12/tcp-fix-synack-rtt-estimation-in-fast-open.patch @@ -0,0 +1,79 @@ +From 2a8aaa341a6aed901a25e55447a879ef77b48386 Mon Sep 17 00:00:00 2001 +From: Yuchung Cheng +Date: Thu, 24 Oct 2013 08:44:25 -0700 +Subject: tcp: fix SYNACK RTT estimation in Fast Open + +From: Yuchung Cheng + +[ Upstream commit bc15afa39ecc16f01c3389d15d8f6015a427fe85 ] + +tp->lsndtime may not always be the SYNACK timestamp if a passive +Fast Open socket sends data before handshake completes. And if the +remote acknowledges both the data and the SYNACK, the RTT sample +is already taken in tcp_ack(), so no need to call +tcp_update_ack_rtt() in tcp_synack_rtt_meas() aagain. + +Signed-off-by: Yuchung Cheng +Acked-by: Neal Cardwell +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_input.c | 18 +++++++++++++----- + 1 file changed, 13 insertions(+), 5 deletions(-) + +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -2871,14 +2871,19 @@ static inline bool tcp_ack_update_rtt(st + } + + /* Compute time elapsed between (last) SYNACK and the ACK completing 3WHS. */ +-static void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req) ++static void tcp_synack_rtt_meas(struct sock *sk, const u32 synack_stamp) + { + struct tcp_sock *tp = tcp_sk(sk); + s32 seq_rtt = -1; + +- if (tp->lsndtime && !tp->total_retrans) +- seq_rtt = tcp_time_stamp - tp->lsndtime; +- tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt, -1); ++ if (synack_stamp && !tp->total_retrans) ++ seq_rtt = tcp_time_stamp - synack_stamp; ++ ++ /* If the ACK acks both the SYNACK and the (Fast Open'd) data packets ++ * sent in SYN_RECV, SYNACK RTT is the smooth RTT computed in tcp_ack() ++ */ ++ if (!tp->srtt) ++ tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt, -1); + } + + static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) +@@ -5587,6 +5592,7 @@ int tcp_rcv_state_process(struct sock *s + struct request_sock *req; + int queued = 0; + bool acceptable; ++ u32 synack_stamp; + + tp->rx_opt.saw_tstamp = 0; + +@@ -5669,9 +5675,11 @@ int tcp_rcv_state_process(struct sock *s + * so release it. + */ + if (req) { ++ synack_stamp = tcp_rsk(req)->snt_synack; + tp->total_retrans = req->num_retrans; + reqsk_fastopen_remove(sk, req, false); + } else { ++ synack_stamp = tp->lsndtime; + /* Make sure socket is routed, for correct metrics. */ + icsk->icsk_af_ops->rebuild_header(sk); + tcp_init_congestion_control(sk); +@@ -5694,7 +5702,7 @@ int tcp_rcv_state_process(struct sock *s + tp->snd_una = TCP_SKB_CB(skb)->ack_seq; + tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale; + tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); +- tcp_synack_rtt_meas(sk, req); ++ tcp_synack_rtt_meas(sk, synack_stamp); + + if (tp->rx_opt.tstamp_ok) + tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; diff --git a/queue-3.12/tcp-gso-fix-truesize-tracking.patch b/queue-3.12/tcp-gso-fix-truesize-tracking.patch new file mode 100644 index 00000000000..23bea74c407 --- /dev/null +++ b/queue-3.12/tcp-gso-fix-truesize-tracking.patch @@ -0,0 +1,70 @@ +From 4db90cba8f8c482183e9d4c6da83ecd36cb8ef17 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Fri, 25 Oct 2013 17:26:17 -0700 +Subject: tcp: gso: fix truesize tracking + +From: Eric Dumazet + +[ Upstream commit 0d08c42cf9a71530fef5ebcfe368f38f2dd0476f ] + +commit 6ff50cd55545 ("tcp: gso: do not generate out of order packets") +had an heuristic that can trigger a warning in skb_try_coalesce(), +because skb->truesize of the gso segments were exactly set to mss. + +This breaks the requirement that + +skb->truesize >= skb->len + truesizeof(struct sk_buff); + +It can trivially be reproduced by : + +ifconfig lo mtu 1500 +ethtool -K lo tso off +netperf + +As the skbs are looped into the TCP networking stack, skb_try_coalesce() +warns us of these skb under-estimating their truesize. + +Signed-off-by: Eric Dumazet +Reported-by: Alexei Starovoitov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_offload.c | 13 +++++-------- + 1 file changed, 5 insertions(+), 8 deletions(-) + +--- a/net/ipv4/tcp_offload.c ++++ b/net/ipv4/tcp_offload.c +@@ -18,6 +18,7 @@ struct sk_buff *tcp_tso_segment(struct s + netdev_features_t features) + { + struct sk_buff *segs = ERR_PTR(-EINVAL); ++ unsigned int sum_truesize = 0; + struct tcphdr *th; + unsigned int thlen; + unsigned int seq; +@@ -102,13 +103,7 @@ struct sk_buff *tcp_tso_segment(struct s + if (copy_destructor) { + skb->destructor = gso_skb->destructor; + skb->sk = gso_skb->sk; +- /* {tcp|sock}_wfree() use exact truesize accounting : +- * sum(skb->truesize) MUST be exactly be gso_skb->truesize +- * So we account mss bytes of 'true size' for each segment. +- * The last segment will contain the remaining. +- */ +- skb->truesize = mss; +- gso_skb->truesize -= mss; ++ sum_truesize += skb->truesize; + } + skb = skb->next; + th = tcp_hdr(skb); +@@ -125,7 +120,9 @@ struct sk_buff *tcp_tso_segment(struct s + if (copy_destructor) { + swap(gso_skb->sk, skb->sk); + swap(gso_skb->destructor, skb->destructor); +- swap(gso_skb->truesize, skb->truesize); ++ sum_truesize += skb->truesize; ++ atomic_add(sum_truesize - gso_skb->truesize, ++ &skb->sk->sk_wmem_alloc); + } + + delta = htonl(oldlen + (skb_tail_pointer(skb) - diff --git a/queue-3.12/tcp-only-take-rtt-from-timestamps-if-new-data-is-acked.patch b/queue-3.12/tcp-only-take-rtt-from-timestamps-if-new-data-is-acked.patch new file mode 100644 index 00000000000..07e336d2973 --- /dev/null +++ b/queue-3.12/tcp-only-take-rtt-from-timestamps-if-new-data-is-acked.patch @@ -0,0 +1,35 @@ +From 49627d9ffd019bffa9613a2dcfb34f3e85adcf6c Mon Sep 17 00:00:00 2001 +From: Yuchung Cheng +Date: Thu, 24 Oct 2013 08:55:25 -0700 +Subject: tcp: only take RTT from timestamps if new data is acked + +From: Yuchung Cheng + +[ Upstream commit 2909d874f34eae157aecab0af27c6dc4a1751f8f ] + +Patch ed08495c3 "tcp: use RTT from SACK for RTO" has a bug that +it does not check if the ACK acknowledge new data before taking +the RTT sample from TCP timestamps. This patch adds the check +back as required by the RFC. + +Signed-off-by: Yuchung Cheng +Acked-by: Neal Cardwell +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_input.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -2856,7 +2856,8 @@ static inline bool tcp_ack_update_rtt(st + * left edge of the send window. + * See draft-ietf-tcplw-high-performance-00, section 3.3. + */ +- if (seq_rtt < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) ++ if (seq_rtt < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && ++ flag & FLAG_ACKED) + seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr; + + if (seq_rtt < 0) diff --git a/queue-3.12/virtio-net-correctly-handle-cpu-hotplug-notifier-during-resuming.patch b/queue-3.12/virtio-net-correctly-handle-cpu-hotplug-notifier-during-resuming.patch new file mode 100644 index 00000000000..6c3c092347e --- /dev/null +++ b/queue-3.12/virtio-net-correctly-handle-cpu-hotplug-notifier-during-resuming.patch @@ -0,0 +1,116 @@ +From c483901d63b5856e9fd03f654a8917b7ed88246c Mon Sep 17 00:00:00 2001 +From: Jason Wang +Date: Tue, 29 Oct 2013 15:11:07 +0800 +Subject: virtio-net: correctly handle cpu hotplug notifier during resuming + +From: Jason Wang + +[ Upstream commit ec9debbd9a88d8ea86c488d6ffcac419ee7d46d9 ] + +commit 3ab098df35f8b98b6553edc2e40234af512ba877 (virtio-net: don't respond to +cpu hotplug notifier if we're not ready) tries to bypass the cpu hotplug +notifier by checking the config_enable and does nothing is it was false. So it +need to try to hold the config_lock mutex which may happen in atomic +environment which leads the following warnings: + +[ 622.944441] CPU0 attaching NULL sched-domain. +[ 622.944446] CPU1 attaching NULL sched-domain. +[ 622.944485] CPU0 attaching NULL sched-domain. +[ 622.950795] BUG: sleeping function called from invalid context at kernel/mutex.c:616 +[ 622.950796] in_atomic(): 1, irqs_disabled(): 1, pid: 10, name: migration/1 +[ 622.950796] no locks held by migration/1/10. +[ 622.950798] CPU: 1 PID: 10 Comm: migration/1 Not tainted 3.12.0-rc5-wl-01249-gb91e82d #317 +[ 622.950799] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 +[ 622.950802] 0000000000000000 ffff88001d42dba0 ffffffff81a32f22 ffff88001bfb9c70 +[ 622.950803] ffff88001d42dbb0 ffffffff810edb02 ffff88001d42dc38 ffffffff81a396ed +[ 622.950805] 0000000000000046 ffff88001d42dbe8 ffffffff810e861d 0000000000000000 +[ 622.950805] Call Trace: +[ 622.950810] [] dump_stack+0x54/0x74 +[ 622.950815] [] __might_sleep+0x112/0x114 +[ 622.950817] [] mutex_lock_nested+0x3c/0x3c6 +[ 622.950818] [] ? up+0x39/0x3e +[ 622.950821] [] ? acpi_os_signal_semaphore+0x21/0x2d +[ 622.950824] [] ? acpi_ut_release_mutex+0x5e/0x62 +[ 622.950828] [] virtnet_cpu_callback+0x33/0x87 +[ 622.950830] [] notifier_call_chain+0x3c/0x5e +[ 622.950832] [] __raw_notifier_call_chain+0xe/0x10 +[ 622.950835] [] __cpu_notify+0x20/0x37 +[ 622.950836] [] cpu_notify+0x13/0x15 +[ 622.950838] [] take_cpu_down+0x27/0x3a +[ 622.950841] [] stop_machine_cpu_stop+0x93/0xf1 +[ 622.950842] [] cpu_stopper_thread+0xa0/0x12f +[ 622.950844] [] ? cpu_stopper_thread+0x12f/0x12f +[ 622.950847] [] ? lock_release_holdtime.part.7+0xa3/0xa8 +[ 622.950848] [] ? cpu_stop_should_run+0x3f/0x47 +[ 622.950850] [] smpboot_thread_fn+0x1c5/0x1e3 +[ 622.950852] [] ? lg_global_unlock+0x67/0x67 +[ 622.950854] [] kthread+0xd8/0xe0 +[ 622.950857] [] ? wait_for_common+0x12f/0x164 +[ 622.950859] [] ? kthread_create_on_node+0x124/0x124 +[ 622.950861] [] ret_from_fork+0x7c/0xb0 +[ 622.950862] [] ? kthread_create_on_node+0x124/0x124 +[ 622.950876] smpboot: CPU 1 is now offline +[ 623.194556] SMP alternatives: lockdep: fixing up alternatives +[ 623.194559] smpboot: Booting Node 0 Processor 1 APIC 0x1 +... + +A correct fix is to unregister the hotcpu notifier during restore and register a +new one in resume. + +Reported-by: Fengguang Wu +Tested-by: Fengguang Wu +Cc: Wanlong Gao +Cc: Rusty Russell +Cc: Michael S. Tsirkin +Signed-off-by: Jason Wang +Acked-by: Michael S. Tsirkin +Reviewed-by: Wanlong Gao +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/virtio_net.c | 13 ++++++------- + 1 file changed, 6 insertions(+), 7 deletions(-) + +--- a/drivers/net/virtio_net.c ++++ b/drivers/net/virtio_net.c +@@ -1118,11 +1118,6 @@ static int virtnet_cpu_callback(struct n + { + struct virtnet_info *vi = container_of(nfb, struct virtnet_info, nb); + +- mutex_lock(&vi->config_lock); +- +- if (!vi->config_enable) +- goto done; +- + switch(action & ~CPU_TASKS_FROZEN) { + case CPU_ONLINE: + case CPU_DOWN_FAILED: +@@ -1136,8 +1131,6 @@ static int virtnet_cpu_callback(struct n + break; + } + +-done: +- mutex_unlock(&vi->config_lock); + return NOTIFY_OK; + } + +@@ -1699,6 +1692,8 @@ static int virtnet_freeze(struct virtio_ + struct virtnet_info *vi = vdev->priv; + int i; + ++ unregister_hotcpu_notifier(&vi->nb); ++ + /* Prevent config work handler from accessing the device */ + mutex_lock(&vi->config_lock); + vi->config_enable = false; +@@ -1747,6 +1742,10 @@ static int virtnet_restore(struct virtio + virtnet_set_queues(vi, vi->curr_queue_pairs); + rtnl_unlock(); + ++ err = register_hotcpu_notifier(&vi->nb); ++ if (err) ++ return err; ++ + return 0; + } + #endif diff --git a/queue-3.12/xen-netback-use-jiffies_64-value-to-calculate-credit-timeout.patch b/queue-3.12/xen-netback-use-jiffies_64-value-to-calculate-credit-timeout.patch new file mode 100644 index 00000000000..98708adad71 --- /dev/null +++ b/queue-3.12/xen-netback-use-jiffies_64-value-to-calculate-credit-timeout.patch @@ -0,0 +1,87 @@ +From 7aa17751a3b382398a1db036ea6b1c3ab58392df Mon Sep 17 00:00:00 2001 +From: Wei Liu +Date: Mon, 28 Oct 2013 12:07:57 +0000 +Subject: xen-netback: use jiffies_64 value to calculate credit timeout + +From: Wei Liu + +[ Upstream commit 059dfa6a93b779516321e5112db9d7621b1367ba ] + +time_after_eq() only works if the delta is < MAX_ULONG/2. + +For a 32bit Dom0, if netfront sends packets at a very low rate, the time +between subsequent calls to tx_credit_exceeded() may exceed MAX_ULONG/2 +and the test for timer_after_eq() will be incorrect. Credit will not be +replenished and the guest may become unable to send packets (e.g., if +prior to the long gap, all credit was exhausted). + +Use jiffies_64 variant to mitigate this problem for 32bit Dom0. + +Suggested-by: Jan Beulich +Signed-off-by: Wei Liu +Reviewed-by: David Vrabel +Cc: Ian Campbell +Cc: Jason Luan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/xen-netback/common.h | 1 + + drivers/net/xen-netback/interface.c | 3 +-- + drivers/net/xen-netback/netback.c | 10 +++++----- + 3 files changed, 7 insertions(+), 7 deletions(-) + +--- a/drivers/net/xen-netback/common.h ++++ b/drivers/net/xen-netback/common.h +@@ -163,6 +163,7 @@ struct xenvif { + unsigned long credit_usec; + unsigned long remaining_credit; + struct timer_list credit_timeout; ++ u64 credit_window_start; + + /* Statistics */ + unsigned long rx_gso_checksum_fixup; +--- a/drivers/net/xen-netback/interface.c ++++ b/drivers/net/xen-netback/interface.c +@@ -312,8 +312,7 @@ struct xenvif *xenvif_alloc(struct devic + vif->credit_bytes = vif->remaining_credit = ~0UL; + vif->credit_usec = 0UL; + init_timer(&vif->credit_timeout); +- /* Initialize 'expires' now: it's used to track the credit window. */ +- vif->credit_timeout.expires = jiffies; ++ vif->credit_window_start = get_jiffies_64(); + + dev->netdev_ops = &xenvif_netdev_ops; + dev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO; +--- a/drivers/net/xen-netback/netback.c ++++ b/drivers/net/xen-netback/netback.c +@@ -1185,9 +1185,8 @@ out: + + static bool tx_credit_exceeded(struct xenvif *vif, unsigned size) + { +- unsigned long now = jiffies; +- unsigned long next_credit = +- vif->credit_timeout.expires + ++ u64 now = get_jiffies_64(); ++ u64 next_credit = vif->credit_window_start + + msecs_to_jiffies(vif->credit_usec / 1000); + + /* Timer could already be pending in rare cases. */ +@@ -1195,8 +1194,8 @@ static bool tx_credit_exceeded(struct xe + return true; + + /* Passed the point where we can replenish credit? */ +- if (time_after_eq(now, next_credit)) { +- vif->credit_timeout.expires = now; ++ if (time_after_eq64(now, next_credit)) { ++ vif->credit_window_start = now; + tx_add_credit(vif); + } + +@@ -1208,6 +1207,7 @@ static bool tx_credit_exceeded(struct xe + tx_credit_callback; + mod_timer(&vif->credit_timeout, + next_credit); ++ vif->credit_window_start = next_credit; + + return true; + }