From: Greg Kroah-Hartman Date: Wed, 7 Dec 2011 22:10:08 +0000 (-0800) Subject: 3.1 patches X-Git-Tag: v3.0.13~1 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=7d5e607557bf41c23bd4bbd2691e842581dfac11;p=thirdparty%2Fkernel%2Fstable-queue.git 3.1 patches added patches: ah-correctly-pass-error-codes-in-ahash-output-callback.patch ah-read-nexthdr-value-before-overwriting-it-in-ahash-input-callback.patch inet-add-a-redirect-generation-id-in-inetpeer.patch ipv4-avoid-useless-call-of-the-function-check_peer_pmtu.patch ipv4-fix-for-ip_options_rcv_srr-daddr-update.patch ipv4-fix-lockdep-splat-in-rt_cache_seq_show.patch ipv4-fix-redirect-handling.patch ipv4-make-sure-rto_onlink-is-saved-in-routing-cache.patch ipv4-perform-peer-validation-on-cached-route-lookup.patch qeth-l3-fix-rcu-splat-in-xmit.patch route-fix-icmp-redirect-validation.patch sch_red-fix-red_calc_qavg_from_idle_time.patch sch_red-fix-red_change.patch sch_teql-fix-lockdep-splat.patch --- diff --git a/queue-3.1/ah-correctly-pass-error-codes-in-ahash-output-callback.patch b/queue-3.1/ah-correctly-pass-error-codes-in-ahash-output-callback.patch new file mode 100644 index 00000000000..7ee968b1043 --- /dev/null +++ b/queue-3.1/ah-correctly-pass-error-codes-in-ahash-output-callback.patch @@ -0,0 +1,49 @@ +From e5853df2d49d47aca870bd28636d25515d3c3b70 Mon Sep 17 00:00:00 2001 +From: Nick Bowler +Date: Tue, 8 Nov 2011 12:12:44 +0000 +Subject: [PATCH 01/15] ah: Correctly pass error codes in ahash output callback. + + +From: Nick Bowler + +[ Upstream commit 069294e813ed5f27f82613b027609bcda5f1b914 ] + +The AH4/6 ahash output callbacks pass nexthdr to xfrm_output_resume +instead of the error code. This appears to be a copy+paste error from +the input case, where nexthdr is expected. This causes the driver to +continuously add AH headers to the datagram until either an allocation +fails and the packet is dropped or the ahash driver hits a synchronous +fallback and the resulting monstrosity is transmitted. + +Correct this issue by simply passing the error code unadulterated. + +Signed-off-by: Nick Bowler +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ah4.c | 2 -- + net/ipv6/ah6.c | 2 -- + 2 files changed, 4 deletions(-) + +--- a/net/ipv4/ah4.c ++++ b/net/ipv4/ah4.c +@@ -136,8 +136,6 @@ static void ah_output_done(struct crypto + memcpy(top_iph+1, iph+1, top_iph->ihl*4 - sizeof(struct iphdr)); + } + +- err = ah->nexthdr; +- + kfree(AH_SKB_CB(skb)->tmp); + xfrm_output_resume(skb, err); + } +--- a/net/ipv6/ah6.c ++++ b/net/ipv6/ah6.c +@@ -324,8 +324,6 @@ static void ah6_output_done(struct crypt + #endif + } + +- err = ah->nexthdr; +- + kfree(AH_SKB_CB(skb)->tmp); + xfrm_output_resume(skb, err); + } diff --git a/queue-3.1/ah-read-nexthdr-value-before-overwriting-it-in-ahash-input-callback.patch b/queue-3.1/ah-read-nexthdr-value-before-overwriting-it-in-ahash-input-callback.patch new file mode 100644 index 00000000000..468f5464ef2 --- /dev/null +++ b/queue-3.1/ah-read-nexthdr-value-before-overwriting-it-in-ahash-input-callback.patch @@ -0,0 +1,56 @@ +From 5c99ab7a217277a2e37e661294d985a4f9244a28 Mon Sep 17 00:00:00 2001 +From: Nick Bowler +Date: Tue, 8 Nov 2011 12:12:45 +0000 +Subject: ah: Read nexthdr value before overwriting it in ahash input callback. + + +From: Nick Bowler + +[ Upstream commit b7ea81a58adc123a4e980cb0eff9eb5c144b5dc7 ] + +The AH4/6 ahash input callbacks read out the nexthdr field from the AH +header *after* they overwrite that header. This is obviously not going +to end well. Fix it up. + +Signed-off-by: Nick Bowler +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ah4.c | 4 ++-- + net/ipv6/ah6.c | 4 ++-- + 2 files changed, 4 insertions(+), 4 deletions(-) + +--- a/net/ipv4/ah4.c ++++ b/net/ipv4/ah4.c +@@ -262,12 +262,12 @@ static void ah_input_done(struct crypto_ + if (err) + goto out; + ++ err = ah->nexthdr; ++ + skb->network_header += ah_hlen; + memcpy(skb_network_header(skb), work_iph, ihl); + __skb_pull(skb, ah_hlen + ihl); + skb_set_transport_header(skb, -ihl); +- +- err = ah->nexthdr; + out: + kfree(AH_SKB_CB(skb)->tmp); + xfrm_input_resume(skb, err); +--- a/net/ipv6/ah6.c ++++ b/net/ipv6/ah6.c +@@ -464,12 +464,12 @@ static void ah6_input_done(struct crypto + if (err) + goto out; + ++ err = ah->nexthdr; ++ + skb->network_header += ah_hlen; + memcpy(skb_network_header(skb), work_iph, hdr_len); + __skb_pull(skb, ah_hlen + hdr_len); + skb_set_transport_header(skb, -hdr_len); +- +- err = ah->nexthdr; + out: + kfree(AH_SKB_CB(skb)->tmp); + xfrm_input_resume(skb, err); diff --git a/queue-3.1/inet-add-a-redirect-generation-id-in-inetpeer.patch b/queue-3.1/inet-add-a-redirect-generation-id-in-inetpeer.patch new file mode 100644 index 00000000000..873fa435335 --- /dev/null +++ b/queue-3.1/inet-add-a-redirect-generation-id-in-inetpeer.patch @@ -0,0 +1,89 @@ +From 8d27f50ab132d14d0f6cca7cc23a52829b89fdd0 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Sat, 26 Nov 2011 12:13:44 +0000 +Subject: inet: add a redirect generation id in inetpeer + + +From: Eric Dumazet + +[ Upstream commit de68dca1816660b0d3ac89fa59ffb410007a143f ] + +Now inetpeer is the place where we cache redirect information for ipv4 +destinations, we must be able to invalidate informations when a route is +added/removed on host. + +As inetpeer is not yet namespace aware, this patch adds a shared +redirect_genid, and a per inetpeer redirect_genid. This might be changed +later if inetpeer becomes ns aware. + +Cache information for one inerpeer is valid as long as its +redirect_genid has the same value than global redirect_genid. + +Reported-by: Arkadiusz Miśkiewicz +Tested-by: Arkadiusz Miśkiewicz +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/inetpeer.h | 1 + + net/ipv4/route.c | 10 +++++++++- + 2 files changed, 10 insertions(+), 1 deletion(-) + +--- a/include/net/inetpeer.h ++++ b/include/net/inetpeer.h +@@ -35,6 +35,7 @@ struct inet_peer { + + u32 metrics[RTAX_MAX]; + u32 rate_tokens; /* rate limiting for ICMP */ ++ int redirect_genid; + unsigned long rate_last; + unsigned long pmtu_expires; + u32 pmtu_orig; +--- a/net/ipv4/route.c ++++ b/net/ipv4/route.c +@@ -132,6 +132,7 @@ static int ip_rt_mtu_expires __read_most + static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; + static int ip_rt_min_advmss __read_mostly = 256; + static int rt_chain_length_max __read_mostly = 20; ++static int redirect_genid; + + /* + * Interface to generic destination cache. +@@ -842,6 +843,7 @@ static void rt_cache_invalidate(struct n + + get_random_bytes(&shuffle, sizeof(shuffle)); + atomic_add(shuffle + 1U, &net->ipv4.rt_genid); ++ redirect_genid++; + } + + /* +@@ -1396,8 +1398,10 @@ void ip_rt_redirect(__be32 old_gw, __be3 + + peer = rt->peer; + if (peer) { +- if (peer->redirect_learned.a4 != new_gw) { ++ if (peer->redirect_learned.a4 != new_gw || ++ peer->redirect_genid != redirect_genid) { + peer->redirect_learned.a4 = new_gw; ++ peer->redirect_genid = redirect_genid; + atomic_inc(&__rt_peer_genid); + } + check_peer_redir(&rt->dst, peer); +@@ -1706,6 +1710,8 @@ static struct dst_entry *ipv4_dst_check( + if (peer) { + check_peer_pmtu(dst, peer); + ++ if (peer->redirect_genid != redirect_genid) ++ peer->redirect_learned.a4 = 0; + if (peer->redirect_learned.a4 && + peer->redirect_learned.a4 != rt->rt_gateway) { + if (check_peer_redir(dst, peer)) +@@ -1857,6 +1863,8 @@ static void rt_init_metrics(struct rtabl + dst_init_metrics(&rt->dst, peer->metrics, false); + + check_peer_pmtu(&rt->dst, peer); ++ if (peer->redirect_genid != redirect_genid) ++ peer->redirect_learned.a4 = 0; + if (peer->redirect_learned.a4 && + peer->redirect_learned.a4 != rt->rt_gateway) { + rt->rt_gateway = peer->redirect_learned.a4; diff --git a/queue-3.1/ipv4-avoid-useless-call-of-the-function-check_peer_pmtu.patch b/queue-3.1/ipv4-avoid-useless-call-of-the-function-check_peer_pmtu.patch new file mode 100644 index 00000000000..4423ef919b7 --- /dev/null +++ b/queue-3.1/ipv4-avoid-useless-call-of-the-function-check_peer_pmtu.patch @@ -0,0 +1,36 @@ +From b62f373eb69e44af4230d89ff40595f5f28fc39e Mon Sep 17 00:00:00 2001 +From: Gao feng +Date: Wed, 19 Oct 2011 15:34:09 +0000 +Subject: ipv4: avoid useless call of the function check_peer_pmtu + + +From: Gao feng + +[ Upstream commit 59445b6b1f90b97c4e28062b96306bacfa4fb170 ] + +In func ipv4_dst_check,check_peer_pmtu should be called only when peer is updated. +So,if the peer is not updated in ip_rt_frag_needed,we can not inc __rt_peer_genid. + +Signed-off-by: Gao feng +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/route.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/net/ipv4/route.c ++++ b/net/ipv4/route.c +@@ -1572,11 +1572,10 @@ unsigned short ip_rt_frag_needed(struct + est_mtu = mtu; + peer->pmtu_learned = mtu; + peer->pmtu_expires = pmtu_expires; ++ atomic_inc(&__rt_peer_genid); + } + + inet_putpeer(peer); +- +- atomic_inc(&__rt_peer_genid); + } + return est_mtu ? : new_mtu; + } diff --git a/queue-3.1/ipv4-fix-for-ip_options_rcv_srr-daddr-update.patch b/queue-3.1/ipv4-fix-for-ip_options_rcv_srr-daddr-update.patch new file mode 100644 index 00000000000..13651083151 --- /dev/null +++ b/queue-3.1/ipv4-fix-for-ip_options_rcv_srr-daddr-update.patch @@ -0,0 +1,31 @@ +From dfeb0f8f5d4efa4ef513c9d7e69f31b5251b084d Mon Sep 17 00:00:00 2001 +From: Li Wei +Date: Tue, 8 Nov 2011 21:39:28 +0000 +Subject: ipv4: fix for ip_options_rcv_srr() daddr update. + + +From: Li Wei + +[ Upstream commit b12f62efb8ec0b9523bdb6c2d412c07193086de9 ] + +When opt->srr_is_hit is set skb_rtable(skb) has been updated for +'nexthop' and iph->daddr should always equals to skb_rtable->rt_dst +holds, We need update iph->daddr either. + +Signed-off-by: Li Wei +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ip_options.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/ipv4/ip_options.c ++++ b/net/ipv4/ip_options.c +@@ -640,6 +640,7 @@ int ip_options_rcv_srr(struct sk_buff *s + } + if (srrptr <= srrspace) { + opt->srr_is_hit = 1; ++ iph->daddr = nexthop; + opt->is_changed = 1; + } + return 0; diff --git a/queue-3.1/ipv4-fix-lockdep-splat-in-rt_cache_seq_show.patch b/queue-3.1/ipv4-fix-lockdep-splat-in-rt_cache_seq_show.patch new file mode 100644 index 00000000000..fcf09d758c8 --- /dev/null +++ b/queue-3.1/ipv4-fix-lockdep-splat-in-rt_cache_seq_show.patch @@ -0,0 +1,48 @@ +From bf0ced8a65c0afcb569f19550cb86d7237fc12a4 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Tue, 29 Nov 2011 20:05:55 +0000 +Subject: ipv4: fix lockdep splat in rt_cache_seq_show + + +From: Eric Dumazet + +[ Upstream commit 218fa90f072e4aeff9003d57e390857f4f35513e ] + +After commit f2c31e32b378 (fix NULL dereferences in check_peer_redir()), +dst_get_neighbour() should be guarded by rcu_read_lock() / +rcu_read_unlock() section. + +Reported-by: Miles Lane +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/route.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/net/ipv4/route.c ++++ b/net/ipv4/route.c +@@ -417,9 +417,13 @@ static int rt_cache_seq_show(struct seq_ + else { + struct rtable *r = v; + struct neighbour *n; +- int len; ++ int len, HHUptod; + ++ rcu_read_lock(); + n = dst_get_neighbour(&r->dst); ++ HHUptod = (n && (n->nud_state & NUD_CONNECTED)) ? 1 : 0; ++ rcu_read_unlock(); ++ + seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t" + "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n", + r->dst.dev ? r->dst.dev->name : "*", +@@ -433,7 +437,7 @@ static int rt_cache_seq_show(struct seq_ + dst_metric(&r->dst, RTAX_RTTVAR)), + r->rt_key_tos, + -1, +- (n && (n->nud_state & NUD_CONNECTED)) ? 1 : 0, ++ HHUptod, + r->rt_spec_dst, &len); + + seq_printf(seq, "%*s\n", 127 - len, ""); diff --git a/queue-3.1/ipv4-fix-redirect-handling.patch b/queue-3.1/ipv4-fix-redirect-handling.patch new file mode 100644 index 00000000000..2a6d0be68e8 --- /dev/null +++ b/queue-3.1/ipv4-fix-redirect-handling.patch @@ -0,0 +1,180 @@ +From 08d066eaef55a609e2232786d5d03420a9e7dedc Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Fri, 18 Nov 2011 15:24:32 -0500 +Subject: ipv4: fix redirect handling + + +From: Eric Dumazet + +[ Upstream commit 9cc20b268a5a14f5e57b8ad405a83513ab0d78dc ] + +commit f39925dbde77 (ipv4: Cache learned redirect information in +inetpeer.) introduced a regression in ICMP redirect handling. + +It assumed ipv4_dst_check() would be called because all possible routes +were attached to the inetpeer we modify in ip_rt_redirect(), but thats +not true. + +commit 7cc9150ebe (route: fix ICMP redirect validation) tried to fix +this but solution was not complete. (It fixed only one route) + +So we must lookup existing routes (including different TOS values) and +call check_peer_redir() on them. + +Reported-by: Ivan Zahariev +Signed-off-by: Eric Dumazet +CC: Flavio Leitner +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/route.c | 114 +++++++++++++++++++++++++++++-------------------------- + 1 file changed, 61 insertions(+), 53 deletions(-) + +--- a/net/ipv4/route.c ++++ b/net/ipv4/route.c +@@ -1309,16 +1309,42 @@ static void rt_del(unsigned hash, struct + spin_unlock_bh(rt_hash_lock_addr(hash)); + } + ++static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer) ++{ ++ struct rtable *rt = (struct rtable *) dst; ++ __be32 orig_gw = rt->rt_gateway; ++ struct neighbour *n, *old_n; ++ ++ dst_confirm(&rt->dst); ++ ++ rt->rt_gateway = peer->redirect_learned.a4; ++ ++ n = ipv4_neigh_lookup(&rt->dst, &rt->rt_gateway); ++ if (IS_ERR(n)) ++ return PTR_ERR(n); ++ old_n = xchg(&rt->dst._neighbour, n); ++ if (old_n) ++ neigh_release(old_n); ++ if (!n || !(n->nud_state & NUD_VALID)) { ++ if (n) ++ neigh_event_send(n, NULL); ++ rt->rt_gateway = orig_gw; ++ return -EAGAIN; ++ } else { ++ rt->rt_flags |= RTCF_REDIRECTED; ++ call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); ++ } ++ return 0; ++} ++ + /* called in rcu_read_lock() section */ + void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, + __be32 saddr, struct net_device *dev) + { + int s, i; + struct in_device *in_dev = __in_dev_get_rcu(dev); +- struct rtable *rt; + __be32 skeys[2] = { saddr, 0 }; + int ikeys[2] = { dev->ifindex, 0 }; +- struct flowi4 fl4; + struct inet_peer *peer; + struct net *net; + +@@ -1341,33 +1367,42 @@ void ip_rt_redirect(__be32 old_gw, __be3 + goto reject_redirect; + } + +- memset(&fl4, 0, sizeof(fl4)); +- fl4.daddr = daddr; + for (s = 0; s < 2; s++) { + for (i = 0; i < 2; i++) { +- fl4.flowi4_oif = ikeys[i]; +- fl4.saddr = skeys[s]; +- rt = __ip_route_output_key(net, &fl4); +- if (IS_ERR(rt)) +- continue; +- +- if (rt->dst.error || rt->dst.dev != dev || +- rt->rt_gateway != old_gw) { +- ip_rt_put(rt); +- continue; +- } +- +- if (!rt->peer) +- rt_bind_peer(rt, rt->rt_dst, 1); +- +- peer = rt->peer; +- if (peer) { +- peer->redirect_learned.a4 = new_gw; +- atomic_inc(&__rt_peer_genid); ++ unsigned int hash; ++ struct rtable __rcu **rthp; ++ struct rtable *rt; ++ ++ hash = rt_hash(daddr, skeys[s], ikeys[i], rt_genid(net)); ++ ++ rthp = &rt_hash_table[hash].chain; ++ ++ while ((rt = rcu_dereference(*rthp)) != NULL) { ++ rthp = &rt->dst.rt_next; ++ ++ if (rt->rt_key_dst != daddr || ++ rt->rt_key_src != skeys[s] || ++ rt->rt_oif != ikeys[i] || ++ rt_is_input_route(rt) || ++ rt_is_expired(rt) || ++ !net_eq(dev_net(rt->dst.dev), net) || ++ rt->dst.error || ++ rt->dst.dev != dev || ++ rt->rt_gateway != old_gw) ++ continue; ++ ++ if (!rt->peer) ++ rt_bind_peer(rt, rt->rt_dst, 1); ++ ++ peer = rt->peer; ++ if (peer) { ++ if (peer->redirect_learned.a4 != new_gw) { ++ peer->redirect_learned.a4 = new_gw; ++ atomic_inc(&__rt_peer_genid); ++ } ++ check_peer_redir(&rt->dst, peer); ++ } + } +- +- ip_rt_put(rt); +- return; + } + } + return; +@@ -1654,33 +1689,6 @@ static void ip_rt_update_pmtu(struct dst + } + } + +-static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer) +-{ +- struct rtable *rt = (struct rtable *) dst; +- __be32 orig_gw = rt->rt_gateway; +- struct neighbour *n, *old_n; +- +- dst_confirm(&rt->dst); +- +- rt->rt_gateway = peer->redirect_learned.a4; +- +- n = ipv4_neigh_lookup(&rt->dst, &rt->rt_gateway); +- if (IS_ERR(n)) +- return PTR_ERR(n); +- old_n = xchg(&rt->dst._neighbour, n); +- if (old_n) +- neigh_release(old_n); +- if (!n || !(n->nud_state & NUD_VALID)) { +- if (n) +- neigh_event_send(n, NULL); +- rt->rt_gateway = orig_gw; +- return -EAGAIN; +- } else { +- rt->rt_flags |= RTCF_REDIRECTED; +- call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); +- } +- return 0; +-} + + static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) + { diff --git a/queue-3.1/ipv4-make-sure-rto_onlink-is-saved-in-routing-cache.patch b/queue-3.1/ipv4-make-sure-rto_onlink-is-saved-in-routing-cache.patch new file mode 100644 index 00000000000..1f598db48e5 --- /dev/null +++ b/queue-3.1/ipv4-make-sure-rto_onlink-is-saved-in-routing-cache.patch @@ -0,0 +1,74 @@ +From 4be7ca112141a0bfcfe8a29890743de0f313abf1 Mon Sep 17 00:00:00 2001 +From: Julian Anastasov +Date: Fri, 2 Dec 2011 11:39:42 +0000 +Subject: ipv4: make sure RTO_ONLINK is saved in routing cache + + +From: Julian Anastasov + +[ Upstream commit f61759e6b831a55b89e584b198c3da325e2bc379 ] + +__mkroute_output fails to work with the original tos +and uses value with stripped RTO_ONLINK bit. Make sure we put +the original TOS bits into rt_key_tos because it used to match +cached route. + +Signed-off-by: Julian Anastasov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/route.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +--- a/net/ipv4/route.c ++++ b/net/ipv4/route.c +@@ -112,7 +112,7 @@ + #include + + #define RT_FL_TOS(oldflp4) \ +- ((u32)(oldflp4->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))) ++ ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK)) + + #define IP_MAX_MTU 0xFFF0 + +@@ -2430,11 +2430,11 @@ EXPORT_SYMBOL(ip_route_input_common); + static struct rtable *__mkroute_output(const struct fib_result *res, + const struct flowi4 *fl4, + __be32 orig_daddr, __be32 orig_saddr, +- int orig_oif, struct net_device *dev_out, ++ int orig_oif, __u8 orig_rtos, ++ struct net_device *dev_out, + unsigned int flags) + { + struct fib_info *fi = res->fi; +- u32 tos = RT_FL_TOS(fl4); + struct in_device *in_dev; + u16 type = res->type; + struct rtable *rth; +@@ -2485,7 +2485,7 @@ static struct rtable *__mkroute_output(c + rth->rt_genid = rt_genid(dev_net(dev_out)); + rth->rt_flags = flags; + rth->rt_type = type; +- rth->rt_key_tos = tos; ++ rth->rt_key_tos = orig_rtos; + rth->rt_dst = fl4->daddr; + rth->rt_src = fl4->saddr; + rth->rt_route_iif = 0; +@@ -2535,7 +2535,7 @@ static struct rtable *__mkroute_output(c + static struct rtable *ip_route_output_slow(struct net *net, struct flowi4 *fl4) + { + struct net_device *dev_out = NULL; +- u32 tos = RT_FL_TOS(fl4); ++ __u8 tos = RT_FL_TOS(fl4); + unsigned int flags = 0; + struct fib_result res; + struct rtable *rth; +@@ -2711,7 +2711,7 @@ static struct rtable *ip_route_output_sl + + make_route: + rth = __mkroute_output(&res, fl4, orig_daddr, orig_saddr, orig_oif, +- dev_out, flags); ++ tos, dev_out, flags); + if (!IS_ERR(rth)) { + unsigned int hash; + diff --git a/queue-3.1/ipv4-perform-peer-validation-on-cached-route-lookup.patch b/queue-3.1/ipv4-perform-peer-validation-on-cached-route-lookup.patch new file mode 100644 index 00000000000..002bda810f5 --- /dev/null +++ b/queue-3.1/ipv4-perform-peer-validation-on-cached-route-lookup.patch @@ -0,0 +1,121 @@ +From 7b2bd8228c9b1ae5dad95d23f4af7213d88d5ff5 Mon Sep 17 00:00:00 2001 +From: "David S. Miller" +Date: Thu, 1 Dec 2011 13:38:59 -0500 +Subject: ipv4: Perform peer validation on cached route lookup. + + +From: "David S. Miller" + +[ Upstream commit efbc368dcc6426d5430b9b8eeda944cf2cb74b8c, + incorporating a follow-on change to prevent an OOPS ] + +Otherwise we won't notice the peer GENID change. + +Reported-by: Steffen Klassert +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/route.c | 41 ++++++++++++++++++++++------------------- + 1 file changed, 22 insertions(+), 19 deletions(-) + +--- a/net/ipv4/route.c ++++ b/net/ipv4/route.c +@@ -1311,7 +1311,7 @@ static void rt_del(unsigned hash, struct + spin_unlock_bh(rt_hash_lock_addr(hash)); + } + +-static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer) ++static void check_peer_redir(struct dst_entry *dst, struct inet_peer *peer) + { + struct rtable *rt = (struct rtable *) dst; + __be32 orig_gw = rt->rt_gateway; +@@ -1322,21 +1322,19 @@ static int check_peer_redir(struct dst_e + rt->rt_gateway = peer->redirect_learned.a4; + + n = ipv4_neigh_lookup(&rt->dst, &rt->rt_gateway); +- if (IS_ERR(n)) +- return PTR_ERR(n); ++ if (IS_ERR(n)) { ++ rt->rt_gateway = orig_gw; ++ return; ++ } + old_n = xchg(&rt->dst._neighbour, n); + if (old_n) + neigh_release(old_n); +- if (!n || !(n->nud_state & NUD_VALID)) { +- if (n) +- neigh_event_send(n, NULL); +- rt->rt_gateway = orig_gw; +- return -EAGAIN; ++ if (!(n->nud_state & NUD_VALID)) { ++ neigh_event_send(n, NULL); + } else { + rt->rt_flags |= RTCF_REDIRECTED; + call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); + } +- return 0; + } + + /* called in rcu_read_lock() section */ +@@ -1694,12 +1692,8 @@ static void ip_rt_update_pmtu(struct dst + } + + +-static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) ++static void ipv4_validate_peer(struct rtable *rt) + { +- struct rtable *rt = (struct rtable *) dst; +- +- if (rt_is_expired(rt)) +- return NULL; + if (rt->rt_peer_genid != rt_peer_genid()) { + struct inet_peer *peer; + +@@ -1708,19 +1702,26 @@ static struct dst_entry *ipv4_dst_check( + + peer = rt->peer; + if (peer) { +- check_peer_pmtu(dst, peer); ++ check_peer_pmtu(&rt->dst, peer); + + if (peer->redirect_genid != redirect_genid) + peer->redirect_learned.a4 = 0; + if (peer->redirect_learned.a4 && +- peer->redirect_learned.a4 != rt->rt_gateway) { +- if (check_peer_redir(dst, peer)) +- return NULL; +- } ++ peer->redirect_learned.a4 != rt->rt_gateway) ++ check_peer_redir(&rt->dst, peer); + } + + rt->rt_peer_genid = rt_peer_genid(); + } ++} ++ ++static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) ++{ ++ struct rtable *rt = (struct rtable *) dst; ++ ++ if (rt_is_expired(rt)) ++ return NULL; ++ ipv4_validate_peer(rt); + return dst; + } + +@@ -2370,6 +2371,7 @@ int ip_route_input_common(struct sk_buff + rth->rt_mark == skb->mark && + net_eq(dev_net(rth->dst.dev), net) && + !rt_is_expired(rth)) { ++ ipv4_validate_peer(rth); + if (noref) { + dst_use_noref(&rth->dst, jiffies); + skb_dst_set_noref(skb, &rth->dst); +@@ -2745,6 +2747,7 @@ struct rtable *__ip_route_output_key(str + (IPTOS_RT_MASK | RTO_ONLINK)) && + net_eq(dev_net(rth->dst.dev), net) && + !rt_is_expired(rth)) { ++ ipv4_validate_peer(rth); + dst_use(&rth->dst, jiffies); + RT_CACHE_STAT_INC(out_hit); + rcu_read_unlock_bh(); diff --git a/queue-3.1/qeth-l3-fix-rcu-splat-in-xmit.patch b/queue-3.1/qeth-l3-fix-rcu-splat-in-xmit.patch new file mode 100644 index 00000000000..d48ea9a2837 --- /dev/null +++ b/queue-3.1/qeth-l3-fix-rcu-splat-in-xmit.patch @@ -0,0 +1,62 @@ +From 4e0608b6d27cdf8255385558d5fe98d05e0afd35 Mon Sep 17 00:00:00 2001 +From: Frank Blaschka +Date: Tue, 15 Nov 2011 02:31:15 +0000 +Subject: qeth: l3 fix rcu splat in xmit + + +From: Frank Blaschka + +[ Upstream commit 1d36cb479f204a0fedc1a3e7ce7b32c0a2c48769 ] + +when use dst_get_neighbour to get neighbour, we need +rcu_read_lock to protect, since dst_get_neighbour uses +rcu_dereference. + +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/s390/net/qeth_l3_main.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/drivers/s390/net/qeth_l3_main.c ++++ b/drivers/s390/net/qeth_l3_main.c +@@ -2740,11 +2740,13 @@ int inline qeth_l3_get_cast_type(struct + struct neighbour *n = NULL; + struct dst_entry *dst; + ++ rcu_read_lock(); + dst = skb_dst(skb); + if (dst) + n = dst_get_neighbour(dst); + if (n) { + cast_type = n->type; ++ rcu_read_unlock(); + if ((cast_type == RTN_BROADCAST) || + (cast_type == RTN_MULTICAST) || + (cast_type == RTN_ANYCAST)) +@@ -2752,6 +2754,8 @@ int inline qeth_l3_get_cast_type(struct + else + return RTN_UNSPEC; + } ++ rcu_read_unlock(); ++ + /* try something else */ + if (skb->protocol == ETH_P_IPV6) + return (skb_network_header(skb)[24] == 0xff) ? +@@ -2807,6 +2811,8 @@ static void qeth_l3_fill_header(struct q + } + + hdr->hdr.l3.length = skb->len - sizeof(struct qeth_hdr); ++ ++ rcu_read_lock(); + dst = skb_dst(skb); + if (dst) + n = dst_get_neighbour(dst); +@@ -2853,6 +2859,7 @@ static void qeth_l3_fill_header(struct q + QETH_CAST_UNICAST | QETH_HDR_PASSTHRU; + } + } ++ rcu_read_unlock(); + } + + static inline void qeth_l3_hdr_csum(struct qeth_card *card, diff --git a/queue-3.1/route-fix-icmp-redirect-validation.patch b/queue-3.1/route-fix-icmp-redirect-validation.patch new file mode 100644 index 00000000000..a5e000b12ff --- /dev/null +++ b/queue-3.1/route-fix-icmp-redirect-validation.patch @@ -0,0 +1,86 @@ +From 679d2016a0aadeea3e2135dd145b39df1ae1a2dc Mon Sep 17 00:00:00 2001 +From: Flavio Leitner +Date: Mon, 24 Oct 2011 02:56:38 -0400 +Subject: route: fix ICMP redirect validation + + +From: Flavio Leitner + +[ Upstream commit 7cc9150ebe8ec06cafea9f1c10d92ddacf88d8ae ] + +The commit f39925dbde7788cfb96419c0f092b086aa325c0f +(ipv4: Cache learned redirect information in inetpeer.) +removed some ICMP packet validations which are required by +RFC 1122, section 3.2.2.2: +... + A Redirect message SHOULD be silently discarded if the new + gateway address it specifies is not on the same connected + (sub-) net through which the Redirect arrived [INTRO:2, + Appendix A], or if the source of the Redirect is not the + current first-hop gateway for the specified destination (see + Section 3.3.1). + +Signed-off-by: Flavio Leitner +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/route.c | 40 +++++++++++++++++++++++++++++++++------- + 1 file changed, 33 insertions(+), 7 deletions(-) + +--- a/net/ipv4/route.c ++++ b/net/ipv4/route.c +@@ -1313,7 +1313,12 @@ static void rt_del(unsigned hash, struct + void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, + __be32 saddr, struct net_device *dev) + { ++ int s, i; + struct in_device *in_dev = __in_dev_get_rcu(dev); ++ struct rtable *rt; ++ __be32 skeys[2] = { saddr, 0 }; ++ int ikeys[2] = { dev->ifindex, 0 }; ++ struct flowi4 fl4; + struct inet_peer *peer; + struct net *net; + +@@ -1336,13 +1341,34 @@ void ip_rt_redirect(__be32 old_gw, __be3 + goto reject_redirect; + } + +- peer = inet_getpeer_v4(daddr, 1); +- if (peer) { +- peer->redirect_learned.a4 = new_gw; +- +- inet_putpeer(peer); +- +- atomic_inc(&__rt_peer_genid); ++ memset(&fl4, 0, sizeof(fl4)); ++ fl4.daddr = daddr; ++ for (s = 0; s < 2; s++) { ++ for (i = 0; i < 2; i++) { ++ fl4.flowi4_oif = ikeys[i]; ++ fl4.saddr = skeys[s]; ++ rt = __ip_route_output_key(net, &fl4); ++ if (IS_ERR(rt)) ++ continue; ++ ++ if (rt->dst.error || rt->dst.dev != dev || ++ rt->rt_gateway != old_gw) { ++ ip_rt_put(rt); ++ continue; ++ } ++ ++ if (!rt->peer) ++ rt_bind_peer(rt, rt->rt_dst, 1); ++ ++ peer = rt->peer; ++ if (peer) { ++ peer->redirect_learned.a4 = new_gw; ++ atomic_inc(&__rt_peer_genid); ++ } ++ ++ ip_rt_put(rt); ++ return; ++ } + } + return; + diff --git a/queue-3.1/sch_red-fix-red_calc_qavg_from_idle_time.patch b/queue-3.1/sch_red-fix-red_calc_qavg_from_idle_time.patch new file mode 100644 index 00000000000..49324f4f881 --- /dev/null +++ b/queue-3.1/sch_red-fix-red_calc_qavg_from_idle_time.patch @@ -0,0 +1,75 @@ +From e4b18d138408631e1e5b3f9a22c583f1680d29d8 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Wed, 30 Nov 2011 12:10:53 +0000 +Subject: sch_red: fix red_calc_qavg_from_idle_time + + +From: Eric Dumazet + +[ Upstream commit ea6a5d3b97b768561db6358f15e4c84ced0f4f7e ] + +Since commit a4a710c4a7490587 (pkt_sched: Change PSCHED_SHIFT from 10 to +6) it seems RED/GRED are broken. + +red_calc_qavg_from_idle_time() computes a delay in us units, but this +delay is now 16 times bigger than real delay, so the final qavg result +smaller than expected. + +Use standard kernel time services since there is no need to obfuscate +them. + +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/red.h | 15 ++++++--------- + 1 file changed, 6 insertions(+), 9 deletions(-) + +--- a/include/net/red.h ++++ b/include/net/red.h +@@ -116,7 +116,7 @@ struct red_parms { + u32 qR; /* Cached random number */ + + unsigned long qavg; /* Average queue length: A scaled */ +- psched_time_t qidlestart; /* Start of current idle period */ ++ ktime_t qidlestart; /* Start of current idle period */ + }; + + static inline u32 red_rmask(u8 Plog) +@@ -148,17 +148,17 @@ static inline void red_set_parms(struct + + static inline int red_is_idling(struct red_parms *p) + { +- return p->qidlestart != PSCHED_PASTPERFECT; ++ return p->qidlestart.tv64 != 0; + } + + static inline void red_start_of_idle_period(struct red_parms *p) + { +- p->qidlestart = psched_get_time(); ++ p->qidlestart = ktime_get(); + } + + static inline void red_end_of_idle_period(struct red_parms *p) + { +- p->qidlestart = PSCHED_PASTPERFECT; ++ p->qidlestart.tv64 = 0; + } + + static inline void red_restart(struct red_parms *p) +@@ -170,13 +170,10 @@ static inline void red_restart(struct re + + static inline unsigned long red_calc_qavg_from_idle_time(struct red_parms *p) + { +- psched_time_t now; +- long us_idle; ++ s64 delta = ktime_us_delta(ktime_get(), p->qidlestart); ++ long us_idle = min_t(s64, delta, p->Scell_max); + int shift; + +- now = psched_get_time(); +- us_idle = psched_tdiff_bounded(now, p->qidlestart, p->Scell_max); +- + /* + * The problem: ideally, average length queue recalcultion should + * be done over constant clock intervals. This is too expensive, so diff --git a/queue-3.1/sch_red-fix-red_change.patch b/queue-3.1/sch_red-fix-red_change.patch new file mode 100644 index 00000000000..5211423cfe6 --- /dev/null +++ b/queue-3.1/sch_red-fix-red_change.patch @@ -0,0 +1,64 @@ +From cc0683457108aa68db39aa51caf530beebc6cb85 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Thu, 1 Dec 2011 11:06:34 +0000 +Subject: sch_red: fix red_change + + +From: Eric Dumazet + +[ Upstream commit 1ee5fa1e9970a16036e37c7b9d5ce81c778252fc ] + +Le mercredi 30 novembre 2011 à 14:36 -0800, Stephen Hemminger a écrit : + +> (Almost) nobody uses RED because they can't figure it out. +> According to Wikipedia, VJ says that: +> "there are not one, but two bugs in classic RED." + +RED is useful for high throughput routers, I doubt many linux machines +act as such devices. + +I was considering adding Adaptative RED (Sally Floyd, Ramakrishna +Gummadi, Scott Shender), August 2001 + +In this version, maxp is dynamic (from 1% to 50%), and user only have to +setup min_th (target average queue size) +(max_th and wq (burst in linux RED) are automatically setup) + +By the way it seems we have a small bug in red_change() + +if (skb_queue_empty(&sch->q)) + red_end_of_idle_period(&q->parms); + +First, if queue is empty, we should call +red_start_of_idle_period(&q->parms); + +Second, since we dont use anymore sch->q, but q->qdisc, the test is +meaningless. + +Oh well... + +[PATCH] sch_red: fix red_change() + +Now RED is classful, we must check q->qdisc->q.qlen, and if queue is empty, +we start an idle period, not end it. + +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/sch_red.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/sched/sch_red.c ++++ b/net/sched/sch_red.c +@@ -209,8 +209,8 @@ static int red_change(struct Qdisc *sch, + ctl->Plog, ctl->Scell_log, + nla_data(tb[TCA_RED_STAB])); + +- if (skb_queue_empty(&sch->q)) +- red_end_of_idle_period(&q->parms); ++ if (!q->qdisc->q.qlen) ++ red_start_of_idle_period(&q->parms); + + sch_tree_unlock(sch); + return 0; diff --git a/queue-3.1/sch_teql-fix-lockdep-splat.patch b/queue-3.1/sch_teql-fix-lockdep-splat.patch new file mode 100644 index 00000000000..ca9db285c46 --- /dev/null +++ b/queue-3.1/sch_teql-fix-lockdep-splat.patch @@ -0,0 +1,83 @@ +From 2373518d60447e8916db600ae52150a3f1601611 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Wed, 30 Nov 2011 04:08:58 +0000 +Subject: sch_teql: fix lockdep splat + + +From: Eric Dumazet + +[ Upstream commit f7e57044eeb1841847c24aa06766c8290c202583 ] + +We need rcu_read_lock() protection before using dst_get_neighbour(), and +we must cache its value (pass it to __teql_resolve()) + +teql_master_xmit() is called under rcu_read_lock_bh() protection, its +not enough. + +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/sch_teql.c | 31 ++++++++++++++++++++----------- + 1 file changed, 20 insertions(+), 11 deletions(-) + +--- a/net/sched/sch_teql.c ++++ b/net/sched/sch_teql.c +@@ -225,11 +225,11 @@ static int teql_qdisc_init(struct Qdisc + + + static int +-__teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev) ++__teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, ++ struct net_device *dev, struct netdev_queue *txq, ++ struct neighbour *mn) + { +- struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, 0); +- struct teql_sched_data *q = qdisc_priv(dev_queue->qdisc); +- struct neighbour *mn = dst_get_neighbour(skb_dst(skb)); ++ struct teql_sched_data *q = qdisc_priv(txq->qdisc); + struct neighbour *n = q->ncache; + + if (mn->tbl == NULL) +@@ -262,17 +262,26 @@ __teql_resolve(struct sk_buff *skb, stru + } + + static inline int teql_resolve(struct sk_buff *skb, +- struct sk_buff *skb_res, struct net_device *dev) ++ struct sk_buff *skb_res, ++ struct net_device *dev, ++ struct netdev_queue *txq) + { +- struct netdev_queue *txq = netdev_get_tx_queue(dev, 0); ++ struct dst_entry *dst = skb_dst(skb); ++ struct neighbour *mn; ++ int res; ++ + if (txq->qdisc == &noop_qdisc) + return -ENODEV; + +- if (dev->header_ops == NULL || +- skb_dst(skb) == NULL || +- dst_get_neighbour(skb_dst(skb)) == NULL) ++ if (!dev->header_ops || !dst) + return 0; +- return __teql_resolve(skb, skb_res, dev); ++ ++ rcu_read_lock(); ++ mn = dst_get_neighbour(dst); ++ res = mn ? __teql_resolve(skb, skb_res, dev, txq, mn) : 0; ++ rcu_read_unlock(); ++ ++ return res; + } + + static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev) +@@ -307,7 +316,7 @@ restart: + continue; + } + +- switch (teql_resolve(skb, skb_res, slave)) { ++ switch (teql_resolve(skb, skb_res, slave, slave_txq)) { + case 0: + if (__netif_tx_trylock(slave_txq)) { + unsigned int length = qdisc_pkt_len(skb); diff --git a/queue-3.1/series b/queue-3.1/series index c3ea4388184..ee27099b979 100644 --- a/queue-3.1/series +++ b/queue-3.1/series @@ -102,3 +102,17 @@ jump_label-jump_label_inc-may-return-before-the-code-is-patched.patch oprofile-fix-crash-when-unloading-module-hr-timer-mode.patch clocksource-fix-bug-with-max_deferment-margin-calculation.patch clockevents-set-noop-handler-in-clockevents_exchange_device.patch +ah-correctly-pass-error-codes-in-ahash-output-callback.patch +ah-read-nexthdr-value-before-overwriting-it-in-ahash-input-callback.patch +ipv4-fix-for-ip_options_rcv_srr-daddr-update.patch +ipv4-fix-lockdep-splat-in-rt_cache_seq_show.patch +qeth-l3-fix-rcu-splat-in-xmit.patch +sch_teql-fix-lockdep-splat.patch +ipv4-avoid-useless-call-of-the-function-check_peer_pmtu.patch +route-fix-icmp-redirect-validation.patch +ipv4-fix-redirect-handling.patch +inet-add-a-redirect-generation-id-in-inetpeer.patch +ipv4-perform-peer-validation-on-cached-route-lookup.patch +ipv4-make-sure-rto_onlink-is-saved-in-routing-cache.patch +sch_red-fix-red_calc_qavg_from_idle_time.patch +sch_red-fix-red_change.patch