From 795bb09fcdf3ea34901c6f2c487b8618af889703 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 7 Aug 2014 20:48:44 -0700 Subject: [PATCH] 3.4-stable patches added patches: inetpeer-get-rid-of-ip_id_count.patch iovec-make-sure-the-caller-actually-wants-anything-in-memcpy_fromiovecend.patch ip-make-ip-identifiers-less-predictable.patch macvlan-initialize-vlan_features-to-turn-on-offload-support.patch net-correctly-set-segment-mac_len-in-skb_segment.patch net-sctp-inherit-auth_capable-on-init-collisions.patch net-sendmsg-fix-null-pointer-dereference.patch sctp-fix-possible-seqlock-seadlock-in-sctp_packet_transmit.patch tcp-fix-integer-overflow-in-tcp-vegas.patch tcp-fix-integer-overflows-in-tcp-veno.patch --- .../inetpeer-get-rid-of-ip_id_count.patch | 516 ++++++++++++++++++ ...ants-anything-in-memcpy_fromiovecend.patch | 32 ++ ...make-ip-identifiers-less-predictable.patch | 149 +++++ ..._features-to-turn-on-offload-support.patch | 31 ++ ...y-set-segment-mac_len-in-skb_segment.patch | 63 +++ ...erit-auth_capable-on-init-collisions.patch | 182 ++++++ ...sendmsg-fix-null-pointer-dereference.patch | 127 +++++ ...ock-seadlock-in-sctp_packet_transmit.patch | 71 +++ queue-3.4/series | 10 + ...cp-fix-integer-overflow-in-tcp-vegas.patch | 40 ++ ...cp-fix-integer-overflows-in-tcp-veno.patch | 36 ++ 11 files changed, 1257 insertions(+) create mode 100644 queue-3.4/inetpeer-get-rid-of-ip_id_count.patch create mode 100644 queue-3.4/iovec-make-sure-the-caller-actually-wants-anything-in-memcpy_fromiovecend.patch create mode 100644 queue-3.4/ip-make-ip-identifiers-less-predictable.patch create mode 100644 queue-3.4/macvlan-initialize-vlan_features-to-turn-on-offload-support.patch create mode 100644 queue-3.4/net-correctly-set-segment-mac_len-in-skb_segment.patch create mode 100644 queue-3.4/net-sctp-inherit-auth_capable-on-init-collisions.patch create mode 100644 queue-3.4/net-sendmsg-fix-null-pointer-dereference.patch create mode 100644 queue-3.4/sctp-fix-possible-seqlock-seadlock-in-sctp_packet_transmit.patch create mode 100644 queue-3.4/series create mode 100644 queue-3.4/tcp-fix-integer-overflow-in-tcp-vegas.patch create mode 100644 queue-3.4/tcp-fix-integer-overflows-in-tcp-veno.patch diff --git a/queue-3.4/inetpeer-get-rid-of-ip_id_count.patch b/queue-3.4/inetpeer-get-rid-of-ip_id_count.patch new file mode 100644 index 00000000000..48c4164f9c4 --- /dev/null +++ b/queue-3.4/inetpeer-get-rid-of-ip_id_count.patch @@ -0,0 +1,516 @@ +From foo@baz Thu Aug 7 20:47:07 PDT 2014 +From: Eric Dumazet +Date: Mon, 2 Jun 2014 05:26:03 -0700 +Subject: inetpeer: get rid of ip_id_count + +From: Eric Dumazet + +[ Upstream commit 73f156a6e8c1074ac6327e0abd1169e95eb66463 ] + +Ideally, we would need to generate IP ID using a per destination IP +generator. + +linux kernels used inet_peer cache for this purpose, but this had a huge +cost on servers disabling MTU discovery. + +1) each inet_peer struct consumes 192 bytes + +2) inetpeer cache uses a binary tree of inet_peer structs, + with a nominal size of ~66000 elements under load. + +3) lookups in this tree are hitting a lot of cache lines, as tree depth + is about 20. + +4) If server deals with many tcp flows, we have a high probability of + not finding the inet_peer, allocating a fresh one, inserting it in + the tree with same initial ip_id_count, (cf secure_ip_id()) + +5) We garbage collect inet_peer aggressively. + +IP ID generation do not have to be 'perfect' + +Goal is trying to avoid duplicates in a short period of time, +so that reassembly units have a chance to complete reassembly of +fragments belonging to one message before receiving other fragments +with a recycled ID. + +We simply use an array of generators, and a Jenkin hash using the dst IP +as a key. + +ipv6_select_ident() is put back into net/ipv6/ip6_output.c where it +belongs (it is only used from this file) + +secure_ip_id() and secure_ipv6_id() no longer are needed. + +Rename ip_select_ident_more() to ip_select_ident_segs() to avoid +unnecessary decrement/increment of the number of segments. + +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ppp/pptp.c | 2 - + include/net/inetpeer.h | 14 +--------- + include/net/ip.h | 40 ++++++++++++++++------------ + include/net/ipip.h | 2 - + include/net/ipv6.h | 9 +++++- + include/net/secure_seq.h | 2 - + net/core/secure_seq.c | 23 ---------------- + net/ipv4/igmp.c | 4 +- + net/ipv4/inetpeer.c | 18 ------------ + net/ipv4/ip_output.c | 7 ++--- + net/ipv4/ipmr.c | 2 - + net/ipv4/raw.c | 2 - + net/ipv4/route.c | 56 +++++++++++++--------------------------- + net/ipv4/xfrm4_mode_tunnel.c | 2 - + net/ipv6/ip6_output.c | 23 ++++++---------- + net/netfilter/ipvs/ip_vs_xmit.c | 2 - + 16 files changed, 71 insertions(+), 137 deletions(-) + +--- a/drivers/net/ppp/pptp.c ++++ b/drivers/net/ppp/pptp.c +@@ -281,7 +281,7 @@ static int pptp_xmit(struct ppp_channel + nf_reset(skb); + + skb->ip_summed = CHECKSUM_NONE; +- ip_select_ident(skb, &rt->dst, NULL); ++ ip_select_ident(skb, NULL); + ip_send_check(iph); + + ip_local_out(skb); +--- a/include/net/inetpeer.h ++++ b/include/net/inetpeer.h +@@ -46,13 +46,12 @@ struct inet_peer { + }; + /* + * Once inet_peer is queued for deletion (refcnt == -1), following fields +- * are not available: rid, ip_id_count, tcp_ts, tcp_ts_stamp ++ * are not available: rid, tcp_ts, tcp_ts_stamp + * We can share memory with rcu_head to help keep inet_peer small. + */ + union { + struct { + atomic_t rid; /* Frag reception counter */ +- atomic_t ip_id_count; /* IP ID for the next packet */ + __u32 tcp_ts; + __u32 tcp_ts_stamp; + }; +@@ -102,7 +101,7 @@ extern bool inet_peer_xrlim_allow(struct + extern void inetpeer_invalidate_tree(int family); + + /* +- * temporary check to make sure we dont access rid, ip_id_count, tcp_ts, ++ * temporary check to make sure we dont access rid, tcp_ts, + * tcp_ts_stamp if no refcount is taken on inet_peer + */ + static inline void inet_peer_refcheck(const struct inet_peer *p) +@@ -110,13 +109,4 @@ static inline void inet_peer_refcheck(co + WARN_ON_ONCE(atomic_read(&p->refcnt) <= 0); + } + +- +-/* can be called with or without local BH being disabled */ +-static inline int inet_getid(struct inet_peer *p, int more) +-{ +- more++; +- inet_peer_refcheck(p); +- return atomic_add_return(more, &p->ip_id_count) - more; +-} +- + #endif /* _NET_INETPEER_H */ +--- a/include/net/ip.h ++++ b/include/net/ip.h +@@ -264,9 +264,19 @@ int ip_dont_fragment(struct sock *sk, st + !(dst_metric_locked(dst, RTAX_MTU))); + } + +-extern void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more); ++#define IP_IDENTS_SZ 2048u ++extern atomic_t *ip_idents; + +-static inline void ip_select_ident(struct sk_buff *skb, struct dst_entry *dst, struct sock *sk) ++static inline u32 ip_idents_reserve(u32 hash, int segs) ++{ ++ atomic_t *id_ptr = ip_idents + hash % IP_IDENTS_SZ; ++ ++ return atomic_add_return(segs, id_ptr) - segs; ++} ++ ++void __ip_select_ident(struct iphdr *iph, int segs); ++ ++static inline void ip_select_ident_segs(struct sk_buff *skb, struct sock *sk, int segs) + { + struct iphdr *iph = ip_hdr(skb); + +@@ -276,24 +286,20 @@ static inline void ip_select_ident(struc + * does not change, they drop every other packet in + * a TCP stream using header compression. + */ +- iph->id = (sk && inet_sk(sk)->inet_daddr) ? +- htons(inet_sk(sk)->inet_id++) : 0; +- } else +- __ip_select_ident(iph, dst, 0); +-} +- +-static inline void ip_select_ident_more(struct sk_buff *skb, struct dst_entry *dst, struct sock *sk, int more) +-{ +- struct iphdr *iph = ip_hdr(skb); +- +- if ((iph->frag_off & htons(IP_DF)) && !skb->local_df) { + if (sk && inet_sk(sk)->inet_daddr) { + iph->id = htons(inet_sk(sk)->inet_id); +- inet_sk(sk)->inet_id += 1 + more; +- } else ++ inet_sk(sk)->inet_id += segs; ++ } else { + iph->id = 0; +- } else +- __ip_select_ident(iph, dst, more); ++ } ++ } else { ++ __ip_select_ident(iph, segs); ++ } ++} ++ ++static inline void ip_select_ident(struct sk_buff *skb, struct sock *sk) ++{ ++ ip_select_ident_segs(skb, sk, 1); + } + + /* +--- a/include/net/ipip.h ++++ b/include/net/ipip.h +@@ -50,7 +50,7 @@ struct ip_tunnel_prl_entry { + int pkt_len = skb->len - skb_transport_offset(skb); \ + \ + skb->ip_summed = CHECKSUM_NONE; \ +- ip_select_ident(skb, &rt->dst, NULL); \ ++ ip_select_ident(skb, NULL); \ + \ + err = ip_local_out(skb); \ + if (likely(net_xmit_eval(err) == 0)) { \ +--- a/include/net/ipv6.h ++++ b/include/net/ipv6.h +@@ -392,14 +392,19 @@ void ip6_frag_init(struct inet_frag_queu + int ip6_frag_match(struct inet_frag_queue *q, void *a); + + /* more secured version of ipv6_addr_hash() */ +-static inline u32 ipv6_addr_jhash(const struct in6_addr *a) ++static inline u32 __ipv6_addr_jhash(const struct in6_addr *a, const u32 initval) + { + u32 v = (__force u32)a->s6_addr32[0] ^ (__force u32)a->s6_addr32[1]; + + return jhash_3words(v, + (__force u32)a->s6_addr32[2], + (__force u32)a->s6_addr32[3], +- ipv6_hash_secret); ++ initval); ++} ++ ++static inline u32 ipv6_addr_jhash(const struct in6_addr *a) ++{ ++ return __ipv6_addr_jhash(a, ipv6_hash_secret); + } + + static inline int ipv6_addr_any(const struct in6_addr *a) +--- a/include/net/secure_seq.h ++++ b/include/net/secure_seq.h +@@ -3,8 +3,6 @@ + + #include + +-extern __u32 secure_ip_id(__be32 daddr); +-extern __u32 secure_ipv6_id(const __be32 daddr[4]); + extern u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); + extern u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, + __be16 dport); +--- a/net/core/secure_seq.c ++++ b/net/core/secure_seq.c +@@ -79,29 +79,6 @@ u32 secure_ipv6_port_ephemeral(const __b + #endif + + #ifdef CONFIG_INET +-__u32 secure_ip_id(__be32 daddr) +-{ +- u32 hash[MD5_DIGEST_WORDS]; +- +- hash[0] = (__force __u32) daddr; +- hash[1] = net_secret[13]; +- hash[2] = net_secret[14]; +- hash[3] = net_secret[15]; +- +- md5_transform(hash, net_secret); +- +- return hash[0]; +-} +- +-__u32 secure_ipv6_id(const __be32 daddr[4]) +-{ +- __u32 hash[4]; +- +- memcpy(hash, daddr, 16); +- md5_transform(hash, net_secret); +- +- return hash[0]; +-} + + __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, + __be16 sport, __be16 dport) +--- a/net/ipv4/igmp.c ++++ b/net/ipv4/igmp.c +@@ -343,7 +343,7 @@ static struct sk_buff *igmpv3_newpack(st + pip->saddr = fl4.saddr; + pip->protocol = IPPROTO_IGMP; + pip->tot_len = 0; /* filled in later */ +- ip_select_ident(skb, &rt->dst, NULL); ++ ip_select_ident(skb, NULL); + ((u8*)&pip[1])[0] = IPOPT_RA; + ((u8*)&pip[1])[1] = 4; + ((u8*)&pip[1])[2] = 0; +@@ -687,7 +687,7 @@ static int igmp_send_report(struct in_de + iph->daddr = dst; + iph->saddr = fl4.saddr; + iph->protocol = IPPROTO_IGMP; +- ip_select_ident(skb, &rt->dst, NULL); ++ ip_select_ident(skb, NULL); + ((u8*)&iph[1])[0] = IPOPT_RA; + ((u8*)&iph[1])[1] = 4; + ((u8*)&iph[1])[2] = 0; +--- a/net/ipv4/inetpeer.c ++++ b/net/ipv4/inetpeer.c +@@ -26,20 +26,7 @@ + * Theory of operations. + * We keep one entry for each peer IP address. The nodes contains long-living + * information about the peer which doesn't depend on routes. +- * At this moment this information consists only of ID field for the next +- * outgoing IP packet. This field is incremented with each packet as encoded +- * in inet_getid() function (include/net/inetpeer.h). +- * At the moment of writing this notes identifier of IP packets is generated +- * to be unpredictable using this code only for packets subjected +- * (actually or potentially) to defragmentation. I.e. DF packets less than +- * PMTU in size when local fragmentation is disabled use a constant ID and do +- * not use this code (see ip_select_ident() in include/net/ip.h). + * +- * Route cache entries hold references to our nodes. +- * New cache entries get references via lookup by destination IP address in +- * the avl tree. The reference is grabbed only when it's needed i.e. only +- * when we try to output IP packet which needs an unpredictable ID (see +- * __ip_select_ident() in net/ipv4/route.c). + * Nodes are removed only when reference counter goes to 0. + * When it's happened the node may be removed when a sufficient amount of + * time has been passed since its last use. The less-recently-used entry can +@@ -62,7 +49,6 @@ + * refcnt: atomically against modifications on other CPU; + * usually under some other lock to prevent node disappearing + * daddr: unchangeable +- * ip_id_count: atomic value (no lock needed) + */ + + static struct kmem_cache *peer_cachep __read_mostly; +@@ -488,10 +474,6 @@ relookup: + p->daddr = *daddr; + atomic_set(&p->refcnt, 1); + atomic_set(&p->rid, 0); +- atomic_set(&p->ip_id_count, +- (daddr->family == AF_INET) ? +- secure_ip_id(daddr->addr.a4) : +- secure_ipv6_id(daddr->addr.a6)); + p->tcp_ts_stamp = 0; + p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; + p->rate_tokens = 0; +--- a/net/ipv4/ip_output.c ++++ b/net/ipv4/ip_output.c +@@ -161,7 +161,7 @@ int ip_build_and_send_pkt(struct sk_buff + iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr); + iph->saddr = saddr; + iph->protocol = sk->sk_protocol; +- ip_select_ident(skb, &rt->dst, sk); ++ ip_select_ident(skb, sk); + + if (opt && opt->opt.optlen) { + iph->ihl += opt->opt.optlen>>2; +@@ -403,8 +403,7 @@ packet_routed: + ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0); + } + +- ip_select_ident_more(skb, &rt->dst, sk, +- (skb_shinfo(skb)->gso_segs ?: 1) - 1); ++ ip_select_ident_segs(skb, sk, skb_shinfo(skb)->gso_segs ?: 1); + + skb->priority = sk->sk_priority; + skb->mark = sk->sk_mark; +@@ -1347,7 +1346,7 @@ struct sk_buff *__ip_make_skb(struct soc + iph->ihl = 5; + iph->tos = inet->tos; + iph->frag_off = df; +- ip_select_ident(skb, &rt->dst, sk); ++ ip_select_ident(skb, sk); + iph->ttl = ttl; + iph->protocol = sk->sk_protocol; + ip_copy_addrs(iph, fl4); +--- a/net/ipv4/ipmr.c ++++ b/net/ipv4/ipmr.c +@@ -1576,7 +1576,7 @@ static void ip_encap(struct sk_buff *skb + iph->protocol = IPPROTO_IPIP; + iph->ihl = 5; + iph->tot_len = htons(skb->len); +- ip_select_ident(skb, skb_dst(skb), NULL); ++ ip_select_ident(skb, NULL); + ip_send_check(iph); + + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); +--- a/net/ipv4/raw.c ++++ b/net/ipv4/raw.c +@@ -384,7 +384,7 @@ static int raw_send_hdrinc(struct sock * + iph->check = 0; + iph->tot_len = htons(length); + if (!iph->id) +- ip_select_ident(skb, &rt->dst, NULL); ++ ip_select_ident(skb, NULL); + + iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); + } +--- a/net/ipv4/route.c ++++ b/net/ipv4/route.c +@@ -1341,46 +1341,23 @@ void rt_bind_peer(struct rtable *rt, __b + rt->rt_peer_genid = rt_peer_genid(); + } + +-/* +- * Peer allocation may fail only in serious out-of-memory conditions. However +- * we still can generate some output. +- * Random ID selection looks a bit dangerous because we have no chances to +- * select ID being unique in a reasonable period of time. +- * But broken packet identifier may be better than no packet at all. +- */ +-static void ip_select_fb_ident(struct iphdr *iph) +-{ +- static DEFINE_SPINLOCK(ip_fb_id_lock); +- static u32 ip_fallback_id; +- u32 salt; ++atomic_t *ip_idents __read_mostly; ++EXPORT_SYMBOL(ip_idents); + +- spin_lock_bh(&ip_fb_id_lock); +- salt = secure_ip_id((__force __be32)ip_fallback_id ^ iph->daddr); +- iph->id = htons(salt & 0xFFFF); +- ip_fallback_id = salt; +- spin_unlock_bh(&ip_fb_id_lock); +-} +- +-void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more) ++void __ip_select_ident(struct iphdr *iph, int segs) + { +- struct rtable *rt = (struct rtable *) dst; ++ static u32 ip_idents_hashrnd __read_mostly; ++ static bool hashrnd_initialized = false; ++ u32 hash, id; + +- if (rt && !(rt->dst.flags & DST_NOPEER)) { +- if (rt->peer == NULL) +- rt_bind_peer(rt, rt->rt_dst, 1); +- +- /* If peer is attached to destination, it is never detached, +- so that we need not to grab a lock to dereference it. +- */ +- if (rt->peer) { +- iph->id = htons(inet_getid(rt->peer, more)); +- return; +- } +- } else if (!rt) +- printk(KERN_DEBUG "rt_bind_peer(0) @%p\n", +- __builtin_return_address(0)); ++ if (unlikely(!hashrnd_initialized)) { ++ hashrnd_initialized = true; ++ get_random_bytes(&ip_idents_hashrnd, sizeof(ip_idents_hashrnd)); ++ } + +- ip_select_fb_ident(iph); ++ hash = jhash_1word((__force u32)iph->daddr, ip_idents_hashrnd); ++ id = ip_idents_reserve(hash, segs); ++ iph->id = htons(id); + } + EXPORT_SYMBOL(__ip_select_ident); + +@@ -3009,7 +2986,6 @@ static int rt_fill_info(struct net *net, + error = rt->dst.error; + if (peer) { + inet_peer_refcheck(rt->peer); +- id = atomic_read(&peer->ip_id_count) & 0xffff; + if (peer->tcp_ts_stamp) { + ts = peer->tcp_ts; + tsage = get_seconds() - peer->tcp_ts_stamp; +@@ -3441,6 +3417,12 @@ int __init ip_rt_init(void) + { + int rc = 0; + ++ ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL); ++ if (!ip_idents) ++ panic("IP: failed to allocate ip_idents\n"); ++ ++ get_random_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents)); ++ + #ifdef CONFIG_IP_ROUTE_CLASSID + ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct)); + if (!ip_rt_acct) +--- a/net/ipv4/xfrm4_mode_tunnel.c ++++ b/net/ipv4/xfrm4_mode_tunnel.c +@@ -54,12 +54,12 @@ static int xfrm4_mode_tunnel_output(stru + + top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ? + 0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF)); +- ip_select_ident(skb, dst->child, NULL); + + top_iph->ttl = ip4_dst_hoplimit(dst->child); + + top_iph->saddr = x->props.saddr.a4; + top_iph->daddr = x->id.daddr.a4; ++ ip_select_ident(skb, NULL); + + return 0; + } +--- a/net/ipv6/ip6_output.c ++++ b/net/ipv6/ip6_output.c +@@ -599,22 +599,17 @@ int ip6_find_1stfragopt(struct sk_buff * + + void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) + { +- static atomic_t ipv6_fragmentation_id; +- int ident; ++ static u32 ip6_idents_hashrnd __read_mostly; ++ static bool hashrnd_initialized = false; ++ u32 hash, id; + +- if (rt && !(rt->dst.flags & DST_NOPEER)) { +- struct inet_peer *peer; +- +- if (!rt->rt6i_peer) +- rt6_bind_peer(rt, 1); +- peer = rt->rt6i_peer; +- if (peer) { +- fhdr->identification = htonl(inet_getid(peer, 0)); +- return; +- } ++ if (unlikely(!hashrnd_initialized)) { ++ hashrnd_initialized = true; ++ get_random_bytes(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd)); + } +- ident = atomic_inc_return(&ipv6_fragmentation_id); +- fhdr->identification = htonl(ident); ++ hash = __ipv6_addr_jhash(&rt->rt6i_dst.addr, ip6_idents_hashrnd); ++ id = ip_idents_reserve(hash, 1); ++ fhdr->identification = htonl(id); + } + + int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) +--- a/net/netfilter/ipvs/ip_vs_xmit.c ++++ b/net/netfilter/ipvs/ip_vs_xmit.c +@@ -853,7 +853,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, s + iph->daddr = cp->daddr.ip; + iph->saddr = saddr; + iph->ttl = old_iph->ttl; +- ip_select_ident(skb, &rt->dst, NULL); ++ ip_select_ident(skb, NULL); + + /* Another hack: avoid icmp_send in ip_fragment */ + skb->local_df = 1; diff --git a/queue-3.4/iovec-make-sure-the-caller-actually-wants-anything-in-memcpy_fromiovecend.patch b/queue-3.4/iovec-make-sure-the-caller-actually-wants-anything-in-memcpy_fromiovecend.patch new file mode 100644 index 00000000000..cc8d11177cf --- /dev/null +++ b/queue-3.4/iovec-make-sure-the-caller-actually-wants-anything-in-memcpy_fromiovecend.patch @@ -0,0 +1,32 @@ +From foo@baz Thu Aug 7 20:47:07 PDT 2014 +From: Sasha Levin +Date: Thu, 31 Jul 2014 23:00:35 -0400 +Subject: iovec: make sure the caller actually wants anything in memcpy_fromiovecend + +From: Sasha Levin + +[ Upstream commit 06ebb06d49486676272a3c030bfeef4bd969a8e6 ] + +Check for cases when the caller requests 0 bytes instead of running off +and dereferencing potentially invalid iovecs. + +Signed-off-by: Sasha Levin +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/iovec.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/net/core/iovec.c ++++ b/net/core/iovec.c +@@ -157,6 +157,10 @@ EXPORT_SYMBOL(memcpy_fromiovec); + int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov, + int offset, int len) + { ++ /* No data? Done! */ ++ if (len == 0) ++ return 0; ++ + /* Skip over the finished iovecs */ + while (offset >= iov->iov_len) { + offset -= iov->iov_len; diff --git a/queue-3.4/ip-make-ip-identifiers-less-predictable.patch b/queue-3.4/ip-make-ip-identifiers-less-predictable.patch new file mode 100644 index 00000000000..6a31c1c120c --- /dev/null +++ b/queue-3.4/ip-make-ip-identifiers-less-predictable.patch @@ -0,0 +1,149 @@ +From foo@baz Thu Aug 7 20:47:07 PDT 2014 +From: Eric Dumazet +Date: Sat, 26 Jul 2014 08:58:10 +0200 +Subject: ip: make IP identifiers less predictable + +From: Eric Dumazet + +[ Upstream commit 04ca6973f7c1a0d8537f2d9906a0cf8e69886d75 ] + +In "Counting Packets Sent Between Arbitrary Internet Hosts", Jeffrey and +Jedidiah describe ways exploiting linux IP identifier generation to +infer whether two machines are exchanging packets. + +With commit 73f156a6e8c1 ("inetpeer: get rid of ip_id_count"), we +changed IP id generation, but this does not really prevent this +side-channel technique. + +This patch adds a random amount of perturbation so that IP identifiers +for a given destination [1] are no longer monotonically increasing after +an idle period. + +Note that prandom_u32_max(1) returns 0, so if generator is used at most +once per jiffy, this patch inserts no hole in the ID suite and do not +increase collision probability. + +This is jiffies based, so in the worst case (HZ=1000), the id can +rollover after ~65 seconds of idle time, which should be fine. + +We also change the hash used in __ip_select_ident() to not only hash +on daddr, but also saddr and protocol, so that ICMP probes can not be +used to infer information for other protocols. + +For IPv6, adds saddr into the hash as well, but not nexthdr. + +If I ping the patched target, we can see ID are now hard to predict. + +21:57:11.008086 IP (...) + A > target: ICMP echo request, seq 1, length 64 +21:57:11.010752 IP (... id 2081 ...) + target > A: ICMP echo reply, seq 1, length 64 + +21:57:12.013133 IP (...) + A > target: ICMP echo request, seq 2, length 64 +21:57:12.015737 IP (... id 3039 ...) + target > A: ICMP echo reply, seq 2, length 64 + +21:57:13.016580 IP (...) + A > target: ICMP echo request, seq 3, length 64 +21:57:13.019251 IP (... id 3437 ...) + target > A: ICMP echo reply, seq 3, length 64 + +[1] TCP sessions uses a per flow ID generator not changed by this patch. + +Signed-off-by: Eric Dumazet +Reported-by: Jeffrey Knockel +Reported-by: Jedidiah R. Crandall +Cc: Willy Tarreau +Cc: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/ip.h | 11 +---------- + net/ipv4/route.c | 36 +++++++++++++++++++++++++++++++++--- + net/ipv6/ip6_output.c | 2 ++ + 3 files changed, 36 insertions(+), 13 deletions(-) + +--- a/include/net/ip.h ++++ b/include/net/ip.h +@@ -264,16 +264,7 @@ int ip_dont_fragment(struct sock *sk, st + !(dst_metric_locked(dst, RTAX_MTU))); + } + +-#define IP_IDENTS_SZ 2048u +-extern atomic_t *ip_idents; +- +-static inline u32 ip_idents_reserve(u32 hash, int segs) +-{ +- atomic_t *id_ptr = ip_idents + hash % IP_IDENTS_SZ; +- +- return atomic_add_return(segs, id_ptr) - segs; +-} +- ++u32 ip_idents_reserve(u32 hash, int segs); + void __ip_select_ident(struct iphdr *iph, int segs); + + static inline void ip_select_ident_segs(struct sk_buff *skb, struct sock *sk, int segs) +--- a/net/ipv4/route.c ++++ b/net/ipv4/route.c +@@ -1341,8 +1341,35 @@ void rt_bind_peer(struct rtable *rt, __b + rt->rt_peer_genid = rt_peer_genid(); + } + +-atomic_t *ip_idents __read_mostly; +-EXPORT_SYMBOL(ip_idents); ++#define IP_IDENTS_SZ 2048u ++struct ip_ident_bucket { ++ atomic_t id; ++ u32 stamp32; ++}; ++ ++static struct ip_ident_bucket *ip_idents __read_mostly; ++ ++/* In order to protect privacy, we add a perturbation to identifiers ++ * if one generator is seldom used. This makes hard for an attacker ++ * to infer how many packets were sent between two points in time. ++ */ ++u32 ip_idents_reserve(u32 hash, int segs) ++{ ++ struct ip_ident_bucket *bucket = ip_idents + hash % IP_IDENTS_SZ; ++ u32 old = ACCESS_ONCE(bucket->stamp32); ++ u32 now = (u32)jiffies; ++ u32 delta = 0; ++ ++ if (old != now && cmpxchg(&bucket->stamp32, old, now) == old) { ++ u64 x = random32(); ++ ++ x *= (now - old); ++ delta = (u32)(x >> 32); ++ } ++ ++ return atomic_add_return(segs + delta, &bucket->id) - segs; ++} ++EXPORT_SYMBOL(ip_idents_reserve); + + void __ip_select_ident(struct iphdr *iph, int segs) + { +@@ -1355,7 +1382,10 @@ void __ip_select_ident(struct iphdr *iph + get_random_bytes(&ip_idents_hashrnd, sizeof(ip_idents_hashrnd)); + } + +- hash = jhash_1word((__force u32)iph->daddr, ip_idents_hashrnd); ++ hash = jhash_3words((__force u32)iph->daddr, ++ (__force u32)iph->saddr, ++ iph->protocol, ++ ip_idents_hashrnd); + id = ip_idents_reserve(hash, segs); + iph->id = htons(id); + } +--- a/net/ipv6/ip6_output.c ++++ b/net/ipv6/ip6_output.c +@@ -608,6 +608,8 @@ void ipv6_select_ident(struct frag_hdr * + get_random_bytes(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd)); + } + hash = __ipv6_addr_jhash(&rt->rt6i_dst.addr, ip6_idents_hashrnd); ++ hash = __ipv6_addr_jhash(&rt->rt6i_src.addr, hash); ++ + id = ip_idents_reserve(hash, 1); + fhdr->identification = htonl(id); + } diff --git a/queue-3.4/macvlan-initialize-vlan_features-to-turn-on-offload-support.patch b/queue-3.4/macvlan-initialize-vlan_features-to-turn-on-offload-support.patch new file mode 100644 index 00000000000..111d967860c --- /dev/null +++ b/queue-3.4/macvlan-initialize-vlan_features-to-turn-on-offload-support.patch @@ -0,0 +1,31 @@ +From foo@baz Thu Aug 7 20:47:07 PDT 2014 +From: Vlad Yasevich +Date: Thu, 31 Jul 2014 10:30:25 -0400 +Subject: macvlan: Initialize vlan_features to turn on offload support. + +From: Vlad Yasevich + +[ Upstream commit 081e83a78db9b0ae1f5eabc2dedecc865f509b98 ] + +Macvlan devices do not initialize vlan_features. As a result, +any vlan devices configured on top of macvlans perform very poorly. +Initialize vlan_features based on the vlan features of the lower-level +device. + +Signed-off-by: Vlad Yasevich +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/macvlan.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/macvlan.c ++++ b/drivers/net/macvlan.c +@@ -455,6 +455,7 @@ static int macvlan_init(struct net_devic + (lowerdev->state & MACVLAN_STATE_MASK); + dev->features = lowerdev->features & MACVLAN_FEATURES; + dev->features |= NETIF_F_LLTX; ++ dev->vlan_features = lowerdev->vlan_features & MACVLAN_FEATURES; + dev->gso_max_size = lowerdev->gso_max_size; + dev->iflink = lowerdev->ifindex; + dev->hard_header_len = lowerdev->hard_header_len; diff --git a/queue-3.4/net-correctly-set-segment-mac_len-in-skb_segment.patch b/queue-3.4/net-correctly-set-segment-mac_len-in-skb_segment.patch new file mode 100644 index 00000000000..21516e0de64 --- /dev/null +++ b/queue-3.4/net-correctly-set-segment-mac_len-in-skb_segment.patch @@ -0,0 +1,63 @@ +From foo@baz Thu Aug 7 20:47:07 PDT 2014 +From: Vlad Yasevich +Date: Thu, 31 Jul 2014 10:33:06 -0400 +Subject: net: Correctly set segment mac_len in skb_segment(). + +From: Vlad Yasevich + +[ Upstream commit fcdfe3a7fa4cb74391d42b6a26dc07c20dab1d82 ] + +When performing segmentation, the mac_len value is copied right +out of the original skb. However, this value is not always set correctly +(like when the packet is VLAN-tagged) and we'll end up copying a bad +value. + +One way to demonstrate this is to configure a VM which tags +packets internally and turn off VLAN acceleration on the forwarding +bridge port. The packets show up corrupt like this: +16:18:24.985548 52:54:00:ab:be:25 > 52:54:00:26:ce:a3, ethertype 802.1Q +(0x8100), length 1518: vlan 100, p 0, ethertype 0x05e0, + 0x0000: 8cdb 1c7c 8cdb 0064 4006 b59d 0a00 6402 ...|...d@.....d. + 0x0010: 0a00 6401 9e0d b441 0a5e 64ec 0330 14fa ..d....A.^d..0.. + 0x0020: 29e3 01c9 f871 0000 0101 080a 000a e833)....q.........3 + 0x0030: 000f 8c75 6e65 7470 6572 6600 6e65 7470 ...unetperf.netp + 0x0040: 6572 6600 6e65 7470 6572 6600 6e65 7470 erf.netperf.netp + 0x0050: 6572 6600 6e65 7470 6572 6600 6e65 7470 erf.netperf.netp + 0x0060: 6572 6600 6e65 7470 6572 6600 6e65 7470 erf.netperf.netp + ... + +This also leads to awful throughput as GSO packets are dropped and +cause retransmissions. + +The solution is to set the mac_len using the values already available +in then new skb. We've already adjusted all of the header offset, so we +might as well correctly figure out the mac_len using skb_reset_mac_len(). +After this change, packets are segmented correctly and performance +is restored. + +CC: Eric Dumazet +Signed-off-by: Vlad Yasevich +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/skbuff.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/core/skbuff.c ++++ b/net/core/skbuff.c +@@ -2748,7 +2748,6 @@ struct sk_buff *skb_segment(struct sk_bu + tail = nskb; + + __copy_skb_header(nskb, skb); +- nskb->mac_len = skb->mac_len; + + /* nskb and skb might have different headroom */ + if (nskb->ip_summed == CHECKSUM_PARTIAL) +@@ -2758,6 +2757,7 @@ struct sk_buff *skb_segment(struct sk_bu + skb_set_network_header(nskb, skb->mac_len); + nskb->transport_header = (nskb->network_header + + skb_network_header_len(skb)); ++ skb_reset_mac_len(nskb); + skb_copy_from_linear_data(skb, nskb->data, doffset); + + if (fskb != skb_shinfo(skb)->frag_list) diff --git a/queue-3.4/net-sctp-inherit-auth_capable-on-init-collisions.patch b/queue-3.4/net-sctp-inherit-auth_capable-on-init-collisions.patch new file mode 100644 index 00000000000..2cc716fd678 --- /dev/null +++ b/queue-3.4/net-sctp-inherit-auth_capable-on-init-collisions.patch @@ -0,0 +1,182 @@ +From foo@baz Thu Aug 7 20:47:07 PDT 2014 +From: Daniel Borkmann +Date: Tue, 22 Jul 2014 15:22:45 +0200 +Subject: net: sctp: inherit auth_capable on INIT collisions + +From: Daniel Borkmann + +[ Upstream commit 1be9a950c646c9092fb3618197f7b6bfb50e82aa ] + +Jason reported an oops caused by SCTP on his ARM machine with +SCTP authentication enabled: + +Internal error: Oops: 17 [#1] ARM +CPU: 0 PID: 104 Comm: sctp-test Not tainted 3.13.0-68744-g3632f30c9b20-dirty #1 +task: c6eefa40 ti: c6f52000 task.ti: c6f52000 +PC is at sctp_auth_calculate_hmac+0xc4/0x10c +LR is at sg_init_table+0x20/0x38 +pc : [] lr : [] psr: 40000013 +sp : c6f538e8 ip : 00000000 fp : c6f53924 +r10: c6f50d80 r9 : 00000000 r8 : 00010000 +r7 : 00000000 r6 : c7be4000 r5 : 00000000 r4 : c6f56254 +r3 : c00c8170 r2 : 00000001 r1 : 00000008 r0 : c6f1e660 +Flags: nZcv IRQs on FIQs on Mode SVC_32 ISA ARM Segment user +Control: 0005397f Table: 06f28000 DAC: 00000015 +Process sctp-test (pid: 104, stack limit = 0xc6f521c0) +Stack: (0xc6f538e8 to 0xc6f54000) +[...] +Backtrace: +[] (sctp_auth_calculate_hmac+0x0/0x10c) from [] (sctp_packet_transmit+0x33c/0x5c8) +[] (sctp_packet_transmit+0x0/0x5c8) from [] (sctp_outq_flush+0x7fc/0x844) +[] (sctp_outq_flush+0x0/0x844) from [] (sctp_outq_uncork+0x24/0x28) +[] (sctp_outq_uncork+0x0/0x28) from [] (sctp_side_effects+0x1134/0x1220) +[] (sctp_side_effects+0x0/0x1220) from [] (sctp_do_sm+0xac/0xd4) +[] (sctp_do_sm+0x0/0xd4) from [] (sctp_assoc_bh_rcv+0x118/0x160) +[] (sctp_assoc_bh_rcv+0x0/0x160) from [] (sctp_inq_push+0x6c/0x74) +[] (sctp_inq_push+0x0/0x74) from [] (sctp_rcv+0x7d8/0x888) + +While we already had various kind of bugs in that area +ec0223ec48a9 ("net: sctp: fix sctp_sf_do_5_1D_ce to verify if +we/peer is AUTH capable") and b14878ccb7fa ("net: sctp: cache +auth_enable per endpoint"), this one is a bit of a different +kind. + +Giving a bit more background on why SCTP authentication is +needed can be found in RFC4895: + + SCTP uses 32-bit verification tags to protect itself against + blind attackers. These values are not changed during the + lifetime of an SCTP association. + + Looking at new SCTP extensions, there is the need to have a + method of proving that an SCTP chunk(s) was really sent by + the original peer that started the association and not by a + malicious attacker. + +To cause this bug, we're triggering an INIT collision between +peers; normal SCTP handshake where both sides intent to +authenticate packets contains RANDOM; CHUNKS; HMAC-ALGO +parameters that are being negotiated among peers: + + ---------- INIT[RANDOM; CHUNKS; HMAC-ALGO] ----------> + <------- INIT-ACK[RANDOM; CHUNKS; HMAC-ALGO] --------- + -------------------- COOKIE-ECHO --------------------> + <-------------------- COOKIE-ACK --------------------- + +RFC4895 says that each endpoint therefore knows its own random +number and the peer's random number *after* the association +has been established. The local and peer's random number along +with the shared key are then part of the secret used for +calculating the HMAC in the AUTH chunk. + +Now, in our scenario, we have 2 threads with 1 non-blocking +SEQ_PACKET socket each, setting up common shared SCTP_AUTH_KEY +and SCTP_AUTH_ACTIVE_KEY properly, and each of them calling +sctp_bindx(3), listen(2) and connect(2) against each other, +thus the handshake looks similar to this, e.g.: + + ---------- INIT[RANDOM; CHUNKS; HMAC-ALGO] ----------> + <------- INIT-ACK[RANDOM; CHUNKS; HMAC-ALGO] --------- + <--------- INIT[RANDOM; CHUNKS; HMAC-ALGO] ----------- + -------- INIT-ACK[RANDOM; CHUNKS; HMAC-ALGO] --------> + ... + +Since such collisions can also happen with verification tags, +the RFC4895 for AUTH rather vaguely says under section 6.1: + + In case of INIT collision, the rules governing the handling + of this Random Number follow the same pattern as those for + the Verification Tag, as explained in Section 5.2.4 of + RFC 2960 [5]. Therefore, each endpoint knows its own Random + Number and the peer's Random Number after the association + has been established. + +In RFC2960, section 5.2.4, we're eventually hitting Action B: + + B) In this case, both sides may be attempting to start an + association at about the same time but the peer endpoint + started its INIT after responding to the local endpoint's + INIT. Thus it may have picked a new Verification Tag not + being aware of the previous Tag it had sent this endpoint. + The endpoint should stay in or enter the ESTABLISHED + state but it MUST update its peer's Verification Tag from + the State Cookie, stop any init or cookie timers that may + running and send a COOKIE ACK. + +In other words, the handling of the Random parameter is the +same as behavior for the Verification Tag as described in +Action B of section 5.2.4. + +Looking at the code, we exactly hit the sctp_sf_do_dupcook_b() +case which triggers an SCTP_CMD_UPDATE_ASSOC command to the +side effect interpreter, and in fact it properly copies over +peer_{random, hmacs, chunks} parameters from the newly created +association to update the existing one. + +Also, the old asoc_shared_key is being released and based on +the new params, sctp_auth_asoc_init_active_key() updated. +However, the issue observed in this case is that the previous +asoc->peer.auth_capable was 0, and has *not* been updated, so +that instead of creating a new secret, we're doing an early +return from the function sctp_auth_asoc_init_active_key() +leaving asoc->asoc_shared_key as NULL. However, we now have to +authenticate chunks from the updated chunk list (e.g. COOKIE-ACK). + +That in fact causes the server side when responding with ... + + <------------------ AUTH; COOKIE-ACK ----------------- + +... to trigger a NULL pointer dereference, since in +sctp_packet_transmit(), it discovers that an AUTH chunk is +being queued for xmit, and thus it calls sctp_auth_calculate_hmac(). + +Since the asoc->active_key_id is still inherited from the +endpoint, and the same as encoded into the chunk, it uses +asoc->asoc_shared_key, which is still NULL, as an asoc_key +and dereferences it in ... + + crypto_hash_setkey(desc.tfm, &asoc_key->data[0], asoc_key->len) + +... causing an oops. All this happens because sctp_make_cookie_ack() +called with the *new* association has the peer.auth_capable=1 +and therefore marks the chunk with auth=1 after checking +sctp_auth_send_cid(), but it is *actually* sent later on over +the then *updated* association's transport that didn't initialize +its shared key due to peer.auth_capable=0. Since control chunks +in that case are not sent by the temporary association which +are scheduled for deletion, they are issued for xmit via +SCTP_CMD_REPLY in the interpreter with the context of the +*updated* association. peer.auth_capable was 0 in the updated +association (which went from COOKIE_WAIT into ESTABLISHED state), +since all previous processing that performed sctp_process_init() +was being done on temporary associations, that we eventually +throw away each time. + +The correct fix is to update to the new peer.auth_capable +value as well in the collision case via sctp_assoc_update(), +so that in case the collision migrated from 0 -> 1, +sctp_auth_asoc_init_active_key() can properly recalculate +the secret. This therefore fixes the observed server panic. + +Fixes: 730fc3d05cd4 ("[SCTP]: Implete SCTP-AUTH parameter processing") +Reported-by: Jason Gunthorpe +Signed-off-by: Daniel Borkmann +Tested-by: Jason Gunthorpe +Cc: Vlad Yasevich +Acked-by: Vlad Yasevich +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/associola.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/sctp/associola.c ++++ b/net/sctp/associola.c +@@ -1188,6 +1188,7 @@ void sctp_assoc_update(struct sctp_assoc + asoc->c = new->c; + asoc->peer.rwnd = new->peer.rwnd; + asoc->peer.sack_needed = new->peer.sack_needed; ++ asoc->peer.auth_capable = new->peer.auth_capable; + asoc->peer.i = new->peer.i; + sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_INITIAL, + asoc->peer.i.initial_tsn, GFP_ATOMIC); diff --git a/queue-3.4/net-sendmsg-fix-null-pointer-dereference.patch b/queue-3.4/net-sendmsg-fix-null-pointer-dereference.patch new file mode 100644 index 00000000000..54a0306ecff --- /dev/null +++ b/queue-3.4/net-sendmsg-fix-null-pointer-dereference.patch @@ -0,0 +1,127 @@ +From foo@baz Thu Aug 7 20:47:07 PDT 2014 +From: Andrey Ryabinin +Date: Sat, 26 Jul 2014 21:26:58 +0400 +Subject: net: sendmsg: fix NULL pointer dereference + +From: Andrey Ryabinin + +[ Upstream commit 40eea803c6b2cfaab092f053248cbeab3f368412 ] + +Sasha's report: + > While fuzzing with trinity inside a KVM tools guest running the latest -next + > kernel with the KASAN patchset, I've stumbled on the following spew: + > + > [ 4448.949424] ================================================================== + > [ 4448.951737] AddressSanitizer: user-memory-access on address 0 + > [ 4448.952988] Read of size 2 by thread T19638: + > [ 4448.954510] CPU: 28 PID: 19638 Comm: trinity-c76 Not tainted 3.16.0-rc4-next-20140711-sasha-00046-g07d3099-dirty #813 + > [ 4448.956823] ffff88046d86ca40 0000000000000000 ffff880082f37e78 ffff880082f37a40 + > [ 4448.958233] ffffffffb6e47068 ffff880082f37a68 ffff880082f37a58 ffffffffb242708d + > [ 4448.959552] 0000000000000000 ffff880082f37a88 ffffffffb24255b1 0000000000000000 + > [ 4448.961266] Call Trace: + > [ 4448.963158] dump_stack (lib/dump_stack.c:52) + > [ 4448.964244] kasan_report_user_access (mm/kasan/report.c:184) + > [ 4448.965507] __asan_load2 (mm/kasan/kasan.c:352) + > [ 4448.966482] ? netlink_sendmsg (net/netlink/af_netlink.c:2339) + > [ 4448.967541] netlink_sendmsg (net/netlink/af_netlink.c:2339) + > [ 4448.968537] ? get_parent_ip (kernel/sched/core.c:2555) + > [ 4448.970103] sock_sendmsg (net/socket.c:654) + > [ 4448.971584] ? might_fault (mm/memory.c:3741) + > [ 4448.972526] ? might_fault (./arch/x86/include/asm/current.h:14 mm/memory.c:3740) + > [ 4448.973596] ? verify_iovec (net/core/iovec.c:64) + > [ 4448.974522] ___sys_sendmsg (net/socket.c:2096) + > [ 4448.975797] ? put_lock_stats.isra.13 (./arch/x86/include/asm/preempt.h:98 kernel/locking/lockdep.c:254) + > [ 4448.977030] ? lock_release_holdtime (kernel/locking/lockdep.c:273) + > [ 4448.978197] ? lock_release_non_nested (kernel/locking/lockdep.c:3434 (discriminator 1)) + > [ 4448.979346] ? check_chain_key (kernel/locking/lockdep.c:2188) + > [ 4448.980535] __sys_sendmmsg (net/socket.c:2181) + > [ 4448.981592] ? trace_hardirqs_on_caller (kernel/locking/lockdep.c:2600) + > [ 4448.982773] ? trace_hardirqs_on (kernel/locking/lockdep.c:2607) + > [ 4448.984458] ? syscall_trace_enter (arch/x86/kernel/ptrace.c:1500 (discriminator 2)) + > [ 4448.985621] ? trace_hardirqs_on_caller (kernel/locking/lockdep.c:2600) + > [ 4448.986754] SyS_sendmmsg (net/socket.c:2201) + > [ 4448.987708] tracesys (arch/x86/kernel/entry_64.S:542) + > [ 4448.988929] ================================================================== + +This reports means that we've come to netlink_sendmsg() with msg->msg_name == NULL and msg->msg_namelen > 0. + +After this report there was no usual "Unable to handle kernel NULL pointer dereference" +and this gave me a clue that address 0 is mapped and contains valid socket address structure in it. + +This bug was introduced in f3d3342602f8bcbf37d7c46641cb9bca7618eb1c +(net: rework recvmsg handler msg_name and msg_namelen logic). +Commit message states that: + "Set msg->msg_name = NULL if user specified a NULL in msg_name but had a + non-null msg_namelen in verify_iovec/verify_compat_iovec. This doesn't + affect sendto as it would bail out earlier while trying to copy-in the + address." +But in fact this affects sendto when address 0 is mapped and contains +socket address structure in it. In such case copy-in address will succeed, +verify_iovec() function will successfully exit with msg->msg_namelen > 0 +and msg->msg_name == NULL. + +This patch fixes it by setting msg_namelen to 0 if msg_name == NULL. + +Cc: Hannes Frederic Sowa +Cc: Eric Dumazet +Cc: +Reported-by: Sasha Levin +Signed-off-by: Andrey Ryabinin +Acked-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/compat.c | 9 +++++---- + net/core/iovec.c | 6 +++--- + 2 files changed, 8 insertions(+), 7 deletions(-) + +--- a/net/compat.c ++++ b/net/compat.c +@@ -85,7 +85,7 @@ int verify_compat_iovec(struct msghdr *k + { + int tot_len; + +- if (kern_msg->msg_namelen) { ++ if (kern_msg->msg_name && kern_msg->msg_namelen) { + if (mode == VERIFY_READ) { + int err = move_addr_to_kernel(kern_msg->msg_name, + kern_msg->msg_namelen, +@@ -93,10 +93,11 @@ int verify_compat_iovec(struct msghdr *k + if (err < 0) + return err; + } +- if (kern_msg->msg_name) +- kern_msg->msg_name = kern_address; +- } else ++ kern_msg->msg_name = kern_address; ++ } else { + kern_msg->msg_name = NULL; ++ kern_msg->msg_namelen = 0; ++ } + + tot_len = iov_from_user_compat_to_kern(kern_iov, + (struct compat_iovec __user *)kern_msg->msg_iov, +--- a/net/core/iovec.c ++++ b/net/core/iovec.c +@@ -39,7 +39,7 @@ int verify_iovec(struct msghdr *m, struc + { + int size, ct, err; + +- if (m->msg_namelen) { ++ if (m->msg_name && m->msg_namelen) { + if (mode == VERIFY_READ) { + void __user *namep; + namep = (void __user __force *) m->msg_name; +@@ -48,10 +48,10 @@ int verify_iovec(struct msghdr *m, struc + if (err < 0) + return err; + } +- if (m->msg_name) +- m->msg_name = address; ++ m->msg_name = address; + } else { + m->msg_name = NULL; ++ m->msg_namelen = 0; + } + + size = m->msg_iovlen * sizeof(struct iovec); diff --git a/queue-3.4/sctp-fix-possible-seqlock-seadlock-in-sctp_packet_transmit.patch b/queue-3.4/sctp-fix-possible-seqlock-seadlock-in-sctp_packet_transmit.patch new file mode 100644 index 00000000000..55627260de0 --- /dev/null +++ b/queue-3.4/sctp-fix-possible-seqlock-seadlock-in-sctp_packet_transmit.patch @@ -0,0 +1,71 @@ +From foo@baz Thu Aug 7 20:47:07 PDT 2014 +From: Eric Dumazet +Date: Tue, 5 Aug 2014 16:49:52 +0200 +Subject: sctp: fix possible seqlock seadlock in sctp_packet_transmit() + +From: Eric Dumazet + +[ Upstream commit 757efd32d5ce31f67193cc0e6a56e4dffcc42fb1 ] + +Dave reported following splat, caused by improper use of +IP_INC_STATS_BH() in process context. + +BUG: using __this_cpu_add() in preemptible [00000000] code: trinity-c117/14551 +caller is __this_cpu_preempt_check+0x13/0x20 +CPU: 3 PID: 14551 Comm: trinity-c117 Not tainted 3.16.0+ #33 + ffffffff9ec898f0 0000000047ea7e23 ffff88022d32f7f0 ffffffff9e7ee207 + 0000000000000003 ffff88022d32f818 ffffffff9e397eaa ffff88023ee70b40 + ffff88022d32f970 ffff8801c026d580 ffff88022d32f828 ffffffff9e397ee3 +Call Trace: + [] dump_stack+0x4e/0x7a + [] check_preemption_disabled+0xfa/0x100 + [] __this_cpu_preempt_check+0x13/0x20 + [] sctp_packet_transmit+0x692/0x710 [sctp] + [] sctp_outq_flush+0x2a2/0xc30 [sctp] + [] ? mark_held_locks+0x7c/0xb0 + [] ? _raw_spin_unlock_irqrestore+0x5d/0x80 + [] sctp_outq_uncork+0x1a/0x20 [sctp] + [] sctp_cmd_interpreter.isra.23+0x1142/0x13f0 [sctp] + [] sctp_do_sm+0xdb/0x330 [sctp] + [] ? preempt_count_sub+0xab/0x100 + [] ? sctp_cname+0x70/0x70 [sctp] + [] sctp_primitive_ASSOCIATE+0x3a/0x50 [sctp] + [] sctp_sendmsg+0x88f/0xe30 [sctp] + [] ? lock_release_holdtime.part.28+0x9a/0x160 + [] ? put_lock_stats.isra.27+0xe/0x30 + [] inet_sendmsg+0x104/0x220 + [] ? inet_sendmsg+0x5/0x220 + [] sock_sendmsg+0x9e/0xe0 + [] ? might_fault+0xb9/0xc0 + [] ? might_fault+0x5e/0xc0 + [] SYSC_sendto+0x124/0x1c0 + [] ? syscall_trace_enter+0x250/0x330 + [] SyS_sendto+0xe/0x10 + [] tracesys+0xdd/0xe2 + +This is a followup of commits f1d8cba61c3c4b ("inet: fix possible +seqlock deadlocks") and 7f88c6b23afbd315 ("ipv6: fix possible seqlock +deadlock in ip6_finish_output2") + +Signed-off-by: Eric Dumazet +Cc: Hannes Frederic Sowa +Reported-by: Dave Jones +Acked-by: Neil Horman +Acked-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/output.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/sctp/output.c ++++ b/net/sctp/output.c +@@ -587,7 +587,7 @@ out: + return err; + no_route: + kfree_skb(nskb); +- IP_INC_STATS_BH(&init_net, IPSTATS_MIB_OUTNOROUTES); ++ IP_INC_STATS(&init_net, IPSTATS_MIB_OUTNOROUTES); + + /* FIXME: Returning the 'err' will effect all the associations + * associated with a socket, although only one of the paths of the diff --git a/queue-3.4/series b/queue-3.4/series new file mode 100644 index 00000000000..5a1ab0ebff3 --- /dev/null +++ b/queue-3.4/series @@ -0,0 +1,10 @@ +inetpeer-get-rid-of-ip_id_count.patch +ip-make-ip-identifiers-less-predictable.patch +net-sendmsg-fix-null-pointer-dereference.patch +tcp-fix-integer-overflows-in-tcp-veno.patch +tcp-fix-integer-overflow-in-tcp-vegas.patch +net-sctp-inherit-auth_capable-on-init-collisions.patch +macvlan-initialize-vlan_features-to-turn-on-offload-support.patch +net-correctly-set-segment-mac_len-in-skb_segment.patch +iovec-make-sure-the-caller-actually-wants-anything-in-memcpy_fromiovecend.patch +sctp-fix-possible-seqlock-seadlock-in-sctp_packet_transmit.patch diff --git a/queue-3.4/tcp-fix-integer-overflow-in-tcp-vegas.patch b/queue-3.4/tcp-fix-integer-overflow-in-tcp-vegas.patch new file mode 100644 index 00000000000..fb7cc5ce9b8 --- /dev/null +++ b/queue-3.4/tcp-fix-integer-overflow-in-tcp-vegas.patch @@ -0,0 +1,40 @@ +From foo@baz Thu Aug 7 20:47:07 PDT 2014 +From: Christoph Paasch +Date: Tue, 29 Jul 2014 13:40:57 +0200 +Subject: tcp: Fix integer-overflow in TCP vegas + +From: Christoph Paasch + +[ Upstream commit 1f74e613ded11517db90b2bd57e9464d9e0fb161 ] + +In vegas we do a multiplication of the cwnd and the rtt. This +may overflow and thus their result is stored in a u64. However, we first +need to cast the cwnd so that actually 64-bit arithmetic is done. + +Then, we need to do do_div to allow this to be used on 32-bit arches. + +Cc: Stephen Hemminger +Cc: Neal Cardwell +Cc: Eric Dumazet +Cc: David Laight +Cc: Doug Leith +Fixes: 8d3a564da34e (tcp: tcp_vegas cong avoid fix) +Signed-off-by: Christoph Paasch +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_vegas.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/ipv4/tcp_vegas.c ++++ b/net/ipv4/tcp_vegas.c +@@ -218,7 +218,8 @@ static void tcp_vegas_cong_avoid(struct + * This is: + * (actual rate in segments) * baseRTT + */ +- target_cwnd = tp->snd_cwnd * vegas->baseRTT / rtt; ++ target_cwnd = (u64)tp->snd_cwnd * vegas->baseRTT; ++ do_div(target_cwnd, rtt); + + /* Calculate the difference between the window we had, + * and the window we would like to have. This quantity diff --git a/queue-3.4/tcp-fix-integer-overflows-in-tcp-veno.patch b/queue-3.4/tcp-fix-integer-overflows-in-tcp-veno.patch new file mode 100644 index 00000000000..6e5972b6fec --- /dev/null +++ b/queue-3.4/tcp-fix-integer-overflows-in-tcp-veno.patch @@ -0,0 +1,36 @@ +From foo@baz Thu Aug 7 20:47:07 PDT 2014 +From: Christoph Paasch +Date: Tue, 29 Jul 2014 12:07:27 +0200 +Subject: tcp: Fix integer-overflows in TCP veno + +From: Christoph Paasch + +[ Upstream commit 45a07695bc64b3ab5d6d2215f9677e5b8c05a7d0 ] + +In veno we do a multiplication of the cwnd and the rtt. This +may overflow and thus their result is stored in a u64. However, we first +need to cast the cwnd so that actually 64-bit arithmetic is done. + +A first attempt at fixing 76f1017757aa0 ([TCP]: TCP Veno congestion +control) was made by 159131149c2 (tcp: Overflow bug in Vegas), but it +failed to add the required cast in tcp_veno_cong_avoid(). + +Fixes: 76f1017757aa0 ([TCP]: TCP Veno congestion control) +Signed-off-by: Christoph Paasch +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_veno.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv4/tcp_veno.c ++++ b/net/ipv4/tcp_veno.c +@@ -144,7 +144,7 @@ static void tcp_veno_cong_avoid(struct s + + rtt = veno->minrtt; + +- target_cwnd = (tp->snd_cwnd * veno->basertt); ++ target_cwnd = (u64)tp->snd_cwnd * veno->basertt; + target_cwnd <<= V_PARAM_SHIFT; + do_div(target_cwnd, rtt); + -- 2.47.3