From 832bae72cbf15bca792175a2d7bc279dc1d89202 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 14 Dec 2014 08:46:52 -0800 Subject: [PATCH] 3.17-stable patches added patches: bond-check-length-of-ifla_bond_arp_ip_target-attributes.patch fix-race-condition-between-vxlan_sock_add-and-vxlan_sock_release.patch gre-set-inner-mac-header-in-gro-complete.patch ip_tunnel-the-lack-of-vti_link_ops-dellink-cause-kernel-panic.patch ipv6-gre-fix-wrong-skb-protocol-in-wccp.patch mips-bpf-fix-broken-bpf_mod.patch net-mlx4_core-limit-count-field-to-24-bits-in-qp_alloc_res.patch net-mvneta-fix-race-condition-in-mvneta_tx.patch net-mvneta-fix-tx-interrupt-delay.patch net-sctp-use-max_header-for-headroom-reserve-in-output-path.patch net-timestamp-make-tcp_recvmsg-call-ipv6_recv_error-for-af_inet6-socks.patch netlink-use-jhash-as-hashfn-for-rhashtable.patch rtnetlink-release-net-refcnt-on-error-in-do_setlink.patch tg3-fix-ring-init-when-there-are-more-tx-than-rx-channels.patch vxlan-fix-boolean-flip-in-vxlan_f_udp_zero_csum6_.patch xen-netfront-use-correct-linear-area-after-linearizing-an-skb.patch --- ...f-ifla_bond_arp_ip_target-attributes.patch | 36 ++++++ ...xlan_sock_add-and-vxlan_sock_release.patch | 72 ++++++++++++ ...set-inner-mac-header-in-gro-complete.patch | 36 ++++++ ..._link_ops-dellink-cause-kernel-panic.patch | 109 ++++++++++++++++++ ...6-gre-fix-wrong-skb-protocol-in-wccp.patch | 39 +++++++ queue-3.17/mips-bpf-fix-broken-bpf_mod.patch | 41 +++++++ ...unt-field-to-24-bits-in-qp_alloc_res.patch | 40 +++++++ ...neta-fix-race-condition-in-mvneta_tx.patch | 40 +++++++ .../net-mvneta-fix-tx-interrupt-delay.patch | 69 +++++++++++ ...-for-headroom-reserve-in-output-path.patch | 52 +++++++++ ...l-ipv6_recv_error-for-af_inet6-socks.patch | 97 ++++++++++++++++ ...k-use-jhash-as-hashfn-for-rhashtable.patch | 45 ++++++++ ...se-net-refcnt-on-error-in-do_setlink.patch | 32 +++++ queue-3.17/series | 16 +++ ...n-there-are-more-tx-than-rx-channels.patch | 34 ++++++ ...lean-flip-in-vxlan_f_udp_zero_csum6_.patch | 40 +++++++ ...linear-area-after-linearizing-an-skb.patch | 45 ++++++++ 17 files changed, 843 insertions(+) create mode 100644 queue-3.17/bond-check-length-of-ifla_bond_arp_ip_target-attributes.patch create mode 100644 queue-3.17/fix-race-condition-between-vxlan_sock_add-and-vxlan_sock_release.patch create mode 100644 queue-3.17/gre-set-inner-mac-header-in-gro-complete.patch create mode 100644 queue-3.17/ip_tunnel-the-lack-of-vti_link_ops-dellink-cause-kernel-panic.patch create mode 100644 queue-3.17/ipv6-gre-fix-wrong-skb-protocol-in-wccp.patch create mode 100644 queue-3.17/mips-bpf-fix-broken-bpf_mod.patch create mode 100644 queue-3.17/net-mlx4_core-limit-count-field-to-24-bits-in-qp_alloc_res.patch create mode 100644 queue-3.17/net-mvneta-fix-race-condition-in-mvneta_tx.patch create mode 100644 queue-3.17/net-mvneta-fix-tx-interrupt-delay.patch create mode 100644 queue-3.17/net-sctp-use-max_header-for-headroom-reserve-in-output-path.patch create mode 100644 queue-3.17/net-timestamp-make-tcp_recvmsg-call-ipv6_recv_error-for-af_inet6-socks.patch create mode 100644 queue-3.17/netlink-use-jhash-as-hashfn-for-rhashtable.patch create mode 100644 queue-3.17/rtnetlink-release-net-refcnt-on-error-in-do_setlink.patch create mode 100644 queue-3.17/tg3-fix-ring-init-when-there-are-more-tx-than-rx-channels.patch create mode 100644 queue-3.17/vxlan-fix-boolean-flip-in-vxlan_f_udp_zero_csum6_.patch create mode 100644 queue-3.17/xen-netfront-use-correct-linear-area-after-linearizing-an-skb.patch diff --git a/queue-3.17/bond-check-length-of-ifla_bond_arp_ip_target-attributes.patch b/queue-3.17/bond-check-length-of-ifla_bond_arp_ip_target-attributes.patch new file mode 100644 index 00000000000..d9017d526c0 --- /dev/null +++ b/queue-3.17/bond-check-length-of-ifla_bond_arp_ip_target-attributes.patch @@ -0,0 +1,36 @@ +From foo@baz Sun Dec 14 08:37:01 PST 2014 +From: Thomas Graf +Date: Thu, 27 Nov 2014 00:22:33 +0100 +Subject: bond: Check length of IFLA_BOND_ARP_IP_TARGET attributes + +From: Thomas Graf + +[ Upstream commit f6c6fda4c9e17940b0a2ba206b0408babfdc930c ] + +Fixes: 7f28fa10 ("bonding: add arp_ip_target netlink support") +Reported-by: John Fastabend +Cc: Scott Feldman +Signed-off-by: Thomas Graf +Acked-by: John Fastabend +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/bonding/bond_netlink.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/drivers/net/bonding/bond_netlink.c ++++ b/drivers/net/bonding/bond_netlink.c +@@ -194,7 +194,12 @@ static int bond_changelink(struct net_de + + bond_option_arp_ip_targets_clear(bond); + nla_for_each_nested(attr, data[IFLA_BOND_ARP_IP_TARGET], rem) { +- __be32 target = nla_get_be32(attr); ++ __be32 target; ++ ++ if (nla_len(attr) < sizeof(target)) ++ return -EINVAL; ++ ++ target = nla_get_be32(attr); + + bond_opt_initval(&newval, (__force u64)target); + err = __bond_opt_set(bond, BOND_OPT_ARP_TARGETS, diff --git a/queue-3.17/fix-race-condition-between-vxlan_sock_add-and-vxlan_sock_release.patch b/queue-3.17/fix-race-condition-between-vxlan_sock_add-and-vxlan_sock_release.patch new file mode 100644 index 00000000000..f1817128f30 --- /dev/null +++ b/queue-3.17/fix-race-condition-between-vxlan_sock_add-and-vxlan_sock_release.patch @@ -0,0 +1,72 @@ +From foo@baz Sun Dec 14 08:37:01 PST 2014 +From: Marcelo Leitner +Date: Thu, 11 Dec 2014 10:02:22 -0200 +Subject: Fix race condition between vxlan_sock_add and vxlan_sock_release + +From: Marcelo Leitner + +[ Upstream commit 00c83b01d58068dfeb2e1351cca6fccf2a83fa8f ] + +Currently, when trying to reuse a socket, vxlan_sock_add will grab +vn->sock_lock, locate a reusable socket, inc refcount and release +vn->sock_lock. + +But vxlan_sock_release() will first decrement refcount, and then grab +that lock. refcnt operations are atomic but as currently we have +deferred works which hold vs->refcnt each, this might happen, leading to +a use after free (specially after vxlan_igmp_leave): + + CPU 1 CPU 2 + +deferred work vxlan_sock_add + ... ... + spin_lock(&vn->sock_lock) + vs = vxlan_find_sock(); + vxlan_sock_release + dec vs->refcnt, reaches 0 + spin_lock(&vn->sock_lock) + vxlan_sock_hold(vs), refcnt=1 + spin_unlock(&vn->sock_lock) + hlist_del_rcu(&vs->hlist); + vxlan_notify_del_rx_port(vs) + spin_unlock(&vn->sock_lock) + +So when we look for a reusable socket, we check if it wasn't freed +already before reusing it. + +Signed-off-by: Marcelo Ricardo Leitner +Fixes: 7c47cedf43a8b3 ("vxlan: move IGMP join/leave to work queue") +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/vxlan.c | 10 +++------- + 1 file changed, 3 insertions(+), 7 deletions(-) + +--- a/drivers/net/vxlan.c ++++ b/drivers/net/vxlan.c +@@ -2050,9 +2050,8 @@ static int vxlan_init(struct net_device + spin_lock(&vn->sock_lock); + vs = vxlan_find_sock(vxlan->net, ipv6 ? AF_INET6 : AF_INET, + vxlan->dst_port); +- if (vs) { ++ if (vs && atomic_add_unless(&vs->refcnt, 1, 0)) { + /* If we have a socket with same port already, reuse it */ +- atomic_inc(&vs->refcnt); + vxlan_vs_add_dev(vs, vxlan); + } else { + /* otherwise make new socket outside of RTNL */ +@@ -2459,12 +2458,9 @@ struct vxlan_sock *vxlan_sock_add(struct + + spin_lock(&vn->sock_lock); + vs = vxlan_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port); +- if (vs) { +- if (vs->rcv == rcv) +- atomic_inc(&vs->refcnt); +- else ++ if (vs && ((vs->rcv != rcv) || ++ !atomic_add_unless(&vs->refcnt, 1, 0))) + vs = ERR_PTR(-EBUSY); +- } + spin_unlock(&vn->sock_lock); + + if (!vs) diff --git a/queue-3.17/gre-set-inner-mac-header-in-gro-complete.patch b/queue-3.17/gre-set-inner-mac-header-in-gro-complete.patch new file mode 100644 index 00000000000..ec60b35b7dd --- /dev/null +++ b/queue-3.17/gre-set-inner-mac-header-in-gro-complete.patch @@ -0,0 +1,36 @@ +From foo@baz Sun Dec 14 08:37:01 PST 2014 +From: Tom Herbert +Date: Sat, 29 Nov 2014 09:59:45 -0800 +Subject: gre: Set inner mac header in gro complete + +From: Tom Herbert + +[ Upstream commit 6fb2a756739aa507c1fd5b8126f0bfc2f070dc46 ] + +Set the inner mac header to point to the GRE payload when +doing GRO. This is needed if we proceed to send the packet +through GRE GSO which now uses the inner mac header instead +of inner network header to determine the length of encapsulation +headers. + +Fixes: 14051f0452a2 ("gre: Use inner mac length when computing tunnel length") +Reported-by: Wolfgang Walter +Signed-off-by: Tom Herbert +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/gre_offload.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/net/ipv4/gre_offload.c ++++ b/net/ipv4/gre_offload.c +@@ -279,6 +279,9 @@ static int gre_gro_complete(struct sk_bu + err = ptype->callbacks.gro_complete(skb, nhoff + grehlen); + + rcu_read_unlock(); ++ ++ skb_set_inner_mac_header(skb, nhoff + grehlen); ++ + return err; + } + diff --git a/queue-3.17/ip_tunnel-the-lack-of-vti_link_ops-dellink-cause-kernel-panic.patch b/queue-3.17/ip_tunnel-the-lack-of-vti_link_ops-dellink-cause-kernel-panic.patch new file mode 100644 index 00000000000..f6628570716 --- /dev/null +++ b/queue-3.17/ip_tunnel-the-lack-of-vti_link_ops-dellink-cause-kernel-panic.patch @@ -0,0 +1,109 @@ +From foo@baz Sun Dec 14 08:37:01 PST 2014 +From: lucien +Date: Sun, 23 Nov 2014 15:04:11 +0800 +Subject: ip_tunnel: the lack of vti_link_ops' dellink() cause kernel panic + +From: lucien + +[ Upstream commit 20ea60ca9952bd19d4b0d74719daba305aef5178 ] + +Now the vti_link_ops do not point the .dellink, for fb tunnel device +(ip_vti0), the net_device will be removed as the default .dellink is +unregister_netdevice_queue,but the tunnel still in the tunnel list, +then if we add a new vti tunnel, in ip_tunnel_find(): + + hlist_for_each_entry_rcu(t, head, hash_node) { + if (local == t->parms.iph.saddr && + remote == t->parms.iph.daddr && + link == t->parms.link && +==> type == t->dev->type && + ip_tunnel_key_match(&t->parms, flags, key)) + break; + } + +the panic will happen, cause dev of ip_tunnel *t is null: +[ 3835.072977] IP: [] ip_tunnel_find+0x9d/0xc0 [ip_tunnel] +[ 3835.073008] PGD b2c21067 PUD b7277067 PMD 0 +[ 3835.073008] Oops: 0000 [#1] SMP +..... +[ 3835.073008] Stack: +[ 3835.073008] ffff8800b72d77f0 ffffffffa0411924 ffff8800bb956000 ffff8800b72d78e0 +[ 3835.073008] ffff8800b72d78a0 0000000000000000 ffffffffa040d100 ffff8800b72d7858 +[ 3835.073008] ffffffffa040b2e3 0000000000000000 0000000000000000 0000000000000000 +[ 3835.073008] Call Trace: +[ 3835.073008] [] ip_tunnel_newlink+0x64/0x160 [ip_tunnel] +[ 3835.073008] [] vti_newlink+0x43/0x70 [ip_vti] +[ 3835.073008] [] rtnl_newlink+0x4fa/0x5f0 +[ 3835.073008] [] ? nla_strlcpy+0x5b/0x70 +[ 3835.073008] [] ? rtnl_link_ops_get+0x40/0x60 +[ 3835.073008] [] ? rtnl_newlink+0x13f/0x5f0 +[ 3835.073008] [] rtnetlink_rcv_msg+0xa4/0x270 +[ 3835.073008] [] ? sock_has_perm+0x75/0x90 +[ 3835.073008] [] ? rtnetlink_rcv+0x30/0x30 +[ 3835.073008] [] netlink_rcv_skb+0xa9/0xc0 +[ 3835.073008] [] rtnetlink_rcv+0x28/0x30 +.... + +modprobe ip_vti +ip link del ip_vti0 type vti +ip link add ip_vti0 type vti +rmmod ip_vti + +do that one or more times, kernel will panic. + +fix it by assigning ip_tunnel_dellink to vti_link_ops' dellink, in +which we skip the unregister of fb tunnel device. do the same on ip6_vti. + +Signed-off-by: Xin Long +Signed-off-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ip_vti.c | 1 + + net/ipv6/ip6_vti.c | 11 +++++++++++ + 2 files changed, 12 insertions(+) + +--- a/net/ipv4/ip_vti.c ++++ b/net/ipv4/ip_vti.c +@@ -528,6 +528,7 @@ static struct rtnl_link_ops vti_link_ops + .validate = vti_tunnel_validate, + .newlink = vti_newlink, + .changelink = vti_changelink, ++ .dellink = ip_tunnel_dellink, + .get_size = vti_get_size, + .fill_info = vti_fill_info, + }; +--- a/net/ipv6/ip6_vti.c ++++ b/net/ipv6/ip6_vti.c +@@ -905,6 +905,15 @@ static int vti6_newlink(struct net *src_ + return vti6_tnl_create2(dev); + } + ++static void vti6_dellink(struct net_device *dev, struct list_head *head) ++{ ++ struct net *net = dev_net(dev); ++ struct vti6_net *ip6n = net_generic(net, vti6_net_id); ++ ++ if (dev != ip6n->fb_tnl_dev) ++ unregister_netdevice_queue(dev, head); ++} ++ + static int vti6_changelink(struct net_device *dev, struct nlattr *tb[], + struct nlattr *data[]) + { +@@ -980,6 +989,7 @@ static struct rtnl_link_ops vti6_link_op + .setup = vti6_dev_setup, + .validate = vti6_validate, + .newlink = vti6_newlink, ++ .dellink = vti6_dellink, + .changelink = vti6_changelink, + .get_size = vti6_get_size, + .fill_info = vti6_fill_info, +@@ -1020,6 +1030,7 @@ static int __net_init vti6_init_net(stru + if (!ip6n->fb_tnl_dev) + goto err_alloc_dev; + dev_net_set(ip6n->fb_tnl_dev, net); ++ ip6n->fb_tnl_dev->rtnl_link_ops = &vti6_link_ops; + + err = vti6_fb_tnl_dev_init(ip6n->fb_tnl_dev); + if (err < 0) diff --git a/queue-3.17/ipv6-gre-fix-wrong-skb-protocol-in-wccp.patch b/queue-3.17/ipv6-gre-fix-wrong-skb-protocol-in-wccp.patch new file mode 100644 index 00000000000..2facdc8cbab --- /dev/null +++ b/queue-3.17/ipv6-gre-fix-wrong-skb-protocol-in-wccp.patch @@ -0,0 +1,39 @@ +From foo@baz Sun Dec 14 08:37:01 PST 2014 +From: Yuri Chislov +Date: Mon, 24 Nov 2014 11:25:15 +0100 +Subject: ipv6: gre: fix wrong skb->protocol in WCCP + +From: Yuri Chislov + +[ Upstream commit be6572fdb1bfbe23b2624d477de50af50b02f5d6 ] + +When using GRE redirection in WCCP, it sets the wrong skb->protocol, +that is, ETH_P_IP instead of ETH_P_IPV6 for the encapuslated traffic. + +Fixes: c12b395a4664 ("gre: Support GRE over IPv6") +Cc: Dmitry Kozlov +Signed-off-by: Yuri Chislov +Tested-by: Yuri Chislov +Signed-off-by: Daniel Borkmann +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_gre.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/ipv6/ip6_gre.c ++++ b/net/ipv6/ip6_gre.c +@@ -502,11 +502,11 @@ static int ip6gre_rcv(struct sk_buff *sk + + skb->protocol = gre_proto; + /* WCCP version 1 and 2 protocol decoding. +- * - Change protocol to IP ++ * - Change protocol to IPv6 + * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header + */ + if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) { +- skb->protocol = htons(ETH_P_IP); ++ skb->protocol = htons(ETH_P_IPV6); + if ((*(h + offset) & 0xF0) != 0x40) + offset += 4; + } diff --git a/queue-3.17/mips-bpf-fix-broken-bpf_mod.patch b/queue-3.17/mips-bpf-fix-broken-bpf_mod.patch new file mode 100644 index 00000000000..5ad9bb75bdb --- /dev/null +++ b/queue-3.17/mips-bpf-fix-broken-bpf_mod.patch @@ -0,0 +1,41 @@ +From foo@baz Sun Dec 14 08:37:01 PST 2014 +From: Denis Kirjanov +Date: Mon, 1 Dec 2014 12:57:02 +0300 +Subject: mips: bpf: Fix broken BPF_MOD + +From: Denis Kirjanov + +[ Upstream commit 2e46477a12f6fd273e31a220b155d66e8352198c ] + +Remove optimize_div() from BPF_MOD | BPF_K case +since we don't know the dividend and fix the +emit_mod() by reading the mod operation result from HI register + +Signed-off-by: Denis Kirjanov +Reviewed-by: Markos Chandras +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/mips/net/bpf_jit.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/arch/mips/net/bpf_jit.c ++++ b/arch/mips/net/bpf_jit.c +@@ -430,7 +430,7 @@ static inline void emit_mod(unsigned int + u32 *p = &ctx->target[ctx->idx]; + uasm_i_divu(&p, dst, src); + p = &ctx->target[ctx->idx + 1]; +- uasm_i_mflo(&p, dst); ++ uasm_i_mfhi(&p, dst); + } + ctx->idx += 2; /* 2 insts */ + } +@@ -1006,7 +1006,7 @@ load_ind: + break; + case BPF_ALU | BPF_MOD | BPF_K: + /* A %= k */ +- if (k == 1 || optimize_div(&k)) { ++ if (k == 1) { + ctx->flags |= SEEN_A; + emit_jit_reg_move(r_A, r_zero, ctx); + } else { diff --git a/queue-3.17/net-mlx4_core-limit-count-field-to-24-bits-in-qp_alloc_res.patch b/queue-3.17/net-mlx4_core-limit-count-field-to-24-bits-in-qp_alloc_res.patch new file mode 100644 index 00000000000..2167f65b511 --- /dev/null +++ b/queue-3.17/net-mlx4_core-limit-count-field-to-24-bits-in-qp_alloc_res.patch @@ -0,0 +1,40 @@ +From foo@baz Sun Dec 14 08:37:01 PST 2014 +From: Jack Morgenstein +Date: Tue, 25 Nov 2014 11:54:31 +0200 +Subject: net/mlx4_core: Limit count field to 24 bits in qp_alloc_res + +From: Jack Morgenstein + +[ Upstream commit 2d5c57d7fbfaa642fb7f0673df24f32b83d9066c ] + +Some VF drivers use the upper byte of "param1" (the qp count field) +in mlx4_qp_reserve_range() to pass flags which are used to optimize +the range allocation. + +Under the current code, if any of these flags are set, the 32-bit +count field yields a count greater than 2^24, which is out of range, +and this VF fails. + +As these flags represent a "best-effort" allocation hint anyway, they may +safely be ignored. Therefore, the PF driver may simply mask out the bits. + +Fixes: c82e9aa0a8 "mlx4_core: resource tracking for HCA resources used by guests" +Signed-off-by: Jack Morgenstein +Signed-off-by: Or Gerlitz +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx4/resource_tracker.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c ++++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +@@ -1546,7 +1546,7 @@ static int qp_alloc_res(struct mlx4_dev + + switch (op) { + case RES_OP_RESERVE: +- count = get_param_l(&in_param); ++ count = get_param_l(&in_param) & 0xffffff; + align = get_param_h(&in_param); + err = mlx4_grant_resource(dev, slave, RES_QP, count, 0); + if (err) diff --git a/queue-3.17/net-mvneta-fix-race-condition-in-mvneta_tx.patch b/queue-3.17/net-mvneta-fix-race-condition-in-mvneta_tx.patch new file mode 100644 index 00000000000..03fc1f3448a --- /dev/null +++ b/queue-3.17/net-mvneta-fix-race-condition-in-mvneta_tx.patch @@ -0,0 +1,40 @@ +From foo@baz Sun Dec 14 08:37:01 PST 2014 +From: Eric Dumazet +Date: Tue, 2 Dec 2014 04:30:59 -0800 +Subject: net: mvneta: fix race condition in mvneta_tx() + +From: Eric Dumazet + +[ Upstream commit 5f478b41033606d325e420df693162e2524c2b94 ] + +mvneta_tx() dereferences skb to get skb->len too late, +as hardware might have completed the transmit and TX completion +could have freed the skb from another cpu. + +Fixes: 71f6d1b31fb1 ("net: mvneta: replace Tx timer with a real interrupt") +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/marvell/mvneta.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/marvell/mvneta.c ++++ b/drivers/net/ethernet/marvell/mvneta.c +@@ -1721,6 +1721,7 @@ static int mvneta_tx(struct sk_buff *skb + u16 txq_id = skb_get_queue_mapping(skb); + struct mvneta_tx_queue *txq = &pp->txqs[txq_id]; + struct mvneta_tx_desc *tx_desc; ++ int len = skb->len; + int frags = 0; + u32 tx_cmd; + +@@ -1788,7 +1789,7 @@ out: + + u64_stats_update_begin(&stats->syncp); + stats->tx_packets++; +- stats->tx_bytes += skb->len; ++ stats->tx_bytes += len; + u64_stats_update_end(&stats->syncp); + } else { + dev->stats.tx_dropped++; diff --git a/queue-3.17/net-mvneta-fix-tx-interrupt-delay.patch b/queue-3.17/net-mvneta-fix-tx-interrupt-delay.patch new file mode 100644 index 00000000000..7c85d047454 --- /dev/null +++ b/queue-3.17/net-mvneta-fix-tx-interrupt-delay.patch @@ -0,0 +1,69 @@ +From foo@baz Sun Dec 14 08:37:01 PST 2014 +From: willy tarreau +Date: Tue, 2 Dec 2014 08:13:04 +0100 +Subject: net: mvneta: fix Tx interrupt delay + +From: willy tarreau + +[ Upstream commit aebea2ba0f7495e1a1c9ea5e753d146cb2f6b845 ] + +The mvneta driver sets the amount of Tx coalesce packets to 16 by +default. Normally that does not cause any trouble since the driver +uses a much larger Tx ring size (532 packets). But some sockets +might run with very small buffers, much smaller than the equivalent +of 16 packets. This is what ping is doing for example, by setting +SNDBUF to 324 bytes rounded up to 2kB by the kernel. + +The problem is that there is no documented method to force a specific +packet to emit an interrupt (eg: the last of the ring) nor is it +possible to make the NIC emit an interrupt after a given delay. + +In this case, it causes trouble, because when ping sends packets over +its raw socket, the few first packets leave the system, and the first +15 packets will be emitted without an IRQ being generated, so without +the skbs being freed. And since the socket's buffer is small, there's +no way to reach that amount of packets, and the ping ends up with +"send: no buffer available" after sending 6 packets. Running with 3 +instances of ping in parallel is enough to hide the problem, because +with 6 packets per instance, that's 18 packets total, which is enough +to grant a Tx interrupt before all are sent. + +The original driver in the LSP kernel worked around this design flaw +by using a software timer to clean up the Tx descriptors. This timer +was slow and caused terrible network performance on some Tx-bound +workloads (such as routing) but was enough to make tools like ping +work correctly. + +Instead here, we simply set the packet counts before interrupt to 1. +This ensures that each packet sent will produce an interrupt. NAPI +takes care of coalescing interrupts since the interrupt is disabled +once generated. + +No measurable performance impact nor CPU usage were observed on small +nor large packets, including when saturating the link on Tx, and this +fixes tools like ping which rely on too small a send buffer. If one +wants to increase this value for certain workloads where it is safe +to do so, "ethtool -C $dev tx-frames" will override this default +setting. + +This fix needs to be applied to stable kernels starting with 3.10. + +Tested-By: Maggie Mae Roxas +Signed-off-by: Willy Tarreau +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/marvell/mvneta.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/marvell/mvneta.c ++++ b/drivers/net/ethernet/marvell/mvneta.c +@@ -216,7 +216,7 @@ + /* Various constants */ + + /* Coalescing */ +-#define MVNETA_TXDONE_COAL_PKTS 16 ++#define MVNETA_TXDONE_COAL_PKTS 1 + #define MVNETA_RX_COAL_PKTS 32 + #define MVNETA_RX_COAL_USEC 100 + diff --git a/queue-3.17/net-sctp-use-max_header-for-headroom-reserve-in-output-path.patch b/queue-3.17/net-sctp-use-max_header-for-headroom-reserve-in-output-path.patch new file mode 100644 index 00000000000..237e1fd7fb9 --- /dev/null +++ b/queue-3.17/net-sctp-use-max_header-for-headroom-reserve-in-output-path.patch @@ -0,0 +1,52 @@ +From foo@baz Sun Dec 14 08:37:01 PST 2014 +From: Daniel Borkmann +Date: Wed, 3 Dec 2014 12:13:58 +0100 +Subject: net: sctp: use MAX_HEADER for headroom reserve in output path +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Daniel Borkmann + +[ Upstream commit 9772b54c55266ce80c639a80aa68eeb908f8ecf5 ] + +To accomodate for enough headroom for tunnels, use MAX_HEADER instead +of LL_MAX_HEADER. Robert reported that he has hit after roughly 40hrs +of trinity an skb_under_panic() via SCTP output path (see reference). +I couldn't reproduce it from here, but not using MAX_HEADER as elsewhere +in other protocols might be one possible cause for this. + +In any case, it looks like accounting on chunks themself seems to look +good as the skb already passed the SCTP output path and did not hit +any skb_over_panic(). Given tunneling was enabled in his .config, the +headroom would have been expanded by MAX_HEADER in this case. + +Reported-by: Robert Święcki +Reference: https://lkml.org/lkml/2014/12/1/507 +Fixes: 594ccc14dfe4d ("[SCTP] Replace incorrect use of dev_alloc_skb with alloc_skb in sctp_packet_transmit().") +Signed-off-by: Daniel Borkmann +Acked-by: Vlad Yasevich +Acked-by: Neil Horman +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/output.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/sctp/output.c ++++ b/net/sctp/output.c +@@ -401,12 +401,12 @@ int sctp_packet_transmit(struct sctp_pac + sk = chunk->skb->sk; + + /* Allocate the new skb. */ +- nskb = alloc_skb(packet->size + LL_MAX_HEADER, GFP_ATOMIC); ++ nskb = alloc_skb(packet->size + MAX_HEADER, GFP_ATOMIC); + if (!nskb) + goto nomem; + + /* Make sure the outbound skb has enough header room reserved. */ +- skb_reserve(nskb, packet->overhead + LL_MAX_HEADER); ++ skb_reserve(nskb, packet->overhead + MAX_HEADER); + + /* Set the owning socket so that we know where to get the + * destination IP address. diff --git a/queue-3.17/net-timestamp-make-tcp_recvmsg-call-ipv6_recv_error-for-af_inet6-socks.patch b/queue-3.17/net-timestamp-make-tcp_recvmsg-call-ipv6_recv_error-for-af_inet6-socks.patch new file mode 100644 index 00000000000..a4fdf8e4373 --- /dev/null +++ b/queue-3.17/net-timestamp-make-tcp_recvmsg-call-ipv6_recv_error-for-af_inet6-socks.patch @@ -0,0 +1,97 @@ +From foo@baz Sun Dec 14 08:37:01 PST 2014 +From: Willem de Bruijn +Date: Wed, 26 Nov 2014 14:53:02 -0500 +Subject: net-timestamp: make tcp_recvmsg call ipv6_recv_error for AF_INET6 socks + +From: Willem de Bruijn + +[ Upstream commit f4713a3dfad045d46afcb9c2a7d0bba288920ed4 ] + +TCP timestamping introduced MSG_ERRQUEUE handling for TCP sockets. +If the socket is of family AF_INET6, call ipv6_recv_error instead +of ip_recv_error. + +This change is more complex than a single branch due to the loadable +ipv6 module. It reuses a pre-existing indirect function call from +ping. The ping code is safe to call, because it is part of the core +ipv6 module and always present when AF_INET6 sockets are active. + +Fixes: 4ed2d765 (net-timestamp: TCP timestamping) +Signed-off-by: Willem de Bruijn + +---- + +It may also be worthwhile to add WARN_ON_ONCE(sk->family == AF_INET6) +to ip_recv_error. +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/inet_common.h | 2 ++ + net/ipv4/af_inet.c | 11 +++++++++++ + net/ipv4/ping.c | 12 ++---------- + net/ipv4/tcp.c | 2 +- + 4 files changed, 16 insertions(+), 11 deletions(-) + +--- a/include/net/inet_common.h ++++ b/include/net/inet_common.h +@@ -37,6 +37,8 @@ int inet_ioctl(struct socket *sock, unsi + int inet_ctl_sock_create(struct sock **sk, unsigned short family, + unsigned short type, unsigned char protocol, + struct net *net); ++int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, ++ int *addr_len); + + static inline void inet_ctl_sock_destroy(struct sock *sk) + { +--- a/net/ipv4/af_inet.c ++++ b/net/ipv4/af_inet.c +@@ -1421,6 +1421,17 @@ out: + return pp; + } + ++int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) ++{ ++ if (sk->sk_family == AF_INET) ++ return ip_recv_error(sk, msg, len, addr_len); ++#if IS_ENABLED(CONFIG_IPV6) ++ if (sk->sk_family == AF_INET6) ++ return pingv6_ops.ipv6_recv_error(sk, msg, len, addr_len); ++#endif ++ return -EINVAL; ++} ++ + static int inet_gro_complete(struct sk_buff *skb, int nhoff) + { + __be16 newlen = htons(skb->len - nhoff); +--- a/net/ipv4/ping.c ++++ b/net/ipv4/ping.c +@@ -855,16 +855,8 @@ int ping_recvmsg(struct kiocb *iocb, str + if (flags & MSG_OOB) + goto out; + +- if (flags & MSG_ERRQUEUE) { +- if (family == AF_INET) { +- return ip_recv_error(sk, msg, len, addr_len); +-#if IS_ENABLED(CONFIG_IPV6) +- } else if (family == AF_INET6) { +- return pingv6_ops.ipv6_recv_error(sk, msg, len, +- addr_len); +-#endif +- } +- } ++ if (flags & MSG_ERRQUEUE) ++ return inet_recv_error(sk, msg, len, addr_len); + + skb = skb_recv_datagram(sk, flags, noblock, &err); + if (!skb) +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -1633,7 +1633,7 @@ int tcp_recvmsg(struct kiocb *iocb, stru + u32 urg_hole = 0; + + if (unlikely(flags & MSG_ERRQUEUE)) +- return ip_recv_error(sk, msg, len, addr_len); ++ return inet_recv_error(sk, msg, len, addr_len); + + if (sk_can_busy_loop(sk) && skb_queue_empty(&sk->sk_receive_queue) && + (sk->sk_state == TCP_ESTABLISHED)) diff --git a/queue-3.17/netlink-use-jhash-as-hashfn-for-rhashtable.patch b/queue-3.17/netlink-use-jhash-as-hashfn-for-rhashtable.patch new file mode 100644 index 00000000000..8726d07c432 --- /dev/null +++ b/queue-3.17/netlink-use-jhash-as-hashfn-for-rhashtable.patch @@ -0,0 +1,45 @@ +From foo@baz Sun Dec 14 08:37:01 PST 2014 +From: Daniel Borkmann +Date: Wed, 10 Dec 2014 16:33:10 +0100 +Subject: netlink: use jhash as hashfn for rhashtable + +From: Daniel Borkmann + +[ Upstream commit 7f19fc5e0b617593dcda0d9956adc78b559ef1f5 ] + +For netlink, we shouldn't be using arch_fast_hash() as a hashing +discipline, but rather jhash() instead. + +Since netlink sockets can be opened by any user, a local attacker +would be able to easily create collisions with the DPDK-derived +arch_fast_hash(), which trades off performance for security by +using crc32 CPU instructions on x86_64. + +While it might have a legimite use case in other places, it should +be avoided in netlink context, though. As rhashtable's API is very +flexible, we could later on still decide on other hashing disciplines, +if legitimate. + +Reference: http://thread.gmane.org/gmane.linux.kernel/1844123 +Fixes: e341694e3eb5 ("netlink: Convert netlink_lookup() to use RCU protected hash table") +Cc: Herbert Xu +Signed-off-by: Daniel Borkmann +Acked-by: Thomas Graf +Acked-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/netlink/af_netlink.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/netlink/af_netlink.c ++++ b/net/netlink/af_netlink.c +@@ -3130,7 +3130,7 @@ static int __init netlink_proto_init(voi + .head_offset = offsetof(struct netlink_sock, node), + .key_offset = offsetof(struct netlink_sock, portid), + .key_len = sizeof(u32), /* portid */ +- .hashfn = arch_fast_hash, ++ .hashfn = jhash, + .max_shift = 16, /* 64K */ + .grow_decision = rht_grow_above_75, + .shrink_decision = rht_shrink_below_30, diff --git a/queue-3.17/rtnetlink-release-net-refcnt-on-error-in-do_setlink.patch b/queue-3.17/rtnetlink-release-net-refcnt-on-error-in-do_setlink.patch new file mode 100644 index 00000000000..9ae0aa30fae --- /dev/null +++ b/queue-3.17/rtnetlink-release-net-refcnt-on-error-in-do_setlink.patch @@ -0,0 +1,32 @@ +From foo@baz Sun Dec 14 08:37:01 PST 2014 +From: Nicolas Dichtel +Date: Thu, 27 Nov 2014 10:16:15 +0100 +Subject: rtnetlink: release net refcnt on error in do_setlink() + +From: Nicolas Dichtel + +[ Upstream commit e0ebde0e131b529fd721b24f62872def5ec3718c ] + +rtnl_link_get_net() holds a reference on the 'struct net', we need to release +it in case of error. + +CC: Eric W. Biederman +Fixes: b51642f6d77b ("net: Enable a userns root rtnl calls that are safe for unprivilged users") +Signed-off-by: Nicolas Dichtel +Reviewed-by: "Eric W. Biederman" +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/rtnetlink.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/core/rtnetlink.c ++++ b/net/core/rtnetlink.c +@@ -1495,6 +1495,7 @@ static int do_setlink(const struct sk_bu + goto errout; + } + if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) { ++ put_net(net); + err = -EPERM; + goto errout; + } diff --git a/queue-3.17/series b/queue-3.17/series index a2820142bfd..0ea249e5e67 100644 --- a/queue-3.17/series +++ b/queue-3.17/series @@ -24,3 +24,19 @@ media-smiapp-only-some-selection-targets-are-settable.patch ahci-add-deviceids-for-sunrise-point-lp-sata-controller.patch ahci-disable-msi-on-samsung-0xa800-ssd.patch sata_fsl-fix-error-handling-of-irq_of_parse_and_map.patch +ip_tunnel-the-lack-of-vti_link_ops-dellink-cause-kernel-panic.patch +ipv6-gre-fix-wrong-skb-protocol-in-wccp.patch +vxlan-fix-boolean-flip-in-vxlan_f_udp_zero_csum6_.patch +fix-race-condition-between-vxlan_sock_add-and-vxlan_sock_release.patch +tg3-fix-ring-init-when-there-are-more-tx-than-rx-channels.patch +net-mlx4_core-limit-count-field-to-24-bits-in-qp_alloc_res.patch +net-timestamp-make-tcp_recvmsg-call-ipv6_recv_error-for-af_inet6-socks.patch +bond-check-length-of-ifla_bond_arp_ip_target-attributes.patch +rtnetlink-release-net-refcnt-on-error-in-do_setlink.patch +gre-set-inner-mac-header-in-gro-complete.patch +mips-bpf-fix-broken-bpf_mod.patch +net-mvneta-fix-tx-interrupt-delay.patch +net-mvneta-fix-race-condition-in-mvneta_tx.patch +net-sctp-use-max_header-for-headroom-reserve-in-output-path.patch +xen-netfront-use-correct-linear-area-after-linearizing-an-skb.patch +netlink-use-jhash-as-hashfn-for-rhashtable.patch diff --git a/queue-3.17/tg3-fix-ring-init-when-there-are-more-tx-than-rx-channels.patch b/queue-3.17/tg3-fix-ring-init-when-there-are-more-tx-than-rx-channels.patch new file mode 100644 index 00000000000..cb65e01f63d --- /dev/null +++ b/queue-3.17/tg3-fix-ring-init-when-there-are-more-tx-than-rx-channels.patch @@ -0,0 +1,34 @@ +From foo@baz Sun Dec 14 08:37:01 PST 2014 +From: Thadeu Lima de Souza Cascardo +Date: Tue, 25 Nov 2014 14:21:11 -0200 +Subject: tg3: fix ring init when there are more TX than RX channels + +From: Thadeu Lima de Souza Cascardo + +[ Upstream commit a620a6bc1c94c22d6c312892be1e0ae171523125 ] + +If TX channels are set to 4 and RX channels are set to less than 4, +using ethtool -L, the driver will try to initialize more RX channels +than it has allocated, causing an oops. + +This fix only initializes the RX ring if it has been allocated. + +Signed-off-by: Thadeu Lima de Souza Cascardo +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/tg3.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/broadcom/tg3.c ++++ b/drivers/net/ethernet/broadcom/tg3.c +@@ -8561,7 +8561,8 @@ static int tg3_init_rings(struct tg3 *tp + if (tnapi->rx_rcb) + memset(tnapi->rx_rcb, 0, TG3_RX_RCB_RING_BYTES(tp)); + +- if (tg3_rx_prodring_alloc(tp, &tnapi->prodring)) { ++ if (tnapi->prodring.rx_std && ++ tg3_rx_prodring_alloc(tp, &tnapi->prodring)) { + tg3_free_rings(tp); + return -ENOMEM; + } diff --git a/queue-3.17/vxlan-fix-boolean-flip-in-vxlan_f_udp_zero_csum6_.patch b/queue-3.17/vxlan-fix-boolean-flip-in-vxlan_f_udp_zero_csum6_.patch new file mode 100644 index 00000000000..ba19fcd5647 --- /dev/null +++ b/queue-3.17/vxlan-fix-boolean-flip-in-vxlan_f_udp_zero_csum6_.patch @@ -0,0 +1,40 @@ +From foo@baz Sun Dec 14 08:37:01 PST 2014 +From: Alexander Duyck +Date: Mon, 24 Nov 2014 20:08:38 -0800 +Subject: vxlan: Fix boolean flip in VXLAN_F_UDP_ZERO_CSUM6_[TX|RX] + +From: Alexander Duyck + +[ Upstream commit 3dc2b6a8d38cf6c7604ec25f3d50d6ec8da04435 ] + +In "vxlan: Call udp_sock_create" there was a logic error that resulted in +the default for IPv6 VXLAN tunnels going from using checksums to not using +checksums. Since there is currently no support in iproute2 for setting +these values it means that a kernel after the change cannot talk over a IPv6 +VXLAN tunnel to a kernel prior the change. + +Fixes: 3ee64f3 ("vxlan: Call udp_sock_create") + +Cc: Tom Herbert +Signed-off-by: Alexander Duyck +Acked-by: Tom Herbert +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/vxlan.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/net/vxlan.c ++++ b/drivers/net/vxlan.c +@@ -2362,9 +2362,9 @@ static struct socket *vxlan_create_sock( + if (ipv6) { + udp_conf.family = AF_INET6; + udp_conf.use_udp6_tx_checksums = +- !!(flags & VXLAN_F_UDP_ZERO_CSUM6_TX); ++ !(flags & VXLAN_F_UDP_ZERO_CSUM6_TX); + udp_conf.use_udp6_rx_checksums = +- !!(flags & VXLAN_F_UDP_ZERO_CSUM6_RX); ++ !(flags & VXLAN_F_UDP_ZERO_CSUM6_RX); + } else { + udp_conf.family = AF_INET; + udp_conf.local_ip.s_addr = INADDR_ANY; diff --git a/queue-3.17/xen-netfront-use-correct-linear-area-after-linearizing-an-skb.patch b/queue-3.17/xen-netfront-use-correct-linear-area-after-linearizing-an-skb.patch new file mode 100644 index 00000000000..5196ac26893 --- /dev/null +++ b/queue-3.17/xen-netfront-use-correct-linear-area-after-linearizing-an-skb.patch @@ -0,0 +1,45 @@ +From foo@baz Sun Dec 14 08:37:01 PST 2014 +From: David Vrabel +Date: Tue, 9 Dec 2014 18:43:28 +0000 +Subject: xen-netfront: use correct linear area after linearizing an skb + +From: David Vrabel + +[ Upstream commit 11d3d2a16cc1f05c6ece69a4392e99efb85666a6 ] + +Commit 97a6d1bb2b658ac85ed88205ccd1ab809899884d (xen-netfront: Fix +handling packets on compound pages with skb_linearize) attempted to +fix a problem where an skb that would have required too many slots +would be dropped causing TCP connections to stall. + +However, it filled in the first slot using the original buffer and not +the new one and would use the wrong offset and grant access to the +wrong page. + +Netback would notice the malformed request and stop all traffic on the +VIF, reporting: + + vif vif-3-0 vif3.0: txreq.offset: 85e, size: 4002, end: 6144 + vif vif-3-0 vif3.0: fatal error; disabling device + +Reported-by: Anthony Wright +Tested-by: Anthony Wright +Signed-off-by: David Vrabel +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/xen-netfront.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/net/xen-netfront.c ++++ b/drivers/net/xen-netfront.c +@@ -627,6 +627,9 @@ static int xennet_start_xmit(struct sk_b + slots, skb->len); + if (skb_linearize(skb)) + goto drop; ++ data = skb->data; ++ offset = offset_in_page(data); ++ len = skb_headlen(skb); + } + + spin_lock_irqsave(&queue->tx_lock, flags); -- 2.47.3