From: Greg Kroah-Hartman Date: Sun, 14 Dec 2014 16:47:33 +0000 (-0800) Subject: 3.18-stable patches X-Git-Tag: v3.10.63~7 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=53d94d63d5dab8e4db990c922bf04c5f2e1f4c74;p=thirdparty%2Fkernel%2Fstable-queue.git 3.18-stable patches added patches: fix-race-condition-between-vxlan_sock_add-and-vxlan_sock_release.patch gre-set-inner-mac-header-in-gro-complete.patch mips-bpf-fix-broken-bpf_mod.patch net-fix-suspicious-rcu_dereference_check-in-net-sched-sch_fq_codel.c.patch net-mvneta-fix-race-condition-in-mvneta_tx.patch net-mvneta-fix-tx-interrupt-delay.patch net-sctp-use-max_header-for-headroom-reserve-in-output-path.patch netlink-use-jhash-as-hashfn-for-rhashtable.patch openvswitch-fix-flow-mask-validation.patch tcp-fix-more-null-deref-after-prequeue-changes.patch xen-netfront-use-correct-linear-area-after-linearizing-an-skb.patch --- diff --git a/queue-3.18/fix-race-condition-between-vxlan_sock_add-and-vxlan_sock_release.patch b/queue-3.18/fix-race-condition-between-vxlan_sock_add-and-vxlan_sock_release.patch new file mode 100644 index 00000000000..716f7e048de --- /dev/null +++ b/queue-3.18/fix-race-condition-between-vxlan_sock_add-and-vxlan_sock_release.patch @@ -0,0 +1,72 @@ +From foo@baz Sun Dec 14 08:37:47 PST 2014 +From: Marcelo Leitner +Date: Thu, 11 Dec 2014 10:02:22 -0200 +Subject: Fix race condition between vxlan_sock_add and vxlan_sock_release + +From: Marcelo Leitner + +[ Upstream commit 00c83b01d58068dfeb2e1351cca6fccf2a83fa8f ] + +Currently, when trying to reuse a socket, vxlan_sock_add will grab +vn->sock_lock, locate a reusable socket, inc refcount and release +vn->sock_lock. + +But vxlan_sock_release() will first decrement refcount, and then grab +that lock. refcnt operations are atomic but as currently we have +deferred works which hold vs->refcnt each, this might happen, leading to +a use after free (specially after vxlan_igmp_leave): + + CPU 1 CPU 2 + +deferred work vxlan_sock_add + ... ... + spin_lock(&vn->sock_lock) + vs = vxlan_find_sock(); + vxlan_sock_release + dec vs->refcnt, reaches 0 + spin_lock(&vn->sock_lock) + vxlan_sock_hold(vs), refcnt=1 + spin_unlock(&vn->sock_lock) + hlist_del_rcu(&vs->hlist); + vxlan_notify_del_rx_port(vs) + spin_unlock(&vn->sock_lock) + +So when we look for a reusable socket, we check if it wasn't freed +already before reusing it. + +Signed-off-by: Marcelo Ricardo Leitner +Fixes: 7c47cedf43a8b3 ("vxlan: move IGMP join/leave to work queue") +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/vxlan.c | 10 +++------- + 1 file changed, 3 insertions(+), 7 deletions(-) + +--- a/drivers/net/vxlan.c ++++ b/drivers/net/vxlan.c +@@ -1995,9 +1995,8 @@ static int vxlan_init(struct net_device + spin_lock(&vn->sock_lock); + vs = vxlan_find_sock(vxlan->net, ipv6 ? AF_INET6 : AF_INET, + vxlan->dst_port); +- if (vs) { ++ if (vs && atomic_add_unless(&vs->refcnt, 1, 0)) { + /* If we have a socket with same port already, reuse it */ +- atomic_inc(&vs->refcnt); + vxlan_vs_add_dev(vs, vxlan); + } else { + /* otherwise make new socket outside of RTNL */ +@@ -2396,12 +2395,9 @@ struct vxlan_sock *vxlan_sock_add(struct + + spin_lock(&vn->sock_lock); + vs = vxlan_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port); +- if (vs) { +- if (vs->rcv == rcv) +- atomic_inc(&vs->refcnt); +- else ++ if (vs && ((vs->rcv != rcv) || ++ !atomic_add_unless(&vs->refcnt, 1, 0))) + vs = ERR_PTR(-EBUSY); +- } + spin_unlock(&vn->sock_lock); + + if (!vs) diff --git a/queue-3.18/gre-set-inner-mac-header-in-gro-complete.patch b/queue-3.18/gre-set-inner-mac-header-in-gro-complete.patch new file mode 100644 index 00000000000..052bec0f1d6 --- /dev/null +++ b/queue-3.18/gre-set-inner-mac-header-in-gro-complete.patch @@ -0,0 +1,36 @@ +From foo@baz Sun Dec 14 08:37:47 PST 2014 +From: Tom Herbert +Date: Sat, 29 Nov 2014 09:59:45 -0800 +Subject: gre: Set inner mac header in gro complete + +From: Tom Herbert + +[ Upstream commit 6fb2a756739aa507c1fd5b8126f0bfc2f070dc46 ] + +Set the inner mac header to point to the GRE payload when +doing GRO. This is needed if we proceed to send the packet +through GRE GSO which now uses the inner mac header instead +of inner network header to determine the length of encapsulation +headers. + +Fixes: 14051f0452a2 ("gre: Use inner mac length when computing tunnel length") +Reported-by: Wolfgang Walter +Signed-off-by: Tom Herbert +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/gre_offload.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/net/ipv4/gre_offload.c ++++ b/net/ipv4/gre_offload.c +@@ -247,6 +247,9 @@ static int gre_gro_complete(struct sk_bu + err = ptype->callbacks.gro_complete(skb, nhoff + grehlen); + + rcu_read_unlock(); ++ ++ skb_set_inner_mac_header(skb, nhoff + grehlen); ++ + return err; + } + diff --git a/queue-3.18/mips-bpf-fix-broken-bpf_mod.patch b/queue-3.18/mips-bpf-fix-broken-bpf_mod.patch new file mode 100644 index 00000000000..cec2a030eb1 --- /dev/null +++ b/queue-3.18/mips-bpf-fix-broken-bpf_mod.patch @@ -0,0 +1,41 @@ +From foo@baz Sun Dec 14 08:37:47 PST 2014 +From: Denis Kirjanov +Date: Mon, 1 Dec 2014 12:57:02 +0300 +Subject: mips: bpf: Fix broken BPF_MOD + +From: Denis Kirjanov + +[ Upstream commit 2e46477a12f6fd273e31a220b155d66e8352198c ] + +Remove optimize_div() from BPF_MOD | BPF_K case +since we don't know the dividend and fix the +emit_mod() by reading the mod operation result from HI register + +Signed-off-by: Denis Kirjanov +Reviewed-by: Markos Chandras +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/mips/net/bpf_jit.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/arch/mips/net/bpf_jit.c ++++ b/arch/mips/net/bpf_jit.c +@@ -426,7 +426,7 @@ static inline void emit_mod(unsigned int + u32 *p = &ctx->target[ctx->idx]; + uasm_i_divu(&p, dst, src); + p = &ctx->target[ctx->idx + 1]; +- uasm_i_mflo(&p, dst); ++ uasm_i_mfhi(&p, dst); + } + ctx->idx += 2; /* 2 insts */ + } +@@ -971,7 +971,7 @@ load_ind: + break; + case BPF_ALU | BPF_MOD | BPF_K: + /* A %= k */ +- if (k == 1 || optimize_div(&k)) { ++ if (k == 1) { + ctx->flags |= SEEN_A; + emit_jit_reg_move(r_A, r_zero, ctx); + } else { diff --git a/queue-3.18/net-fix-suspicious-rcu_dereference_check-in-net-sched-sch_fq_codel.c.patch b/queue-3.18/net-fix-suspicious-rcu_dereference_check-in-net-sched-sch_fq_codel.c.patch new file mode 100644 index 00000000000..8409917e2a5 --- /dev/null +++ b/queue-3.18/net-fix-suspicious-rcu_dereference_check-in-net-sched-sch_fq_codel.c.patch @@ -0,0 +1,36 @@ +From foo@baz Sun Dec 14 08:37:47 PST 2014 +From: "Valdis.Kletnieks@vt.edu" +Date: Tue, 9 Dec 2014 16:15:50 -0500 +Subject: net: fix suspicious rcu_dereference_check in net/sched/sch_fq_codel.c + +From: "Valdis.Kletnieks@vt.edu" + +[ Upstream commit 69204cf7eb9c5a72067ce6922d4699378251d053 ] + +commit 46e5da40ae (net: qdisc: use rcu prefix and silence + sparse warnings) triggers a spurious warning: + +net/sched/sch_fq_codel.c:97 suspicious rcu_dereference_check() usage! + +The code should be using the _bh variant of rcu_dereference. + +Signed-off-by: Valdis Kletnieks +Acked-by: Eric Dumazet +Acked-by: John Fastabend +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/sch_fq_codel.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/sched/sch_fq_codel.c ++++ b/net/sched/sch_fq_codel.c +@@ -94,7 +94,7 @@ static unsigned int fq_codel_classify(st + TC_H_MIN(skb->priority) <= q->flows_cnt) + return TC_H_MIN(skb->priority); + +- filter = rcu_dereference(q->filter_list); ++ filter = rcu_dereference_bh(q->filter_list); + if (!filter) + return fq_codel_hash(q, skb) + 1; + diff --git a/queue-3.18/net-mvneta-fix-race-condition-in-mvneta_tx.patch b/queue-3.18/net-mvneta-fix-race-condition-in-mvneta_tx.patch new file mode 100644 index 00000000000..6ec7d493417 --- /dev/null +++ b/queue-3.18/net-mvneta-fix-race-condition-in-mvneta_tx.patch @@ -0,0 +1,40 @@ +From foo@baz Sun Dec 14 08:37:47 PST 2014 +From: Eric Dumazet +Date: Tue, 2 Dec 2014 04:30:59 -0800 +Subject: net: mvneta: fix race condition in mvneta_tx() + +From: Eric Dumazet + +[ Upstream commit 5f478b41033606d325e420df693162e2524c2b94 ] + +mvneta_tx() dereferences skb to get skb->len too late, +as hardware might have completed the transmit and TX completion +could have freed the skb from another cpu. + +Fixes: 71f6d1b31fb1 ("net: mvneta: replace Tx timer with a real interrupt") +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/marvell/mvneta.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/marvell/mvneta.c ++++ b/drivers/net/ethernet/marvell/mvneta.c +@@ -1721,6 +1721,7 @@ static int mvneta_tx(struct sk_buff *skb + u16 txq_id = skb_get_queue_mapping(skb); + struct mvneta_tx_queue *txq = &pp->txqs[txq_id]; + struct mvneta_tx_desc *tx_desc; ++ int len = skb->len; + int frags = 0; + u32 tx_cmd; + +@@ -1788,7 +1789,7 @@ out: + + u64_stats_update_begin(&stats->syncp); + stats->tx_packets++; +- stats->tx_bytes += skb->len; ++ stats->tx_bytes += len; + u64_stats_update_end(&stats->syncp); + } else { + dev->stats.tx_dropped++; diff --git a/queue-3.18/net-mvneta-fix-tx-interrupt-delay.patch b/queue-3.18/net-mvneta-fix-tx-interrupt-delay.patch new file mode 100644 index 00000000000..8cc99a643bd --- /dev/null +++ b/queue-3.18/net-mvneta-fix-tx-interrupt-delay.patch @@ -0,0 +1,69 @@ +From foo@baz Sun Dec 14 08:37:47 PST 2014 +From: willy tarreau +Date: Tue, 2 Dec 2014 08:13:04 +0100 +Subject: net: mvneta: fix Tx interrupt delay + +From: willy tarreau + +[ Upstream commit aebea2ba0f7495e1a1c9ea5e753d146cb2f6b845 ] + +The mvneta driver sets the amount of Tx coalesce packets to 16 by +default. Normally that does not cause any trouble since the driver +uses a much larger Tx ring size (532 packets). But some sockets +might run with very small buffers, much smaller than the equivalent +of 16 packets. This is what ping is doing for example, by setting +SNDBUF to 324 bytes rounded up to 2kB by the kernel. + +The problem is that there is no documented method to force a specific +packet to emit an interrupt (eg: the last of the ring) nor is it +possible to make the NIC emit an interrupt after a given delay. + +In this case, it causes trouble, because when ping sends packets over +its raw socket, the few first packets leave the system, and the first +15 packets will be emitted without an IRQ being generated, so without +the skbs being freed. And since the socket's buffer is small, there's +no way to reach that amount of packets, and the ping ends up with +"send: no buffer available" after sending 6 packets. Running with 3 +instances of ping in parallel is enough to hide the problem, because +with 6 packets per instance, that's 18 packets total, which is enough +to grant a Tx interrupt before all are sent. + +The original driver in the LSP kernel worked around this design flaw +by using a software timer to clean up the Tx descriptors. This timer +was slow and caused terrible network performance on some Tx-bound +workloads (such as routing) but was enough to make tools like ping +work correctly. + +Instead here, we simply set the packet counts before interrupt to 1. +This ensures that each packet sent will produce an interrupt. NAPI +takes care of coalescing interrupts since the interrupt is disabled +once generated. + +No measurable performance impact nor CPU usage were observed on small +nor large packets, including when saturating the link on Tx, and this +fixes tools like ping which rely on too small a send buffer. If one +wants to increase this value for certain workloads where it is safe +to do so, "ethtool -C $dev tx-frames" will override this default +setting. + +This fix needs to be applied to stable kernels starting with 3.10. + +Tested-By: Maggie Mae Roxas +Signed-off-by: Willy Tarreau +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/marvell/mvneta.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/marvell/mvneta.c ++++ b/drivers/net/ethernet/marvell/mvneta.c +@@ -216,7 +216,7 @@ + /* Various constants */ + + /* Coalescing */ +-#define MVNETA_TXDONE_COAL_PKTS 16 ++#define MVNETA_TXDONE_COAL_PKTS 1 + #define MVNETA_RX_COAL_PKTS 32 + #define MVNETA_RX_COAL_USEC 100 + diff --git a/queue-3.18/net-sctp-use-max_header-for-headroom-reserve-in-output-path.patch b/queue-3.18/net-sctp-use-max_header-for-headroom-reserve-in-output-path.patch new file mode 100644 index 00000000000..da12e498c68 --- /dev/null +++ b/queue-3.18/net-sctp-use-max_header-for-headroom-reserve-in-output-path.patch @@ -0,0 +1,52 @@ +From foo@baz Sun Dec 14 08:37:47 PST 2014 +From: Daniel Borkmann +Date: Wed, 3 Dec 2014 12:13:58 +0100 +Subject: net: sctp: use MAX_HEADER for headroom reserve in output path +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Daniel Borkmann + +[ Upstream commit 9772b54c55266ce80c639a80aa68eeb908f8ecf5 ] + +To accomodate for enough headroom for tunnels, use MAX_HEADER instead +of LL_MAX_HEADER. Robert reported that he has hit after roughly 40hrs +of trinity an skb_under_panic() via SCTP output path (see reference). +I couldn't reproduce it from here, but not using MAX_HEADER as elsewhere +in other protocols might be one possible cause for this. + +In any case, it looks like accounting on chunks themself seems to look +good as the skb already passed the SCTP output path and did not hit +any skb_over_panic(). Given tunneling was enabled in his .config, the +headroom would have been expanded by MAX_HEADER in this case. + +Reported-by: Robert Święcki +Reference: https://lkml.org/lkml/2014/12/1/507 +Fixes: 594ccc14dfe4d ("[SCTP] Replace incorrect use of dev_alloc_skb with alloc_skb in sctp_packet_transmit().") +Signed-off-by: Daniel Borkmann +Acked-by: Vlad Yasevich +Acked-by: Neil Horman +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/output.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/sctp/output.c ++++ b/net/sctp/output.c +@@ -401,12 +401,12 @@ int sctp_packet_transmit(struct sctp_pac + sk = chunk->skb->sk; + + /* Allocate the new skb. */ +- nskb = alloc_skb(packet->size + LL_MAX_HEADER, GFP_ATOMIC); ++ nskb = alloc_skb(packet->size + MAX_HEADER, GFP_ATOMIC); + if (!nskb) + goto nomem; + + /* Make sure the outbound skb has enough header room reserved. */ +- skb_reserve(nskb, packet->overhead + LL_MAX_HEADER); ++ skb_reserve(nskb, packet->overhead + MAX_HEADER); + + /* Set the owning socket so that we know where to get the + * destination IP address. diff --git a/queue-3.18/netlink-use-jhash-as-hashfn-for-rhashtable.patch b/queue-3.18/netlink-use-jhash-as-hashfn-for-rhashtable.patch new file mode 100644 index 00000000000..4eff6586d79 --- /dev/null +++ b/queue-3.18/netlink-use-jhash-as-hashfn-for-rhashtable.patch @@ -0,0 +1,45 @@ +From foo@baz Sun Dec 14 08:37:47 PST 2014 +From: Daniel Borkmann +Date: Wed, 10 Dec 2014 16:33:10 +0100 +Subject: netlink: use jhash as hashfn for rhashtable + +From: Daniel Borkmann + +[ Upstream commit 7f19fc5e0b617593dcda0d9956adc78b559ef1f5 ] + +For netlink, we shouldn't be using arch_fast_hash() as a hashing +discipline, but rather jhash() instead. + +Since netlink sockets can be opened by any user, a local attacker +would be able to easily create collisions with the DPDK-derived +arch_fast_hash(), which trades off performance for security by +using crc32 CPU instructions on x86_64. + +While it might have a legimite use case in other places, it should +be avoided in netlink context, though. As rhashtable's API is very +flexible, we could later on still decide on other hashing disciplines, +if legitimate. + +Reference: http://thread.gmane.org/gmane.linux.kernel/1844123 +Fixes: e341694e3eb5 ("netlink: Convert netlink_lookup() to use RCU protected hash table") +Cc: Herbert Xu +Signed-off-by: Daniel Borkmann +Acked-by: Thomas Graf +Acked-by: Hannes Frederic Sowa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/netlink/af_netlink.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/netlink/af_netlink.c ++++ b/net/netlink/af_netlink.c +@@ -3130,7 +3130,7 @@ static int __init netlink_proto_init(voi + .head_offset = offsetof(struct netlink_sock, node), + .key_offset = offsetof(struct netlink_sock, portid), + .key_len = sizeof(u32), /* portid */ +- .hashfn = arch_fast_hash, ++ .hashfn = jhash, + .max_shift = 16, /* 64K */ + .grow_decision = rht_grow_above_75, + .shrink_decision = rht_shrink_below_30, diff --git a/queue-3.18/openvswitch-fix-flow-mask-validation.patch b/queue-3.18/openvswitch-fix-flow-mask-validation.patch new file mode 100644 index 00000000000..1a1199b8665 --- /dev/null +++ b/queue-3.18/openvswitch-fix-flow-mask-validation.patch @@ -0,0 +1,41 @@ +From foo@baz Sun Dec 14 08:37:47 PST 2014 +From: Pravin B Shelar +Date: Sun, 30 Nov 2014 23:04:17 -0800 +Subject: openvswitch: Fix flow mask validation. + +From: Pravin B Shelar + +[ Upstream commit f2a01517f2a1040a0b156f171a7cefd748f2fd03 ] + +Following patch fixes typo in the flow validation. This prevented +installation of ARP and IPv6 flows. + +Fixes: 19e7a3df72 ("openvswitch: Fix NDP flow mask validation") +Signed-off-by: Pravin B Shelar +Reviewed-by: Thomas Graf +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/openvswitch/flow_netlink.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/openvswitch/flow_netlink.c ++++ b/net/openvswitch/flow_netlink.c +@@ -145,7 +145,7 @@ static bool match_validate(const struct + if (match->key->eth.type == htons(ETH_P_ARP) + || match->key->eth.type == htons(ETH_P_RARP)) { + key_expected |= 1 << OVS_KEY_ATTR_ARP; +- if (match->mask && (match->mask->key.tp.src == htons(0xff))) ++ if (match->mask && (match->mask->key.eth.type == htons(0xffff))) + mask_allowed |= 1 << OVS_KEY_ATTR_ARP; + } + +@@ -220,7 +220,7 @@ static bool match_validate(const struct + htons(NDISC_NEIGHBOUR_SOLICITATION) || + match->key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) { + key_expected |= 1 << OVS_KEY_ATTR_ND; +- if (match->mask && (match->mask->key.tp.src == htons(0xffff))) ++ if (match->mask && (match->mask->key.tp.src == htons(0xff))) + mask_allowed |= 1 << OVS_KEY_ATTR_ND; + } + } diff --git a/queue-3.18/tcp-fix-more-null-deref-after-prequeue-changes.patch b/queue-3.18/tcp-fix-more-null-deref-after-prequeue-changes.patch new file mode 100644 index 00000000000..cd3c8759435 --- /dev/null +++ b/queue-3.18/tcp-fix-more-null-deref-after-prequeue-changes.patch @@ -0,0 +1,129 @@ +From foo@baz Sun Dec 14 08:37:47 PST 2014 +From: Eric Dumazet +Date: Tue, 9 Dec 2014 09:56:08 -0800 +Subject: tcp: fix more NULL deref after prequeue changes + +From: Eric Dumazet + +[ Upstream commit 0f85feae6b710ced3abad5b2b47d31dfcb956b62 ] + +When I cooked commit c3658e8d0f1 ("tcp: fix possible NULL dereference in +tcp_vX_send_reset()") I missed other spots we could deref a NULL +skb_dst(skb) + +Again, if a socket is provided, we do not need skb_dst() to get a +pointer to network namespace : sock_net(sk) is good enough. + +Reported-by: Dann Frazier +Bisected-by: Dann Frazier +Tested-by: Dann Frazier +Signed-off-by: Eric Dumazet +Fixes: ca777eff51f7 ("tcp: remove dst refcount false sharing for prequeue mode") +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_ipv4.c | 4 ++-- + net/ipv6/tcp_ipv6.c | 28 ++++++++++++++-------------- + 2 files changed, 16 insertions(+), 16 deletions(-) + +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -623,6 +623,7 @@ static void tcp_v4_send_reset(struct soc + arg.iov[0].iov_base = (unsigned char *)&rep; + arg.iov[0].iov_len = sizeof(rep.th); + ++ net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); + #ifdef CONFIG_TCP_MD5SIG + hash_location = tcp_parse_md5sig_option(th); + if (!sk && hash_location) { +@@ -633,7 +634,7 @@ static void tcp_v4_send_reset(struct soc + * Incoming packet is checked with md5 hash with finding key, + * no RST generated if md5 hash doesn't match. + */ +- sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev), ++ sk1 = __inet_lookup_listener(net, + &tcp_hashinfo, ip_hdr(skb)->saddr, + th->source, ip_hdr(skb)->daddr, + ntohs(th->source), inet_iif(skb)); +@@ -681,7 +682,6 @@ static void tcp_v4_send_reset(struct soc + if (sk) + arg.bound_dev_if = sk->sk_bound_dev_if; + +- net = dev_net(skb_dst(skb)->dev); + arg.tos = ip_hdr(skb)->tos; + ip_send_unicast_reply(net, skb, &TCP_SKB_CB(skb)->header.h4.opt, + ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, +--- a/net/ipv6/tcp_ipv6.c ++++ b/net/ipv6/tcp_ipv6.c +@@ -787,16 +787,16 @@ static const struct tcp_request_sock_ops + .queue_hash_add = inet6_csk_reqsk_queue_hash_add, + }; + +-static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, +- u32 tsval, u32 tsecr, int oif, +- struct tcp_md5sig_key *key, int rst, u8 tclass, +- u32 label) ++static void tcp_v6_send_response(struct sock *sk, struct sk_buff *skb, u32 seq, ++ u32 ack, u32 win, u32 tsval, u32 tsecr, ++ int oif, struct tcp_md5sig_key *key, int rst, ++ u8 tclass, u32 label) + { + const struct tcphdr *th = tcp_hdr(skb); + struct tcphdr *t1; + struct sk_buff *buff; + struct flowi6 fl6; +- struct net *net = dev_net(skb_dst(skb)->dev); ++ struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); + struct sock *ctl_sk = net->ipv6.tcp_sk; + unsigned int tot_len = sizeof(struct tcphdr); + struct dst_entry *dst; +@@ -946,7 +946,7 @@ static void tcp_v6_send_reset(struct soc + (th->doff << 2); + + oif = sk ? sk->sk_bound_dev_if : 0; +- tcp_v6_send_response(skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0); ++ tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0); + + #ifdef CONFIG_TCP_MD5SIG + release_sk1: +@@ -957,13 +957,13 @@ release_sk1: + #endif + } + +-static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, +- u32 win, u32 tsval, u32 tsecr, int oif, ++static void tcp_v6_send_ack(struct sock *sk, struct sk_buff *skb, u32 seq, ++ u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, + struct tcp_md5sig_key *key, u8 tclass, + u32 label) + { +- tcp_v6_send_response(skb, seq, ack, win, tsval, tsecr, oif, key, 0, tclass, +- label); ++ tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0, ++ tclass, label); + } + + static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) +@@ -971,7 +971,7 @@ static void tcp_v6_timewait_ack(struct s + struct inet_timewait_sock *tw = inet_twsk(sk); + struct tcp_timewait_sock *tcptw = tcp_twsk(sk); + +- tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, ++ tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, + tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, + tcp_time_stamp + tcptw->tw_ts_offset, + tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw), +@@ -986,10 +986,10 @@ static void tcp_v6_reqsk_send_ack(struct + /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV + * sk->sk_state == TCP_SYN_RECV -> for Fast Open. + */ +- tcp_v6_send_ack(skb, (sk->sk_state == TCP_LISTEN) ? ++ tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ? + tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, +- tcp_rsk(req)->rcv_nxt, +- req->rcv_wnd, tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if, ++ tcp_rsk(req)->rcv_nxt, req->rcv_wnd, ++ tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if, + tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), + 0, 0); + } diff --git a/queue-3.18/xen-netfront-use-correct-linear-area-after-linearizing-an-skb.patch b/queue-3.18/xen-netfront-use-correct-linear-area-after-linearizing-an-skb.patch new file mode 100644 index 00000000000..451accf0f9c --- /dev/null +++ b/queue-3.18/xen-netfront-use-correct-linear-area-after-linearizing-an-skb.patch @@ -0,0 +1,45 @@ +From foo@baz Sun Dec 14 08:37:47 PST 2014 +From: David Vrabel +Date: Tue, 9 Dec 2014 18:43:28 +0000 +Subject: xen-netfront: use correct linear area after linearizing an skb + +From: David Vrabel + +[ Upstream commit 11d3d2a16cc1f05c6ece69a4392e99efb85666a6 ] + +Commit 97a6d1bb2b658ac85ed88205ccd1ab809899884d (xen-netfront: Fix +handling packets on compound pages with skb_linearize) attempted to +fix a problem where an skb that would have required too many slots +would be dropped causing TCP connections to stall. + +However, it filled in the first slot using the original buffer and not +the new one and would use the wrong offset and grant access to the +wrong page. + +Netback would notice the malformed request and stop all traffic on the +VIF, reporting: + + vif vif-3-0 vif3.0: txreq.offset: 85e, size: 4002, end: 6144 + vif vif-3-0 vif3.0: fatal error; disabling device + +Reported-by: Anthony Wright +Tested-by: Anthony Wright +Signed-off-by: David Vrabel +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/xen-netfront.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/net/xen-netfront.c ++++ b/drivers/net/xen-netfront.c +@@ -627,6 +627,9 @@ static int xennet_start_xmit(struct sk_b + slots, skb->len); + if (skb_linearize(skb)) + goto drop; ++ data = skb->data; ++ offset = offset_in_page(data); ++ len = skb_headlen(skb); + } + + spin_lock_irqsave(&queue->tx_lock, flags);