From d53b8acbe5426497e66b88ca55b8525f906c6763 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 4 Feb 2014 09:17:02 -0800 Subject: [PATCH] 3.12-stable patches added patches: bnx2x-fix-dma-unmapping-of-tso-split-bds.patch bpf-do-not-use-reciprocal-divide.patch fib_frontend-fix-possible-null-pointer-dereference.patch ieee802154-fix-memory-leak-in-ieee802154_add_iface.patch inet_diag-fix-inet_diag_dump_icsk-timewait-socket-state-logic.patch ip_tunnel-clear-ipcb-in-ip_tunnel_xmit-in-case-dst_link_failure-is-called.patch net-avoid-reference-counter-overflows-on-fib_rules-in-multicast-forwarding.patch net-fix-memory-leak-if-tproxy-used-with-tcp-early-demux.patch net-gre-use-icmp_hdr-to-get-inner-ip-header.patch net-rds-fix-per-cpu-helper-usage.patch net-usbnet-fix-sg-initialisation.patch net-via-rhine-fix-tx_timeout-handling.patch net-vxlan-share-rx-skb-de-marking-and-checksum-checks-with-ovs.patch s390-bpf-jit-fix-32-bit-divisions-use-unsigned-divide-instructions.patch tcp-metrics-avoid-duplicate-entries-with-the-same-destination-ip.patch xen-netfront-fix-resource-leak-in-netfront.patch --- ...x-fix-dma-unmapping-of-tso-split-bds.patch | 73 ++++++ .../bpf-do-not-use-reciprocal-divide.patch | 220 ++++++++++++++++++ ...ix-possible-null-pointer-dereference.patch | 35 +++ ...-memory-leak-in-ieee802154_add_iface.patch | 34 +++ ...ump_icsk-timewait-socket-state-logic.patch | 63 +++++ ...t-in-case-dst_link_failure-is-called.patch | 36 +++ ...on-fib_rules-in-multicast-forwarding.patch | 73 ++++++ ...-if-tproxy-used-with-tcp-early-demux.patch | 96 ++++++++ ...-use-icmp_hdr-to-get-inner-ip-header.patch | 40 ++++ .../net-rds-fix-per-cpu-helper-usage.patch | 52 +++++ .../net-usbnet-fix-sg-initialisation.patch | 40 ++++ ...et-via-rhine-fix-tx_timeout-handling.patch | 34 +++ ...marking-and-checksum-checks-with-ovs.patch | 59 +++++ ...ons-use-unsigned-divide-instructions.patch | 74 ++++++ queue-3.12/series | 16 ++ ...entries-with-the-same-destination-ip.patch | 128 ++++++++++ ...tfront-fix-resource-leak-in-netfront.patch | 204 ++++++++++++++++ 17 files changed, 1277 insertions(+) create mode 100644 queue-3.12/bnx2x-fix-dma-unmapping-of-tso-split-bds.patch create mode 100644 queue-3.12/bpf-do-not-use-reciprocal-divide.patch create mode 100644 queue-3.12/fib_frontend-fix-possible-null-pointer-dereference.patch create mode 100644 queue-3.12/ieee802154-fix-memory-leak-in-ieee802154_add_iface.patch create mode 100644 queue-3.12/inet_diag-fix-inet_diag_dump_icsk-timewait-socket-state-logic.patch create mode 100644 queue-3.12/ip_tunnel-clear-ipcb-in-ip_tunnel_xmit-in-case-dst_link_failure-is-called.patch create mode 100644 queue-3.12/net-avoid-reference-counter-overflows-on-fib_rules-in-multicast-forwarding.patch create mode 100644 queue-3.12/net-fix-memory-leak-if-tproxy-used-with-tcp-early-demux.patch create mode 100644 queue-3.12/net-gre-use-icmp_hdr-to-get-inner-ip-header.patch create mode 100644 queue-3.12/net-rds-fix-per-cpu-helper-usage.patch create mode 100644 queue-3.12/net-usbnet-fix-sg-initialisation.patch create mode 100644 queue-3.12/net-via-rhine-fix-tx_timeout-handling.patch create mode 100644 queue-3.12/net-vxlan-share-rx-skb-de-marking-and-checksum-checks-with-ovs.patch create mode 100644 queue-3.12/s390-bpf-jit-fix-32-bit-divisions-use-unsigned-divide-instructions.patch create mode 100644 queue-3.12/tcp-metrics-avoid-duplicate-entries-with-the-same-destination-ip.patch create mode 100644 queue-3.12/xen-netfront-fix-resource-leak-in-netfront.patch diff --git a/queue-3.12/bnx2x-fix-dma-unmapping-of-tso-split-bds.patch b/queue-3.12/bnx2x-fix-dma-unmapping-of-tso-split-bds.patch new file mode 100644 index 00000000000..ddae2811dc6 --- /dev/null +++ b/queue-3.12/bnx2x-fix-dma-unmapping-of-tso-split-bds.patch @@ -0,0 +1,73 @@ +From foo@baz Tue Feb 4 09:14:01 PST 2014 +From: Michal Schmidt +Date: Thu, 9 Jan 2014 14:36:27 +0100 +Subject: bnx2x: fix DMA unmapping of TSO split BDs + +From: Michal Schmidt + +[ Upstream commit 95e92fd40c967c363ad66b2fd1ce4dcd68132e54 ] + +bnx2x triggers warnings with CONFIG_DMA_API_DEBUG=y: + + WARNING: CPU: 0 PID: 2253 at lib/dma-debug.c:887 check_unmap+0xf8/0x920() + bnx2x 0000:28:00.0: DMA-API: device driver frees DMA memory with + different size [device address=0x00000000da2b389e] [map size=1490 bytes] + [unmap size=66 bytes] + +The reason is that bnx2x splits a TSO BD into two BDs (headers + data) +using one DMA mapping for both, but it uses only the length of the first +BD when unmapping. + +This patch fixes the bug by unmapping the whole length of the two BDs. + +Signed-off-by: Michal Schmidt +Reviewed-by: Eric Dumazet +Acked-by: Dmitry Kravkov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 13 +++++++++---- + 1 file changed, 9 insertions(+), 4 deletions(-) + +--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c ++++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +@@ -160,6 +160,7 @@ static u16 bnx2x_free_tx_pkt(struct bnx2 + struct sk_buff *skb = tx_buf->skb; + u16 bd_idx = TX_BD(tx_buf->first_bd), new_cons; + int nbd; ++ u16 split_bd_len = 0; + + /* prefetch skb end pointer to speedup dev_kfree_skb() */ + prefetch(&skb->end); +@@ -167,10 +168,7 @@ static u16 bnx2x_free_tx_pkt(struct bnx2 + DP(NETIF_MSG_TX_DONE, "fp[%d]: pkt_idx %d buff @(%p)->skb %p\n", + txdata->txq_index, idx, tx_buf, skb); + +- /* unmap first bd */ + tx_start_bd = &txdata->tx_desc_ring[bd_idx].start_bd; +- dma_unmap_single(&bp->pdev->dev, BD_UNMAP_ADDR(tx_start_bd), +- BD_UNMAP_LEN(tx_start_bd), DMA_TO_DEVICE); + + nbd = le16_to_cpu(tx_start_bd->nbd) - 1; + #ifdef BNX2X_STOP_ON_ERROR +@@ -188,12 +186,19 @@ static u16 bnx2x_free_tx_pkt(struct bnx2 + --nbd; + bd_idx = TX_BD(NEXT_TX_IDX(bd_idx)); + +- /* ...and the TSO split header bd since they have no mapping */ ++ /* TSO headers+data bds share a common mapping. See bnx2x_tx_split() */ + if (tx_buf->flags & BNX2X_TSO_SPLIT_BD) { ++ tx_data_bd = &txdata->tx_desc_ring[bd_idx].reg_bd; ++ split_bd_len = BD_UNMAP_LEN(tx_data_bd); + --nbd; + bd_idx = TX_BD(NEXT_TX_IDX(bd_idx)); + } + ++ /* unmap first bd */ ++ dma_unmap_single(&bp->pdev->dev, BD_UNMAP_ADDR(tx_start_bd), ++ BD_UNMAP_LEN(tx_start_bd) + split_bd_len, ++ DMA_TO_DEVICE); ++ + /* now free frags */ + while (nbd > 0) { + diff --git a/queue-3.12/bpf-do-not-use-reciprocal-divide.patch b/queue-3.12/bpf-do-not-use-reciprocal-divide.patch new file mode 100644 index 00000000000..718049a41be --- /dev/null +++ b/queue-3.12/bpf-do-not-use-reciprocal-divide.patch @@ -0,0 +1,220 @@ +From foo@baz Tue Feb 4 09:14:01 PST 2014 +From: Eric Dumazet +Date: Wed, 15 Jan 2014 06:50:07 -0800 +Subject: bpf: do not use reciprocal divide + +From: Eric Dumazet + +[ Upstream commit aee636c4809fa54848ff07a899b326eb1f9987a2 ] + +At first Jakub Zawadzki noticed that some divisions by reciprocal_divide +were not correct. (off by one in some cases) +http://www.wireshark.org/~darkjames/reciprocal-buggy.c + +He could also show this with BPF: +http://www.wireshark.org/~darkjames/set-and-dump-filter-k-bug.c + +The reciprocal divide in linux kernel is not generic enough, +lets remove its use in BPF, as it is not worth the pain with +current cpus. + +Signed-off-by: Eric Dumazet +Reported-by: Jakub Zawadzki +Cc: Mircea Gherzan +Cc: Daniel Borkmann +Cc: Hannes Frederic Sowa +Cc: Matt Evans +Cc: Martin Schwidefsky +Cc: Heiko Carstens +Cc: David S. Miller +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm/net/bpf_jit_32.c | 6 +++--- + arch/powerpc/net/bpf_jit_comp.c | 7 ++++--- + arch/s390/net/bpf_jit_comp.c | 17 ++++++++++++----- + arch/sparc/net/bpf_jit_comp.c | 17 ++++++++++++++--- + arch/x86/net/bpf_jit_comp.c | 14 ++++++++++---- + net/core/filter.c | 30 ++---------------------------- + 6 files changed, 45 insertions(+), 46 deletions(-) + +--- a/arch/arm/net/bpf_jit_32.c ++++ b/arch/arm/net/bpf_jit_32.c +@@ -637,10 +637,10 @@ load_ind: + emit(ARM_MUL(r_A, r_A, r_X), ctx); + break; + case BPF_S_ALU_DIV_K: +- /* current k == reciprocal_value(userspace k) */ ++ if (k == 1) ++ break; + emit_mov_i(r_scratch, k, ctx); +- /* A = top 32 bits of the product */ +- emit(ARM_UMULL(r_scratch, r_A, r_A, r_scratch), ctx); ++ emit_udiv(r_A, r_A, r_scratch, ctx); + break; + case BPF_S_ALU_DIV_X: + update_on_xread(ctx); +--- a/arch/powerpc/net/bpf_jit_comp.c ++++ b/arch/powerpc/net/bpf_jit_comp.c +@@ -209,10 +209,11 @@ static int bpf_jit_build_body(struct sk_ + } + PPC_DIVWU(r_A, r_A, r_X); + break; +- case BPF_S_ALU_DIV_K: /* A = reciprocal_divide(A, K); */ ++ case BPF_S_ALU_DIV_K: /* A /= K */ ++ if (K == 1) ++ break; + PPC_LI32(r_scratch1, K); +- /* Top 32 bits of 64bit result -> A */ +- PPC_MULHWU(r_A, r_A, r_scratch1); ++ PPC_DIVWU(r_A, r_A, r_scratch1); + break; + case BPF_S_ALU_AND_X: + ctx->seen |= SEEN_XREG; +--- a/arch/s390/net/bpf_jit_comp.c ++++ b/arch/s390/net/bpf_jit_comp.c +@@ -371,11 +371,13 @@ static int bpf_jit_insn(struct bpf_jit * + /* dr %r4,%r12 */ + EMIT2(0x1d4c); + break; +- case BPF_S_ALU_DIV_K: /* A = reciprocal_divide(A, K) */ +- /* m %r4,(%r13) */ +- EMIT4_DISP(0x5c40d000, EMIT_CONST(K)); +- /* lr %r5,%r4 */ +- EMIT2(0x1854); ++ case BPF_S_ALU_DIV_K: /* A /= K */ ++ if (K == 1) ++ break; ++ /* lhi %r4,0 */ ++ EMIT4(0xa7480000); ++ /* d %r4,(%r13) */ ++ EMIT4_DISP(0x5d40d000, EMIT_CONST(K)); + break; + case BPF_S_ALU_MOD_X: /* A %= X */ + jit->seen |= SEEN_XREG | SEEN_RET0; +@@ -391,6 +393,11 @@ static int bpf_jit_insn(struct bpf_jit * + EMIT2(0x1854); + break; + case BPF_S_ALU_MOD_K: /* A %= K */ ++ if (K == 1) { ++ /* lhi %r5,0 */ ++ EMIT4(0xa7580000); ++ break; ++ } + /* lhi %r4,0 */ + EMIT4(0xa7480000); + /* d %r4,(%r13) */ +--- a/arch/sparc/net/bpf_jit_comp.c ++++ b/arch/sparc/net/bpf_jit_comp.c +@@ -497,9 +497,20 @@ void bpf_jit_compile(struct sk_filter *f + case BPF_S_ALU_MUL_K: /* A *= K */ + emit_alu_K(MUL, K); + break; +- case BPF_S_ALU_DIV_K: /* A /= K */ +- emit_alu_K(MUL, K); +- emit_read_y(r_A); ++ case BPF_S_ALU_DIV_K: /* A /= K with K != 0*/ ++ if (K == 1) ++ break; ++ emit_write_y(G0); ++#ifdef CONFIG_SPARC32 ++ /* The Sparc v8 architecture requires ++ * three instructions between a %y ++ * register write and the first use. ++ */ ++ emit_nop(); ++ emit_nop(); ++ emit_nop(); ++#endif ++ emit_alu_K(DIV, K); + break; + case BPF_S_ALU_DIV_X: /* A /= X; */ + emit_cmpi(r_X, 0); +--- a/arch/x86/net/bpf_jit_comp.c ++++ b/arch/x86/net/bpf_jit_comp.c +@@ -359,15 +359,21 @@ void bpf_jit_compile(struct sk_filter *f + EMIT2(0x89, 0xd0); /* mov %edx,%eax */ + break; + case BPF_S_ALU_MOD_K: /* A %= K; */ ++ if (K == 1) { ++ CLEAR_A(); ++ break; ++ } + EMIT2(0x31, 0xd2); /* xor %edx,%edx */ + EMIT1(0xb9);EMIT(K, 4); /* mov imm32,%ecx */ + EMIT2(0xf7, 0xf1); /* div %ecx */ + EMIT2(0x89, 0xd0); /* mov %edx,%eax */ + break; +- case BPF_S_ALU_DIV_K: /* A = reciprocal_divide(A, K); */ +- EMIT3(0x48, 0x69, 0xc0); /* imul imm32,%rax,%rax */ +- EMIT(K, 4); +- EMIT4(0x48, 0xc1, 0xe8, 0x20); /* shr $0x20,%rax */ ++ case BPF_S_ALU_DIV_K: /* A /= K */ ++ if (K == 1) ++ break; ++ EMIT2(0x31, 0xd2); /* xor %edx,%edx */ ++ EMIT1(0xb9);EMIT(K, 4); /* mov imm32,%ecx */ ++ EMIT2(0xf7, 0xf1); /* div %ecx */ + break; + case BPF_S_ALU_AND_X: + seen |= SEEN_XREG; +--- a/net/core/filter.c ++++ b/net/core/filter.c +@@ -36,7 +36,6 @@ + #include + #include + #include +-#include + #include + #include + #include +@@ -166,7 +165,7 @@ unsigned int sk_run_filter(const struct + A /= X; + continue; + case BPF_S_ALU_DIV_K: +- A = reciprocal_divide(A, K); ++ A /= K; + continue; + case BPF_S_ALU_MOD_X: + if (X == 0) +@@ -553,11 +552,6 @@ int sk_chk_filter(struct sock_filter *fi + /* Some instructions need special checks */ + switch (code) { + case BPF_S_ALU_DIV_K: +- /* check for division by zero */ +- if (ftest->k == 0) +- return -EINVAL; +- ftest->k = reciprocal_value(ftest->k); +- break; + case BPF_S_ALU_MOD_K: + /* check for division by zero */ + if (ftest->k == 0) +@@ -853,27 +847,7 @@ void sk_decode_filter(struct sock_filter + to->code = decodes[code]; + to->jt = filt->jt; + to->jf = filt->jf; +- +- if (code == BPF_S_ALU_DIV_K) { +- /* +- * When loaded this rule user gave us X, which was +- * translated into R = r(X). Now we calculate the +- * RR = r(R) and report it back. If next time this +- * value is loaded and RRR = r(RR) is calculated +- * then the R == RRR will be true. +- * +- * One exception. X == 1 translates into R == 0 and +- * we can't calculate RR out of it with r(). +- */ +- +- if (filt->k == 0) +- to->k = 1; +- else +- to->k = reciprocal_value(filt->k); +- +- BUG_ON(reciprocal_value(to->k) != filt->k); +- } else +- to->k = filt->k; ++ to->k = filt->k; + } + + int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, unsigned int len) diff --git a/queue-3.12/fib_frontend-fix-possible-null-pointer-dereference.patch b/queue-3.12/fib_frontend-fix-possible-null-pointer-dereference.patch new file mode 100644 index 00000000000..fa5a0956fa2 --- /dev/null +++ b/queue-3.12/fib_frontend-fix-possible-null-pointer-dereference.patch @@ -0,0 +1,35 @@ +From foo@baz Tue Feb 4 09:14:01 PST 2014 +From: Oliver Hartkopp +Date: Thu, 23 Jan 2014 10:19:34 +0100 +Subject: fib_frontend: fix possible NULL pointer dereference + +From: Oliver Hartkopp + +[ Upstream commit a0065f266a9b5d51575535a25c15ccbeed9a9966 ] + +The two commits 0115e8e30d (net: remove delay at device dismantle) and +748e2d9396a (net: reinstate rtnl in call_netdevice_notifiers()) silently +removed a NULL pointer check for in_dev since Linux 3.7. + +This patch re-introduces this check as it causes crashing the kernel when +setting small mtu values on non-ip capable netdevices. + +Signed-off-by: Oliver Hartkopp +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/fib_frontend.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/ipv4/fib_frontend.c ++++ b/net/ipv4/fib_frontend.c +@@ -1049,6 +1049,8 @@ static int fib_netdev_event(struct notif + } + + in_dev = __in_dev_get_rtnl(dev); ++ if (!in_dev) ++ return NOTIFY_DONE; + + switch (event) { + case NETDEV_UP: diff --git a/queue-3.12/ieee802154-fix-memory-leak-in-ieee802154_add_iface.patch b/queue-3.12/ieee802154-fix-memory-leak-in-ieee802154_add_iface.patch new file mode 100644 index 00000000000..f2d565911c6 --- /dev/null +++ b/queue-3.12/ieee802154-fix-memory-leak-in-ieee802154_add_iface.patch @@ -0,0 +1,34 @@ +From foo@baz Tue Feb 4 09:14:01 PST 2014 +From: Christian Engelmayer +Date: Sat, 11 Jan 2014 22:19:30 +0100 +Subject: ieee802154: Fix memory leak in ieee802154_add_iface() + +From: Christian Engelmayer + +[ Upstream commit 267d29a69c6af39445f36102a832b25ed483f299 ] + +Fix a memory leak in the ieee802154_add_iface() error handling path. +Detected by Coverity: CID 710490. + +Signed-off-by: Christian Engelmayer +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ieee802154/nl-phy.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/net/ieee802154/nl-phy.c ++++ b/net/ieee802154/nl-phy.c +@@ -224,8 +224,10 @@ static int ieee802154_add_iface(struct s + + if (info->attrs[IEEE802154_ATTR_DEV_TYPE]) { + type = nla_get_u8(info->attrs[IEEE802154_ATTR_DEV_TYPE]); +- if (type >= __IEEE802154_DEV_MAX) +- return -EINVAL; ++ if (type >= __IEEE802154_DEV_MAX) { ++ rc = -EINVAL; ++ goto nla_put_failure; ++ } + } + + dev = phy->add_iface(phy, devname, type); diff --git a/queue-3.12/inet_diag-fix-inet_diag_dump_icsk-timewait-socket-state-logic.patch b/queue-3.12/inet_diag-fix-inet_diag_dump_icsk-timewait-socket-state-logic.patch new file mode 100644 index 00000000000..133b2adba9f --- /dev/null +++ b/queue-3.12/inet_diag-fix-inet_diag_dump_icsk-timewait-socket-state-logic.patch @@ -0,0 +1,63 @@ +From foo@baz Tue Feb 4 09:14:01 PST 2014 +From: Neal Cardwell +Date: Sun, 2 Feb 2014 20:40:13 -0500 +Subject: inet_diag: fix inet_diag_dump_icsk() timewait socket state logic + +From: Neal Cardwell + +[ Based upon upstream commit 70315d22d3c7383f9a508d0aab21e2eb35b2303a ] + +Fix inet_diag_dump_icsk() to reflect the fact that both TIME_WAIT and +FIN_WAIT2 connections are represented by inet_timewait_sock (not just +TIME_WAIT). Thus: + +(a) We need to iterate through the time_wait buckets if the user wants +either TIME_WAIT or FIN_WAIT2. (Before fixing this, "ss -nemoi state +fin-wait-2" would not return any sockets, even if there were some in +FIN_WAIT2.) + +(b) We need to check tw_substate to see if the user wants to dump +sockets in the particular substate (TIME_WAIT or FIN_WAIT2) that a +given connection is in. (Before fixing this, "ss -nemoi state +time-wait" would actually return sockets in state FIN_WAIT2.) + +An analogous fix is in v3.13: 70315d22d3c7383f9a508d0aab21e2eb35b2303a +("inet_diag: fix inet_diag_dump_icsk() to use correct state for +timewait sockets") but that patch is quite different because 3.13 code +is very different in this area due to the unification of TCP hash +tables in 05dbc7b ("tcp/dccp: remove twchain") in v3.13-rc1. + +I tested that this applies cleanly between v3.3 and v3.12, and tested +that it works in both 3.3 and 3.12. It does not apply cleanly to 3.2 +and earlier (though it makes semantic sense), and semantically is not +the right fix for 3.13 and beyond (as mentioned above). + +Signed-off-by: Neal Cardwell +Cc: Eric Dumazet +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/inet_diag.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/net/ipv4/inet_diag.c ++++ b/net/ipv4/inet_diag.c +@@ -961,7 +961,7 @@ next_normal: + ++num; + } + +- if (r->idiag_states & TCPF_TIME_WAIT) { ++ if (r->idiag_states & (TCPF_TIME_WAIT | TCPF_FIN_WAIT2)) { + struct inet_timewait_sock *tw; + + inet_twsk_for_each(tw, node, +@@ -971,6 +971,8 @@ next_normal: + + if (num < s_num) + goto next_dying; ++ if (!(r->idiag_states & (1 << tw->tw_substate))) ++ goto next_dying; + if (r->sdiag_family != AF_UNSPEC && + tw->tw_family != r->sdiag_family) + goto next_dying; diff --git a/queue-3.12/ip_tunnel-clear-ipcb-in-ip_tunnel_xmit-in-case-dst_link_failure-is-called.patch b/queue-3.12/ip_tunnel-clear-ipcb-in-ip_tunnel_xmit-in-case-dst_link_failure-is-called.patch new file mode 100644 index 00000000000..c724a4af28e --- /dev/null +++ b/queue-3.12/ip_tunnel-clear-ipcb-in-ip_tunnel_xmit-in-case-dst_link_failure-is-called.patch @@ -0,0 +1,36 @@ +From foo@baz Tue Feb 4 09:14:01 PST 2014 +From: Duan Jiong +Date: Thu, 23 Jan 2014 14:00:25 +0800 +Subject: ip_tunnel: clear IPCB in ip_tunnel_xmit() in case dst_link_failure() is called + +From: Duan Jiong + +[ Upstream commit 11c21a307d79ea5f6b6fc0d3dfdeda271e5e65f6 ] + +commit a622260254ee48("ip_tunnel: fix kernel panic with icmp_dest_unreach") +clear IPCB in ip_tunnel_xmit() , or else skb->cb[] may contain garbage from +GSO segmentation layer. + +But commit 0e6fbc5b6c621("ip_tunnels: extend iptunnel_xmit()") refactor codes, +and it clear IPCB behind the dst_link_failure(). + +So clear IPCB in ip_tunnel_xmit() just like commti a622260254ee48("ip_tunnel: +fix kernel panic with icmp_dest_unreach"). + +Signed-off-by: Duan Jiong +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ip_tunnel.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/ipv4/ip_tunnel.c ++++ b/net/ipv4/ip_tunnel.c +@@ -618,6 +618,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, + tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { + tunnel->err_count--; + ++ memset(IPCB(skb), 0, sizeof(*IPCB(skb))); + dst_link_failure(skb); + } else + tunnel->err_count = 0; diff --git a/queue-3.12/net-avoid-reference-counter-overflows-on-fib_rules-in-multicast-forwarding.patch b/queue-3.12/net-avoid-reference-counter-overflows-on-fib_rules-in-multicast-forwarding.patch new file mode 100644 index 00000000000..48b5659b7aa --- /dev/null +++ b/queue-3.12/net-avoid-reference-counter-overflows-on-fib_rules-in-multicast-forwarding.patch @@ -0,0 +1,73 @@ +From foo@baz Tue Feb 4 09:14:01 PST 2014 +From: Hannes Frederic Sowa +Date: Mon, 13 Jan 2014 02:45:22 +0100 +Subject: net: avoid reference counter overflows on fib_rules in multicast forwarding + +From: Hannes Frederic Sowa + +[ Upstream commit 95f4a45de1a0f172b35451fc52283290adb21f6e ] + +Bob Falken reported that after 4G packets, multicast forwarding stopped +working. This was because of a rule reference counter overflow which +freed the rule as soon as the overflow happend. + +This patch solves this by adding the FIB_LOOKUP_NOREF flag to +fib_rules_lookup calls. This is safe even from non-rcu locked sections +as in this case the flag only implies not taking a reference to the rule, +which we don't need at all. + +Rules only hold references to the namespace, which are guaranteed to be +available during the call of the non-rcu protected function reg_vif_xmit +because of the interface reference which itself holds a reference to +the net namespace. + +Fixes: f0ad0860d01e47 ("ipv4: ipmr: support multiple tables") +Fixes: d1db275dd3f6e4 ("ipv6: ip6mr: support multiple tables") +Reported-by: Bob Falken +Cc: Patrick McHardy +Cc: Thomas Graf +Cc: Julian Anastasov +Cc: Eric Dumazet +Signed-off-by: Hannes Frederic Sowa +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ipmr.c | 7 +++++-- + net/ipv6/ip6mr.c | 7 +++++-- + 2 files changed, 10 insertions(+), 4 deletions(-) + +--- a/net/ipv4/ipmr.c ++++ b/net/ipv4/ipmr.c +@@ -157,9 +157,12 @@ static struct mr_table *ipmr_get_table(s + static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4, + struct mr_table **mrt) + { +- struct ipmr_result res; +- struct fib_lookup_arg arg = { .result = &res, }; + int err; ++ struct ipmr_result res; ++ struct fib_lookup_arg arg = { ++ .result = &res, ++ .flags = FIB_LOOKUP_NOREF, ++ }; + + err = fib_rules_lookup(net->ipv4.mr_rules_ops, + flowi4_to_flowi(flp4), 0, &arg); +--- a/net/ipv6/ip6mr.c ++++ b/net/ipv6/ip6mr.c +@@ -141,9 +141,12 @@ static struct mr6_table *ip6mr_get_table + static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6, + struct mr6_table **mrt) + { +- struct ip6mr_result res; +- struct fib_lookup_arg arg = { .result = &res, }; + int err; ++ struct ip6mr_result res; ++ struct fib_lookup_arg arg = { ++ .result = &res, ++ .flags = FIB_LOOKUP_NOREF, ++ }; + + err = fib_rules_lookup(net->ipv6.mr6_rules_ops, + flowi6_to_flowi(flp6), 0, &arg); diff --git a/queue-3.12/net-fix-memory-leak-if-tproxy-used-with-tcp-early-demux.patch b/queue-3.12/net-fix-memory-leak-if-tproxy-used-with-tcp-early-demux.patch new file mode 100644 index 00000000000..33f1d4f4e40 --- /dev/null +++ b/queue-3.12/net-fix-memory-leak-if-tproxy-used-with-tcp-early-demux.patch @@ -0,0 +1,96 @@ +From foo@baz Tue Feb 4 09:14:01 PST 2014 +From: Holger Eitzenberger +Date: Mon, 27 Jan 2014 10:33:18 +0100 +Subject: net: Fix memory leak if TPROXY used with TCP early demux + +From: Holger Eitzenberger + +[ Upstream commit a452ce345d63ddf92cd101e4196569f8718ad319 ] + +I see a memory leak when using a transparent HTTP proxy using TPROXY +together with TCP early demux and Kernel v3.8.13.15 (Ubuntu stable): + +unreferenced object 0xffff88008cba4a40 (size 1696): + comm "softirq", pid 0, jiffies 4294944115 (age 8907.520s) + hex dump (first 32 bytes): + 0a e0 20 6a 40 04 1b 37 92 be 32 e2 e8 b4 00 00 .. j@..7..2..... + 02 00 07 01 00 00 00 00 00 00 00 00 00 00 00 00 ................ + backtrace: + [] kmem_cache_alloc+0xad/0xb9 + [] sk_prot_alloc+0x29/0xc5 + [] sk_clone_lock+0x14/0x283 + [] inet_csk_clone_lock+0xf/0x7b + [] netlink_broadcast+0x14/0x16 + [] tcp_create_openreq_child+0x1b/0x4c3 + [] tcp_v4_syn_recv_sock+0x38/0x25d + [] tcp_check_req+0x25c/0x3d0 + [] tcp_v4_do_rcv+0x287/0x40e + [] ip_route_input_noref+0x843/0xa55 + [] tcp_v4_rcv+0x4c9/0x725 + [] ip_local_deliver_finish+0xe9/0x154 + [] __netif_receive_skb+0x4b2/0x514 + [] process_backlog+0xee/0x1c5 + [] net_rx_action+0xa7/0x200 + [] add_interrupt_randomness+0x39/0x157 + +But there are many more, resulting in the machine going OOM after some +days. + +From looking at the TPROXY code, and with help from Florian, I see +that the memory leak is introduced in tcp_v4_early_demux(): + + void tcp_v4_early_demux(struct sk_buff *skb) + { + /* ... */ + + iph = ip_hdr(skb); + th = tcp_hdr(skb); + + if (th->doff < sizeof(struct tcphdr) / 4) + return; + + sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo, + iph->saddr, th->source, + iph->daddr, ntohs(th->dest), + skb->skb_iif); + if (sk) { + skb->sk = sk; + +where the socket is assigned unconditionally to skb->sk, also bumping +the refcnt on it. This is problematic, because in our case the skb +has already a socket assigned in the TPROXY target. This then results +in the leak I see. + +The very same issue seems to be with IPv6, but haven't tested. + +Reviewed-by: Florian Westphal +Signed-off-by: Holger Eitzenberger +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ip_input.c | 2 +- + net/ipv6/ip6_input.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +--- a/net/ipv4/ip_input.c ++++ b/net/ipv4/ip_input.c +@@ -314,7 +314,7 @@ static int ip_rcv_finish(struct sk_buff + const struct iphdr *iph = ip_hdr(skb); + struct rtable *rt; + +- if (sysctl_ip_early_demux && !skb_dst(skb)) { ++ if (sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) { + const struct net_protocol *ipprot; + int protocol = iph->protocol; + +--- a/net/ipv6/ip6_input.c ++++ b/net/ipv6/ip6_input.c +@@ -49,7 +49,7 @@ + + int ip6_rcv_finish(struct sk_buff *skb) + { +- if (sysctl_ip_early_demux && !skb_dst(skb)) { ++ if (sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) { + const struct inet6_protocol *ipprot; + + ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]); diff --git a/queue-3.12/net-gre-use-icmp_hdr-to-get-inner-ip-header.patch b/queue-3.12/net-gre-use-icmp_hdr-to-get-inner-ip-header.patch new file mode 100644 index 00000000000..d85260226bc --- /dev/null +++ b/queue-3.12/net-gre-use-icmp_hdr-to-get-inner-ip-header.patch @@ -0,0 +1,40 @@ +From foo@baz Tue Feb 4 09:14:01 PST 2014 +From: Duan Jiong +Date: Tue, 28 Jan 2014 11:49:43 +0800 +Subject: net: gre: use icmp_hdr() to get inner ip header + +From: Duan Jiong + +[ Upstream commit c0c0c50ff7c3e331c90bab316d21f724fb9e1994 ] + +When dealing with icmp messages, the skb->data points the +ip header that triggered the sending of the icmp message. + +In gre_cisco_err(), the parse_gre_header() is called, and the +iptunnel_pull_header() is called to pull the skb at the end of +the parse_gre_header(), so the skb->data doesn't point the +inner ip header. + +Unfortunately, the ipgre_err still needs those ip addresses in +inner ip header to look up tunnel by ip_tunnel_lookup(). + +So just use icmp_hdr() to get inner ip header instead of skb->data. + +Signed-off-by: Duan Jiong +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ip_gre.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv4/ip_gre.c ++++ b/net/ipv4/ip_gre.c +@@ -178,7 +178,7 @@ static int ipgre_err(struct sk_buff *skb + else + itn = net_generic(net, ipgre_net_id); + +- iph = (const struct iphdr *)skb->data; ++ iph = (const struct iphdr *)(icmp_hdr(skb) + 1); + t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags, + iph->daddr, iph->saddr, tpi->key); + diff --git a/queue-3.12/net-rds-fix-per-cpu-helper-usage.patch b/queue-3.12/net-rds-fix-per-cpu-helper-usage.patch new file mode 100644 index 00000000000..15033a1aad9 --- /dev/null +++ b/queue-3.12/net-rds-fix-per-cpu-helper-usage.patch @@ -0,0 +1,52 @@ +From foo@baz Tue Feb 4 09:14:01 PST 2014 +From: Gerald Schaefer +Date: Thu, 16 Jan 2014 16:54:48 +0100 +Subject: net: rds: fix per-cpu helper usage + +From: Gerald Schaefer + +[ Upstream commit c196403b79aa241c3fefb3ee5bb328aa7c5cc860 ] + +commit ae4b46e9d "net: rds: use this_cpu_* per-cpu helper" broke per-cpu +handling for rds. chpfirst is the result of __this_cpu_read(), so it is +an absolute pointer and not __percpu. Therefore, __this_cpu_write() +should not operate on chpfirst, but rather on cache->percpu->first, just +like __this_cpu_read() did before. + +Signed-off-byd Gerald Schaefer +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/rds/ib_recv.c | 7 +++---- + 1 file changed, 3 insertions(+), 4 deletions(-) + +--- a/net/rds/ib_recv.c ++++ b/net/rds/ib_recv.c +@@ -421,8 +421,7 @@ static void rds_ib_recv_cache_put(struct + struct rds_ib_refill_cache *cache) + { + unsigned long flags; +- struct list_head *old; +- struct list_head __percpu *chpfirst; ++ struct list_head *old, *chpfirst; + + local_irq_save(flags); + +@@ -432,7 +431,7 @@ static void rds_ib_recv_cache_put(struct + else /* put on front */ + list_add_tail(new_item, chpfirst); + +- __this_cpu_write(chpfirst, new_item); ++ __this_cpu_write(cache->percpu->first, new_item); + __this_cpu_inc(cache->percpu->count); + + if (__this_cpu_read(cache->percpu->count) < RDS_IB_RECYCLE_BATCH_COUNT) +@@ -452,7 +451,7 @@ static void rds_ib_recv_cache_put(struct + } while (old); + + +- __this_cpu_write(chpfirst, NULL); ++ __this_cpu_write(cache->percpu->first, NULL); + __this_cpu_write(cache->percpu->count, 0); + end: + local_irq_restore(flags); diff --git a/queue-3.12/net-usbnet-fix-sg-initialisation.patch b/queue-3.12/net-usbnet-fix-sg-initialisation.patch new file mode 100644 index 00000000000..4b9ce9aec1c --- /dev/null +++ b/queue-3.12/net-usbnet-fix-sg-initialisation.patch @@ -0,0 +1,40 @@ +From foo@baz Tue Feb 4 09:14:01 PST 2014 +From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= +Date: Fri, 10 Jan 2014 23:10:17 +0100 +Subject: net: usbnet: fix SG initialisation +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= + +[ Upstream commit fdc3452cd2c7b2bfe0f378f92123f4f9a98fa2bd ] + +Commit 60e453a940ac ("USBNET: fix handling padding packet") +added an extra SG entry in case padding is necessary, but +failed to update the initialisation of the list. This can +cause list traversal to fall off the end of the list, +resulting in an oops. + +Fixes: 60e453a940ac ("USBNET: fix handling padding packet") +Reported-by: Thomas Kear +Cc: Ming Lei +Signed-off-by: Bjørn Mork +Tested-by: Ming Lei +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/usb/usbnet.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/usb/usbnet.c ++++ b/drivers/net/usb/usbnet.c +@@ -1245,7 +1245,7 @@ static int build_dma_sg(const struct sk_ + return -ENOMEM; + + urb->num_sgs = num_sgs; +- sg_init_table(urb->sg, urb->num_sgs); ++ sg_init_table(urb->sg, urb->num_sgs + 1); + + sg_set_buf(&urb->sg[s++], skb->data, skb_headlen(skb)); + total_len += skb_headlen(skb); diff --git a/queue-3.12/net-via-rhine-fix-tx_timeout-handling.patch b/queue-3.12/net-via-rhine-fix-tx_timeout-handling.patch new file mode 100644 index 00000000000..88ee3428053 --- /dev/null +++ b/queue-3.12/net-via-rhine-fix-tx_timeout-handling.patch @@ -0,0 +1,34 @@ +From foo@baz Tue Feb 4 09:14:01 PST 2014 +From: Richard Weinberger +Date: Tue, 14 Jan 2014 22:46:36 +0100 +Subject: net,via-rhine: Fix tx_timeout handling + +From: Richard Weinberger + +[ Upstream commit a926592f5e4e900f3fa903298c4619a131e60963 ] + +rhine_reset_task() misses to disable the tx scheduler upon reset, +this can lead to a crash if work is still scheduled while we're resetting +the tx queue. + +Fixes: +[ 93.591707] BUG: unable to handle kernel NULL pointer dereference at 0000004c +[ 93.595514] IP: [] rhine_napipoll+0x491/0x6 + +Signed-off-by: Richard Weinberger +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/via/via-rhine.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/ethernet/via/via-rhine.c ++++ b/drivers/net/ethernet/via/via-rhine.c +@@ -1615,6 +1615,7 @@ static void rhine_reset_task(struct work + goto out_unlock; + + napi_disable(&rp->napi); ++ netif_tx_disable(dev); + spin_lock_bh(&rp->lock); + + /* clear all descriptors */ diff --git a/queue-3.12/net-vxlan-share-rx-skb-de-marking-and-checksum-checks-with-ovs.patch b/queue-3.12/net-vxlan-share-rx-skb-de-marking-and-checksum-checks-with-ovs.patch new file mode 100644 index 00000000000..7c8e6a1d3fc --- /dev/null +++ b/queue-3.12/net-vxlan-share-rx-skb-de-marking-and-checksum-checks-with-ovs.patch @@ -0,0 +1,59 @@ +From foo@baz Tue Feb 4 09:14:01 PST 2014 +From: Or Gerlitz +Date: Thu, 23 Jan 2014 11:28:13 +0200 +Subject: net/vxlan: Share RX skb de-marking and checksum checks with ovs + +From: Or Gerlitz + +[ Upstream commit d0bc65557ad09a57b4db176e9e3ccddb26971453 ] + +Make sure the practice set by commit 0afb166 "vxlan: Add capability +of Rx checksum offload for inner packet" is applied when the skb +goes through the portion of the RX code which is shared between +vxlan netdevices and ovs vxlan port instances. + +Cc: Joseph Gasparakis +Cc: Pravin B Shelar +Signed-off-by: Or Gerlitz +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/vxlan.c | 20 +++++++++----------- + 1 file changed, 9 insertions(+), 11 deletions(-) + +--- a/drivers/net/vxlan.c ++++ b/drivers/net/vxlan.c +@@ -1051,6 +1051,15 @@ static int vxlan_udp_encap_recv(struct s + if (!vs) + goto drop; + ++ /* If the NIC driver gave us an encapsulated packet ++ * with the encapsulation mark, the device checksummed it ++ * for us. Otherwise force the upper layers to verify it. ++ */ ++ if (skb->ip_summed != CHECKSUM_UNNECESSARY || !skb->encapsulation) ++ skb->ip_summed = CHECKSUM_NONE; ++ ++ skb->encapsulation = 0; ++ + vs->rcv(vs, skb, vxh->vx_vni); + return 0; + +@@ -1109,17 +1118,6 @@ static void vxlan_rcv(struct vxlan_sock + + skb_reset_network_header(skb); + +- /* If the NIC driver gave us an encapsulated packet with +- * CHECKSUM_UNNECESSARY and Rx checksum feature is enabled, +- * leave the CHECKSUM_UNNECESSARY, the device checksummed it +- * for us. Otherwise force the upper layers to verify it. +- */ +- if (skb->ip_summed != CHECKSUM_UNNECESSARY || !skb->encapsulation || +- !(vxlan->dev->features & NETIF_F_RXCSUM)) +- skb->ip_summed = CHECKSUM_NONE; +- +- skb->encapsulation = 0; +- + if (oip6) + err = IP6_ECN_decapsulate(oip6, skb); + if (oip) diff --git a/queue-3.12/s390-bpf-jit-fix-32-bit-divisions-use-unsigned-divide-instructions.patch b/queue-3.12/s390-bpf-jit-fix-32-bit-divisions-use-unsigned-divide-instructions.patch new file mode 100644 index 00000000000..98abf02544d --- /dev/null +++ b/queue-3.12/s390-bpf-jit-fix-32-bit-divisions-use-unsigned-divide-instructions.patch @@ -0,0 +1,74 @@ +From foo@baz Tue Feb 4 09:14:01 PST 2014 +From: Heiko Carstens +Date: Fri, 17 Jan 2014 09:37:15 +0100 +Subject: s390/bpf,jit: fix 32 bit divisions, use unsigned divide instructions + +From: Heiko Carstens + +[ Upstream commit 3af57f78c38131b7a66e2b01e06fdacae01992a3 ] + +The s390 bpf jit compiler emits the signed divide instructions "dr" and "d" +for unsigned divisions. +This can cause problems: the dividend will be zero extended to a 64 bit value +and the divisor is the 32 bit signed value as specified A or X accumulator, +even though A and X are supposed to be treated as unsigned values. + +The divide instrunctions will generate an exception if the result cannot be +expressed with a 32 bit signed value. +This is the case if e.g. the dividend is 0xffffffff and the divisor either 1 +or also 0xffffffff (signed: -1). + +To avoid all these issues simply use unsigned divide instructions. + +Signed-off-by: Heiko Carstens +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/s390/net/bpf_jit_comp.c | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + +--- a/arch/s390/net/bpf_jit_comp.c ++++ b/arch/s390/net/bpf_jit_comp.c +@@ -368,16 +368,16 @@ static int bpf_jit_insn(struct bpf_jit * + EMIT4_PCREL(0xa7840000, (jit->ret0_ip - jit->prg)); + /* lhi %r4,0 */ + EMIT4(0xa7480000); +- /* dr %r4,%r12 */ +- EMIT2(0x1d4c); ++ /* dlr %r4,%r12 */ ++ EMIT4(0xb997004c); + break; + case BPF_S_ALU_DIV_K: /* A /= K */ + if (K == 1) + break; + /* lhi %r4,0 */ + EMIT4(0xa7480000); +- /* d %r4,(%r13) */ +- EMIT4_DISP(0x5d40d000, EMIT_CONST(K)); ++ /* dl %r4,(%r13) */ ++ EMIT6_DISP(0xe340d000, 0x0097, EMIT_CONST(K)); + break; + case BPF_S_ALU_MOD_X: /* A %= X */ + jit->seen |= SEEN_XREG | SEEN_RET0; +@@ -387,8 +387,8 @@ static int bpf_jit_insn(struct bpf_jit * + EMIT4_PCREL(0xa7840000, (jit->ret0_ip - jit->prg)); + /* lhi %r4,0 */ + EMIT4(0xa7480000); +- /* dr %r4,%r12 */ +- EMIT2(0x1d4c); ++ /* dlr %r4,%r12 */ ++ EMIT4(0xb997004c); + /* lr %r5,%r4 */ + EMIT2(0x1854); + break; +@@ -400,8 +400,8 @@ static int bpf_jit_insn(struct bpf_jit * + } + /* lhi %r4,0 */ + EMIT4(0xa7480000); +- /* d %r4,(%r13) */ +- EMIT4_DISP(0x5d40d000, EMIT_CONST(K)); ++ /* dl %r4,(%r13) */ ++ EMIT6_DISP(0xe340d000, 0x0097, EMIT_CONST(K)); + /* lr %r5,%r4 */ + EMIT2(0x1854); + break; diff --git a/queue-3.12/series b/queue-3.12/series index 5717146872f..2be6bb95dd8 100644 --- a/queue-3.12/series +++ b/queue-3.12/series @@ -98,3 +98,19 @@ arm-mvebu-add-quirk-for-i2c-for-the-openblocks-ax3-4-board.patch i2c-mv64xxx-fix-bus-hang-on-a0-version-of-the-armada-xp-socs.patch i2c-mv64xxx-document-the-newly-introduced-armada-xp-a0-compatible.patch i2c-piix4-add-support-for-amd-ml-and-cz-smbus-changes.patch +bnx2x-fix-dma-unmapping-of-tso-split-bds.patch +net-usbnet-fix-sg-initialisation.patch +ieee802154-fix-memory-leak-in-ieee802154_add_iface.patch +net-avoid-reference-counter-overflows-on-fib_rules-in-multicast-forwarding.patch +net-via-rhine-fix-tx_timeout-handling.patch +net-rds-fix-per-cpu-helper-usage.patch +tcp-metrics-avoid-duplicate-entries-with-the-same-destination-ip.patch +bpf-do-not-use-reciprocal-divide.patch +s390-bpf-jit-fix-32-bit-divisions-use-unsigned-divide-instructions.patch +ip_tunnel-clear-ipcb-in-ip_tunnel_xmit-in-case-dst_link_failure-is-called.patch +net-vxlan-share-rx-skb-de-marking-and-checksum-checks-with-ovs.patch +fib_frontend-fix-possible-null-pointer-dereference.patch +net-fix-memory-leak-if-tproxy-used-with-tcp-early-demux.patch +xen-netfront-fix-resource-leak-in-netfront.patch +net-gre-use-icmp_hdr-to-get-inner-ip-header.patch +inet_diag-fix-inet_diag_dump_icsk-timewait-socket-state-logic.patch diff --git a/queue-3.12/tcp-metrics-avoid-duplicate-entries-with-the-same-destination-ip.patch b/queue-3.12/tcp-metrics-avoid-duplicate-entries-with-the-same-destination-ip.patch new file mode 100644 index 00000000000..093af2f4bf5 --- /dev/null +++ b/queue-3.12/tcp-metrics-avoid-duplicate-entries-with-the-same-destination-ip.patch @@ -0,0 +1,128 @@ +From foo@baz Tue Feb 4 09:14:01 PST 2014 +From: Christoph Paasch +Date: Thu, 16 Jan 2014 20:01:21 +0100 +Subject: tcp: metrics: Avoid duplicate entries with the same destination-IP + +From: Christoph Paasch + +[ Upstream commit 77f99ad16a07aa062c2d30fae57b1fee456f6ef6 ] + +Because the tcp-metrics is an RCU-list, it may be that two +soft-interrupts are inside __tcp_get_metrics() for the same +destination-IP at the same time. If this destination-IP is not yet part of +the tcp-metrics, both soft-interrupts will end up in tcpm_new and create +a new entry for this IP. +So, we will have two tcp-metrics with the same destination-IP in the list. + +This patch checks twice __tcp_get_metrics(). First without holding the +lock, then while holding the lock. The second one is there to confirm +that the entry has not been added by another soft-irq while waiting for +the spin-lock. + +Fixes: 51c5d0c4b169b (tcp: Maintain dynamic metrics in local cache.) +Signed-off-by: Christoph Paasch +Reviewed-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_metrics.c | 51 ++++++++++++++++++++++++++++++------------------- + 1 file changed, 32 insertions(+), 19 deletions(-) + +--- a/net/ipv4/tcp_metrics.c ++++ b/net/ipv4/tcp_metrics.c +@@ -22,6 +22,9 @@ + + int sysctl_tcp_nometrics_save __read_mostly; + ++static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *addr, ++ struct net *net, unsigned int hash); ++ + struct tcp_fastopen_metrics { + u16 mss; + u16 syn_loss:10; /* Recurring Fast Open SYN losses */ +@@ -130,16 +133,41 @@ static void tcpm_suck_dst(struct tcp_met + } + } + ++#define TCP_METRICS_TIMEOUT (60 * 60 * HZ) ++ ++static void tcpm_check_stamp(struct tcp_metrics_block *tm, struct dst_entry *dst) ++{ ++ if (tm && unlikely(time_after(jiffies, tm->tcpm_stamp + TCP_METRICS_TIMEOUT))) ++ tcpm_suck_dst(tm, dst, false); ++} ++ ++#define TCP_METRICS_RECLAIM_DEPTH 5 ++#define TCP_METRICS_RECLAIM_PTR (struct tcp_metrics_block *) 0x1UL ++ + static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, + struct inetpeer_addr *addr, +- unsigned int hash, +- bool reclaim) ++ unsigned int hash) + { + struct tcp_metrics_block *tm; + struct net *net; ++ bool reclaim = false; + + spin_lock_bh(&tcp_metrics_lock); + net = dev_net(dst->dev); ++ ++ /* While waiting for the spin-lock the cache might have been populated ++ * with this entry and so we have to check again. ++ */ ++ tm = __tcp_get_metrics(addr, net, hash); ++ if (tm == TCP_METRICS_RECLAIM_PTR) { ++ reclaim = true; ++ tm = NULL; ++ } ++ if (tm) { ++ tcpm_check_stamp(tm, dst); ++ goto out_unlock; ++ } ++ + if (unlikely(reclaim)) { + struct tcp_metrics_block *oldest; + +@@ -169,17 +197,6 @@ out_unlock: + return tm; + } + +-#define TCP_METRICS_TIMEOUT (60 * 60 * HZ) +- +-static void tcpm_check_stamp(struct tcp_metrics_block *tm, struct dst_entry *dst) +-{ +- if (tm && unlikely(time_after(jiffies, tm->tcpm_stamp + TCP_METRICS_TIMEOUT))) +- tcpm_suck_dst(tm, dst, false); +-} +- +-#define TCP_METRICS_RECLAIM_DEPTH 5 +-#define TCP_METRICS_RECLAIM_PTR (struct tcp_metrics_block *) 0x1UL +- + static struct tcp_metrics_block *tcp_get_encode(struct tcp_metrics_block *tm, int depth) + { + if (tm) +@@ -280,7 +297,6 @@ static struct tcp_metrics_block *tcp_get + struct inetpeer_addr addr; + unsigned int hash; + struct net *net; +- bool reclaim; + + addr.family = sk->sk_family; + switch (addr.family) { +@@ -300,13 +316,10 @@ static struct tcp_metrics_block *tcp_get + hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); + + tm = __tcp_get_metrics(&addr, net, hash); +- reclaim = false; +- if (tm == TCP_METRICS_RECLAIM_PTR) { +- reclaim = true; ++ if (tm == TCP_METRICS_RECLAIM_PTR) + tm = NULL; +- } + if (!tm && create) +- tm = tcpm_new(dst, &addr, hash, reclaim); ++ tm = tcpm_new(dst, &addr, hash); + else + tcpm_check_stamp(tm, dst); + diff --git a/queue-3.12/xen-netfront-fix-resource-leak-in-netfront.patch b/queue-3.12/xen-netfront-fix-resource-leak-in-netfront.patch new file mode 100644 index 00000000000..5f72279ac0e --- /dev/null +++ b/queue-3.12/xen-netfront-fix-resource-leak-in-netfront.patch @@ -0,0 +1,204 @@ +From foo@baz Tue Feb 4 09:14:01 PST 2014 +From: Annie Li +Date: Tue, 28 Jan 2014 11:35:42 +0800 +Subject: xen-netfront: fix resource leak in netfront + +From: Annie Li + +[ Upstream commit cefe0078eea52af17411eb1248946a94afb84ca5 ] + +This patch removes grant transfer releasing code from netfront, and uses +gnttab_end_foreign_access to end grant access since +gnttab_end_foreign_access_ref may fail when the grant entry is +currently used for reading or writing. + +* clean up grant transfer code kept from old netfront(2.6.18) which grants +pages for access/map and transfer. But grant transfer is deprecated in current +netfront, so remove corresponding release code for transfer. + +* fix resource leak, release grant access (through gnttab_end_foreign_access) +and skb for tx/rx path, use get_page to ensure page is released when grant +access is completed successfully. + +Xen-blkfront/xen-tpmfront/xen-pcifront also have similar issue, but patches +for them will be created separately. + +V6: Correct subject line and commit message. + +V5: Remove unecessary change in xennet_end_access. + +V4: Revert put_page in gnttab_end_foreign_access, and keep netfront change in +single patch. + +V3: Changes as suggestion from David Vrabel, ensure pages are not freed untill +grant acess is ended. + +V2: Improve patch comments. + +Signed-off-by: Annie Li +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/xen-netfront.c | 88 +++++++++++++-------------------------------- + 1 file changed, 26 insertions(+), 62 deletions(-) + +--- a/drivers/net/xen-netfront.c ++++ b/drivers/net/xen-netfront.c +@@ -117,6 +117,7 @@ struct netfront_info { + } tx_skbs[NET_TX_RING_SIZE]; + grant_ref_t gref_tx_head; + grant_ref_t grant_tx_ref[NET_TX_RING_SIZE]; ++ struct page *grant_tx_page[NET_TX_RING_SIZE]; + unsigned tx_skb_freelist; + + spinlock_t rx_lock ____cacheline_aligned_in_smp; +@@ -395,6 +396,7 @@ static void xennet_tx_buf_gc(struct net_ + gnttab_release_grant_reference( + &np->gref_tx_head, np->grant_tx_ref[id]); + np->grant_tx_ref[id] = GRANT_INVALID_REF; ++ np->grant_tx_page[id] = NULL; + add_id_to_freelist(&np->tx_skb_freelist, np->tx_skbs, id); + dev_kfree_skb_irq(skb); + } +@@ -451,6 +453,7 @@ static void xennet_make_frags(struct sk_ + gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id, + mfn, GNTMAP_readonly); + ++ np->grant_tx_page[id] = virt_to_page(data); + tx->gref = np->grant_tx_ref[id] = ref; + tx->offset = offset; + tx->size = len; +@@ -496,6 +499,7 @@ static void xennet_make_frags(struct sk_ + np->xbdev->otherend_id, + mfn, GNTMAP_readonly); + ++ np->grant_tx_page[id] = page; + tx->gref = np->grant_tx_ref[id] = ref; + tx->offset = offset; + tx->size = bytes; +@@ -595,6 +599,7 @@ static int xennet_start_xmit(struct sk_b + mfn = virt_to_mfn(data); + gnttab_grant_foreign_access_ref( + ref, np->xbdev->otherend_id, mfn, GNTMAP_readonly); ++ np->grant_tx_page[id] = virt_to_page(data); + tx->gref = np->grant_tx_ref[id] = ref; + tx->offset = offset; + tx->size = len; +@@ -1119,10 +1124,11 @@ static void xennet_release_tx_bufs(struc + continue; + + skb = np->tx_skbs[i].skb; +- gnttab_end_foreign_access_ref(np->grant_tx_ref[i], +- GNTMAP_readonly); +- gnttab_release_grant_reference(&np->gref_tx_head, +- np->grant_tx_ref[i]); ++ get_page(np->grant_tx_page[i]); ++ gnttab_end_foreign_access(np->grant_tx_ref[i], ++ GNTMAP_readonly, ++ (unsigned long)page_address(np->grant_tx_page[i])); ++ np->grant_tx_page[i] = NULL; + np->grant_tx_ref[i] = GRANT_INVALID_REF; + add_id_to_freelist(&np->tx_skb_freelist, np->tx_skbs, i); + dev_kfree_skb_irq(skb); +@@ -1131,78 +1137,35 @@ static void xennet_release_tx_bufs(struc + + static void xennet_release_rx_bufs(struct netfront_info *np) + { +- struct mmu_update *mmu = np->rx_mmu; +- struct multicall_entry *mcl = np->rx_mcl; +- struct sk_buff_head free_list; +- struct sk_buff *skb; +- unsigned long mfn; +- int xfer = 0, noxfer = 0, unused = 0; + int id, ref; + +- dev_warn(&np->netdev->dev, "%s: fix me for copying receiver.\n", +- __func__); +- return; +- +- skb_queue_head_init(&free_list); +- + spin_lock_bh(&np->rx_lock); + + for (id = 0; id < NET_RX_RING_SIZE; id++) { +- ref = np->grant_rx_ref[id]; +- if (ref == GRANT_INVALID_REF) { +- unused++; +- continue; +- } ++ struct sk_buff *skb; ++ struct page *page; + + skb = np->rx_skbs[id]; +- mfn = gnttab_end_foreign_transfer_ref(ref); +- gnttab_release_grant_reference(&np->gref_rx_head, ref); +- np->grant_rx_ref[id] = GRANT_INVALID_REF; +- +- if (0 == mfn) { +- skb_shinfo(skb)->nr_frags = 0; +- dev_kfree_skb(skb); +- noxfer++; ++ if (!skb) + continue; +- } + +- if (!xen_feature(XENFEAT_auto_translated_physmap)) { +- /* Remap the page. */ +- const struct page *page = +- skb_frag_page(&skb_shinfo(skb)->frags[0]); +- unsigned long pfn = page_to_pfn(page); +- void *vaddr = page_address(page); ++ ref = np->grant_rx_ref[id]; ++ if (ref == GRANT_INVALID_REF) ++ continue; + +- MULTI_update_va_mapping(mcl, (unsigned long)vaddr, +- mfn_pte(mfn, PAGE_KERNEL), +- 0); +- mcl++; +- mmu->ptr = ((u64)mfn << PAGE_SHIFT) +- | MMU_MACHPHYS_UPDATE; +- mmu->val = pfn; +- mmu++; ++ page = skb_frag_page(&skb_shinfo(skb)->frags[0]); + +- set_phys_to_machine(pfn, mfn); +- } +- __skb_queue_tail(&free_list, skb); +- xfer++; +- } +- +- dev_info(&np->netdev->dev, "%s: %d xfer, %d noxfer, %d unused\n", +- __func__, xfer, noxfer, unused); ++ /* gnttab_end_foreign_access() needs a page ref until ++ * foreign access is ended (which may be deferred). ++ */ ++ get_page(page); ++ gnttab_end_foreign_access(ref, 0, ++ (unsigned long)page_address(page)); ++ np->grant_rx_ref[id] = GRANT_INVALID_REF; + +- if (xfer) { +- if (!xen_feature(XENFEAT_auto_translated_physmap)) { +- /* Do all the remapping work and M2P updates. */ +- MULTI_mmu_update(mcl, np->rx_mmu, mmu - np->rx_mmu, +- NULL, DOMID_SELF); +- mcl++; +- HYPERVISOR_multicall(np->rx_mcl, mcl - np->rx_mcl); +- } ++ kfree_skb(skb); + } + +- __skb_queue_purge(&free_list); +- + spin_unlock_bh(&np->rx_lock); + } + +@@ -1349,6 +1312,7 @@ static struct net_device *xennet_create_ + for (i = 0; i < NET_RX_RING_SIZE; i++) { + np->rx_skbs[i] = NULL; + np->grant_rx_ref[i] = GRANT_INVALID_REF; ++ np->grant_tx_page[i] = NULL; + } + + /* A grant for every tx ring slot */ -- 2.47.2