From 11b78c1566206b038f7ea786d26beb70ea78c0a7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 29 Apr 2015 12:12:15 +0200 Subject: [PATCH] 4.0-stable patches added patches: ip_forward-drop-frames-with-attached-skb-sk.patch net-add-skb_checksum_complete_unset.patch net-do-not-deplete-pfmemalloc-reserve.patch net-fix-crash-in-build_skb.patch net-mlx4_en-prevent-setting-invalid-rss-hash-function.patch net-rfs-fix-crash-in-get_rps_cpus.patch ppp-call-skb_checksum_complete_unset-in-ppp_receive_frame.patch pxa168-fix-double-deallocation-of-managed-resources.patch tcp-avoid-looping-in-tcp_send_fin.patch tcp-fix-possible-deadlock-in-tcp_send_fin.patch --- ...ard-drop-frames-with-attached-skb-sk.patch | 42 +++++ .../net-add-skb_checksum_complete_unset.patch | 41 +++++ ...et-do-not-deplete-pfmemalloc-reserve.patch | 51 ++++++ queue-4.0/net-fix-crash-in-build_skb.patch | 165 ++++++++++++++++++ ...nt-setting-invalid-rss-hash-function.patch | 69 ++++++++ .../net-rfs-fix-crash-in-get_rps_cpus.patch | 103 +++++++++++ ..._complete_unset-in-ppp_receive_frame.patch | 30 ++++ ...le-deallocation-of-managed-resources.patch | 79 +++++++++ .../tcp-avoid-looping-in-tcp_send_fin.patch | 117 +++++++++++++ ...ix-possible-deadlock-in-tcp_send_fin.patch | 70 ++++++++ 10 files changed, 767 insertions(+) create mode 100644 queue-4.0/ip_forward-drop-frames-with-attached-skb-sk.patch create mode 100644 queue-4.0/net-add-skb_checksum_complete_unset.patch create mode 100644 queue-4.0/net-do-not-deplete-pfmemalloc-reserve.patch create mode 100644 queue-4.0/net-fix-crash-in-build_skb.patch create mode 100644 queue-4.0/net-mlx4_en-prevent-setting-invalid-rss-hash-function.patch create mode 100644 queue-4.0/net-rfs-fix-crash-in-get_rps_cpus.patch create mode 100644 queue-4.0/ppp-call-skb_checksum_complete_unset-in-ppp_receive_frame.patch create mode 100644 queue-4.0/pxa168-fix-double-deallocation-of-managed-resources.patch create mode 100644 queue-4.0/tcp-avoid-looping-in-tcp_send_fin.patch create mode 100644 queue-4.0/tcp-fix-possible-deadlock-in-tcp_send_fin.patch diff --git a/queue-4.0/ip_forward-drop-frames-with-attached-skb-sk.patch b/queue-4.0/ip_forward-drop-frames-with-attached-skb-sk.patch new file mode 100644 index 00000000000..e5ecf67445c --- /dev/null +++ b/queue-4.0/ip_forward-drop-frames-with-attached-skb-sk.patch @@ -0,0 +1,42 @@ +From foo@baz Wed Apr 29 11:59:49 CEST 2015 +From: =?UTF-8?q?Sebastian=20P=C3=B6hn?= +Date: Mon, 20 Apr 2015 09:19:20 +0200 +Subject: ip_forward: Drop frames with attached skb->sk + +From: =?UTF-8?q?Sebastian=20P=C3=B6hn?= + +[ Upstream commit 2ab957492d13bb819400ac29ae55911d50a82a13 ] + +Initial discussion was: +[FYI] xfrm: Don't lookup sk_policy for timewait sockets + +Forwarded frames should not have a socket attached. Especially +tw sockets will lead to panics later-on in the stack. + +This was observed with TPROXY assigning a tw socket and broken +policy routing (misconfigured). As a result frame enters +forwarding path instead of input. We cannot solve this in +TPROXY as it cannot know that policy routing is broken. + +v2: +Remove useless comment + +Signed-off-by: Sebastian Poehn +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ip_forward.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/net/ipv4/ip_forward.c ++++ b/net/ipv4/ip_forward.c +@@ -82,6 +82,9 @@ int ip_forward(struct sk_buff *skb) + if (skb->pkt_type != PACKET_HOST) + goto drop; + ++ if (unlikely(skb->sk)) ++ goto drop; ++ + if (skb_warn_if_lro(skb)) + goto drop; + diff --git a/queue-4.0/net-add-skb_checksum_complete_unset.patch b/queue-4.0/net-add-skb_checksum_complete_unset.patch new file mode 100644 index 00000000000..0bbaf974e02 --- /dev/null +++ b/queue-4.0/net-add-skb_checksum_complete_unset.patch @@ -0,0 +1,41 @@ +From foo@baz Wed Apr 29 11:59:49 CEST 2015 +From: Tom Herbert +Date: Mon, 20 Apr 2015 14:10:04 -0700 +Subject: net: add skb_checksum_complete_unset + +From: Tom Herbert + +[ Upstream commit 4e18b9adf2f910ec4d30b811a74a5b626e6c6125 ] + +This function changes ip_summed to CHECKSUM_NONE if CHECKSUM_COMPLETE +is set. This is called to discard checksum-complete when packet +is being modified and checksum is not pulled for headers in a layer. + +Signed-off-by: Tom Herbert +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/skbuff.h | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +--- a/include/linux/skbuff.h ++++ b/include/linux/skbuff.h +@@ -3013,6 +3013,18 @@ static inline bool __skb_checksum_valida + */ + #define CHECKSUM_BREAK 76 + ++/* Unset checksum-complete ++ * ++ * Unset checksum complete can be done when packet is being modified ++ * (uncompressed for instance) and checksum-complete value is ++ * invalidated. ++ */ ++static inline void skb_checksum_complete_unset(struct sk_buff *skb) ++{ ++ if (skb->ip_summed == CHECKSUM_COMPLETE) ++ skb->ip_summed = CHECKSUM_NONE; ++} ++ + /* Validate (init) checksum based on checksum complete. + * + * Return values: diff --git a/queue-4.0/net-do-not-deplete-pfmemalloc-reserve.patch b/queue-4.0/net-do-not-deplete-pfmemalloc-reserve.patch new file mode 100644 index 00000000000..d7110e17681 --- /dev/null +++ b/queue-4.0/net-do-not-deplete-pfmemalloc-reserve.patch @@ -0,0 +1,51 @@ +From foo@baz Wed Apr 29 11:59:49 CEST 2015 +From: Eric Dumazet +Date: Wed, 22 Apr 2015 07:33:36 -0700 +Subject: net: do not deplete pfmemalloc reserve + +From: Eric Dumazet + +[ Upstream commit 79930f5892e134c6da1254389577fffb8bd72c66 ] + +build_skb() should look at the page pfmemalloc status. +If set, this means page allocator allocated this page in the +expectation it would help to free other pages. Networking +stack can do that only if skb->pfmemalloc is also set. + +Also, we must refrain using high order pages from the pfmemalloc +reserve, so __page_frag_refill() must also use __GFP_NOMEMALLOC for +them. Under memory pressure, using order-0 pages is probably the best +strategy. + +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/skbuff.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +--- a/net/core/skbuff.c ++++ b/net/core/skbuff.c +@@ -311,7 +311,11 @@ struct sk_buff *build_skb(void *data, un + + memset(skb, 0, offsetof(struct sk_buff, tail)); + skb->truesize = SKB_TRUESIZE(size); +- skb->head_frag = frag_size != 0; ++ if (frag_size) { ++ skb->head_frag = 1; ++ if (virt_to_head_page(data)->pfmemalloc) ++ skb->pfmemalloc = 1; ++ } + atomic_set(&skb->users, 1); + skb->head = data; + skb->data = data; +@@ -348,7 +352,8 @@ static struct page *__page_frag_refill(s + gfp_t gfp = gfp_mask; + + if (order) { +- gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY; ++ gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY | ++ __GFP_NOMEMALLOC; + page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, order); + nc->frag.size = PAGE_SIZE << (page ? order : 0); + } diff --git a/queue-4.0/net-fix-crash-in-build_skb.patch b/queue-4.0/net-fix-crash-in-build_skb.patch new file mode 100644 index 00000000000..98ada820584 --- /dev/null +++ b/queue-4.0/net-fix-crash-in-build_skb.patch @@ -0,0 +1,165 @@ +From foo@baz Wed Apr 29 11:59:49 CEST 2015 +From: Eric Dumazet +Date: Fri, 24 Apr 2015 16:05:01 -0700 +Subject: net: fix crash in build_skb() + +From: Eric Dumazet + +[ Upstream commit 2ea2f62c8bda242433809c7f4e9eae1c52c40bbe ] + +When I added pfmemalloc support in build_skb(), I forgot netlink +was using build_skb() with a vmalloc() area. + +In this patch I introduce __build_skb() for netlink use, +and build_skb() is a wrapper handling both skb->head_frag and +skb->pfmemalloc + +This means netlink no longer has to hack skb->head_frag + +[ 1567.700067] kernel BUG at arch/x86/mm/physaddr.c:26! +[ 1567.700067] invalid opcode: 0000 [#1] PREEMPT SMP KASAN +[ 1567.700067] Dumping ftrace buffer: +[ 1567.700067] (ftrace buffer empty) +[ 1567.700067] Modules linked in: +[ 1567.700067] CPU: 9 PID: 16186 Comm: trinity-c182 Not tainted 4.0.0-next-20150424-sasha-00037-g4796e21 #2167 +[ 1567.700067] task: ffff880127efb000 ti: ffff880246770000 task.ti: ffff880246770000 +[ 1567.700067] RIP: __phys_addr (arch/x86/mm/physaddr.c:26 (discriminator 3)) +[ 1567.700067] RSP: 0018:ffff8802467779d8 EFLAGS: 00010202 +[ 1567.700067] RAX: 000041000ed8e000 RBX: ffffc9008ed8e000 RCX: 000000000000002c +[ 1567.700067] RDX: 0000000000000004 RSI: 0000000000000000 RDI: ffffffffb3fd6049 +[ 1567.700067] RBP: ffff8802467779f8 R08: 0000000000000019 R09: ffff8801d0168000 +[ 1567.700067] R10: ffff8801d01680c7 R11: ffffed003a02d019 R12: ffffc9000ed8e000 +[ 1567.700067] R13: 0000000000000f40 R14: 0000000000001180 R15: ffffc9000ed8e000 +[ 1567.700067] FS: 00007f2a7da3f700(0000) GS:ffff8801d1000000(0000) knlGS:0000000000000000 +[ 1567.700067] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 1567.700067] CR2: 0000000000738308 CR3: 000000022e329000 CR4: 00000000000007e0 +[ 1567.700067] Stack: +[ 1567.700067] ffffc9000ed8e000 ffff8801d0168000 ffffc9000ed8e000 ffff8801d0168000 +[ 1567.700067] ffff880246777a28 ffffffffad7c0a21 0000000000001080 ffff880246777c08 +[ 1567.700067] ffff88060d302e68 ffff880246777b58 ffff880246777b88 ffffffffad9a6821 +[ 1567.700067] Call Trace: +[ 1567.700067] build_skb (include/linux/mm.h:508 net/core/skbuff.c:316) +[ 1567.700067] netlink_sendmsg (net/netlink/af_netlink.c:1633 net/netlink/af_netlink.c:2329) +[ 1567.774369] ? sched_clock_cpu (kernel/sched/clock.c:311) +[ 1567.774369] ? netlink_unicast (net/netlink/af_netlink.c:2273) +[ 1567.774369] ? netlink_unicast (net/netlink/af_netlink.c:2273) +[ 1567.774369] sock_sendmsg (net/socket.c:614 net/socket.c:623) +[ 1567.774369] sock_write_iter (net/socket.c:823) +[ 1567.774369] ? sock_sendmsg (net/socket.c:806) +[ 1567.774369] __vfs_write (fs/read_write.c:479 fs/read_write.c:491) +[ 1567.774369] ? get_lock_stats (kernel/locking/lockdep.c:249) +[ 1567.774369] ? default_llseek (fs/read_write.c:487) +[ 1567.774369] ? vtime_account_user (kernel/sched/cputime.c:701) +[ 1567.774369] ? rw_verify_area (fs/read_write.c:406 (discriminator 4)) +[ 1567.774369] vfs_write (fs/read_write.c:539) +[ 1567.774369] SyS_write (fs/read_write.c:586 fs/read_write.c:577) +[ 1567.774369] ? SyS_read (fs/read_write.c:577) +[ 1567.774369] ? __this_cpu_preempt_check (lib/smp_processor_id.c:63) +[ 1567.774369] ? trace_hardirqs_on_caller (kernel/locking/lockdep.c:2594 kernel/locking/lockdep.c:2636) +[ 1567.774369] ? trace_hardirqs_on_thunk (arch/x86/lib/thunk_64.S:42) +[ 1567.774369] system_call_fastpath (arch/x86/kernel/entry_64.S:261) + +Fixes: 79930f5892e ("net: do not deplete pfmemalloc reserve") +Signed-off-by: Eric Dumazet +Reported-by: Sasha Levin +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/skbuff.h | 1 + + net/core/skbuff.c | 31 ++++++++++++++++++++++--------- + net/netlink/af_netlink.c | 6 ++---- + 3 files changed, 25 insertions(+), 13 deletions(-) + +--- a/include/linux/skbuff.h ++++ b/include/linux/skbuff.h +@@ -769,6 +769,7 @@ bool skb_try_coalesce(struct sk_buff *to + + struct sk_buff *__alloc_skb(unsigned int size, gfp_t priority, int flags, + int node); ++struct sk_buff *__build_skb(void *data, unsigned int frag_size); + struct sk_buff *build_skb(void *data, unsigned int frag_size); + static inline struct sk_buff *alloc_skb(unsigned int size, + gfp_t priority) +--- a/net/core/skbuff.c ++++ b/net/core/skbuff.c +@@ -280,13 +280,14 @@ nodata: + EXPORT_SYMBOL(__alloc_skb); + + /** +- * build_skb - build a network buffer ++ * __build_skb - build a network buffer + * @data: data buffer provided by caller +- * @frag_size: size of fragment, or 0 if head was kmalloced ++ * @frag_size: size of data, or 0 if head was kmalloced + * + * Allocate a new &sk_buff. Caller provides space holding head and + * skb_shared_info. @data must have been allocated by kmalloc() only if +- * @frag_size is 0, otherwise data should come from the page allocator. ++ * @frag_size is 0, otherwise data should come from the page allocator ++ * or vmalloc() + * The return is the new skb buffer. + * On a failure the return is %NULL, and @data is not freed. + * Notes : +@@ -297,7 +298,7 @@ EXPORT_SYMBOL(__alloc_skb); + * before giving packet to stack. + * RX rings only contains data buffers, not full skbs. + */ +-struct sk_buff *build_skb(void *data, unsigned int frag_size) ++struct sk_buff *__build_skb(void *data, unsigned int frag_size) + { + struct skb_shared_info *shinfo; + struct sk_buff *skb; +@@ -311,11 +312,6 @@ struct sk_buff *build_skb(void *data, un + + memset(skb, 0, offsetof(struct sk_buff, tail)); + skb->truesize = SKB_TRUESIZE(size); +- if (frag_size) { +- skb->head_frag = 1; +- if (virt_to_head_page(data)->pfmemalloc) +- skb->pfmemalloc = 1; +- } + atomic_set(&skb->users, 1); + skb->head = data; + skb->data = data; +@@ -332,6 +328,23 @@ struct sk_buff *build_skb(void *data, un + + return skb; + } ++ ++/* build_skb() is wrapper over __build_skb(), that specifically ++ * takes care of skb->head and skb->pfmemalloc ++ * This means that if @frag_size is not zero, then @data must be backed ++ * by a page fragment, not kmalloc() or vmalloc() ++ */ ++struct sk_buff *build_skb(void *data, unsigned int frag_size) ++{ ++ struct sk_buff *skb = __build_skb(data, frag_size); ++ ++ if (skb && frag_size) { ++ skb->head_frag = 1; ++ if (virt_to_head_page(data)->pfmemalloc) ++ skb->pfmemalloc = 1; ++ } ++ return skb; ++} + EXPORT_SYMBOL(build_skb); + + struct netdev_alloc_cache { +--- a/net/netlink/af_netlink.c ++++ b/net/netlink/af_netlink.c +@@ -1616,13 +1616,11 @@ static struct sk_buff *netlink_alloc_lar + if (data == NULL) + return NULL; + +- skb = build_skb(data, size); ++ skb = __build_skb(data, size); + if (skb == NULL) + vfree(data); +- else { +- skb->head_frag = 0; ++ else + skb->destructor = netlink_skb_destructor; +- } + + return skb; + } diff --git a/queue-4.0/net-mlx4_en-prevent-setting-invalid-rss-hash-function.patch b/queue-4.0/net-mlx4_en-prevent-setting-invalid-rss-hash-function.patch new file mode 100644 index 00000000000..8cfd96f9354 --- /dev/null +++ b/queue-4.0/net-mlx4_en-prevent-setting-invalid-rss-hash-function.patch @@ -0,0 +1,69 @@ +From foo@baz Wed Apr 29 11:59:49 CEST 2015 +From: Amir Vadai +Date: Mon, 27 Apr 2015 13:40:56 +0300 +Subject: net/mlx4_en: Prevent setting invalid RSS hash function + +From: Amir Vadai + +[ Upstream commit b37069090b7c5615610a8aa6b36533d67b364d38 ] + +mlx4_en_check_rxfh_func() was checking for hardware support before +setting a known RSS hash function, but didn't do any check before +setting unknown RSS hash function. Need to make it fail on such values. +In this occasion, moved the actual setting of the new value from the +check function into mlx4_en_set_rxfh(). + +Fixes: 947cbb0 ("net/mlx4_en: Support for configurable RSS hash function") +Signed-off-by: Amir Vadai +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 29 +++++++++++++----------- + 1 file changed, 16 insertions(+), 13 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c ++++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +@@ -981,20 +981,21 @@ static int mlx4_en_check_rxfh_func(struc + struct mlx4_en_priv *priv = netdev_priv(dev); + + /* check if requested function is supported by the device */ +- if ((hfunc == ETH_RSS_HASH_TOP && +- !(priv->mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS_TOP)) || +- (hfunc == ETH_RSS_HASH_XOR && +- !(priv->mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS_XOR))) +- return -EINVAL; ++ if (hfunc == ETH_RSS_HASH_TOP) { ++ if (!(priv->mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS_TOP)) ++ return -EINVAL; ++ if (!(dev->features & NETIF_F_RXHASH)) ++ en_warn(priv, "Toeplitz hash function should be used in conjunction with RX hashing for optimal performance\n"); ++ return 0; ++ } else if (hfunc == ETH_RSS_HASH_XOR) { ++ if (!(priv->mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS_XOR)) ++ return -EINVAL; ++ if (dev->features & NETIF_F_RXHASH) ++ en_warn(priv, "Enabling both XOR Hash function and RX Hashing can limit RPS functionality\n"); ++ return 0; ++ } + +- priv->rss_hash_fn = hfunc; +- if (hfunc == ETH_RSS_HASH_TOP && !(dev->features & NETIF_F_RXHASH)) +- en_warn(priv, +- "Toeplitz hash function should be used in conjunction with RX hashing for optimal performance\n"); +- if (hfunc == ETH_RSS_HASH_XOR && (dev->features & NETIF_F_RXHASH)) +- en_warn(priv, +- "Enabling both XOR Hash function and RX Hashing can limit RPS functionality\n"); +- return 0; ++ return -EINVAL; + } + + static int mlx4_en_get_rxfh(struct net_device *dev, u32 *ring_index, u8 *key, +@@ -1068,6 +1069,8 @@ static int mlx4_en_set_rxfh(struct net_d + priv->prof->rss_rings = rss_rings; + if (key) + memcpy(priv->rss_key, key, MLX4_EN_RSS_KEY_SIZE); ++ if (hfunc != ETH_RSS_HASH_NO_CHANGE) ++ priv->rss_hash_fn = hfunc; + + if (port_up) { + err = mlx4_en_start_port(dev); diff --git a/queue-4.0/net-rfs-fix-crash-in-get_rps_cpus.patch b/queue-4.0/net-rfs-fix-crash-in-get_rps_cpus.patch new file mode 100644 index 00000000000..ab4ef5f6b16 --- /dev/null +++ b/queue-4.0/net-rfs-fix-crash-in-get_rps_cpus.patch @@ -0,0 +1,103 @@ +From foo@baz Wed Apr 29 11:59:49 CEST 2015 +From: Eric Dumazet +Date: Sat, 25 Apr 2015 09:35:24 -0700 +Subject: net: rfs: fix crash in get_rps_cpus() + +From: Eric Dumazet + +[ Upstream commit a31196b07f8034eba6a3487a1ad1bb5ec5cd58a5 ] + +Commit 567e4b79731c ("net: rfs: add hash collision detection") had one +mistake : + +RPS_NO_CPU is no longer the marker for invalid cpu in set_rps_cpu() +and get_rps_cpu(), as @next_cpu was the result of an AND with +rps_cpu_mask + +This bug showed up on a host with 72 cpus : +next_cpu was 0x7f, and the code was trying to access percpu data of an +non existent cpu. + +In a follow up patch, we might get rid of compares against nr_cpu_ids, +if we init the tables with 0. This is silly to test for a very unlikely +condition that exists only shortly after table initialization, as +we got rid of rps_reset_sock_flow() and similar functions that were +writing this RPS_NO_CPU magic value at flow dismantle : When table is +old enough, it never contains this value anymore. + +Fixes: 567e4b79731c ("net: rfs: add hash collision detection") +Signed-off-by: Eric Dumazet +Cc: Tom Herbert +Cc: Ben Hutchings +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + Documentation/networking/scaling.txt | 2 +- + net/core/dev.c | 12 ++++++------ + 2 files changed, 7 insertions(+), 7 deletions(-) + +--- a/Documentation/networking/scaling.txt ++++ b/Documentation/networking/scaling.txt +@@ -282,7 +282,7 @@ following is true: + + - The current CPU's queue head counter >= the recorded tail counter + value in rps_dev_flow[i] +-- The current CPU is unset (equal to RPS_NO_CPU) ++- The current CPU is unset (>= nr_cpu_ids) + - The current CPU is offline + + After this check, the packet is sent to the (possibly updated) current +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -3041,7 +3041,7 @@ static struct rps_dev_flow * + set_rps_cpu(struct net_device *dev, struct sk_buff *skb, + struct rps_dev_flow *rflow, u16 next_cpu) + { +- if (next_cpu != RPS_NO_CPU) { ++ if (next_cpu < nr_cpu_ids) { + #ifdef CONFIG_RFS_ACCEL + struct netdev_rx_queue *rxqueue; + struct rps_dev_flow_table *flow_table; +@@ -3146,7 +3146,7 @@ static int get_rps_cpu(struct net_device + * If the desired CPU (where last recvmsg was done) is + * different from current CPU (one in the rx-queue flow + * table entry), switch if one of the following holds: +- * - Current CPU is unset (equal to RPS_NO_CPU). ++ * - Current CPU is unset (>= nr_cpu_ids). + * - Current CPU is offline. + * - The current CPU's queue tail has advanced beyond the + * last packet that was enqueued using this table entry. +@@ -3154,14 +3154,14 @@ static int get_rps_cpu(struct net_device + * have been dequeued, thus preserving in order delivery. + */ + if (unlikely(tcpu != next_cpu) && +- (tcpu == RPS_NO_CPU || !cpu_online(tcpu) || ++ (tcpu >= nr_cpu_ids || !cpu_online(tcpu) || + ((int)(per_cpu(softnet_data, tcpu).input_queue_head - + rflow->last_qtail)) >= 0)) { + tcpu = next_cpu; + rflow = set_rps_cpu(dev, skb, rflow, next_cpu); + } + +- if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) { ++ if (tcpu < nr_cpu_ids && cpu_online(tcpu)) { + *rflowp = rflow; + cpu = tcpu; + goto done; +@@ -3202,14 +3202,14 @@ bool rps_may_expire_flow(struct net_devi + struct rps_dev_flow_table *flow_table; + struct rps_dev_flow *rflow; + bool expire = true; +- int cpu; ++ unsigned int cpu; + + rcu_read_lock(); + flow_table = rcu_dereference(rxqueue->rps_flow_table); + if (flow_table && flow_id <= flow_table->mask) { + rflow = &flow_table->flows[flow_id]; + cpu = ACCESS_ONCE(rflow->cpu); +- if (rflow->filter == filter_id && cpu != RPS_NO_CPU && ++ if (rflow->filter == filter_id && cpu < nr_cpu_ids && + ((int)(per_cpu(softnet_data, cpu).input_queue_head - + rflow->last_qtail) < + (int)(10 * flow_table->mask))) diff --git a/queue-4.0/ppp-call-skb_checksum_complete_unset-in-ppp_receive_frame.patch b/queue-4.0/ppp-call-skb_checksum_complete_unset-in-ppp_receive_frame.patch new file mode 100644 index 00000000000..c869ed9b8d1 --- /dev/null +++ b/queue-4.0/ppp-call-skb_checksum_complete_unset-in-ppp_receive_frame.patch @@ -0,0 +1,30 @@ +From foo@baz Wed Apr 29 11:59:49 CEST 2015 +From: Tom Herbert +Date: Mon, 20 Apr 2015 14:10:05 -0700 +Subject: ppp: call skb_checksum_complete_unset in ppp_receive_frame + +From: Tom Herbert + +[ Upstream commit 3dfb05340ec6676e6fc71a9ae87bbbe66d3c2998 ] + +Call checksum_complete_unset in PPP receive to discard checksum-complete +value. PPP does not pull checksum for headers and also modifies packet +as in VJ compression. + +Signed-off-by: Tom Herbert +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ppp/ppp_generic.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/ppp/ppp_generic.c ++++ b/drivers/net/ppp/ppp_generic.c +@@ -1716,6 +1716,7 @@ ppp_receive_frame(struct ppp *ppp, struc + { + /* note: a 0-length skb is used as an error indication */ + if (skb->len > 0) { ++ skb_checksum_complete_unset(skb); + #ifdef CONFIG_PPP_MULTILINK + /* XXX do channel-level decompression here */ + if (PPP_PROTO(skb) == PPP_MP) diff --git a/queue-4.0/pxa168-fix-double-deallocation-of-managed-resources.patch b/queue-4.0/pxa168-fix-double-deallocation-of-managed-resources.patch new file mode 100644 index 00000000000..8b385b8ceee --- /dev/null +++ b/queue-4.0/pxa168-fix-double-deallocation-of-managed-resources.patch @@ -0,0 +1,79 @@ +From foo@baz Wed Apr 29 11:59:49 CEST 2015 +From: Alexey Khoroshilov +Date: Sat, 25 Apr 2015 04:07:03 +0300 +Subject: pxa168: fix double deallocation of managed resources + +From: Alexey Khoroshilov + +[ Upstream commit 0e03fd3e335d272bee88fe733d5fd13f5c5b7140 ] + +Commit 43d3ddf87a57 ("net: pxa168_eth: add device tree support") starts +to use managed resources by adding devm_clk_get() and +devm_ioremap_resource(), but it leaves explicit iounmap() and clock_put() +in pxa168_eth_remove() and in failure handling code of pxa168_eth_probe(). +As a result double free can happen. + +The patch removes explicit resource deallocation. Also it converts +clk_disable() to clk_disable_unprepare() to make it symmetrical with +clk_prepare_enable(). + +Found by Linux Driver Verification project (linuxtesting.org). + +Signed-off-by: Alexey Khoroshilov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/marvell/pxa168_eth.c | 16 +++++----------- + 1 file changed, 5 insertions(+), 11 deletions(-) + +--- a/drivers/net/ethernet/marvell/pxa168_eth.c ++++ b/drivers/net/ethernet/marvell/pxa168_eth.c +@@ -1508,7 +1508,8 @@ static int pxa168_eth_probe(struct platf + np = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0); + if (!np) { + dev_err(&pdev->dev, "missing phy-handle\n"); +- return -EINVAL; ++ err = -EINVAL; ++ goto err_netdev; + } + of_property_read_u32(np, "reg", &pep->phy_addr); + pep->phy_intf = of_get_phy_mode(pdev->dev.of_node); +@@ -1526,7 +1527,7 @@ static int pxa168_eth_probe(struct platf + pep->smi_bus = mdiobus_alloc(); + if (pep->smi_bus == NULL) { + err = -ENOMEM; +- goto err_base; ++ goto err_netdev; + } + pep->smi_bus->priv = pep; + pep->smi_bus->name = "pxa168_eth smi"; +@@ -1551,13 +1552,10 @@ err_mdiobus: + mdiobus_unregister(pep->smi_bus); + err_free_mdio: + mdiobus_free(pep->smi_bus); +-err_base: +- iounmap(pep->base); + err_netdev: + free_netdev(dev); + err_clk: +- clk_disable(clk); +- clk_put(clk); ++ clk_disable_unprepare(clk); + return err; + } + +@@ -1574,13 +1572,9 @@ static int pxa168_eth_remove(struct plat + if (pep->phy) + phy_disconnect(pep->phy); + if (pep->clk) { +- clk_disable(pep->clk); +- clk_put(pep->clk); +- pep->clk = NULL; ++ clk_disable_unprepare(pep->clk); + } + +- iounmap(pep->base); +- pep->base = NULL; + mdiobus_unregister(pep->smi_bus); + mdiobus_free(pep->smi_bus); + unregister_netdev(dev); diff --git a/queue-4.0/tcp-avoid-looping-in-tcp_send_fin.patch b/queue-4.0/tcp-avoid-looping-in-tcp_send_fin.patch new file mode 100644 index 00000000000..4d444f66345 --- /dev/null +++ b/queue-4.0/tcp-avoid-looping-in-tcp_send_fin.patch @@ -0,0 +1,117 @@ +From foo@baz Wed Apr 29 11:59:49 CEST 2015 +From: Eric Dumazet +Date: Thu, 23 Apr 2015 10:42:39 -0700 +Subject: tcp: avoid looping in tcp_send_fin() + +From: Eric Dumazet + +[ Upstream commit 845704a535e9b3c76448f52af1b70e4422ea03fd ] + +Presence of an unbound loop in tcp_send_fin() had always been hard +to explain when analyzing crash dumps involving gigantic dying processes +with millions of sockets. + +Lets try a different strategy : + +In case of memory pressure, try to add the FIN flag to last packet +in write queue, even if packet was already sent. TCP stack will +be able to deliver this FIN after a timeout event. Note that this +FIN being delivered by a retransmit, it also carries a Push flag +given our current implementation. + +By checking sk_under_memory_pressure(), we anticipate that cooking +many FIN packets might deplete tcp memory. + +In the case we could not allocate a packet, even with __GFP_WAIT +allocation, then not sending a FIN seems quite reasonable if it allows +to get rid of this socket, free memory, and not block the process from +eventually doing other useful work. + +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_output.c | 50 +++++++++++++++++++++++++++++--------------------- + 1 file changed, 29 insertions(+), 21 deletions(-) + +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -2753,7 +2753,8 @@ begin_fwd: + + /* We allow to exceed memory limits for FIN packets to expedite + * connection tear down and (memory) recovery. +- * Otherwise tcp_send_fin() could loop forever. ++ * Otherwise tcp_send_fin() could be tempted to either delay FIN ++ * or even be forced to close flow without any FIN. + */ + static void sk_forced_wmem_schedule(struct sock *sk, int size) + { +@@ -2766,33 +2767,40 @@ static void sk_forced_wmem_schedule(stru + sk_memory_allocated_add(sk, amt, &status); + } + +-/* Send a fin. The caller locks the socket for us. This cannot be +- * allowed to fail queueing a FIN frame under any circumstances. ++/* Send a FIN. The caller locks the socket for us. ++ * We should try to send a FIN packet really hard, but eventually give up. + */ + void tcp_send_fin(struct sock *sk) + { ++ struct sk_buff *skb, *tskb = tcp_write_queue_tail(sk); + struct tcp_sock *tp = tcp_sk(sk); +- struct sk_buff *skb = tcp_write_queue_tail(sk); +- int mss_now; + +- /* Optimization, tack on the FIN if we have a queue of +- * unsent frames. But be careful about outgoing SACKS +- * and IP options. ++ /* Optimization, tack on the FIN if we have one skb in write queue and ++ * this skb was not yet sent, or we are under memory pressure. ++ * Note: in the latter case, FIN packet will be sent after a timeout, ++ * as TCP stack thinks it has already been transmitted. + */ +- mss_now = tcp_current_mss(sk); +- +- if (tcp_send_head(sk) != NULL) { +- TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_FIN; +- TCP_SKB_CB(skb)->end_seq++; ++ if (tskb && (tcp_send_head(sk) || sk_under_memory_pressure(sk))) { ++coalesce: ++ TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN; ++ TCP_SKB_CB(tskb)->end_seq++; + tp->write_seq++; ++ if (!tcp_send_head(sk)) { ++ /* This means tskb was already sent. ++ * Pretend we included the FIN on previous transmit. ++ * We need to set tp->snd_nxt to the value it would have ++ * if FIN had been sent. This is because retransmit path ++ * does not change tp->snd_nxt. ++ */ ++ tp->snd_nxt++; ++ return; ++ } + } else { +- /* Socket is locked, keep trying until memory is available. */ +- for (;;) { +- skb = alloc_skb_fclone(MAX_TCP_HEADER, +- sk->sk_allocation); +- if (skb) +- break; +- yield(); ++ skb = alloc_skb_fclone(MAX_TCP_HEADER, sk->sk_allocation); ++ if (unlikely(!skb)) { ++ if (tskb) ++ goto coalesce; ++ return; + } + skb_reserve(skb, MAX_TCP_HEADER); + sk_forced_wmem_schedule(sk, skb->truesize); +@@ -2801,7 +2809,7 @@ void tcp_send_fin(struct sock *sk) + TCPHDR_ACK | TCPHDR_FIN); + tcp_queue_skb(sk, skb); + } +- __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF); ++ __tcp_push_pending_frames(sk, tcp_current_mss(sk), TCP_NAGLE_OFF); + } + + /* We get here when a process closes a file descriptor (either due to diff --git a/queue-4.0/tcp-fix-possible-deadlock-in-tcp_send_fin.patch b/queue-4.0/tcp-fix-possible-deadlock-in-tcp_send_fin.patch new file mode 100644 index 00000000000..33f847fcbda --- /dev/null +++ b/queue-4.0/tcp-fix-possible-deadlock-in-tcp_send_fin.patch @@ -0,0 +1,70 @@ +From foo@baz Wed Apr 29 11:59:49 CEST 2015 +From: Eric Dumazet +Date: Tue, 21 Apr 2015 18:32:24 -0700 +Subject: tcp: fix possible deadlock in tcp_send_fin() + +From: Eric Dumazet + +[ Upstream commit d83769a580f1132ac26439f50068a29b02be535e ] + +Using sk_stream_alloc_skb() in tcp_send_fin() is dangerous in +case a huge process is killed by OOM, and tcp_mem[2] is hit. + +To be able to free memory we need to make progress, so this +patch allows FIN packets to not care about tcp_mem[2], if +skb allocation succeeded. + +In a follow-up patch, we might abort tcp_send_fin() infinite loop +in case TIF_MEMDIE is set on this thread, as memory allocator +did its best getting extra memory already. + +This patch reverts d22e15371811 ("tcp: fix tcp fin memory accounting") + +Fixes: d22e15371811 ("tcp: fix tcp fin memory accounting") +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_output.c | 20 +++++++++++++++++++- + 1 file changed, 19 insertions(+), 1 deletion(-) + +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -2751,6 +2751,21 @@ begin_fwd: + } + } + ++/* We allow to exceed memory limits for FIN packets to expedite ++ * connection tear down and (memory) recovery. ++ * Otherwise tcp_send_fin() could loop forever. ++ */ ++static void sk_forced_wmem_schedule(struct sock *sk, int size) ++{ ++ int amt, status; ++ ++ if (size <= sk->sk_forward_alloc) ++ return; ++ amt = sk_mem_pages(size); ++ sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; ++ sk_memory_allocated_add(sk, amt, &status); ++} ++ + /* Send a fin. The caller locks the socket for us. This cannot be + * allowed to fail queueing a FIN frame under any circumstances. + */ +@@ -2773,11 +2788,14 @@ void tcp_send_fin(struct sock *sk) + } else { + /* Socket is locked, keep trying until memory is available. */ + for (;;) { +- skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation); ++ skb = alloc_skb_fclone(MAX_TCP_HEADER, ++ sk->sk_allocation); + if (skb) + break; + yield(); + } ++ skb_reserve(skb, MAX_TCP_HEADER); ++ sk_forced_wmem_schedule(sk, skb->truesize); + /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ + tcp_init_nondata_skb(skb, tp->write_seq, + TCPHDR_ACK | TCPHDR_FIN); -- 2.47.2