From: Greg Kroah-Hartman Date: Thu, 26 Apr 2012 20:14:23 +0000 (-0700) Subject: 3.3-stable patches X-Git-Tag: v3.0.30~3 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=2fd8de3275fcc39aab5d4cc14dee4f736bfc9ec2;p=thirdparty%2Fkernel%2Fstable-queue.git 3.3-stable patches added patches: 8139cp-set-intr-mask-after-its-handler-is-registered.patch atl1-fix-kernel-panic-in-case-of-dma-errors.patch bonding-properly-unset-current_arp_slave-on-slave-link-up.patch bridge-do-not-send-queries-on-multicast-group-leaves.patch dummy-add-ndo_uninit.patch ipv6-fix-array-index-in-ip6_mc_add_src.patch ksz884x-don-t-copy-too-much-in-netdev_set_mac_address.patch net-allow-pskb_expand_head-to-get-maximum-tailroom.patch net-ax25-reorder-ax25_exit-to-remove-races.patch net-ethernet-ks8851_mll-fix-rx-frame-buffer-overflow.patch net-fix-a-race-in-sock_queue_err_skb.patch net-fix-proc-net-dev-regression.patch netlink-fix-races-after-skb-queueing.patch netns-do-not-leak-net_generic-data-on-failed-init.patch net_sched-gred-fix-oops-in-gred_dump-in-wred-mode.patch net-smsc911x-fix-skb-handling-in-receive-path.patch net-usb-smsc75xx-fix-mtu.patch phonet-check-input-from-user-before-allocating.patch phy-icplus-fix-auto-power-saving-in-ip101a_config_init.patch ppp-don-t-stop-and-restart-queue-on-every-tx-packet.patch sctp-allow-struct-sctp_event_subscribe-to-grow-without-breaking-binaries.patch tcp-allow-splice-to-build-full-tso-packets.patch tcp-avoid-order-1-allocations-on-wifi-and-tx-path.patch tcp-fix-tcp_grow_window-for-large-incoming-frames.patch tcp-fix-tcp_maxseg-for-established-ipv6-passive-sockets.patch tcp-fix-tcp_rcv_rtt_update-use-of-an-unscaled-rtt-sample.patch tcp-restore-correct-limit.patch wimax-i2400m-prevent-a-possible-kernel-bug-due-to-missing-fw_name-string.patch --- diff --git a/queue-3.3/8139cp-set-intr-mask-after-its-handler-is-registered.patch b/queue-3.3/8139cp-set-intr-mask-after-its-handler-is-registered.patch new file mode 100644 index 00000000000..25a94798727 --- /dev/null +++ b/queue-3.3/8139cp-set-intr-mask-after-its-handler-is-registered.patch @@ -0,0 +1,64 @@ +From 129d0087682b1513a94b6bbae155dee823fcdf03 Mon Sep 17 00:00:00 2001 +From: Jason Wang +Date: Wed, 11 Apr 2012 22:10:54 +0000 +Subject: [PATCH 16/28] 8139cp: set intr mask after its handler is registered + + +From: Jason Wang + +[ Upstream commit a8c9cb106fe79c28d6b7f1397652cadd228715ff ] + +We set intr mask before its handler is registered, this does not work well when +8139cp is sharing irq line with other devices. As the irq could be enabled by +the device before 8139cp's hander is registered which may lead unhandled +irq. Fix this by introducing an helper cp_irq_enable() and call it after +request_irq(). + +Signed-off-by: Jason Wang +Reviewed-by: Flavio Leitner +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/realtek/8139cp.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/realtek/8139cp.c ++++ b/drivers/net/ethernet/realtek/8139cp.c +@@ -958,6 +958,11 @@ static inline void cp_start_hw (struct c + cpw8(Cmd, RxOn | TxOn); + } + ++static void cp_enable_irq(struct cp_private *cp) ++{ ++ cpw16_f(IntrMask, cp_intr_mask); ++} ++ + static void cp_init_hw (struct cp_private *cp) + { + struct net_device *dev = cp->dev; +@@ -997,8 +1002,6 @@ static void cp_init_hw (struct cp_privat + + cpw16(MultiIntr, 0); + +- cpw16_f(IntrMask, cp_intr_mask); +- + cpw8_f(Cfg9346, Cfg9346_Lock); + } + +@@ -1130,6 +1133,8 @@ static int cp_open (struct net_device *d + if (rc) + goto err_out_hw; + ++ cp_enable_irq(cp); ++ + netif_carrier_off(dev); + mii_check_media(&cp->mii_if, netif_msg_link(cp), true); + netif_start_queue(dev); +@@ -2031,6 +2036,7 @@ static int cp_resume (struct pci_dev *pd + /* FIXME: sh*t may happen if the Rx ring buffer is depleted */ + cp_init_rings_index (cp); + cp_init_hw (cp); ++ cp_enable_irq(cp); + netif_start_queue (dev); + + spin_lock_irqsave (&cp->lock, flags); diff --git a/queue-3.3/atl1-fix-kernel-panic-in-case-of-dma-errors.patch b/queue-3.3/atl1-fix-kernel-panic-in-case-of-dma-errors.patch new file mode 100644 index 00000000000..dc6831f6bfb --- /dev/null +++ b/queue-3.3/atl1-fix-kernel-panic-in-case-of-dma-errors.patch @@ -0,0 +1,100 @@ +From f7e3da78e59a1468938cfe069deed827501cec88 Mon Sep 17 00:00:00 2001 +From: Tony Zelenoff +Date: Wed, 11 Apr 2012 06:15:03 +0000 +Subject: [PATCH 15/28] atl1: fix kernel panic in case of DMA errors + + +From: Tony Zelenoff + +[ Upstream commit 03662e41c7cff64a776bfb1b3816de4be43de881 ] + +Problem: +There was two separate work_struct structures which share one +handler. Unfortunately getting atl1_adapter structure from +work_struct in case of DMA error was done from incorrect +offset which cause kernel panics. + +Solution: +The useless work_struct for DMA error removed and +handler name changed to more generic one. + +Signed-off-by: Tony Zelenoff +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/atheros/atlx/atl1.c | 12 +++++------- + drivers/net/ethernet/atheros/atlx/atl1.h | 3 +-- + drivers/net/ethernet/atheros/atlx/atlx.c | 2 +- + 3 files changed, 7 insertions(+), 10 deletions(-) + +--- a/drivers/net/ethernet/atheros/atlx/atl1.c ++++ b/drivers/net/ethernet/atheros/atlx/atl1.c +@@ -2473,7 +2473,7 @@ static irqreturn_t atl1_intr(int irq, vo + "pcie phy link down %x\n", status); + if (netif_running(adapter->netdev)) { /* reset MAC */ + iowrite32(0, adapter->hw.hw_addr + REG_IMR); +- schedule_work(&adapter->pcie_dma_to_rst_task); ++ schedule_work(&adapter->reset_dev_task); + return IRQ_HANDLED; + } + } +@@ -2485,7 +2485,7 @@ static irqreturn_t atl1_intr(int irq, vo + "pcie DMA r/w error (status = 0x%x)\n", + status); + iowrite32(0, adapter->hw.hw_addr + REG_IMR); +- schedule_work(&adapter->pcie_dma_to_rst_task); ++ schedule_work(&adapter->reset_dev_task); + return IRQ_HANDLED; + } + +@@ -2630,10 +2630,10 @@ static void atl1_down(struct atl1_adapte + atl1_clean_rx_ring(adapter); + } + +-static void atl1_tx_timeout_task(struct work_struct *work) ++static void atl1_reset_dev_task(struct work_struct *work) + { + struct atl1_adapter *adapter = +- container_of(work, struct atl1_adapter, tx_timeout_task); ++ container_of(work, struct atl1_adapter, reset_dev_task); + struct net_device *netdev = adapter->netdev; + + netif_device_detach(netdev); +@@ -3032,12 +3032,10 @@ static int __devinit atl1_probe(struct p + (unsigned long)adapter); + adapter->phy_timer_pending = false; + +- INIT_WORK(&adapter->tx_timeout_task, atl1_tx_timeout_task); ++ INIT_WORK(&adapter->reset_dev_task, atl1_reset_dev_task); + + INIT_WORK(&adapter->link_chg_task, atlx_link_chg_task); + +- INIT_WORK(&adapter->pcie_dma_to_rst_task, atl1_tx_timeout_task); +- + err = register_netdev(netdev); + if (err) + goto err_common; +--- a/drivers/net/ethernet/atheros/atlx/atl1.h ++++ b/drivers/net/ethernet/atheros/atlx/atl1.h +@@ -758,9 +758,8 @@ struct atl1_adapter { + u16 link_speed; + u16 link_duplex; + spinlock_t lock; +- struct work_struct tx_timeout_task; ++ struct work_struct reset_dev_task; + struct work_struct link_chg_task; +- struct work_struct pcie_dma_to_rst_task; + + struct timer_list phy_config_timer; + bool phy_timer_pending; +--- a/drivers/net/ethernet/atheros/atlx/atlx.c ++++ b/drivers/net/ethernet/atheros/atlx/atlx.c +@@ -193,7 +193,7 @@ static void atlx_tx_timeout(struct net_d + { + struct atlx_adapter *adapter = netdev_priv(netdev); + /* Do the reset outside of interrupt context */ +- schedule_work(&adapter->tx_timeout_task); ++ schedule_work(&adapter->reset_dev_task); + } + + /* diff --git a/queue-3.3/bonding-properly-unset-current_arp_slave-on-slave-link-up.patch b/queue-3.3/bonding-properly-unset-current_arp_slave-on-slave-link-up.patch new file mode 100644 index 00000000000..d096b4557f3 --- /dev/null +++ b/queue-3.3/bonding-properly-unset-current_arp_slave-on-slave-link-up.patch @@ -0,0 +1,42 @@ +From 519967d82324cbdd3f099b18931039693ce58d42 Mon Sep 17 00:00:00 2001 +From: Veaceslav Falico +Date: Thu, 5 Apr 2012 03:47:43 +0000 +Subject: [PATCH 09/28] bonding: properly unset current_arp_slave on slave link up + + +From: Veaceslav Falico + +[ Upstream commit 5a4309746cd74734daa964acb02690c22b3c8911 ] + +When a slave comes up, we're unsetting the current_arp_slave without +removing active flags from it, which can lead to situations where we have +more than one slave with active flags in active-backup mode. + +To avoid this situation we must remove the active flags from a slave before +removing it as a current_arp_slave. + +Signed-off-by: Veaceslav Falico +Signed-off-by: Jay Vosburgh +Signed-off-by: Andy Gospodarek +Signed-off-by: Marcelo Ricardo Leitner +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/bonding/bond_main.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/drivers/net/bonding/bond_main.c ++++ b/drivers/net/bonding/bond_main.c +@@ -2982,7 +2982,11 @@ static void bond_ab_arp_commit(struct bo + trans_start + delta_in_ticks)) || + bond->curr_active_slave != slave) { + slave->link = BOND_LINK_UP; +- bond->current_arp_slave = NULL; ++ if (bond->current_arp_slave) { ++ bond_set_slave_inactive_flags( ++ bond->current_arp_slave); ++ bond->current_arp_slave = NULL; ++ } + + pr_info("%s: link status definitely up for interface %s.\n", + bond->dev->name, slave->dev->name); diff --git a/queue-3.3/bridge-do-not-send-queries-on-multicast-group-leaves.patch b/queue-3.3/bridge-do-not-send-queries-on-multicast-group-leaves.patch new file mode 100644 index 00000000000..9f980d02f54 --- /dev/null +++ b/queue-3.3/bridge-do-not-send-queries-on-multicast-group-leaves.patch @@ -0,0 +1,194 @@ +From e42a283e25ae0b6f230195c4a5f27946e80f6be6 Mon Sep 17 00:00:00 2001 +From: Herbert Xu +Date: Wed, 4 Apr 2012 01:01:20 +0000 +Subject: [PATCH 06/28] bridge: Do not send queries on multicast group leaves + + +From: Herbert Xu + +[ Upstream commit 996304bbea3d2a094b7ba54c3bd65d3fffeac57b ] + +As it stands the bridge IGMP snooping system will respond to +group leave messages with queries for remaining membership. +This is both unnecessary and undesirable. First of all any +multicast routers present should be doing this rather than us. +What's more the queries that we send may end up upsetting other +multicast snooping swithces in the system that are buggy. + +In fact, we can simply remove the code that send these queries +because the existing membership expiry mechanism doesn't rely +on them anyway. + +So this patch simply removes all code associated with group +queries in response to group leave messages. + +Signed-off-by: Herbert Xu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bridge/br_multicast.c | 81 ---------------------------------------------- + net/bridge/br_private.h | 4 -- + 2 files changed, 85 deletions(-) + +--- a/net/bridge/br_multicast.c ++++ b/net/bridge/br_multicast.c +@@ -241,7 +241,6 @@ static void br_multicast_group_expired(u + hlist_del_rcu(&mp->hlist[mdb->ver]); + mdb->size--; + +- del_timer(&mp->query_timer); + call_rcu_bh(&mp->rcu, br_multicast_free_group); + + out: +@@ -271,7 +270,6 @@ static void br_multicast_del_pg(struct n + rcu_assign_pointer(*pp, p->next); + hlist_del_init(&p->mglist); + del_timer(&p->timer); +- del_timer(&p->query_timer); + call_rcu_bh(&p->rcu, br_multicast_free_pg); + + if (!mp->ports && !mp->mglist && +@@ -507,74 +505,6 @@ static struct sk_buff *br_multicast_allo + return NULL; + } + +-static void br_multicast_send_group_query(struct net_bridge_mdb_entry *mp) +-{ +- struct net_bridge *br = mp->br; +- struct sk_buff *skb; +- +- skb = br_multicast_alloc_query(br, &mp->addr); +- if (!skb) +- goto timer; +- +- netif_rx(skb); +- +-timer: +- if (++mp->queries_sent < br->multicast_last_member_count) +- mod_timer(&mp->query_timer, +- jiffies + br->multicast_last_member_interval); +-} +- +-static void br_multicast_group_query_expired(unsigned long data) +-{ +- struct net_bridge_mdb_entry *mp = (void *)data; +- struct net_bridge *br = mp->br; +- +- spin_lock(&br->multicast_lock); +- if (!netif_running(br->dev) || !mp->mglist || +- mp->queries_sent >= br->multicast_last_member_count) +- goto out; +- +- br_multicast_send_group_query(mp); +- +-out: +- spin_unlock(&br->multicast_lock); +-} +- +-static void br_multicast_send_port_group_query(struct net_bridge_port_group *pg) +-{ +- struct net_bridge_port *port = pg->port; +- struct net_bridge *br = port->br; +- struct sk_buff *skb; +- +- skb = br_multicast_alloc_query(br, &pg->addr); +- if (!skb) +- goto timer; +- +- br_deliver(port, skb); +- +-timer: +- if (++pg->queries_sent < br->multicast_last_member_count) +- mod_timer(&pg->query_timer, +- jiffies + br->multicast_last_member_interval); +-} +- +-static void br_multicast_port_group_query_expired(unsigned long data) +-{ +- struct net_bridge_port_group *pg = (void *)data; +- struct net_bridge_port *port = pg->port; +- struct net_bridge *br = port->br; +- +- spin_lock(&br->multicast_lock); +- if (!netif_running(br->dev) || hlist_unhashed(&pg->mglist) || +- pg->queries_sent >= br->multicast_last_member_count) +- goto out; +- +- br_multicast_send_port_group_query(pg); +- +-out: +- spin_unlock(&br->multicast_lock); +-} +- + static struct net_bridge_mdb_entry *br_multicast_get_group( + struct net_bridge *br, struct net_bridge_port *port, + struct br_ip *group, int hash) +@@ -690,8 +620,6 @@ rehash: + mp->addr = *group; + setup_timer(&mp->timer, br_multicast_group_expired, + (unsigned long)mp); +- setup_timer(&mp->query_timer, br_multicast_group_query_expired, +- (unsigned long)mp); + + hlist_add_head_rcu(&mp->hlist[mdb->ver], &mdb->mhash[hash]); + mdb->size++; +@@ -746,8 +674,6 @@ static int br_multicast_add_group(struct + hlist_add_head(&p->mglist, &port->mglist); + setup_timer(&p->timer, br_multicast_port_group_expired, + (unsigned long)p); +- setup_timer(&p->query_timer, br_multicast_port_group_query_expired, +- (unsigned long)p); + + rcu_assign_pointer(*pp, p); + +@@ -1291,9 +1217,6 @@ static void br_multicast_leave_group(str + time_after(mp->timer.expires, time) : + try_to_del_timer_sync(&mp->timer) >= 0)) { + mod_timer(&mp->timer, time); +- +- mp->queries_sent = 0; +- mod_timer(&mp->query_timer, now); + } + + goto out; +@@ -1310,9 +1233,6 @@ static void br_multicast_leave_group(str + time_after(p->timer.expires, time) : + try_to_del_timer_sync(&p->timer) >= 0)) { + mod_timer(&p->timer, time); +- +- p->queries_sent = 0; +- mod_timer(&p->query_timer, now); + } + + break; +@@ -1681,7 +1601,6 @@ void br_multicast_stop(struct net_bridge + hlist_for_each_entry_safe(mp, p, n, &mdb->mhash[i], + hlist[ver]) { + del_timer(&mp->timer); +- del_timer(&mp->query_timer); + call_rcu_bh(&mp->rcu, br_multicast_free_group); + } + } +--- a/net/bridge/br_private.h ++++ b/net/bridge/br_private.h +@@ -82,9 +82,7 @@ struct net_bridge_port_group { + struct hlist_node mglist; + struct rcu_head rcu; + struct timer_list timer; +- struct timer_list query_timer; + struct br_ip addr; +- u32 queries_sent; + }; + + struct net_bridge_mdb_entry +@@ -94,10 +92,8 @@ struct net_bridge_mdb_entry + struct net_bridge_port_group __rcu *ports; + struct rcu_head rcu; + struct timer_list timer; +- struct timer_list query_timer; + struct br_ip addr; + bool mglist; +- u32 queries_sent; + }; + + struct net_bridge_mdb_htable diff --git a/queue-3.3/dummy-add-ndo_uninit.patch b/queue-3.3/dummy-add-ndo_uninit.patch new file mode 100644 index 00000000000..b3f08461451 --- /dev/null +++ b/queue-3.3/dummy-add-ndo_uninit.patch @@ -0,0 +1,53 @@ +From 29138760ccf1a184189fdf3ff8659d5a18b16c64 Mon Sep 17 00:00:00 2001 +From: Hiroaki SHIMODA +Date: Sun, 15 Apr 2012 13:26:01 +0000 +Subject: [PATCH 21/28] dummy: Add ndo_uninit(). + + +From: Hiroaki SHIMODA + +[ Upstream commit 890fdf2a0cb88202d1427589db2cf29c1bdd3c1d ] + +In register_netdevice(), when ndo_init() is successful and later +some error occurred, ndo_uninit() will be called. +So dummy deivce is desirable to implement ndo_uninit() method +to free percpu stats for this case. +And, ndo_uninit() is also called along with dev->destructor() when +device is unregistered, so in order to prevent dev->dstats from +being freed twice, dev->destructor is modified to free_netdev(). + +Signed-off-by: Hiroaki SHIMODA +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/dummy.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/drivers/net/dummy.c ++++ b/drivers/net/dummy.c +@@ -106,14 +106,14 @@ static int dummy_dev_init(struct net_dev + return 0; + } + +-static void dummy_dev_free(struct net_device *dev) ++static void dummy_dev_uninit(struct net_device *dev) + { + free_percpu(dev->dstats); +- free_netdev(dev); + } + + static const struct net_device_ops dummy_netdev_ops = { + .ndo_init = dummy_dev_init, ++ .ndo_uninit = dummy_dev_uninit, + .ndo_start_xmit = dummy_xmit, + .ndo_validate_addr = eth_validate_addr, + .ndo_set_rx_mode = set_multicast_list, +@@ -127,7 +127,7 @@ static void dummy_setup(struct net_devic + + /* Initialize the device structure. */ + dev->netdev_ops = &dummy_netdev_ops; +- dev->destructor = dummy_dev_free; ++ dev->destructor = free_netdev; + + /* Fill in device structure with ethernet-generic values. */ + dev->tx_queue_len = 0; diff --git a/queue-3.3/ipv6-fix-array-index-in-ip6_mc_add_src.patch b/queue-3.3/ipv6-fix-array-index-in-ip6_mc_add_src.patch new file mode 100644 index 00000000000..aabbdd98a23 --- /dev/null +++ b/queue-3.3/ipv6-fix-array-index-in-ip6_mc_add_src.patch @@ -0,0 +1,37 @@ +From 30b5ee3a227f26e894607f6037f3591cdc7d4a94 Mon Sep 17 00:00:00 2001 +From: "RongQing.Li" +Date: Wed, 4 Apr 2012 16:47:04 +0000 +Subject: [PATCH 07/28] ipv6: fix array index in ip6_mc_add_src() + + +From: "RongQing.Li" + +[ Upstream commit 78d50217baf36093ab320f95bae0d6452daec85c ] + +Convert array index from the loop bound to the loop index. + +And remove the void type conversion to ip6_mc_del1_src() return +code, seem it is unnecessary, since ip6_mc_del1_src() does not +use __must_check similar attribute, no compiler will report the +warning when it is removed. + +v2: enrich the commit header + +Signed-off-by: RongQing.Li +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/mcast.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv6/mcast.c ++++ b/net/ipv6/mcast.c +@@ -2044,7 +2044,7 @@ static int ip6_mc_add_src(struct inet6_d + if (!delta) + pmc->mca_sfcount[sfmode]--; + for (j=0; jmca_sfcount[MCAST_EXCLUDE] != 0)) { + struct ip6_sf_list *psf; + diff --git a/queue-3.3/ksz884x-don-t-copy-too-much-in-netdev_set_mac_address.patch b/queue-3.3/ksz884x-don-t-copy-too-much-in-netdev_set_mac_address.patch new file mode 100644 index 00000000000..4d8cd471b56 --- /dev/null +++ b/queue-3.3/ksz884x-don-t-copy-too-much-in-netdev_set_mac_address.patch @@ -0,0 +1,33 @@ +From 9a369ce1662b82f8983dd977c9b3a9fa4232d0e1 Mon Sep 17 00:00:00 2001 +From: Dan Carpenter +Date: Thu, 19 Apr 2012 10:00:19 +0300 +Subject: [PATCH 24/28] ksz884x: don't copy too much in netdev_set_mac_address() + + +From: Dan Carpenter + +[ Upstream commit 716af4abd6e6370226f567af50bfaca274515980 ] + +MAX_ADDR_LEN is 32. ETH_ALEN is 6. mac->sa_data is a 14 byte array, so +the memcpy() is doing a read past the end of the array. I asked about +this on netdev and Ben Hutchings told me it's supposed to be copying +ETH_ALEN bytes (thanks Ben). + +Signed-off-by: Dan Carpenter +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/micrel/ksz884x.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/micrel/ksz884x.c ++++ b/drivers/net/ethernet/micrel/ksz884x.c +@@ -5675,7 +5675,7 @@ static int netdev_set_mac_address(struct + memcpy(hw->override_addr, mac->sa_data, ETH_ALEN); + } + +- memcpy(dev->dev_addr, mac->sa_data, MAX_ADDR_LEN); ++ memcpy(dev->dev_addr, mac->sa_data, ETH_ALEN); + + interrupt = hw_block_intr(hw); + diff --git a/queue-3.3/net-allow-pskb_expand_head-to-get-maximum-tailroom.patch b/queue-3.3/net-allow-pskb_expand_head-to-get-maximum-tailroom.patch new file mode 100644 index 00000000000..656a681bfe4 --- /dev/null +++ b/queue-3.3/net-allow-pskb_expand_head-to-get-maximum-tailroom.patch @@ -0,0 +1,45 @@ +From c29370964ee80c47c09586893b99cc4bfe13295e Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Tue, 10 Apr 2012 20:08:39 +0000 +Subject: [PATCH 27/28] net: allow pskb_expand_head() to get maximum tailroom + + +From: Eric Dumazet + +[ Upstream commit 87151b8689d890dfb495081f7be9b9e257f7a2df ] + +Marc Merlin reported many order-1 allocations failures in TX path on its +wireless setup, that dont make any sense with MTU=1500 network, and non +SG capable hardware. + +Turns out part of the problem comes from pskb_expand_head() not using +ksize() to get exact head size given by kmalloc(). Doing the same thing +than __alloc_skb() allows more tailroom in skb and can prevent future +reallocations. + +As a bonus, struct skb_shared_info becomes cache line aligned. + +Reported-by: Marc MERLIN +Tested-by: Marc MERLIN +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/skbuff.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/net/core/skbuff.c ++++ b/net/core/skbuff.c +@@ -952,9 +952,11 @@ int pskb_expand_head(struct sk_buff *skb + goto adjust_others; + } + +- data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); ++ data = kmalloc(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)), ++ gfp_mask); + if (!data) + goto nodata; ++ size = SKB_WITH_OVERHEAD(ksize(data)); + + /* Copy only real data... and, alas, header. This should be + * optimized for the cases when header is void. diff --git a/queue-3.3/net-ax25-reorder-ax25_exit-to-remove-races.patch b/queue-3.3/net-ax25-reorder-ax25_exit-to-remove-races.patch new file mode 100644 index 00000000000..cfba1995219 --- /dev/null +++ b/queue-3.3/net-ax25-reorder-ax25_exit-to-remove-races.patch @@ -0,0 +1,56 @@ +From a6254aa2d9a21f2d45e2b7e508b1f47e17e4fd5d Mon Sep 17 00:00:00 2001 +From: "Eric W. Biederman" +Date: Wed, 18 Apr 2012 16:11:23 +0000 +Subject: [PATCH 25/28] net ax25: Reorder ax25_exit to remove races. + + +From: "Eric W. Biederman" + +[ Upstream commit 3adadc08cc1e2cbcc15a640d639297ef5fcb17f5 ] + +While reviewing the sysctl code in ax25 I spotted races in ax25_exit +where it is possible to receive notifications and packets after already +freeing up some of the data structures needed to process those +notifications and updates. + +Call unregister_netdevice_notifier early so that the rest of the cleanup +code does not need to deal with network devices. This takes advantage +of my recent enhancement to unregister_netdevice_notifier to send +unregister notifications of all network devices that are current +registered. + +Move the unregistration for packet types, socket types and protocol +types before we cleanup any of the ax25 data structures to remove the +possibilities of other races. + +Signed-off-by: Eric W. Biederman +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ax25/af_ax25.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +--- a/net/ax25/af_ax25.c ++++ b/net/ax25/af_ax25.c +@@ -2012,16 +2012,17 @@ static void __exit ax25_exit(void) + proc_net_remove(&init_net, "ax25_route"); + proc_net_remove(&init_net, "ax25"); + proc_net_remove(&init_net, "ax25_calls"); +- ax25_rt_free(); +- ax25_uid_free(); +- ax25_dev_free(); + +- ax25_unregister_sysctl(); + unregister_netdevice_notifier(&ax25_dev_notifier); ++ ax25_unregister_sysctl(); + + dev_remove_pack(&ax25_packet_type); + + sock_unregister(PF_AX25); + proto_unregister(&ax25_proto); ++ ++ ax25_rt_free(); ++ ax25_uid_free(); ++ ax25_dev_free(); + } + module_exit(ax25_exit); diff --git a/queue-3.3/net-ethernet-ks8851_mll-fix-rx-frame-buffer-overflow.patch b/queue-3.3/net-ethernet-ks8851_mll-fix-rx-frame-buffer-overflow.patch new file mode 100644 index 00000000000..22c4505e383 --- /dev/null +++ b/queue-3.3/net-ethernet-ks8851_mll-fix-rx-frame-buffer-overflow.patch @@ -0,0 +1,43 @@ +From 9d0bc6304eddb1895286ea19a18ad29f82d05be1 Mon Sep 17 00:00:00 2001 +From: Davide Ciminaghi +Date: Fri, 13 Apr 2012 04:48:25 +0000 +Subject: [PATCH 18/28] net/ethernet: ks8851_mll fix rx frame buffer overflow + + +From: Davide Ciminaghi + +[ Upstream commit 8a9a0ea6032186e3030419262678d652b88bf6a8 ] + +At the beginning of ks_rcv(), a for loop retrieves the +header information relevant to all the frames stored +in the mac's internal buffers. The number of pending +frames is stored as an 8 bits field in KS_RXFCTR. +If interrupts are disabled long enough to allow for more than +32 frames to accumulate in the MAC's internal buffers, a buffer +overflow occurs. +This patch fixes the problem by making the +driver's frame_head_info buffer big enough. +Well actually, since the chip appears to have 12K of +internal rx buffers and the shortest ethernet frame should +be 64 bytes long, maybe the limit could be set to +12*1024/64 = 192 frames, but 255 should be safer. + +Signed-off-by: Davide Ciminaghi +Signed-off-by: Raffaele Recalcati +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/micrel/ks8851_mll.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/micrel/ks8851_mll.c ++++ b/drivers/net/ethernet/micrel/ks8851_mll.c +@@ -40,7 +40,7 @@ + #define DRV_NAME "ks8851_mll" + + static u8 KS_DEFAULT_MAC_ADDRESS[] = { 0x00, 0x10, 0xA1, 0x86, 0x95, 0x11 }; +-#define MAX_RECV_FRAMES 32 ++#define MAX_RECV_FRAMES 255 + #define MAX_BUF_SIZE 2048 + #define TX_BUF_SIZE 2000 + #define RX_BUF_SIZE 2000 diff --git a/queue-3.3/net-fix-a-race-in-sock_queue_err_skb.patch b/queue-3.3/net-fix-a-race-in-sock_queue_err_skb.patch new file mode 100644 index 00000000000..05d540a2702 --- /dev/null +++ b/queue-3.3/net-fix-a-race-in-sock_queue_err_skb.patch @@ -0,0 +1,41 @@ +From ab3e1b7cfc86ddacef4c30f85be80b87454a8170 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Fri, 6 Apr 2012 10:49:10 +0200 +Subject: [PATCH 12/28] net: fix a race in sock_queue_err_skb() + + +From: Eric Dumazet + +[ Upstream commit 110c43304db6f06490961529536c362d9ac5732f ] + +As soon as an skb is queued into socket error queue, another thread +can consume it, so we are not allowed to reference skb anymore, or risk +use after free. + +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/skbuff.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/net/core/skbuff.c ++++ b/net/core/skbuff.c +@@ -3160,6 +3160,8 @@ static void sock_rmem_free(struct sk_buf + */ + int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb) + { ++ int len = skb->len; ++ + if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= + (unsigned)sk->sk_rcvbuf) + return -ENOMEM; +@@ -3174,7 +3176,7 @@ int sock_queue_err_skb(struct sock *sk, + + skb_queue_tail(&sk->sk_error_queue, skb); + if (!sock_flag(sk, SOCK_DEAD)) +- sk->sk_data_ready(sk, skb->len); ++ sk->sk_data_ready(sk, len); + return 0; + } + EXPORT_SYMBOL(sock_queue_err_skb); diff --git a/queue-3.3/net-fix-proc-net-dev-regression.patch b/queue-3.3/net-fix-proc-net-dev-regression.patch new file mode 100644 index 00000000000..a3e0d6dfcbb --- /dev/null +++ b/queue-3.3/net-fix-proc-net-dev-regression.patch @@ -0,0 +1,179 @@ +From 2ca0b5e02eda86848d63581510f77772d10623fb Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Mon, 2 Apr 2012 22:33:02 +0000 +Subject: [PATCH 01/28] net: fix /proc/net/dev regression + + +From: Eric Dumazet + +[ Upstream commit 2def16ae6b0c77571200f18ba4be049b03d75579 ] + +Commit f04565ddf52 (dev: use name hash for dev_seq_ops) added a second +regression, as some devices are missing from /proc/net/dev if many +devices are defined. + +When seq_file buffer is filled, the last ->next/show() method is +canceled (pos value is reverted to value prior ->next() call) + +Problem is after above commit, we dont restart the lookup at right +position in ->start() method. + +Fix this by removing the internal 'pos' pointer added in commit, since +we need to use the 'loff_t *pos' provided by seq_file layer. + +This also reverts commit 5cac98dd0 (net: Fix corruption +in /proc/*/net/dev_mcast), since its not needed anymore. + +Reported-by: Ben Greear +Signed-off-by: Eric Dumazet +Cc: Mihai Maruseac +Tested-by: Ben Greear +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/netdevice.h | 2 - + net/core/dev.c | 58 ++++++++++------------------------------------ + net/core/dev_addr_lists.c | 3 +- + 3 files changed, 15 insertions(+), 48 deletions(-) + +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -2582,8 +2582,6 @@ extern void net_disable_timestamp(void) + extern void *dev_seq_start(struct seq_file *seq, loff_t *pos); + extern void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos); + extern void dev_seq_stop(struct seq_file *seq, void *v); +-extern int dev_seq_open_ops(struct inode *inode, struct file *file, +- const struct seq_operations *ops); + #endif + + extern int netdev_class_create_file(struct class_attribute *class_attr); +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -4037,54 +4037,41 @@ static int dev_ifconf(struct net *net, c + + #ifdef CONFIG_PROC_FS + +-#define BUCKET_SPACE (32 - NETDEV_HASHBITS) +- +-struct dev_iter_state { +- struct seq_net_private p; +- unsigned int pos; /* bucket << BUCKET_SPACE + offset */ +-}; ++#define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1) + + #define get_bucket(x) ((x) >> BUCKET_SPACE) + #define get_offset(x) ((x) & ((1 << BUCKET_SPACE) - 1)) + #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o)) + +-static inline struct net_device *dev_from_same_bucket(struct seq_file *seq) ++static inline struct net_device *dev_from_same_bucket(struct seq_file *seq, loff_t *pos) + { +- struct dev_iter_state *state = seq->private; + struct net *net = seq_file_net(seq); + struct net_device *dev; + struct hlist_node *p; + struct hlist_head *h; +- unsigned int count, bucket, offset; ++ unsigned int count = 0, offset = get_offset(*pos); + +- bucket = get_bucket(state->pos); +- offset = get_offset(state->pos); +- h = &net->dev_name_head[bucket]; +- count = 0; ++ h = &net->dev_name_head[get_bucket(*pos)]; + hlist_for_each_entry_rcu(dev, p, h, name_hlist) { +- if (count++ == offset) { +- state->pos = set_bucket_offset(bucket, count); ++ if (++count == offset) + return dev; +- } + } + + return NULL; + } + +-static inline struct net_device *dev_from_new_bucket(struct seq_file *seq) ++static inline struct net_device *dev_from_bucket(struct seq_file *seq, loff_t *pos) + { +- struct dev_iter_state *state = seq->private; + struct net_device *dev; + unsigned int bucket; + +- bucket = get_bucket(state->pos); + do { +- dev = dev_from_same_bucket(seq); ++ dev = dev_from_same_bucket(seq, pos); + if (dev) + return dev; + +- bucket++; +- state->pos = set_bucket_offset(bucket, 0); ++ bucket = get_bucket(*pos) + 1; ++ *pos = set_bucket_offset(bucket, 1); + } while (bucket < NETDEV_HASHENTRIES); + + return NULL; +@@ -4097,33 +4084,20 @@ static inline struct net_device *dev_fro + void *dev_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(RCU) + { +- struct dev_iter_state *state = seq->private; +- + rcu_read_lock(); + if (!*pos) + return SEQ_START_TOKEN; + +- /* check for end of the hash */ +- if (state->pos == 0 && *pos > 1) ++ if (get_bucket(*pos) >= NETDEV_HASHENTRIES) + return NULL; + +- return dev_from_new_bucket(seq); ++ return dev_from_bucket(seq, pos); + } + + void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) + { +- struct net_device *dev; +- + ++*pos; +- +- if (v == SEQ_START_TOKEN) +- return dev_from_new_bucket(seq); +- +- dev = dev_from_same_bucket(seq); +- if (dev) +- return dev; +- +- return dev_from_new_bucket(seq); ++ return dev_from_bucket(seq, pos); + } + + void dev_seq_stop(struct seq_file *seq, void *v) +@@ -4222,13 +4196,7 @@ static const struct seq_operations dev_s + static int dev_seq_open(struct inode *inode, struct file *file) + { + return seq_open_net(inode, file, &dev_seq_ops, +- sizeof(struct dev_iter_state)); +-} +- +-int dev_seq_open_ops(struct inode *inode, struct file *file, +- const struct seq_operations *ops) +-{ +- return seq_open_net(inode, file, ops, sizeof(struct dev_iter_state)); ++ sizeof(struct seq_net_private)); + } + + static const struct file_operations dev_seq_fops = { +--- a/net/core/dev_addr_lists.c ++++ b/net/core/dev_addr_lists.c +@@ -696,7 +696,8 @@ static const struct seq_operations dev_m + + static int dev_mc_seq_open(struct inode *inode, struct file *file) + { +- return dev_seq_open_ops(inode, file, &dev_mc_seq_ops); ++ return seq_open_net(inode, file, &dev_mc_seq_ops, ++ sizeof(struct seq_net_private)); + } + + static const struct file_operations dev_mc_seq_fops = { diff --git a/queue-3.3/net-smsc911x-fix-skb-handling-in-receive-path.patch b/queue-3.3/net-smsc911x-fix-skb-handling-in-receive-path.patch new file mode 100644 index 00000000000..d2475ac3c5d --- /dev/null +++ b/queue-3.3/net-smsc911x-fix-skb-handling-in-receive-path.patch @@ -0,0 +1,76 @@ +From 2af963d33f200090d60201150ee3f5843e743509 Mon Sep 17 00:00:00 2001 +From: Will Deacon +Date: Thu, 12 Apr 2012 05:54:09 +0000 +Subject: [PATCH 17/28] net: smsc911x: fix skb handling in receive path + + +From: Will Deacon + +[ Upstream commit 3c5e979bd037888dd7d722da22da4b43659af485 ] + +The SMSC911x driver resets the ->head, ->data and ->tail pointers in the +skb on the reset path in order to avoid buffer overflow due to packet +padding performed by the hardware. + +This patch fixes the receive path so that the skb pointers are fixed up +after the data has been read from the device, The error path is also +fixed to use number of words consistently and prevent erroneous FIFO +fastforwarding when skipping over bad data. + +Signed-off-by: Will Deacon +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/smsc/smsc911x.c | 14 +++++--------- + 1 file changed, 5 insertions(+), 9 deletions(-) + +--- a/drivers/net/ethernet/smsc/smsc911x.c ++++ b/drivers/net/ethernet/smsc/smsc911x.c +@@ -1166,10 +1166,8 @@ smsc911x_rx_counterrors(struct net_devic + + /* Quickly dumps bad packets */ + static void +-smsc911x_rx_fastforward(struct smsc911x_data *pdata, unsigned int pktbytes) ++smsc911x_rx_fastforward(struct smsc911x_data *pdata, unsigned int pktwords) + { +- unsigned int pktwords = (pktbytes + NET_IP_ALIGN + 3) >> 2; +- + if (likely(pktwords >= 4)) { + unsigned int timeout = 500; + unsigned int val; +@@ -1233,7 +1231,7 @@ static int smsc911x_poll(struct napi_str + continue; + } + +- skb = netdev_alloc_skb(dev, pktlength + NET_IP_ALIGN); ++ skb = netdev_alloc_skb(dev, pktwords << 2); + if (unlikely(!skb)) { + SMSC_WARN(pdata, rx_err, + "Unable to allocate skb for rx packet"); +@@ -1243,14 +1241,12 @@ static int smsc911x_poll(struct napi_str + break; + } + +- skb->data = skb->head; +- skb_reset_tail_pointer(skb); ++ pdata->ops->rx_readfifo(pdata, ++ (unsigned int *)skb->data, pktwords); + + /* Align IP on 16B boundary */ + skb_reserve(skb, NET_IP_ALIGN); + skb_put(skb, pktlength - 4); +- pdata->ops->rx_readfifo(pdata, +- (unsigned int *)skb->head, pktwords); + skb->protocol = eth_type_trans(skb, dev); + skb_checksum_none_assert(skb); + netif_receive_skb(skb); +@@ -1565,7 +1561,7 @@ static int smsc911x_open(struct net_devi + smsc911x_reg_write(pdata, FIFO_INT, temp); + + /* set RX Data offset to 2 bytes for alignment */ +- smsc911x_reg_write(pdata, RX_CFG, (2 << 8)); ++ smsc911x_reg_write(pdata, RX_CFG, (NET_IP_ALIGN << 8)); + + /* enable NAPI polling before enabling RX interrupts */ + napi_enable(&pdata->napi); diff --git a/queue-3.3/net-usb-smsc75xx-fix-mtu.patch b/queue-3.3/net-usb-smsc75xx-fix-mtu.patch new file mode 100644 index 00000000000..c94d82f6d61 --- /dev/null +++ b/queue-3.3/net-usb-smsc75xx-fix-mtu.patch @@ -0,0 +1,37 @@ +From 634bde961ef469fe4cd2da3bad8e129fd602af8f Mon Sep 17 00:00:00 2001 +From: Stephane Fillod +Date: Sun, 15 Apr 2012 11:38:29 +0000 +Subject: [PATCH 20/28] net: usb: smsc75xx: fix mtu + + +From: Stephane Fillod + +[ Upstream commit a99ff7d0123b19ecad3b589480b6542716ab6b52 ] + +Make smsc75xx recalculate the hard_mtu after adjusting the +hard_header_len. + +Without this, usbnet adjusts the MTU down to 1492 bytes, and the host is +unable to receive standard 1500-byte frames from the device. + +Inspired by same fix on cdc_eem 78fb72f7936c01d5b426c03a691eca082b03f2b9. + +Tested on ARM/Omap3 with EVB-LAN7500-LC. + +Signed-off-by: Stephane Fillod +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/usb/smsc75xx.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/usb/smsc75xx.c ++++ b/drivers/net/usb/smsc75xx.c +@@ -1051,6 +1051,7 @@ static int smsc75xx_bind(struct usbnet * + dev->net->ethtool_ops = &smsc75xx_ethtool_ops; + dev->net->flags |= IFF_MULTICAST; + dev->net->hard_header_len += SMSC75XX_TX_OVERHEAD; ++ dev->hard_mtu = dev->net->mtu + dev->net->hard_header_len; + return 0; + } + diff --git a/queue-3.3/net_sched-gred-fix-oops-in-gred_dump-in-wred-mode.patch b/queue-3.3/net_sched-gred-fix-oops-in-gred_dump-in-wred-mode.patch new file mode 100644 index 00000000000..8b74a01d6ba --- /dev/null +++ b/queue-3.3/net_sched-gred-fix-oops-in-gred_dump-in-wred-mode.patch @@ -0,0 +1,51 @@ +From d65ba13406233f881bc212640b54bbb87c52141b Mon Sep 17 00:00:00 2001 +From: David Ward +Date: Sun, 15 Apr 2012 12:31:45 +0000 +Subject: [PATCH 19/28] net_sched: gred: Fix oops in gred_dump() in WRED mode + + +From: David Ward + +[ Upstream commit 244b65dbfede788f2fa3fe2463c44d0809e97c6b ] + +A parameter set exists for WRED mode, called wred_set, to hold the same +values for qavg and qidlestart across all VQs. The WRED mode values had +been previously held in the VQ for the default DP. After these values +were moved to wred_set, the VQ for the default DP was no longer created +automatically (so that it could be omitted on purpose, to have packets +in the default DP enqueued directly to the device without using RED). + +However, gred_dump() was overlooked during that change; in WRED mode it +still reads qavg/qidlestart from the VQ for the default DP, which might +not even exist. As a result, this command sequence will cause an oops: + +tc qdisc add dev $DEV handle $HANDLE parent $PARENT gred setup \ + DPs 3 default 2 grio +tc qdisc change dev $DEV handle $HANDLE gred DP 0 prio 8 $RED_OPTIONS +tc qdisc change dev $DEV handle $HANDLE gred DP 1 prio 8 $RED_OPTIONS + +This fixes gred_dump() in WRED mode to use the values held in wred_set. + +Signed-off-by: David Ward +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/sch_gred.c | 7 ++----- + 1 file changed, 2 insertions(+), 5 deletions(-) + +--- a/net/sched/sch_gred.c ++++ b/net/sched/sch_gred.c +@@ -565,11 +565,8 @@ static int gred_dump(struct Qdisc *sch, + opt.packets = q->packetsin; + opt.bytesin = q->bytesin; + +- if (gred_wred_mode(table)) { +- q->vars.qidlestart = +- table->tab[table->def]->vars.qidlestart; +- q->vars.qavg = table->tab[table->def]->vars.qavg; +- } ++ if (gred_wred_mode(table)) ++ gred_load_wred_set(table, q); + + opt.qave = red_calc_qavg(&q->parms, &q->vars, q->vars.qavg); + diff --git a/queue-3.3/netlink-fix-races-after-skb-queueing.patch b/queue-3.3/netlink-fix-races-after-skb-queueing.patch new file mode 100644 index 00000000000..4ee694984eb --- /dev/null +++ b/queue-3.3/netlink-fix-races-after-skb-queueing.patch @@ -0,0 +1,80 @@ +From 0d357e2f26d2b9c43255e19cc1a450547e8264bb Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Thu, 5 Apr 2012 22:17:46 +0000 +Subject: [PATCH 11/28] netlink: fix races after skb queueing + + +From: Eric Dumazet + +[ Upstream commit 4a7e7c2ad540e54c75489a70137bf0ec15d3a127 ] + +As soon as an skb is queued into socket receive_queue, another thread +can consume it, so we are not allowed to reference skb anymore, or risk +use after free. + +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/netlink/af_netlink.c | 24 +++++++++++++----------- + 1 file changed, 13 insertions(+), 11 deletions(-) + +--- a/net/netlink/af_netlink.c ++++ b/net/netlink/af_netlink.c +@@ -829,12 +829,19 @@ int netlink_attachskb(struct sock *sk, s + return 0; + } + +-int netlink_sendskb(struct sock *sk, struct sk_buff *skb) ++static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb) + { + int len = skb->len; + + skb_queue_tail(&sk->sk_receive_queue, skb); + sk->sk_data_ready(sk, len); ++ return len; ++} ++ ++int netlink_sendskb(struct sock *sk, struct sk_buff *skb) ++{ ++ int len = __netlink_sendskb(sk, skb); ++ + sock_put(sk); + return len; + } +@@ -957,8 +964,7 @@ static int netlink_broadcast_deliver(str + if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && + !test_bit(0, &nlk->state)) { + skb_set_owner_r(skb, sk); +- skb_queue_tail(&sk->sk_receive_queue, skb); +- sk->sk_data_ready(sk, skb->len); ++ __netlink_sendskb(sk, skb); + return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1); + } + return -1; +@@ -1680,10 +1686,8 @@ static int netlink_dump(struct sock *sk) + + if (sk_filter(sk, skb)) + kfree_skb(skb); +- else { +- skb_queue_tail(&sk->sk_receive_queue, skb); +- sk->sk_data_ready(sk, skb->len); +- } ++ else ++ __netlink_sendskb(sk, skb); + return 0; + } + +@@ -1697,10 +1701,8 @@ static int netlink_dump(struct sock *sk) + + if (sk_filter(sk, skb)) + kfree_skb(skb); +- else { +- skb_queue_tail(&sk->sk_receive_queue, skb); +- sk->sk_data_ready(sk, skb->len); +- } ++ else ++ __netlink_sendskb(sk, skb); + + if (cb->done) + cb->done(cb); diff --git a/queue-3.3/netns-do-not-leak-net_generic-data-on-failed-init.patch b/queue-3.3/netns-do-not-leak-net_generic-data-on-failed-init.patch new file mode 100644 index 00000000000..5ae5ed906b7 --- /dev/null +++ b/queue-3.3/netns-do-not-leak-net_generic-data-on-failed-init.patch @@ -0,0 +1,77 @@ +From a028fe3db3df8800408ee09194dab72ea68369c9 Mon Sep 17 00:00:00 2001 +From: Julian Anastasov +Date: Mon, 16 Apr 2012 04:43:15 +0000 +Subject: [PATCH 23/28] netns: do not leak net_generic data on failed init + + +From: Julian Anastasov + +[ Upstream commit b922934d017f1cc831b017913ed7d1a56c558b43 ] + +ops_init should free the net_generic data on +init failure and __register_pernet_operations should not +call ops_free when NET_NS is not enabled. + +Signed-off-by: Julian Anastasov +Reviewed-by: "Eric W. Biederman" +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/net_namespace.c | 33 ++++++++++++++++++--------------- + 1 file changed, 18 insertions(+), 15 deletions(-) + +--- a/net/core/net_namespace.c ++++ b/net/core/net_namespace.c +@@ -83,21 +83,29 @@ assign: + + static int ops_init(const struct pernet_operations *ops, struct net *net) + { +- int err; ++ int err = -ENOMEM; ++ void *data = NULL; ++ + if (ops->id && ops->size) { +- void *data = kzalloc(ops->size, GFP_KERNEL); ++ data = kzalloc(ops->size, GFP_KERNEL); + if (!data) +- return -ENOMEM; ++ goto out; + + err = net_assign_generic(net, *ops->id, data); +- if (err) { +- kfree(data); +- return err; +- } ++ if (err) ++ goto cleanup; + } ++ err = 0; + if (ops->init) +- return ops->init(net); +- return 0; ++ err = ops->init(net); ++ if (!err) ++ return 0; ++ ++cleanup: ++ kfree(data); ++ ++out: ++ return err; + } + + static void ops_free(const struct pernet_operations *ops, struct net *net) +@@ -448,12 +456,7 @@ static void __unregister_pernet_operatio + static int __register_pernet_operations(struct list_head *list, + struct pernet_operations *ops) + { +- int err = 0; +- err = ops_init(ops, &init_net); +- if (err) +- ops_free(ops, &init_net); +- return err; +- ++ return ops_init(ops, &init_net); + } + + static void __unregister_pernet_operations(struct pernet_operations *ops) diff --git a/queue-3.3/phonet-check-input-from-user-before-allocating.patch b/queue-3.3/phonet-check-input-from-user-before-allocating.patch new file mode 100644 index 00000000000..b448b3313bf --- /dev/null +++ b/queue-3.3/phonet-check-input-from-user-before-allocating.patch @@ -0,0 +1,73 @@ +From c06596fcd60d3a5bfb03f9d3fd21fd6956fc2bd6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 5 Apr 2012 12:07:45 +0000 +Subject: [PATCH 08/28] phonet: Check input from user before allocating + + +From: Sasha Levin + +[ Upstream commit bcf1b70ac6eb0ed8286c66e6bf37cb747cbaa04c ] + +A phonet packet is limited to USHRT_MAX bytes, this is never checked during +tx which means that the user can specify any size he wishes, and the kernel +will attempt to allocate that size. + +In the good case, it'll lead to the following warning, but it may also cause +the kernel to kick in the OOM and kill a random task on the server. + +[ 8921.744094] WARNING: at mm/page_alloc.c:2255 __alloc_pages_slowpath+0x65/0x730() +[ 8921.749770] Pid: 5081, comm: trinity Tainted: G W 3.4.0-rc1-next-20120402-sasha #46 +[ 8921.756672] Call Trace: +[ 8921.758185] [] warn_slowpath_common+0x87/0xb0 +[ 8921.762868] [] warn_slowpath_null+0x15/0x20 +[ 8921.765399] [] __alloc_pages_slowpath+0x65/0x730 +[ 8921.769226] [] ? zone_watermark_ok+0x1a/0x20 +[ 8921.771686] [] ? get_page_from_freelist+0x625/0x660 +[ 8921.773919] [] __alloc_pages_nodemask+0x1f8/0x240 +[ 8921.776248] [] kmalloc_large_node+0x70/0xc0 +[ 8921.778294] [] __kmalloc_node_track_caller+0x34/0x1c0 +[ 8921.780847] [] ? sock_alloc_send_pskb+0xbc/0x260 +[ 8921.783179] [] __alloc_skb+0x75/0x170 +[ 8921.784971] [] sock_alloc_send_pskb+0xbc/0x260 +[ 8921.787111] [] ? release_sock+0x7e/0x90 +[ 8921.788973] [] sock_alloc_send_skb+0x10/0x20 +[ 8921.791052] [] pep_sendmsg+0x60/0x380 +[ 8921.792931] [] ? pn_socket_bind+0x156/0x180 +[ 8921.794917] [] ? pn_socket_autobind+0x3f/0x90 +[ 8921.797053] [] pn_socket_sendmsg+0x4f/0x70 +[ 8921.798992] [] sock_aio_write+0x187/0x1b0 +[ 8921.801395] [] ? sub_preempt_count+0xae/0xf0 +[ 8921.803501] [] ? __lock_acquire+0x42c/0x4b0 +[ 8921.805505] [] ? __sock_recv_ts_and_drops+0x140/0x140 +[ 8921.807860] [] do_sync_readv_writev+0xbc/0x110 +[ 8921.809986] [] ? might_fault+0x97/0xa0 +[ 8921.811998] [] ? security_file_permission+0x1e/0x90 +[ 8921.814595] [] do_readv_writev+0xe2/0x1e0 +[ 8921.816702] [] ? do_setitimer+0x1ac/0x200 +[ 8921.818819] [] ? get_parent_ip+0x11/0x50 +[ 8921.820863] [] ? sub_preempt_count+0xae/0xf0 +[ 8921.823318] [] vfs_writev+0x46/0x60 +[ 8921.825219] [] sys_writev+0x4f/0xb0 +[ 8921.827127] [] system_call_fastpath+0x16/0x1b +[ 8921.829384] ---[ end trace dffe390f30db9eb7 ]--- + +Signed-off-by: Sasha Levin +Acked-by: Rémi Denis-Courmont +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/phonet/pep.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/net/phonet/pep.c ++++ b/net/phonet/pep.c +@@ -1130,6 +1130,9 @@ static int pep_sendmsg(struct kiocb *ioc + int flags = msg->msg_flags; + int err, done; + ++ if (len > USHRT_MAX) ++ return -EMSGSIZE; ++ + if ((msg->msg_flags & ~(MSG_DONTWAIT|MSG_EOR|MSG_NOSIGNAL| + MSG_CMSG_COMPAT)) || + !(msg->msg_flags & MSG_EOR)) diff --git a/queue-3.3/phy-icplus-fix-auto-power-saving-in-ip101a_config_init.patch b/queue-3.3/phy-icplus-fix-auto-power-saving-in-ip101a_config_init.patch new file mode 100644 index 00000000000..1673ecd77cd --- /dev/null +++ b/queue-3.3/phy-icplus-fix-auto-power-saving-in-ip101a_config_init.patch @@ -0,0 +1,39 @@ +From a0103a7b505241c97fceec89cf9480850dd2e1b0 Mon Sep 17 00:00:00 2001 +From: Srinivas Kandagatla +Date: Mon, 2 Apr 2012 00:02:09 +0000 +Subject: [PATCH 04/28] phy:icplus:fix Auto Power Saving in ip101a_config_init. + + +From: Srinivas Kandagatla + +[ Upstream commit b3300146aa8efc5d3937fd33f3cfdc580a3843bc ] + +This patch fixes Auto Power Saving configuration in ip101a_config_init +which was broken as there is no phy register write followed after +setting IP101A_APS_ON flag. + +This patch also fixes the return value of ip101a_config_init. + +Without this patch ip101a_config_init returns 2 which is not an error +accroding to IS_ERR and the mac driver will continue accessing 2 as +valid pointer to phy_dev resulting in memory fault. + +Signed-off-by: Srinivas Kandagatla +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/phy/icplus.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/net/phy/icplus.c ++++ b/drivers/net/phy/icplus.c +@@ -162,7 +162,8 @@ static int ip101a_g_config_init(struct p + /* Enable Auto Power Saving mode */ + c = phy_read(phydev, IP10XX_SPEC_CTRL_STATUS); + c |= IP101A_G_APS_ON; +- return c; ++ ++ return phy_write(phydev, IP10XX_SPEC_CTRL_STATUS, c); + } + + static int ip175c_read_status(struct phy_device *phydev) diff --git a/queue-3.3/ppp-don-t-stop-and-restart-queue-on-every-tx-packet.patch b/queue-3.3/ppp-don-t-stop-and-restart-queue-on-every-tx-packet.patch new file mode 100644 index 00000000000..914bca7375f --- /dev/null +++ b/queue-3.3/ppp-don-t-stop-and-restart-queue-on-every-tx-packet.patch @@ -0,0 +1,60 @@ +From 10f036f94d6f5e9399db376cd383e039b742ec76 Mon Sep 17 00:00:00 2001 +From: David Woodhouse +Date: Tue, 24 Apr 2012 22:09:47 -0400 +Subject: [PATCH 02/28] ppp: Don't stop and restart queue on every TX packet + + +From: David Woodhouse + +[ This combines upstream commit + e675f0cc9a872fd152edc0c77acfed19bf28b81e and follow-on bug fix + commit 9a5d2bd99e0dfe9a31b3c160073ac445ba3d773f ] + +For every transmitted packet, ppp_start_xmit() will stop the netdev +queue and then, if appropriate, restart it. This causes the TX softirq +to run, entirely gratuitously. + +This is "only" a waste of CPU time in the normal case, but it's actively +harmful when the PPP device is a TEQL slave — the wakeup will cause the +offending device to receive the next TX packet from the TEQL queue, when +it *should* have gone to the next slave in the list. We end up seeing +large bursts of packets on just *one* slave device, rather than using +the full available bandwidth over all slaves. + +This patch fixes the problem by *not* unconditionally stopping the queue +in ppp_start_xmit(). It adds a return value from ppp_xmit_process() +which indicates whether the queue should be stopped or not. + +It *doesn't* remove the call to netif_wake_queue() from +ppp_xmit_process(), because other code paths (especially from +ppp_output_wakeup()) need it there and it's messy to push it out to the +other callers to do it based on the return value. So we leave it in +place — it's a no-op in the case where the queue wasn't stopped, so it's +harmless in the TX path. + +Signed-off-by: David Woodhouse +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ppp/ppp_generic.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/net/ppp/ppp_generic.c ++++ b/drivers/net/ppp/ppp_generic.c +@@ -968,7 +968,6 @@ ppp_start_xmit(struct sk_buff *skb, stru + proto = npindex_to_proto[npi]; + put_unaligned_be16(proto, pp); + +- netif_stop_queue(dev); + skb_queue_tail(&ppp->file.xq, skb); + ppp_xmit_process(ppp); + return NETDEV_TX_OK; +@@ -1063,6 +1062,8 @@ ppp_xmit_process(struct ppp *ppp) + code that we can accept some more. */ + if (!ppp->xmit_pending && !skb_peek(&ppp->file.xq)) + netif_wake_queue(ppp->dev); ++ else ++ netif_stop_queue(ppp->dev); + } + ppp_xmit_unlock(ppp); + } diff --git a/queue-3.3/sctp-allow-struct-sctp_event_subscribe-to-grow-without-breaking-binaries.patch b/queue-3.3/sctp-allow-struct-sctp_event_subscribe-to-grow-without-breaking-binaries.patch new file mode 100644 index 00000000000..634ba29123b --- /dev/null +++ b/queue-3.3/sctp-allow-struct-sctp_event_subscribe-to-grow-without-breaking-binaries.patch @@ -0,0 +1,53 @@ +From b90e2d69051af17eeb42785e8b292b7da09b663f Mon Sep 17 00:00:00 2001 +From: Thomas Graf +Date: Tue, 3 Apr 2012 22:17:53 +0000 +Subject: [PATCH 05/28] sctp: Allow struct sctp_event_subscribe to grow without breaking binaries + + +From: Thomas Graf + +[ Upstream commit acdd5985364f8dc511a0762fab2e683f29d9d692 ] + +getsockopt(..., SCTP_EVENTS, ...) performs a length check and returns +an error if the user provides less bytes than the size of struct +sctp_event_subscribe. + +Struct sctp_event_subscribe needs to be extended by an u8 for every +new event or notification type that is added. + +This obviously makes getsockopt fail for binaries that are compiled +against an older versions of which do not contain +all event types. + +This patch changes getsockopt behaviour to no longer return an error +if not enough bytes are being provided by the user. Instead, it +returns as much of sctp_event_subscribe as fits into the provided buffer. + +This leads to the new behavior that users see what they have been aware +of at compile time. + +The setsockopt(..., SCTP_EVENTS, ...) API is already behaving like this. + +Signed-off-by: Thomas Graf +Acked-by: Vlad Yasevich +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/socket.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/net/sctp/socket.c ++++ b/net/sctp/socket.c +@@ -4133,9 +4133,10 @@ static int sctp_getsockopt_disable_fragm + static int sctp_getsockopt_events(struct sock *sk, int len, char __user *optval, + int __user *optlen) + { +- if (len < sizeof(struct sctp_event_subscribe)) ++ if (len <= 0) + return -EINVAL; +- len = sizeof(struct sctp_event_subscribe); ++ if (len > sizeof(struct sctp_event_subscribe)) ++ len = sizeof(struct sctp_event_subscribe); + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(optval, &sctp_sk(sk)->subscribe, len)) diff --git a/queue-3.3/series b/queue-3.3/series index 65baf85f3d0..462762d956e 100644 --- a/queue-3.3/series +++ b/queue-3.3/series @@ -61,3 +61,31 @@ ocfs2-rl_count-endianness-breakage.patch ocfs2-e_leaf_clusters-endianness-breakage.patch lockd-fix-the-endianness-bug.patch usb-dwc3-ep0-increment-actual-on-bounced-ep0-case.patch +net-fix-proc-net-dev-regression.patch +ppp-don-t-stop-and-restart-queue-on-every-tx-packet.patch +tcp-allow-splice-to-build-full-tso-packets.patch +phy-icplus-fix-auto-power-saving-in-ip101a_config_init.patch +sctp-allow-struct-sctp_event_subscribe-to-grow-without-breaking-binaries.patch +bridge-do-not-send-queries-on-multicast-group-leaves.patch +ipv6-fix-array-index-in-ip6_mc_add_src.patch +phonet-check-input-from-user-before-allocating.patch +bonding-properly-unset-current_arp_slave-on-slave-link-up.patch +wimax-i2400m-prevent-a-possible-kernel-bug-due-to-missing-fw_name-string.patch +netlink-fix-races-after-skb-queueing.patch +net-fix-a-race-in-sock_queue_err_skb.patch +tcp-restore-correct-limit.patch +tcp-fix-tcp_rcv_rtt_update-use-of-an-unscaled-rtt-sample.patch +atl1-fix-kernel-panic-in-case-of-dma-errors.patch +8139cp-set-intr-mask-after-its-handler-is-registered.patch +net-smsc911x-fix-skb-handling-in-receive-path.patch +net-ethernet-ks8851_mll-fix-rx-frame-buffer-overflow.patch +net_sched-gred-fix-oops-in-gred_dump-in-wred-mode.patch +net-usb-smsc75xx-fix-mtu.patch +dummy-add-ndo_uninit.patch +tcp-fix-tcp_grow_window-for-large-incoming-frames.patch +netns-do-not-leak-net_generic-data-on-failed-init.patch +ksz884x-don-t-copy-too-much-in-netdev_set_mac_address.patch +net-ax25-reorder-ax25_exit-to-remove-races.patch +tcp-fix-tcp_maxseg-for-established-ipv6-passive-sockets.patch +net-allow-pskb_expand_head-to-get-maximum-tailroom.patch +tcp-avoid-order-1-allocations-on-wifi-and-tx-path.patch diff --git a/queue-3.3/tcp-allow-splice-to-build-full-tso-packets.patch b/queue-3.3/tcp-allow-splice-to-build-full-tso-packets.patch new file mode 100644 index 00000000000..803c3c0ba53 --- /dev/null +++ b/queue-3.3/tcp-allow-splice-to-build-full-tso-packets.patch @@ -0,0 +1,118 @@ +From 3aebf9d9bb951e41e6e87aa6a63c1ebe0a4e31b6 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Tue, 24 Apr 2012 22:12:06 -0400 +Subject: [PATCH 03/28] tcp: allow splice() to build full TSO packets + + +From: Eric Dumazet + +[ This combines upstream commit + 2f53384424251c06038ae612e56231b96ab610ee and the follow-on bug fix + commit 35f9c09fe9c72eb8ca2b8e89a593e1c151f28fc2 ] + +vmsplice()/splice(pipe, socket) call do_tcp_sendpages() one page at a +time, adding at most 4096 bytes to an skb. (assuming PAGE_SIZE=4096) + +The call to tcp_push() at the end of do_tcp_sendpages() forces an +immediate xmit when pipe is not already filled, and tso_fragment() try +to split these skb to MSS multiples. + +4096 bytes are usually split in a skb with 2 MSS, and a remaining +sub-mss skb (assuming MTU=1500) + +This makes slow start suboptimal because many small frames are sent to +qdisc/driver layers instead of big ones (constrained by cwnd and packets +in flight of course) + +In fact, applications using sendmsg() (adding an additional memory copy) +instead of vmsplice()/splice()/sendfile() are a bit faster because of +this anomaly, especially if serving small files in environments with +large initial [c]wnd. + +Call tcp_push() only if MSG_MORE is not set in the flags parameter. + +This bit is automatically provided by splice() internals but for the +last page, or on all pages if user specified SPLICE_F_MORE splice() +flag. + +In some workloads, this can reduce number of sent logical packets by an +order of magnitude, making zero-copy TCP actually faster than +one-copy :) + +Reported-by: Tom Herbert +Cc: Nandita Dukkipati +Cc: Neal Cardwell +Cc: Tom Herbert +Cc: Yuchung Cheng +Cc: H.K. Jerry Chu +Cc: Maciej Żenczykowski +Cc: Mahesh Bandewar +Cc: Ilpo Järvinen +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + fs/splice.c | 5 ++++- + include/linux/socket.h | 2 +- + net/ipv4/tcp.c | 2 +- + net/socket.c | 6 +++--- + 4 files changed, 9 insertions(+), 6 deletions(-) + +--- a/fs/splice.c ++++ b/fs/splice.c +@@ -30,6 +30,7 @@ + #include + #include + #include ++#include + + /* + * Attempt to steal a page from a pipe buffer. This should perhaps go into +@@ -690,7 +691,9 @@ static int pipe_to_sendpage(struct pipe_ + if (!likely(file->f_op && file->f_op->sendpage)) + return -EINVAL; + +- more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len; ++ more = (sd->flags & SPLICE_F_MORE) ? MSG_MORE : 0; ++ if (sd->len < sd->total_len) ++ more |= MSG_SENDPAGE_NOTLAST; + return file->f_op->sendpage(file, buf->page, buf->offset, + sd->len, &pos, more); + } +--- a/include/linux/socket.h ++++ b/include/linux/socket.h +@@ -265,7 +265,7 @@ struct ucred { + #define MSG_NOSIGNAL 0x4000 /* Do not generate SIGPIPE */ + #define MSG_MORE 0x8000 /* Sender will send more */ + #define MSG_WAITFORONE 0x10000 /* recvmmsg(): block until 1+ packets avail */ +- ++#define MSG_SENDPAGE_NOTLAST 0x20000 /* sendpage() internal : not the last page */ + #define MSG_EOF MSG_FIN + + #define MSG_CMSG_CLOEXEC 0x40000000 /* Set close_on_exit for file +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -858,7 +858,7 @@ wait_for_memory: + } + + out: +- if (copied) ++ if (copied && !(flags & MSG_SENDPAGE_NOTLAST)) + tcp_push(sk, flags, mss_now, tp->nonagle); + return copied; + +--- a/net/socket.c ++++ b/net/socket.c +@@ -811,9 +811,9 @@ static ssize_t sock_sendpage(struct file + + sock = file->private_data; + +- flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT; +- if (more) +- flags |= MSG_MORE; ++ flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; ++ /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */ ++ flags |= more; + + return kernel_sendpage(sock, page, offset, size, flags); + } diff --git a/queue-3.3/tcp-avoid-order-1-allocations-on-wifi-and-tx-path.patch b/queue-3.3/tcp-avoid-order-1-allocations-on-wifi-and-tx-path.patch new file mode 100644 index 00000000000..8184b71e3aa --- /dev/null +++ b/queue-3.3/tcp-avoid-order-1-allocations-on-wifi-and-tx-path.patch @@ -0,0 +1,127 @@ +From f016eec62af8177d9f9c6608b4d541cb7e114391 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Tue, 24 Apr 2012 23:01:22 -0400 +Subject: [PATCH 28/28] tcp: avoid order-1 allocations on wifi and tx path + + +From: Eric Dumazet + +[ This combines upstream commit + a21d45726acacc963d8baddf74607d9b74e2b723 and the follow-on bug fix + commit a21d45726acacc963d8baddf74607d9b74e2b723 ] + +Marc Merlin reported many order-1 allocations failures in TX path on its +wireless setup, that dont make any sense with MTU=1500 network, and non +SG capable hardware. + +After investigation, it turns out TCP uses sk_stream_alloc_skb() and +used as a convention skb_tailroom(skb) to know how many bytes of data +payload could be put in this skb (for non SG capable devices) + +Note : these skb used kmalloc-4096 (MTU=1500 + MAX_HEADER + +sizeof(struct skb_shared_info) being above 2048) + +Later, mac80211 layer need to add some bytes at the tail of skb +(IEEE80211_ENCRYPT_TAILROOM = 18 bytes) and since no more tailroom is +available has to call pskb_expand_head() and request order-1 +allocations. + +This patch changes sk_stream_alloc_skb() so that only +sk->sk_prot->max_header bytes of headroom are reserved, and use a new +skb field, avail_size to hold the data payload limit. + +This way, order-0 allocations done by TCP stack can leave more than 2 KB +of tailroom and no more allocation is performed in mac80211 layer (or +any layer needing some tailroom) + +avail_size is unioned with mark/dropcount, since mark will be set later +in IP stack for output packets. Therefore, skb size is unchanged. + +Reported-by: Marc MERLIN +Tested-by: Marc MERLIN +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/skbuff.h | 13 +++++++++++++ + net/ipv4/tcp.c | 8 ++++---- + net/ipv4/tcp_output.c | 3 ++- + 3 files changed, 19 insertions(+), 5 deletions(-) + +--- a/include/linux/skbuff.h ++++ b/include/linux/skbuff.h +@@ -468,6 +468,7 @@ struct sk_buff { + union { + __u32 mark; + __u32 dropcount; ++ __u32 avail_size; + }; + + __u16 vlan_tci; +@@ -1338,6 +1339,18 @@ static inline int skb_tailroom(const str + } + + /** ++ * skb_availroom - bytes at buffer end ++ * @skb: buffer to check ++ * ++ * Return the number of bytes of free space at the tail of an sk_buff ++ * allocated by sk_stream_alloc() ++ */ ++static inline int skb_availroom(const struct sk_buff *skb) ++{ ++ return skb_is_nonlinear(skb) ? 0 : skb->avail_size - skb->len; ++} ++ ++/** + * skb_reserve - adjust headroom + * @skb: buffer to alter + * @len: bytes to move +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -699,11 +699,12 @@ struct sk_buff *sk_stream_alloc_skb(stru + skb = alloc_skb_fclone(size + sk->sk_prot->max_header, gfp); + if (skb) { + if (sk_wmem_schedule(sk, skb->truesize)) { ++ skb_reserve(skb, sk->sk_prot->max_header); + /* + * Make sure that we have exactly size bytes + * available to the caller, no more, no less. + */ +- skb_reserve(skb, skb_tailroom(skb) - size); ++ skb->avail_size = size; + return skb; + } + __kfree_skb(skb); +@@ -993,10 +994,9 @@ new_segment: + copy = seglen; + + /* Where to copy to? */ +- if (skb_tailroom(skb) > 0) { ++ if (skb_availroom(skb) > 0) { + /* We have some space in skb head. Superb! */ +- if (copy > skb_tailroom(skb)) +- copy = skb_tailroom(skb); ++ copy = min_t(int, copy, skb_availroom(skb)); + err = skb_add_data_nocache(sk, skb, from, copy); + if (err) + goto do_fault; +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -1096,6 +1096,7 @@ static void __pskb_trim_head(struct sk_b + eat = min_t(int, len, skb_headlen(skb)); + if (eat) { + __skb_pull(skb, eat); ++ skb->avail_size -= eat; + len -= eat; + if (!len) + return; +@@ -2060,7 +2061,7 @@ static void tcp_retrans_try_collapse(str + /* Punt if not enough space exists in the first SKB for + * the data in the second + */ +- if (skb->len > skb_tailroom(to)) ++ if (skb->len > skb_availroom(to)) + break; + + if (after(TCP_SKB_CB(skb)->end_seq, tcp_wnd_end(tp))) diff --git a/queue-3.3/tcp-fix-tcp_grow_window-for-large-incoming-frames.patch b/queue-3.3/tcp-fix-tcp_grow_window-for-large-incoming-frames.patch new file mode 100644 index 00000000000..1e3dbf01bd1 --- /dev/null +++ b/queue-3.3/tcp-fix-tcp_grow_window-for-large-incoming-frames.patch @@ -0,0 +1,38 @@ +From 2c1da79e03a2026caf4293d208c46702da35252b Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Mon, 16 Apr 2012 23:28:07 +0000 +Subject: [PATCH 22/28] tcp: fix tcp_grow_window() for large incoming frames + + +From: Eric Dumazet + +[ Upstream commit 4d846f02392a710f9604892ac3329e628e60a230 ] + +tcp_grow_window() has to grow rcv_ssthresh up to window_clamp, allowing +sender to increase its window. + +tcp_grow_window() still assumes a tcp frame is under MSS, but its no +longer true with LRO/GRO. + +This patch fixes one of the performance issue we noticed with GRO on. + +Signed-off-by: Eric Dumazet +Cc: Neal Cardwell +Cc: Tom Herbert +Acked-by: Neal Cardwell +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_input.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -333,6 +333,7 @@ static void tcp_grow_window(struct sock + incr = __tcp_grow_window(sk, skb); + + if (incr) { ++ incr = max_t(int, incr, 2 * skb->len); + tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr, + tp->window_clamp); + inet_csk(sk)->icsk_ack.quick |= 1; diff --git a/queue-3.3/tcp-fix-tcp_maxseg-for-established-ipv6-passive-sockets.patch b/queue-3.3/tcp-fix-tcp_maxseg-for-established-ipv6-passive-sockets.patch new file mode 100644 index 00000000000..a55c74b9dae --- /dev/null +++ b/queue-3.3/tcp-fix-tcp_maxseg-for-established-ipv6-passive-sockets.patch @@ -0,0 +1,38 @@ +From 8207f2ec3703b3582992a8eeda95bd486f8a45bf Mon Sep 17 00:00:00 2001 +From: Neal Cardwell +Date: Sun, 22 Apr 2012 09:45:47 +0000 +Subject: [PATCH 26/28] tcp: fix TCP_MAXSEG for established IPv6 passive sockets + + +From: Neal Cardwell + +[ Upstream commit d135c522f1234f62e81be29cebdf59e9955139ad ] + +Commit f5fff5d forgot to fix TCP_MAXSEG behavior IPv6 sockets, so IPv6 +TCP server sockets that used TCP_MAXSEG would find that the advmss of +child sockets would be incorrect. This commit mirrors the advmss logic +from tcp_v4_syn_recv_sock in tcp_v6_syn_recv_sock. Eventually this +logic should probably be shared between IPv4 and IPv6, but this at +least fixes this issue. + +Signed-off-by: Neal Cardwell +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/tcp_ipv6.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/net/ipv6/tcp_ipv6.c ++++ b/net/ipv6/tcp_ipv6.c +@@ -1493,6 +1493,10 @@ static struct sock * tcp_v6_syn_recv_soc + tcp_mtup_init(newsk); + tcp_sync_mss(newsk, dst_mtu(dst)); + newtp->advmss = dst_metric_advmss(dst); ++ if (tcp_sk(sk)->rx_opt.user_mss && ++ tcp_sk(sk)->rx_opt.user_mss < newtp->advmss) ++ newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; ++ + tcp_initialize_rcv_mss(newsk); + if (tcp_rsk(req)->snt_synack) + tcp_valid_rtt_meas(newsk, diff --git a/queue-3.3/tcp-fix-tcp_rcv_rtt_update-use-of-an-unscaled-rtt-sample.patch b/queue-3.3/tcp-fix-tcp_rcv_rtt_update-use-of-an-unscaled-rtt-sample.patch new file mode 100644 index 00000000000..c6b051b3d11 --- /dev/null +++ b/queue-3.3/tcp-fix-tcp_rcv_rtt_update-use-of-an-unscaled-rtt-sample.patch @@ -0,0 +1,47 @@ +From 96030da82358d976c3be492d91f050a51b7d00f1 Mon Sep 17 00:00:00 2001 +From: Neal Cardwell +Date: Tue, 10 Apr 2012 07:59:20 +0000 +Subject: [PATCH 14/28] tcp: fix tcp_rcv_rtt_update() use of an unscaled RTT sample + + +From: Neal Cardwell + +[ Upstream commit 18a223e0b9ec8979320ba364b47c9772391d6d05 ] + +Fix a code path in tcp_rcv_rtt_update() that was comparing scaled and +unscaled RTT samples. + +The intent in the code was to only use the 'm' measurement if it was a +new minimum. However, since 'm' had not yet been shifted left 3 bits +but 'new_sample' had, this comparison would nearly always succeed, +leading us to erroneously set our receive-side RTT estimate to the 'm' +sample when that sample could be nearly 8x too high to use. + +The overall effect is to often cause the receive-side RTT estimate to +be significantly too large (up to 40% too large for brief periods in +my tests). + +Signed-off-by: Neal Cardwell +Acked-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_input.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -472,8 +472,11 @@ static void tcp_rcv_rtt_update(struct tc + if (!win_dep) { + m -= (new_sample >> 3); + new_sample += m; +- } else if (m < new_sample) +- new_sample = m << 3; ++ } else { ++ m <<= 3; ++ if (m < new_sample) ++ new_sample = m; ++ } + } else { + /* No previous measure. */ + new_sample = m << 3; diff --git a/queue-3.3/tcp-restore-correct-limit.patch b/queue-3.3/tcp-restore-correct-limit.patch new file mode 100644 index 00000000000..3c51b9aeca4 --- /dev/null +++ b/queue-3.3/tcp-restore-correct-limit.patch @@ -0,0 +1,42 @@ +From 0f738ab85369e3cbefcf3e1067acf1171c7f2308 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Tue, 10 Apr 2012 00:56:42 +0000 +Subject: [PATCH 13/28] tcp: restore correct limit + + +From: Eric Dumazet + +[ Upstream commit 5fb84b1428b271f8767e0eb3fcd7231896edfaa4 ] + +Commit c43b874d5d714f (tcp: properly initialize tcp memory limits) tried +to fix a regression added in commits 4acb4190 & 3dc43e3, +but still get it wrong. + +Result is machines with low amount of memory have too small tcp_rmem[2] +value and slow tcp receives : Per socket limit being 1/1024 of memory +instead of 1/128 in old kernels, so rcv window is capped to small +values. + +Fix this to match comment and previous behavior. + +Signed-off-by: Eric Dumazet +Cc: Jason Wang +Cc: Glauber Costa +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -3299,8 +3299,7 @@ void __init tcp_init(void) + + tcp_init_mem(&init_net); + /* Set per-socket limits to no more than 1/128 the pressure threshold */ +- limit = nr_free_buffer_pages() << (PAGE_SHIFT - 10); +- limit = max(limit, 128UL); ++ limit = nr_free_buffer_pages() << (PAGE_SHIFT - 7); + max_share = min(4UL*1024*1024, limit); + + sysctl_tcp_wmem[0] = SK_MEM_QUANTUM; diff --git a/queue-3.3/wimax-i2400m-prevent-a-possible-kernel-bug-due-to-missing-fw_name-string.patch b/queue-3.3/wimax-i2400m-prevent-a-possible-kernel-bug-due-to-missing-fw_name-string.patch new file mode 100644 index 00000000000..b7b21cb7628 --- /dev/null +++ b/queue-3.3/wimax-i2400m-prevent-a-possible-kernel-bug-due-to-missing-fw_name-string.patch @@ -0,0 +1,34 @@ +From 27cc26b2cb35ed6cbc3554c24fa148c052d5f697 Mon Sep 17 00:00:00 2001 +From: Phil Sutter +Date: Mon, 26 Mar 2012 09:01:30 +0000 +Subject: [PATCH 10/28] wimax: i2400m - prevent a possible kernel bug due to missing fw_name string + + +From: Phil Sutter + +[ Upstream commit 4eee6a3a04e8bb53fbe7de0f64d0524d3fbe3f80 ] + +This happened on a machine with a custom hotplug script calling nameif, +probably due to slow firmware loading. At the time nameif uses ethtool +to gather interface information, i2400m->fw_name is zero and so a null +pointer dereference occurs from within i2400m_get_drvinfo(). + +Signed-off-by: Phil Sutter +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/wimax/i2400m/netdev.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/net/wimax/i2400m/netdev.c ++++ b/drivers/net/wimax/i2400m/netdev.c +@@ -597,7 +597,8 @@ static void i2400m_get_drvinfo(struct ne + struct i2400m *i2400m = net_dev_to_i2400m(net_dev); + + strncpy(info->driver, KBUILD_MODNAME, sizeof(info->driver) - 1); +- strncpy(info->fw_version, i2400m->fw_name, sizeof(info->fw_version) - 1); ++ strncpy(info->fw_version, ++ i2400m->fw_name ? : "", sizeof(info->fw_version) - 1); + if (net_dev->dev.parent) + strncpy(info->bus_info, dev_name(net_dev->dev.parent), + sizeof(info->bus_info) - 1);