From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Fri, 29 Apr 2011 16:20:18 +0000 (-0700)
Subject: .38 patches
X-Git-Tag: v2.6.38.5~2
X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=ff5462e0a03eaeba73c6cca00a851751c33dfc8d;p=thirdparty%2Fkernel%2Fstable-queue.git

.38 patches
---

diff --git a/queue-2.6.38/agp-fix-arbitrary-kernel-memory-writes.patch b/queue-2.6.38/agp-fix-arbitrary-kernel-memory-writes.patch
new file mode 100644
index 00000000000..d3a0c2d1cbe
--- /dev/null
+++ b/queue-2.6.38/agp-fix-arbitrary-kernel-memory-writes.patch
@@ -0,0 +1,58 @@
+From 194b3da873fd334ef183806db751473512af29ce Mon Sep 17 00:00:00 2001
+From: Vasiliy Kulikov <segoon@openwall.com>
+Date: Thu, 14 Apr 2011 20:55:16 +0400
+Subject: agp: fix arbitrary kernel memory writes
+
+From: Vasiliy Kulikov <segoon@openwall.com>
+
+commit 194b3da873fd334ef183806db751473512af29ce upstream.
+
+pg_start is copied from userspace on AGPIOC_BIND and AGPIOC_UNBIND ioctl
+cmds of agp_ioctl() and passed to agpioc_bind_wrap().  As said in the
+comment, (pg_start + mem->page_count) may wrap in case of AGPIOC_BIND,
+and it is not checked at all in case of AGPIOC_UNBIND.  As a result, user
+with sufficient privileges (usually "video" group) may generate either
+local DoS or privilege escalation.
+
+Signed-off-by: Vasiliy Kulikov <segoon@openwall.com>
+Signed-off-by: Dave Airlie <airlied@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/char/agp/generic.c |   11 ++++++++---
+ 1 file changed, 8 insertions(+), 3 deletions(-)
+
+--- a/drivers/char/agp/generic.c
++++ b/drivers/char/agp/generic.c
+@@ -1089,8 +1089,8 @@ int agp_generic_insert_memory(struct agp
+ 		return -EINVAL;
+ 	}
+ 
+-	/* AK: could wrap */
+-	if ((pg_start + mem->page_count) > num_entries)
++	if (((pg_start + mem->page_count) > num_entries) ||
++	    ((pg_start + mem->page_count) < pg_start))
+ 		return -EINVAL;
+ 
+ 	j = pg_start;
+@@ -1124,7 +1124,7 @@ int agp_generic_remove_memory(struct agp
+ {
+ 	size_t i;
+ 	struct agp_bridge_data *bridge;
+-	int mask_type;
++	int mask_type, num_entries;
+ 
+ 	bridge = mem->bridge;
+ 	if (!bridge)
+@@ -1136,6 +1136,11 @@ int agp_generic_remove_memory(struct agp
+ 	if (type != mem->type)
+ 		return -EINVAL;
+ 
++	num_entries = agp_num_entries();
++	if (((pg_start + mem->page_count) > num_entries) ||
++	    ((pg_start + mem->page_count) < pg_start))
++		return -EINVAL;
++
+ 	mask_type = bridge->driver->agp_type_to_mask_type(bridge, type);
+ 	if (mask_type != 0) {
+ 		/* The generic routines know nothing of memory types */
diff --git a/queue-2.6.38/agp-fix-oom-and-buffer-overflow.patch b/queue-2.6.38/agp-fix-oom-and-buffer-overflow.patch
new file mode 100644
index 00000000000..195a09ff1c3
--- /dev/null
+++ b/queue-2.6.38/agp-fix-oom-and-buffer-overflow.patch
@@ -0,0 +1,59 @@
+From b522f02184b413955f3bc952e3776ce41edc6355 Mon Sep 17 00:00:00 2001
+From: Vasiliy Kulikov <segoon@openwall.com>
+Date: Thu, 14 Apr 2011 20:55:19 +0400
+Subject: agp: fix OOM and buffer overflow
+
+From: Vasiliy Kulikov <segoon@openwall.com>
+
+commit b522f02184b413955f3bc952e3776ce41edc6355 upstream.
+
+page_count is copied from userspace.  agp_allocate_memory() tries to
+check whether this number is too big, but doesn't take into account the
+wrap case.  Also agp_create_user_memory() doesn't check whether
+alloc_size is calculated from num_agp_pages variable without overflow.
+This may lead to allocation of too small buffer with following buffer
+overflow.
+
+Another problem in agp code is not addressed in the patch - kernel memory
+exhaustion (AGPIOC_RESERVE and AGPIOC_ALLOCATE ioctls).  It is not checked
+whether requested pid is a pid of the caller (no check in agpioc_reserve_wrap()).
+Each allocation is limited to 16KB, though, there is no per-process limit.
+This might lead to OOM situation, which is not even solved in case of the
+caller death by OOM killer - the memory is allocated for another (faked) process.
+
+Signed-off-by: Vasiliy Kulikov <segoon@openwall.com>
+Signed-off-by: Dave Airlie <airlied@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/char/agp/generic.c |    8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/drivers/char/agp/generic.c
++++ b/drivers/char/agp/generic.c
+@@ -115,6 +115,9 @@ static struct agp_memory *agp_create_use
+ 	struct agp_memory *new;
+ 	unsigned long alloc_size = num_agp_pages*sizeof(struct page *);
+ 
++	if (INT_MAX/sizeof(struct page *) < num_agp_pages)
++		return NULL;
++
+ 	new = kzalloc(sizeof(struct agp_memory), GFP_KERNEL);
+ 	if (new == NULL)
+ 		return NULL;
+@@ -234,11 +237,14 @@ struct agp_memory *agp_allocate_memory(s
+ 	int scratch_pages;
+ 	struct agp_memory *new;
+ 	size_t i;
++	int cur_memory;
+ 
+ 	if (!bridge)
+ 		return NULL;
+ 
+-	if ((atomic_read(&bridge->current_memory_agp) + page_count) > bridge->max_memory_agp)
++	cur_memory = atomic_read(&bridge->current_memory_agp);
++	if ((cur_memory + page_count > bridge->max_memory_agp) ||
++	    (cur_memory + page_count < page_count))
+ 		return NULL;
+ 
+ 	if (type >= AGP_USER_TYPES) {
diff --git a/queue-2.6.38/drm-select-framebuffer_console_primary-if-we-have-framebuffer_console.patch b/queue-2.6.38/drm-select-framebuffer_console_primary-if-we-have-framebuffer_console.patch
new file mode 100644
index 00000000000..476139a25f8
--- /dev/null
+++ b/queue-2.6.38/drm-select-framebuffer_console_primary-if-we-have-framebuffer_console.patch
@@ -0,0 +1,30 @@
+From bf5192edcbc1f0a7f9c054649dbf1a0b3210d9b7 Mon Sep 17 00:00:00 2001
+From: Dave Airlie <airlied@redhat.com>
+Date: Fri, 22 Apr 2011 07:51:33 +1000
+Subject: drm: select FRAMEBUFFER_CONSOLE_PRIMARY if we have FRAMEBUFFER_CONSOLE
+
+From: Dave Airlie <airlied@redhat.com>
+
+commit bf5192edcbc1f0a7f9c054649dbf1a0b3210d9b7 upstream.
+
+Multi-gpu/switcheroo relies on this option to get the console on the
+correct GPU at bootup, some distros enable it but it seems some get
+it wrong.
+
+Signed-off-by: Dave Airlie <airlied@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/gpu/drm/Kconfig |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/gpu/drm/Kconfig
++++ b/drivers/gpu/drm/Kconfig
+@@ -24,6 +24,7 @@ config DRM_KMS_HELPER
+ 	depends on DRM
+ 	select FB
+ 	select FRAMEBUFFER_CONSOLE if !EXPERT
++	select FRAMEBUFFER_CONSOLE_DETECT_PRIMARY if FRAMEBUFFER_CONSOLE
+ 	help
+ 	  FB and CRTC helpers for KMS drivers.
+ 
diff --git a/queue-2.6.38/iwl3945-do-not-deprecate-software-scan.patch b/queue-2.6.38/iwl3945-do-not-deprecate-software-scan.patch
new file mode 100644
index 00000000000..1d0fd95f3b3
--- /dev/null
+++ b/queue-2.6.38/iwl3945-do-not-deprecate-software-scan.patch
@@ -0,0 +1,43 @@
+From 3bda50e3eaf58a4b9c4ce34204e5faa15c8b1b97 Mon Sep 17 00:00:00 2001
+From: Stanislaw Gruszka <sgruszka@redhat.com>
+Date: Mon, 14 Mar 2011 14:15:06 +0100
+Subject: iwl3945: do not deprecate software scan
+
+From: Stanislaw Gruszka <sgruszka@redhat.com>
+
+commit 3bda50e3eaf58a4b9c4ce34204e5faa15c8b1b97 upstream.
+
+Software scanning can be used for workaround some performance problems,
+so do not deprecate it.
+
+Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
+Acked-by: Wey-Yi Guy <wey-yi.w.guy@intel.com>
+Signed-off-by: John W. Linville <linville@tuxdriver.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/net/wireless/iwlwifi/iwl3945-base.c |    6 ++----
+ 1 file changed, 2 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/wireless/iwlwifi/iwl3945-base.c
++++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c
+@@ -3995,8 +3995,7 @@ static int iwl3945_pci_probe(struct pci_
+ 	 * "the hard way", rather than using device's scan.
+ 	 */
+ 	if (iwl3945_mod_params.disable_hw_scan) {
+-		dev_printk(KERN_DEBUG, &(pdev->dev),
+-			"sw scan support is deprecated\n");
++		IWL_DEBUG_INFO(priv, "Disabling hw_scan\n");
+ 		iwl3945_hw_ops.hw_scan = NULL;
+ 	}
+ 
+@@ -4318,8 +4317,7 @@ MODULE_PARM_DESC(debug, "debug output ma
+ #endif
+ module_param_named(disable_hw_scan, iwl3945_mod_params.disable_hw_scan,
+ 		   int, S_IRUGO);
+-MODULE_PARM_DESC(disable_hw_scan,
+-		 "disable hardware scanning (default 0) (deprecated)");
++MODULE_PARM_DESC(disable_hw_scan, "disable hardware scanning (default 0)");
+ module_param_named(fw_restart3945, iwl3945_mod_params.restart_fw, int, S_IRUGO);
+ MODULE_PARM_DESC(fw_restart3945, "restart firmware in case of error");
+ 
diff --git a/queue-2.6.38/iwlwifi-do-not-set-tx-power-when-channel-is-changing.patch b/queue-2.6.38/iwlwifi-do-not-set-tx-power-when-channel-is-changing.patch
new file mode 100644
index 00000000000..c5501fc15ec
--- /dev/null
+++ b/queue-2.6.38/iwlwifi-do-not-set-tx-power-when-channel-is-changing.patch
@@ -0,0 +1,94 @@
+From f844a709a7d8f8be61a571afc31dfaca9e779621 Mon Sep 17 00:00:00 2001
+From: Stanislaw Gruszka <sgruszka@redhat.com>
+Date: Fri, 28 Jan 2011 16:47:44 +0100
+Subject: iwlwifi: do not set tx power when channel is changing
+
+From: Stanislaw Gruszka <sgruszka@redhat.com>
+
+commit f844a709a7d8f8be61a571afc31dfaca9e779621 upstream.
+
+Mac80211 can request for tx power and channel change in one ->config
+call. If that happens, *_send_tx_power functions will try to setup tx
+power for old channel, what can be not correct because we already change
+the band. I.e  error  "Failed to get channel info for channel 140 [0]",
+can be printed frequently when operating in software scanning mode.
+
+Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
+Acked-by: Wey-Yi Guy <wey-yi.w.guy@intel.com>
+Signed-off-by: John W. Linville <linville@tuxdriver.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/net/wireless/iwlwifi/iwl-3945.c     |    2 +-
+ drivers/net/wireless/iwlwifi/iwl-4965.c     |    2 +-
+ drivers/net/wireless/iwlwifi/iwl-agn-rxon.c |    5 ++---
+ drivers/net/wireless/iwlwifi/iwl-core.c     |   13 ++++++++++---
+ 4 files changed, 14 insertions(+), 8 deletions(-)
+
+--- a/drivers/net/wireless/iwlwifi/iwl-3945.c
++++ b/drivers/net/wireless/iwlwifi/iwl-3945.c
+@@ -1823,7 +1823,7 @@ int iwl3945_commit_rxon(struct iwl_priv
+ 
+ 	/* If we issue a new RXON command which required a tune then we must
+ 	 * send a new TXPOWER command or we won't be able to Tx any frames */
+-	rc = priv->cfg->ops->lib->send_tx_power(priv);
++	rc = iwl_set_tx_power(priv, priv->tx_power_next, true);
+ 	if (rc) {
+ 		IWL_ERR(priv, "Error setting Tx power (%d).\n", rc);
+ 		return rc;
+--- a/drivers/net/wireless/iwlwifi/iwl-4965.c
++++ b/drivers/net/wireless/iwlwifi/iwl-4965.c
+@@ -1571,7 +1571,7 @@ static int iwl4965_commit_rxon(struct iw
+ 
+ 	/* If we issue a new RXON command which required a tune then we must
+ 	 * send a new TXPOWER command or we won't be able to Tx any frames */
+-	ret = iwl_set_tx_power(priv, priv->tx_power_user_lmt, true);
++	ret = iwl_set_tx_power(priv, priv->tx_power_next, true);
+ 	if (ret) {
+ 		IWL_ERR(priv, "Error sending TX power (%d)\n", ret);
+ 		return ret;
+--- a/drivers/net/wireless/iwlwifi/iwl-agn-rxon.c
++++ b/drivers/net/wireless/iwlwifi/iwl-agn-rxon.c
+@@ -288,10 +288,9 @@ int iwlagn_commit_rxon(struct iwl_priv *
+ 	 * If we issue a new RXON command which required a tune then we must
+ 	 * send a new TXPOWER command or we won't be able to Tx any frames.
+ 	 *
+-	 * FIXME: which RXON requires a tune? Can we optimise this out in
+-	 *        some cases?
++	 * It's expected we set power here if channel is changing.
+ 	 */
+-	ret = iwl_set_tx_power(priv, priv->tx_power_user_lmt, true);
++	ret = iwl_set_tx_power(priv, priv->tx_power_next, true);
+ 	if (ret) {
+ 		IWL_ERR(priv, "Error sending TX power (%d)\n", ret);
+ 		return ret;
+--- a/drivers/net/wireless/iwlwifi/iwl-core.c
++++ b/drivers/net/wireless/iwlwifi/iwl-core.c
+@@ -1161,6 +1161,8 @@ int iwl_set_tx_power(struct iwl_priv *pr
+ {
+ 	int ret;
+ 	s8 prev_tx_power;
++	bool defer;
++	struct iwl_rxon_context *ctx = &priv->contexts[IWL_RXON_CTX_BSS];
+ 
+ 	lockdep_assert_held(&priv->mutex);
+ 
+@@ -1188,10 +1190,15 @@ int iwl_set_tx_power(struct iwl_priv *pr
+ 	if (!iwl_is_ready_rf(priv))
+ 		return -EIO;
+ 
+-	/* scan complete use tx_power_next, need to be updated */
++	/* scan complete and commit_rxon use tx_power_next value,
++	 * it always need to be updated for newest request */
+ 	priv->tx_power_next = tx_power;
+-	if (test_bit(STATUS_SCANNING, &priv->status) && !force) {
+-		IWL_DEBUG_INFO(priv, "Deferring tx power set while scanning\n");
++
++	/* do not set tx power when scanning or channel changing */
++	defer = test_bit(STATUS_SCANNING, &priv->status) ||
++		memcmp(&ctx->active, &ctx->staging, sizeof(ctx->staging));
++	if (defer && !force) {
++		IWL_DEBUG_INFO(priv, "Deferring tx power set\n");
+ 		return 0;
+ 	}
+ 
diff --git a/queue-2.6.38/m68k-mm-set-all-online-nodes-in-n_normal_memory.patch b/queue-2.6.38/m68k-mm-set-all-online-nodes-in-n_normal_memory.patch
new file mode 100644
index 00000000000..8085a4c2085
--- /dev/null
+++ b/queue-2.6.38/m68k-mm-set-all-online-nodes-in-n_normal_memory.patch
@@ -0,0 +1,43 @@
+From 4aac0b4815ba592052758f4b468f253d383dc9d6 Mon Sep 17 00:00:00 2001
+From: Michael Schmitz <schmitzmic@googlemail.com>
+Date: Tue, 26 Apr 2011 14:51:53 +1200
+Subject: m68k/mm: Set all online nodes in N_NORMAL_MEMORY
+
+From: Michael Schmitz <schmitzmic@googlemail.com>
+
+commit 4aac0b4815ba592052758f4b468f253d383dc9d6 upstream.
+
+For m68k, N_NORMAL_MEMORY represents all nodes that have present memory
+since it does not support HIGHMEM.  This patch sets the bit at the time
+node_present_pages has been set by free_area_init_node.
+At the time the node is brought online, the node state would have to be
+done unconditionally since information about present memory has not yet
+been recorded.
+
+If N_NORMAL_MEMORY is not accurate, slub may encounter errors since it
+uses this nodemask to setup per-cache kmem_cache_node data structures.
+
+This pach is an alternative to the one proposed by David Rientjes
+<rientjes@google.com> attempting to set node state immediately when
+bringing the node online.
+
+Signed-off-by: Michael Schmitz <schmitz@debian.org>
+Tested-by: Thorsten Glaser <tg@debian.org>
+Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/m68k/mm/motorola.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/arch/m68k/mm/motorola.c
++++ b/arch/m68k/mm/motorola.c
+@@ -300,6 +300,8 @@ void __init paging_init(void)
+ 		zones_size[ZONE_DMA] = m68k_memory[i].size >> PAGE_SHIFT;
+ 		free_area_init_node(i, zones_size,
+ 				    m68k_memory[i].addr >> PAGE_SHIFT, NULL);
++		if (node_present_pages(i))
++			node_set_state(i, N_NORMAL_MEMORY);
+ 	}
+ }
+ 
diff --git a/queue-2.6.38/mm-check-if-pte-is-already-allocated-during-page-fault.patch b/queue-2.6.38/mm-check-if-pte-is-already-allocated-during-page-fault.patch
new file mode 100644
index 00000000000..7aa27dfe105
--- /dev/null
+++ b/queue-2.6.38/mm-check-if-pte-is-already-allocated-during-page-fault.patch
@@ -0,0 +1,66 @@
+From cc03638df20acbec5d0d0d9e07234aadde9e698d Mon Sep 17 00:00:00 2001
+From: Mel Gorman <mgorman@suse.de>
+Date: Wed, 27 Apr 2011 15:26:56 -0700
+Subject: mm: check if PTE is already allocated during page fault
+
+From: Mel Gorman <mgorman@suse.de>
+
+commit cc03638df20acbec5d0d0d9e07234aadde9e698d upstream.
+
+With transparent hugepage support, handle_mm_fault() has to be careful
+that a normal PMD has been established before handling a PTE fault.  To
+achieve this, it used __pte_alloc() directly instead of pte_alloc_map as
+pte_alloc_map is unsafe to run against a huge PMD.  pte_offset_map() is
+called once it is known the PMD is safe.
+
+pte_alloc_map() is smart enough to check if a PTE is already present
+before calling __pte_alloc but this check was lost.  As a consequence,
+PTEs may be allocated unnecessarily and the page table lock taken.  Thi
+useless PTE does get cleaned up but it's a performance hit which is
+visible in page_test from aim9.
+
+This patch simply re-adds the check normally done by pte_alloc_map to
+check if the PTE needs to be allocated before taking the page table lock.
+The effect is noticable in page_test from aim9.
+
+  AIM9
+                  2.6.38-vanilla 2.6.38-checkptenone
+  creat-clo      446.10 ( 0.00%)   424.47 (-5.10%)
+  page_test       38.10 ( 0.00%)    42.04 ( 9.37%)
+  brk_test        52.45 ( 0.00%)    51.57 (-1.71%)
+  exec_test      382.00 ( 0.00%)   456.90 (16.39%)
+  fork_test       60.11 ( 0.00%)    67.79 (11.34%)
+  MMTests Statistics: duration
+  Total Elapsed Time (seconds)                611.90    612.22
+
+(While this affects 2.6.38, it is a performance rather than a functional
+bug and normally outside the rules -stable.  While the big performance
+differences are to a microbench, the difference in fork and exec
+performance may be significant enough that -stable wants to consider the
+patch)
+
+Reported-by: Raz Ben Yehuda <raziebe@gmail.com>
+Signed-off-by: Mel Gorman <mgorman@suse.de>
+Reviewed-by: Rik van Riel <riel@redhat.com>
+Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
+Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
+Acked-by: Johannes Weiner <hannes@cmpxchg.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ mm/memory.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -3332,7 +3332,7 @@ int handle_mm_fault(struct mm_struct *mm
+ 	 * run pte_offset_map on the pmd, if an huge pmd could
+ 	 * materialize from under us from a different thread.
+ 	 */
+-	if (unlikely(__pte_alloc(mm, vma, pmd, address)))
++	if (unlikely(pmd_none(*pmd)) && __pte_alloc(mm, vma, pmd, address))
+ 		return VM_FAULT_OOM;
+ 	/* if an huge pmd materialized from under us just retry later */
+ 	if (unlikely(pmd_trans_huge(*pmd)))
diff --git a/queue-2.6.38/mm-thp-fix-dev-zero-map_private-and-vm_flags-cleanups.patch b/queue-2.6.38/mm-thp-fix-dev-zero-map_private-and-vm_flags-cleanups.patch
new file mode 100644
index 00000000000..06ec03c17e4
--- /dev/null
+++ b/queue-2.6.38/mm-thp-fix-dev-zero-map_private-and-vm_flags-cleanups.patch
@@ -0,0 +1,165 @@
+From 78f11a255749d09025f54d4e2df4fbcb031530e2 Mon Sep 17 00:00:00 2001
+From: Andrea Arcangeli <aarcange@redhat.com>
+Date: Wed, 27 Apr 2011 15:26:45 -0700
+Subject: mm: thp: fix /dev/zero MAP_PRIVATE and vm_flags cleanups
+
+From: Andrea Arcangeli <aarcange@redhat.com>
+
+commit 78f11a255749d09025f54d4e2df4fbcb031530e2 upstream.
+
+The huge_memory.c THP page fault was allowed to run if vm_ops was null
+(which would succeed for /dev/zero MAP_PRIVATE, as the f_op->mmap wouldn't
+setup a special vma->vm_ops and it would fallback to regular anonymous
+memory) but other THP logics weren't fully activated for vmas with vm_file
+not NULL (/dev/zero has a not NULL vma->vm_file).
+
+So this removes the vm_file checks so that /dev/zero also can safely use
+THP (the other albeit safer approach to fix this bug would have been to
+prevent the THP initial page fault to run if vm_file was set).
+
+After removing the vm_file checks, this also makes huge_memory.c stricter
+in khugepaged for the DEBUG_VM=y case.  It doesn't replace the vm_file
+check with a is_pfn_mapping check (but it keeps checking for VM_PFNMAP
+under VM_BUG_ON) because for a is_cow_mapping() mapping VM_PFNMAP should
+only be allowed to exist before the first page fault, and in turn when
+vma->anon_vma is null (so preventing khugepaged registration).  So I tend
+to think the previous comment saying if vm_file was set, VM_PFNMAP might
+have been set and we could still be registered in khugepaged (despite
+anon_vma was not NULL to be registered in khugepaged) was too paranoid.
+The is_linear_pfn_mapping check is also I think superfluous (as described
+by comment) but under DEBUG_VM it is safe to stay.
+
+Addresses https://bugzilla.kernel.org/show_bug.cgi?id=33682
+
+Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
+Reported-by: Caspar Zhang <bugs@casparzhang.com>
+Acked-by: Mel Gorman <mel@csn.ul.ie>
+Acked-by: Rik van Riel <riel@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ include/linux/huge_mm.h |    2 +-
+ include/linux/mm.h      |    3 ++-
+ mm/huge_memory.c        |   43 ++++++++++++++++++++++++-------------------
+ 3 files changed, 27 insertions(+), 21 deletions(-)
+
+--- a/include/linux/huge_mm.h
++++ b/include/linux/huge_mm.h
+@@ -117,7 +117,7 @@ static inline void vma_adjust_trans_huge
+ 					 unsigned long end,
+ 					 long adjust_next)
+ {
+-	if (!vma->anon_vma || vma->vm_ops || vma->vm_file)
++	if (!vma->anon_vma || vma->vm_ops)
+ 		return;
+ 	__vma_adjust_trans_huge(vma, start, end, adjust_next);
+ }
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -137,7 +137,8 @@ extern unsigned int kobjsize(const void
+ #define VM_RandomReadHint(v)		((v)->vm_flags & VM_RAND_READ)
+ 
+ /*
+- * special vmas that are non-mergable, non-mlock()able
++ * Special vmas that are non-mergable, non-mlock()able.
++ * Note: mm/huge_memory.c VM_NO_THP depends on this definition.
+  */
+ #define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP)
+ 
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -1400,6 +1400,9 @@ out:
+ 	return ret;
+ }
+ 
++#define VM_NO_THP (VM_SPECIAL|VM_INSERTPAGE|VM_MIXEDMAP|VM_SAO| \
++		   VM_HUGETLB|VM_SHARED|VM_MAYSHARE)
++
+ int hugepage_madvise(struct vm_area_struct *vma,
+ 		     unsigned long *vm_flags, int advice)
+ {
+@@ -1408,11 +1411,7 @@ int hugepage_madvise(struct vm_area_stru
+ 		/*
+ 		 * Be somewhat over-protective like KSM for now!
+ 		 */
+-		if (*vm_flags & (VM_HUGEPAGE |
+-				 VM_SHARED   | VM_MAYSHARE   |
+-				 VM_PFNMAP   | VM_IO      | VM_DONTEXPAND |
+-				 VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE |
+-				 VM_MIXEDMAP | VM_SAO))
++		if (*vm_flags & (VM_HUGEPAGE | VM_NO_THP))
+ 			return -EINVAL;
+ 		*vm_flags &= ~VM_NOHUGEPAGE;
+ 		*vm_flags |= VM_HUGEPAGE;
+@@ -1428,11 +1427,7 @@ int hugepage_madvise(struct vm_area_stru
+ 		/*
+ 		 * Be somewhat over-protective like KSM for now!
+ 		 */
+-		if (*vm_flags & (VM_NOHUGEPAGE |
+-				 VM_SHARED   | VM_MAYSHARE   |
+-				 VM_PFNMAP   | VM_IO      | VM_DONTEXPAND |
+-				 VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE |
+-				 VM_MIXEDMAP | VM_SAO))
++		if (*vm_flags & (VM_NOHUGEPAGE | VM_NO_THP))
+ 			return -EINVAL;
+ 		*vm_flags &= ~VM_HUGEPAGE;
+ 		*vm_flags |= VM_NOHUGEPAGE;
+@@ -1566,10 +1561,14 @@ int khugepaged_enter_vma_merge(struct vm
+ 		 * page fault if needed.
+ 		 */
+ 		return 0;
+-	if (vma->vm_file || vma->vm_ops)
++	if (vma->vm_ops)
+ 		/* khugepaged not yet working on file or special mappings */
+ 		return 0;
+-	VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma));
++	/*
++	 * If is_pfn_mapping() is true is_learn_pfn_mapping() must be
++	 * true too, verify it here.
++	 */
++	VM_BUG_ON(is_linear_pfn_mapping(vma) || vma->vm_flags & VM_NO_THP);
+ 	hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
+ 	hend = vma->vm_end & HPAGE_PMD_MASK;
+ 	if (hstart < hend)
+@@ -1818,12 +1817,15 @@ static void collapse_huge_page(struct mm
+ 	    (vma->vm_flags & VM_NOHUGEPAGE))
+ 		goto out;
+ 
+-	/* VM_PFNMAP vmas may have vm_ops null but vm_file set */
+-	if (!vma->anon_vma || vma->vm_ops || vma->vm_file)
++	if (!vma->anon_vma || vma->vm_ops)
+ 		goto out;
+ 	if (is_vma_temporary_stack(vma))
+ 		goto out;
+-	VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma));
++	/*
++	 * If is_pfn_mapping() is true is_learn_pfn_mapping() must be
++	 * true too, verify it here.
++	 */
++	VM_BUG_ON(is_linear_pfn_mapping(vma) || vma->vm_flags & VM_NO_THP);
+ 
+ 	pgd = pgd_offset(mm, address);
+ 	if (!pgd_present(*pgd))
+@@ -2056,13 +2058,16 @@ static unsigned int khugepaged_scan_mm_s
+ 			progress++;
+ 			continue;
+ 		}
+-		/* VM_PFNMAP vmas may have vm_ops null but vm_file set */
+-		if (!vma->anon_vma || vma->vm_ops || vma->vm_file)
++		if (!vma->anon_vma || vma->vm_ops)
+ 			goto skip;
+ 		if (is_vma_temporary_stack(vma))
+ 			goto skip;
+-
+-		VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma));
++		/*
++		 * If is_pfn_mapping() is true is_learn_pfn_mapping()
++		 * must be true too, verify it here.
++		 */
++		VM_BUG_ON(is_linear_pfn_mapping(vma) ||
++			  vma->vm_flags & VM_NO_THP);
+ 
+ 		hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
+ 		hend = vma->vm_end & HPAGE_PMD_MASK;
diff --git a/queue-2.6.38/nfs-don-t-lose-ms_synchronous-on-remount-of-noac-mount.patch b/queue-2.6.38/nfs-don-t-lose-ms_synchronous-on-remount-of-noac-mount.patch
new file mode 100644
index 00000000000..1a55d492d57
--- /dev/null
+++ b/queue-2.6.38/nfs-don-t-lose-ms_synchronous-on-remount-of-noac-mount.patch
@@ -0,0 +1,44 @@
+From 26c4c170731f00008f4317a2888a0a07ac99d90d Mon Sep 17 00:00:00 2001
+From: Jeff Layton <jlayton@redhat.com>
+Date: Wed, 27 Apr 2011 11:49:09 -0400
+Subject: nfs: don't lose MS_SYNCHRONOUS on remount of noac mount
+
+From: Jeff Layton <jlayton@redhat.com>
+
+commit 26c4c170731f00008f4317a2888a0a07ac99d90d upstream.
+
+On a remount, the VFS layer will clear the MS_SYNCHRONOUS bit on the
+assumption that the flags on the mount syscall will have it set if the
+remounted fs is supposed to keep it.
+
+In the case of "noac" though, MS_SYNCHRONOUS is implied. A remount of
+such a mount will lose the MS_SYNCHRONOUS flag since "sync" isn't part
+of the mount options.
+
+Reported-by: Max Matveev <makc@redhat.com>
+Signed-off-by: Jeff Layton <jlayton@redhat.com>
+Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/nfs/super.c |    9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+--- a/fs/nfs/super.c
++++ b/fs/nfs/super.c
+@@ -2077,6 +2077,15 @@ nfs_remount(struct super_block *sb, int
+ 	if (error < 0)
+ 		goto out;
+ 
++	/*
++	 * noac is a special case. It implies -o sync, but that's not
++	 * necessarily reflected in the mtab options. do_remount_sb
++	 * will clear MS_SYNCHRONOUS if -o sync wasn't specified in the
++	 * remount options, so we have to explicitly reset it.
++	 */
++	if (data->flags & NFS_MOUNT_NOAC)
++		*flags |= MS_SYNCHRONOUS;
++
+ 	/* compare new mount options with old ones */
+ 	error = nfs_compare_remount_data(nfss, data);
+ out:
diff --git a/queue-2.6.38/nfsv4.1-ensure-state-manager-thread-dies-on-last-umount.patch b/queue-2.6.38/nfsv4.1-ensure-state-manager-thread-dies-on-last-umount.patch
new file mode 100644
index 00000000000..e991c9e3a0b
--- /dev/null
+++ b/queue-2.6.38/nfsv4.1-ensure-state-manager-thread-dies-on-last-umount.patch
@@ -0,0 +1,39 @@
+From 47c2199b6eb5fbe38ddb844db7cdbd914d304f9c Mon Sep 17 00:00:00 2001
+From: Trond Myklebust <Trond.Myklebust@netapp.com>
+Date: Fri, 15 Apr 2011 17:34:18 -0400
+Subject: NFSv4.1: Ensure state manager thread dies on last umount
+
+From: Trond Myklebust <Trond.Myklebust@netapp.com>
+
+commit 47c2199b6eb5fbe38ddb844db7cdbd914d304f9c upstream.
+
+Currently, the state manager may continue to try recovering state forever
+even after the last filesystem to reference that nfs_client has umounted.
+
+Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/nfs/nfs4state.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/fs/nfs/nfs4state.c
++++ b/fs/nfs/nfs4state.c
+@@ -1600,7 +1600,7 @@ static void nfs4_state_manager(struct nf
+ 	int status = 0;
+ 
+ 	/* Ensure exclusive access to NFSv4 state */
+-	for(;;) {
++	do {
+ 		if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) {
+ 			/* We're going to have to re-establish a clientid */
+ 			status = nfs4_reclaim_lease(clp);
+@@ -1684,7 +1684,7 @@ static void nfs4_state_manager(struct nf
+ 			break;
+ 		if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0)
+ 			break;
+-	}
++	} while (atomic_read(&clp->cl_count) > 1);
+ 	return;
+ out_error:
+ 	printk(KERN_WARNING "Error: state manager failed on NFSv4 server %s"
diff --git a/queue-2.6.38/oom-use-pte-pages-in-oom-score.patch b/queue-2.6.38/oom-use-pte-pages-in-oom-score.patch
new file mode 100644
index 00000000000..89455a91a5d
--- /dev/null
+++ b/queue-2.6.38/oom-use-pte-pages-in-oom-score.patch
@@ -0,0 +1,46 @@
+From f755a042d82b51b54f3bdd0890e5ea56c0fb6807 Mon Sep 17 00:00:00 2001
+From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+Date: Wed, 27 Apr 2011 15:26:50 -0700
+Subject: oom: use pte pages in OOM score
+
+From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+
+commit f755a042d82b51b54f3bdd0890e5ea56c0fb6807 upstream.
+
+PTE pages eat up memory just like anything else, but we do not account for
+them in any way in the OOM scores.  They are also _guaranteed_ to get
+freed up when a process is OOM killed, while RSS is not.
+
+Reported-by: Dave Hansen <dave@linux.vnet.ibm.com>
+Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
+Cc: Oleg Nesterov <oleg@redhat.com>
+Acked-by: David Rientjes <rientjes@google.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ mm/oom_kill.c |    9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+--- a/mm/oom_kill.c
++++ b/mm/oom_kill.c
+@@ -172,10 +172,13 @@ unsigned int oom_badness(struct task_str
+ 
+ 	/*
+ 	 * The baseline for the badness score is the proportion of RAM that each
+-	 * task's rss and swap space use.
++	 * task's rss, pagetable and swap space use.
+ 	 */
+-	points = (get_mm_rss(p->mm) + get_mm_counter(p->mm, MM_SWAPENTS)) * 1000 /
+-			totalpages;
++	points = get_mm_rss(p->mm) + p->mm->nr_ptes;
++	points += get_mm_counter(p->mm, MM_SWAPENTS);
++
++	points *= 1000;
++	points /= totalpages;
+ 	task_unlock(p);
+ 
+ 	/*
diff --git a/queue-2.6.38/series b/queue-2.6.38/series
index 0b2a1610741..ffb6fe8bc0f 100644
--- a/queue-2.6.38/series
+++ b/queue-2.6.38/series
@@ -37,3 +37,16 @@ slub-fix-panic-with-discontigmem.patch
 set-memory-ranges-in-n_normal_memory-when-onlined.patch
 flexcop-pci-fix-__xlate_proc_name-warning-for-flexcop-pci.patch
 virtio-console-enable-call-to-hvc_remove-on-console-port-remove.patch
+oom-use-pte-pages-in-oom-score.patch
+mm-check-if-pte-is-already-allocated-during-page-fault.patch
+mm-thp-fix-dev-zero-map_private-and-vm_flags-cleanups.patch
+m68k-mm-set-all-online-nodes-in-n_normal_memory.patch
+vfs-avoid-large-kmalloc-s-for-the-fdtable.patch
+nfs-don-t-lose-ms_synchronous-on-remount-of-noac-mount.patch
+nfsv4.1-ensure-state-manager-thread-dies-on-last-umount.patch
+um-mdd-support-for-64-bit-atomic-operations.patch
+drm-select-framebuffer_console_primary-if-we-have-framebuffer_console.patch
+agp-fix-arbitrary-kernel-memory-writes.patch
+agp-fix-oom-and-buffer-overflow.patch
+iwlwifi-do-not-set-tx-power-when-channel-is-changing.patch
+iwl3945-do-not-deprecate-software-scan.patch
diff --git a/queue-2.6.38/um-mdd-support-for-64-bit-atomic-operations.patch b/queue-2.6.38/um-mdd-support-for-64-bit-atomic-operations.patch
new file mode 100644
index 00000000000..54eaefb98a1
--- /dev/null
+++ b/queue-2.6.38/um-mdd-support-for-64-bit-atomic-operations.patch
@@ -0,0 +1,274 @@
+From 57d8e02e3cd21bccf2b84b26b42feb79e1f0f83e Mon Sep 17 00:00:00 2001
+From: Richard Weinberger <richard@nod.at>
+Date: Wed, 27 Apr 2011 15:26:51 -0700
+Subject: um: mdd support for 64 bit atomic operations
+
+From: Richard Weinberger <richard@nod.at>
+
+commit 57d8e02e3cd21bccf2b84b26b42feb79e1f0f83e upstream.
+
+This adds support for 64 bit atomic operations on 32 bit UML systems.  XFS
+needs them since 2.6.38.
+
+  $ make ARCH=um SUBARCH=i386
+  ...
+    LD      .tmp_vmlinux1
+  fs/built-in.o: In function `xlog_regrant_reserve_log_space':
+  xfs_log.c:(.text+0xd8584): undefined reference to `atomic64_read_386'
+  xfs_log.c:(.text+0xd85ac): undefined reference to `cmpxchg8b_emu'
+  ...
+
+Addresses https://bugzilla.kernel.org/show_bug.cgi?id=32812
+
+Reported-by: Martin Walch <walch.martin@web.de>
+Tested-by: Martin Walch <walch.martin@web.de>
+Cc: Martin Walch <walch.martin@web.de>
+Signed-off-by: Richard Weinberger <richard@nod.at>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/um/sys-i386/Makefile          |    2 
+ arch/um/sys-i386/atomic64_cx8_32.S |  225 +++++++++++++++++++++++++++++++++++++
+ 2 files changed, 226 insertions(+), 1 deletion(-)
+
+--- a/arch/um/sys-i386/Makefile
++++ b/arch/um/sys-i386/Makefile
+@@ -4,7 +4,7 @@
+ 
+ obj-y = bug.o bugs.o checksum.o delay.o fault.o ksyms.o ldt.o ptrace.o \
+ 	ptrace_user.o setjmp.o signal.o stub.o stub_segv.o syscalls.o sysrq.o \
+-	sys_call_table.o tls.o
++	sys_call_table.o tls.o atomic64_cx8_32.o
+ 
+ obj-$(CONFIG_BINFMT_ELF) += elfcore.o
+ 
+--- /dev/null
++++ b/arch/um/sys-i386/atomic64_cx8_32.S
+@@ -0,0 +1,225 @@
++/*
++ * atomic64_t for 586+
++ *
++ * Copied from arch/x86/lib/atomic64_cx8_32.S
++ *
++ * Copyright Â© 2010  Luca Barbieri
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ */
++
++#include <linux/linkage.h>
++#include <asm/alternative-asm.h>
++#include <asm/dwarf2.h>
++
++.macro SAVE reg
++	pushl_cfi %\reg
++	CFI_REL_OFFSET \reg, 0
++.endm
++
++.macro RESTORE reg
++	popl_cfi %\reg
++	CFI_RESTORE \reg
++.endm
++
++.macro read64 reg
++	movl %ebx, %eax
++	movl %ecx, %edx
++/* we need LOCK_PREFIX since otherwise cmpxchg8b always does the write */
++	LOCK_PREFIX
++	cmpxchg8b (\reg)
++.endm
++
++ENTRY(atomic64_read_cx8)
++	CFI_STARTPROC
++
++	read64 %ecx
++	ret
++	CFI_ENDPROC
++ENDPROC(atomic64_read_cx8)
++
++ENTRY(atomic64_set_cx8)
++	CFI_STARTPROC
++
++1:
++/* we don't need LOCK_PREFIX since aligned 64-bit writes
++ * are atomic on 586 and newer */
++	cmpxchg8b (%esi)
++	jne 1b
++
++	ret
++	CFI_ENDPROC
++ENDPROC(atomic64_set_cx8)
++
++ENTRY(atomic64_xchg_cx8)
++	CFI_STARTPROC
++
++	movl %ebx, %eax
++	movl %ecx, %edx
++1:
++	LOCK_PREFIX
++	cmpxchg8b (%esi)
++	jne 1b
++
++	ret
++	CFI_ENDPROC
++ENDPROC(atomic64_xchg_cx8)
++
++.macro addsub_return func ins insc
++ENTRY(atomic64_\func\()_return_cx8)
++	CFI_STARTPROC
++	SAVE ebp
++	SAVE ebx
++	SAVE esi
++	SAVE edi
++
++	movl %eax, %esi
++	movl %edx, %edi
++	movl %ecx, %ebp
++
++	read64 %ebp
++1:
++	movl %eax, %ebx
++	movl %edx, %ecx
++	\ins\()l %esi, %ebx
++	\insc\()l %edi, %ecx
++	LOCK_PREFIX
++	cmpxchg8b (%ebp)
++	jne 1b
++
++10:
++	movl %ebx, %eax
++	movl %ecx, %edx
++	RESTORE edi
++	RESTORE esi
++	RESTORE ebx
++	RESTORE ebp
++	ret
++	CFI_ENDPROC
++ENDPROC(atomic64_\func\()_return_cx8)
++.endm
++
++addsub_return add add adc
++addsub_return sub sub sbb
++
++.macro incdec_return func ins insc
++ENTRY(atomic64_\func\()_return_cx8)
++	CFI_STARTPROC
++	SAVE ebx
++
++	read64 %esi
++1:
++	movl %eax, %ebx
++	movl %edx, %ecx
++	\ins\()l $1, %ebx
++	\insc\()l $0, %ecx
++	LOCK_PREFIX
++	cmpxchg8b (%esi)
++	jne 1b
++
++10:
++	movl %ebx, %eax
++	movl %ecx, %edx
++	RESTORE ebx
++	ret
++	CFI_ENDPROC
++ENDPROC(atomic64_\func\()_return_cx8)
++.endm
++
++incdec_return inc add adc
++incdec_return dec sub sbb
++
++ENTRY(atomic64_dec_if_positive_cx8)
++	CFI_STARTPROC
++	SAVE ebx
++
++	read64 %esi
++1:
++	movl %eax, %ebx
++	movl %edx, %ecx
++	subl $1, %ebx
++	sbb $0, %ecx
++	js 2f
++	LOCK_PREFIX
++	cmpxchg8b (%esi)
++	jne 1b
++
++2:
++	movl %ebx, %eax
++	movl %ecx, %edx
++	RESTORE ebx
++	ret
++	CFI_ENDPROC
++ENDPROC(atomic64_dec_if_positive_cx8)
++
++ENTRY(atomic64_add_unless_cx8)
++	CFI_STARTPROC
++	SAVE ebp
++	SAVE ebx
++/* these just push these two parameters on the stack */
++	SAVE edi
++	SAVE esi
++
++	movl %ecx, %ebp
++	movl %eax, %esi
++	movl %edx, %edi
++
++	read64 %ebp
++1:
++	cmpl %eax, 0(%esp)
++	je 4f
++2:
++	movl %eax, %ebx
++	movl %edx, %ecx
++	addl %esi, %ebx
++	adcl %edi, %ecx
++	LOCK_PREFIX
++	cmpxchg8b (%ebp)
++	jne 1b
++
++	movl $1, %eax
++3:
++	addl $8, %esp
++	CFI_ADJUST_CFA_OFFSET -8
++	RESTORE ebx
++	RESTORE ebp
++	ret
++4:
++	cmpl %edx, 4(%esp)
++	jne 2b
++	xorl %eax, %eax
++	jmp 3b
++	CFI_ENDPROC
++ENDPROC(atomic64_add_unless_cx8)
++
++ENTRY(atomic64_inc_not_zero_cx8)
++	CFI_STARTPROC
++	SAVE ebx
++
++	read64 %esi
++1:
++	testl %eax, %eax
++	je 4f
++2:
++	movl %eax, %ebx
++	movl %edx, %ecx
++	addl $1, %ebx
++	adcl $0, %ecx
++	LOCK_PREFIX
++	cmpxchg8b (%esi)
++	jne 1b
++
++	movl $1, %eax
++3:
++	RESTORE ebx
++	ret
++4:
++	testl %edx, %edx
++	jne 2b
++	jmp 3b
++	CFI_ENDPROC
++ENDPROC(atomic64_inc_not_zero_cx8)
diff --git a/queue-2.6.38/vfs-avoid-large-kmalloc-s-for-the-fdtable.patch b/queue-2.6.38/vfs-avoid-large-kmalloc-s-for-the-fdtable.patch
new file mode 100644
index 00000000000..94130228c91
--- /dev/null
+++ b/queue-2.6.38/vfs-avoid-large-kmalloc-s-for-the-fdtable.patch
@@ -0,0 +1,70 @@
+From 6d4831c283530a5f2c6bd8172c13efa236eb149d Mon Sep 17 00:00:00 2001
+From: Andrew Morton <akpm@linux-foundation.org>
+Date: Wed, 27 Apr 2011 15:26:41 -0700
+Subject: vfs: avoid large kmalloc()s for the fdtable
+
+From: Andrew Morton <akpm@linux-foundation.org>
+
+commit 6d4831c283530a5f2c6bd8172c13efa236eb149d upstream.
+
+Azurit reports large increases in system time after 2.6.36 when running
+Apache.  It was bisected down to a892e2d7dcdfa6c76e6 ("vfs: use kmalloc()
+to allocate fdmem if possible").
+
+That patch caused the vfs to use kmalloc() for very large allocations and
+this is causing excessive work (and presumably excessive reclaim) within
+the page allocator.
+
+Fix it by falling back to vmalloc() earlier - when the allocation attempt
+would have been considered "costly" by reclaim.
+
+Reported-by: azurIt <azurit@pobox.sk>
+Tested-by: azurIt <azurit@pobox.sk>
+Acked-by: Changli Gao <xiaosuo@gmail.com>
+Cc: Americo Wang <xiyou.wangcong@gmail.com>
+Cc: Jiri Slaby <jslaby@suse.cz>
+Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
+Cc: Mel Gorman <mel@csn.ul.ie>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ fs/file.c |   18 +++++++++++-------
+ 1 file changed, 11 insertions(+), 7 deletions(-)
+
+--- a/fs/file.c
++++ b/fs/file.c
+@@ -9,6 +9,7 @@
+ #include <linux/module.h>
+ #include <linux/fs.h>
+ #include <linux/mm.h>
++#include <linux/mmzone.h>
+ #include <linux/time.h>
+ #include <linux/sched.h>
+ #include <linux/slab.h>
+@@ -39,14 +40,17 @@ int sysctl_nr_open_max = 1024 * 1024; /*
+  */
+ static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list);
+ 
+-static inline void *alloc_fdmem(unsigned int size)
++static void *alloc_fdmem(unsigned int size)
+ {
+-	void *data;
+-
+-	data = kmalloc(size, GFP_KERNEL|__GFP_NOWARN);
+-	if (data != NULL)
+-		return data;
+-
++	/*
++	 * Very large allocations can stress page reclaim, so fall back to
++	 * vmalloc() if the allocation size will be considered "large" by the VM.
++	 */
++	if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
++		void *data = kmalloc(size, GFP_KERNEL|__GFP_NOWARN);
++		if (data != NULL)
++			return data;
++	}
+ 	return vmalloc(size);
+ }
+