From: Greg Kroah-Hartman Date: Fri, 29 Apr 2011 16:20:18 +0000 (-0700) Subject: .38 patches X-Git-Tag: v2.6.38.5~2 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=ff5462e0a03eaeba73c6cca00a851751c33dfc8d;p=thirdparty%2Fkernel%2Fstable-queue.git .38 patches --- diff --git a/queue-2.6.38/agp-fix-arbitrary-kernel-memory-writes.patch b/queue-2.6.38/agp-fix-arbitrary-kernel-memory-writes.patch new file mode 100644 index 00000000000..d3a0c2d1cbe --- /dev/null +++ b/queue-2.6.38/agp-fix-arbitrary-kernel-memory-writes.patch @@ -0,0 +1,58 @@ +From 194b3da873fd334ef183806db751473512af29ce Mon Sep 17 00:00:00 2001 +From: Vasiliy Kulikov +Date: Thu, 14 Apr 2011 20:55:16 +0400 +Subject: agp: fix arbitrary kernel memory writes + +From: Vasiliy Kulikov + +commit 194b3da873fd334ef183806db751473512af29ce upstream. + +pg_start is copied from userspace on AGPIOC_BIND and AGPIOC_UNBIND ioctl +cmds of agp_ioctl() and passed to agpioc_bind_wrap(). As said in the +comment, (pg_start + mem->page_count) may wrap in case of AGPIOC_BIND, +and it is not checked at all in case of AGPIOC_UNBIND. As a result, user +with sufficient privileges (usually "video" group) may generate either +local DoS or privilege escalation. + +Signed-off-by: Vasiliy Kulikov +Signed-off-by: Dave Airlie +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/char/agp/generic.c | 11 ++++++++--- + 1 file changed, 8 insertions(+), 3 deletions(-) + +--- a/drivers/char/agp/generic.c ++++ b/drivers/char/agp/generic.c +@@ -1089,8 +1089,8 @@ int agp_generic_insert_memory(struct agp + return -EINVAL; + } + +- /* AK: could wrap */ +- if ((pg_start + mem->page_count) > num_entries) ++ if (((pg_start + mem->page_count) > num_entries) || ++ ((pg_start + mem->page_count) < pg_start)) + return -EINVAL; + + j = pg_start; +@@ -1124,7 +1124,7 @@ int agp_generic_remove_memory(struct agp + { + size_t i; + struct agp_bridge_data *bridge; +- int mask_type; ++ int mask_type, num_entries; + + bridge = mem->bridge; + if (!bridge) +@@ -1136,6 +1136,11 @@ int agp_generic_remove_memory(struct agp + if (type != mem->type) + return -EINVAL; + ++ num_entries = agp_num_entries(); ++ if (((pg_start + mem->page_count) > num_entries) || ++ ((pg_start + mem->page_count) < pg_start)) ++ return -EINVAL; ++ + mask_type = bridge->driver->agp_type_to_mask_type(bridge, type); + if (mask_type != 0) { + /* The generic routines know nothing of memory types */ diff --git a/queue-2.6.38/agp-fix-oom-and-buffer-overflow.patch b/queue-2.6.38/agp-fix-oom-and-buffer-overflow.patch new file mode 100644 index 00000000000..195a09ff1c3 --- /dev/null +++ b/queue-2.6.38/agp-fix-oom-and-buffer-overflow.patch @@ -0,0 +1,59 @@ +From b522f02184b413955f3bc952e3776ce41edc6355 Mon Sep 17 00:00:00 2001 +From: Vasiliy Kulikov +Date: Thu, 14 Apr 2011 20:55:19 +0400 +Subject: agp: fix OOM and buffer overflow + +From: Vasiliy Kulikov + +commit b522f02184b413955f3bc952e3776ce41edc6355 upstream. + +page_count is copied from userspace. agp_allocate_memory() tries to +check whether this number is too big, but doesn't take into account the +wrap case. Also agp_create_user_memory() doesn't check whether +alloc_size is calculated from num_agp_pages variable without overflow. +This may lead to allocation of too small buffer with following buffer +overflow. + +Another problem in agp code is not addressed in the patch - kernel memory +exhaustion (AGPIOC_RESERVE and AGPIOC_ALLOCATE ioctls). It is not checked +whether requested pid is a pid of the caller (no check in agpioc_reserve_wrap()). +Each allocation is limited to 16KB, though, there is no per-process limit. +This might lead to OOM situation, which is not even solved in case of the +caller death by OOM killer - the memory is allocated for another (faked) process. + +Signed-off-by: Vasiliy Kulikov +Signed-off-by: Dave Airlie +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/char/agp/generic.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +--- a/drivers/char/agp/generic.c ++++ b/drivers/char/agp/generic.c +@@ -115,6 +115,9 @@ static struct agp_memory *agp_create_use + struct agp_memory *new; + unsigned long alloc_size = num_agp_pages*sizeof(struct page *); + ++ if (INT_MAX/sizeof(struct page *) < num_agp_pages) ++ return NULL; ++ + new = kzalloc(sizeof(struct agp_memory), GFP_KERNEL); + if (new == NULL) + return NULL; +@@ -234,11 +237,14 @@ struct agp_memory *agp_allocate_memory(s + int scratch_pages; + struct agp_memory *new; + size_t i; ++ int cur_memory; + + if (!bridge) + return NULL; + +- if ((atomic_read(&bridge->current_memory_agp) + page_count) > bridge->max_memory_agp) ++ cur_memory = atomic_read(&bridge->current_memory_agp); ++ if ((cur_memory + page_count > bridge->max_memory_agp) || ++ (cur_memory + page_count < page_count)) + return NULL; + + if (type >= AGP_USER_TYPES) { diff --git a/queue-2.6.38/drm-select-framebuffer_console_primary-if-we-have-framebuffer_console.patch b/queue-2.6.38/drm-select-framebuffer_console_primary-if-we-have-framebuffer_console.patch new file mode 100644 index 00000000000..476139a25f8 --- /dev/null +++ b/queue-2.6.38/drm-select-framebuffer_console_primary-if-we-have-framebuffer_console.patch @@ -0,0 +1,30 @@ +From bf5192edcbc1f0a7f9c054649dbf1a0b3210d9b7 Mon Sep 17 00:00:00 2001 +From: Dave Airlie +Date: Fri, 22 Apr 2011 07:51:33 +1000 +Subject: drm: select FRAMEBUFFER_CONSOLE_PRIMARY if we have FRAMEBUFFER_CONSOLE + +From: Dave Airlie + +commit bf5192edcbc1f0a7f9c054649dbf1a0b3210d9b7 upstream. + +Multi-gpu/switcheroo relies on this option to get the console on the +correct GPU at bootup, some distros enable it but it seems some get +it wrong. + +Signed-off-by: Dave Airlie +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/gpu/drm/Kconfig | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/gpu/drm/Kconfig ++++ b/drivers/gpu/drm/Kconfig +@@ -24,6 +24,7 @@ config DRM_KMS_HELPER + depends on DRM + select FB + select FRAMEBUFFER_CONSOLE if !EXPERT ++ select FRAMEBUFFER_CONSOLE_DETECT_PRIMARY if FRAMEBUFFER_CONSOLE + help + FB and CRTC helpers for KMS drivers. + diff --git a/queue-2.6.38/iwl3945-do-not-deprecate-software-scan.patch b/queue-2.6.38/iwl3945-do-not-deprecate-software-scan.patch new file mode 100644 index 00000000000..1d0fd95f3b3 --- /dev/null +++ b/queue-2.6.38/iwl3945-do-not-deprecate-software-scan.patch @@ -0,0 +1,43 @@ +From 3bda50e3eaf58a4b9c4ce34204e5faa15c8b1b97 Mon Sep 17 00:00:00 2001 +From: Stanislaw Gruszka +Date: Mon, 14 Mar 2011 14:15:06 +0100 +Subject: iwl3945: do not deprecate software scan + +From: Stanislaw Gruszka + +commit 3bda50e3eaf58a4b9c4ce34204e5faa15c8b1b97 upstream. + +Software scanning can be used for workaround some performance problems, +so do not deprecate it. + +Signed-off-by: Stanislaw Gruszka +Acked-by: Wey-Yi Guy +Signed-off-by: John W. Linville +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/net/wireless/iwlwifi/iwl3945-base.c | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +--- a/drivers/net/wireless/iwlwifi/iwl3945-base.c ++++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c +@@ -3995,8 +3995,7 @@ static int iwl3945_pci_probe(struct pci_ + * "the hard way", rather than using device's scan. + */ + if (iwl3945_mod_params.disable_hw_scan) { +- dev_printk(KERN_DEBUG, &(pdev->dev), +- "sw scan support is deprecated\n"); ++ IWL_DEBUG_INFO(priv, "Disabling hw_scan\n"); + iwl3945_hw_ops.hw_scan = NULL; + } + +@@ -4318,8 +4317,7 @@ MODULE_PARM_DESC(debug, "debug output ma + #endif + module_param_named(disable_hw_scan, iwl3945_mod_params.disable_hw_scan, + int, S_IRUGO); +-MODULE_PARM_DESC(disable_hw_scan, +- "disable hardware scanning (default 0) (deprecated)"); ++MODULE_PARM_DESC(disable_hw_scan, "disable hardware scanning (default 0)"); + module_param_named(fw_restart3945, iwl3945_mod_params.restart_fw, int, S_IRUGO); + MODULE_PARM_DESC(fw_restart3945, "restart firmware in case of error"); + diff --git a/queue-2.6.38/iwlwifi-do-not-set-tx-power-when-channel-is-changing.patch b/queue-2.6.38/iwlwifi-do-not-set-tx-power-when-channel-is-changing.patch new file mode 100644 index 00000000000..c5501fc15ec --- /dev/null +++ b/queue-2.6.38/iwlwifi-do-not-set-tx-power-when-channel-is-changing.patch @@ -0,0 +1,94 @@ +From f844a709a7d8f8be61a571afc31dfaca9e779621 Mon Sep 17 00:00:00 2001 +From: Stanislaw Gruszka +Date: Fri, 28 Jan 2011 16:47:44 +0100 +Subject: iwlwifi: do not set tx power when channel is changing + +From: Stanislaw Gruszka + +commit f844a709a7d8f8be61a571afc31dfaca9e779621 upstream. + +Mac80211 can request for tx power and channel change in one ->config +call. If that happens, *_send_tx_power functions will try to setup tx +power for old channel, what can be not correct because we already change +the band. I.e error "Failed to get channel info for channel 140 [0]", +can be printed frequently when operating in software scanning mode. + +Signed-off-by: Stanislaw Gruszka +Acked-by: Wey-Yi Guy +Signed-off-by: John W. Linville +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/net/wireless/iwlwifi/iwl-3945.c | 2 +- + drivers/net/wireless/iwlwifi/iwl-4965.c | 2 +- + drivers/net/wireless/iwlwifi/iwl-agn-rxon.c | 5 ++--- + drivers/net/wireless/iwlwifi/iwl-core.c | 13 ++++++++++--- + 4 files changed, 14 insertions(+), 8 deletions(-) + +--- a/drivers/net/wireless/iwlwifi/iwl-3945.c ++++ b/drivers/net/wireless/iwlwifi/iwl-3945.c +@@ -1823,7 +1823,7 @@ int iwl3945_commit_rxon(struct iwl_priv + + /* If we issue a new RXON command which required a tune then we must + * send a new TXPOWER command or we won't be able to Tx any frames */ +- rc = priv->cfg->ops->lib->send_tx_power(priv); ++ rc = iwl_set_tx_power(priv, priv->tx_power_next, true); + if (rc) { + IWL_ERR(priv, "Error setting Tx power (%d).\n", rc); + return rc; +--- a/drivers/net/wireless/iwlwifi/iwl-4965.c ++++ b/drivers/net/wireless/iwlwifi/iwl-4965.c +@@ -1571,7 +1571,7 @@ static int iwl4965_commit_rxon(struct iw + + /* If we issue a new RXON command which required a tune then we must + * send a new TXPOWER command or we won't be able to Tx any frames */ +- ret = iwl_set_tx_power(priv, priv->tx_power_user_lmt, true); ++ ret = iwl_set_tx_power(priv, priv->tx_power_next, true); + if (ret) { + IWL_ERR(priv, "Error sending TX power (%d)\n", ret); + return ret; +--- a/drivers/net/wireless/iwlwifi/iwl-agn-rxon.c ++++ b/drivers/net/wireless/iwlwifi/iwl-agn-rxon.c +@@ -288,10 +288,9 @@ int iwlagn_commit_rxon(struct iwl_priv * + * If we issue a new RXON command which required a tune then we must + * send a new TXPOWER command or we won't be able to Tx any frames. + * +- * FIXME: which RXON requires a tune? Can we optimise this out in +- * some cases? ++ * It's expected we set power here if channel is changing. + */ +- ret = iwl_set_tx_power(priv, priv->tx_power_user_lmt, true); ++ ret = iwl_set_tx_power(priv, priv->tx_power_next, true); + if (ret) { + IWL_ERR(priv, "Error sending TX power (%d)\n", ret); + return ret; +--- a/drivers/net/wireless/iwlwifi/iwl-core.c ++++ b/drivers/net/wireless/iwlwifi/iwl-core.c +@@ -1161,6 +1161,8 @@ int iwl_set_tx_power(struct iwl_priv *pr + { + int ret; + s8 prev_tx_power; ++ bool defer; ++ struct iwl_rxon_context *ctx = &priv->contexts[IWL_RXON_CTX_BSS]; + + lockdep_assert_held(&priv->mutex); + +@@ -1188,10 +1190,15 @@ int iwl_set_tx_power(struct iwl_priv *pr + if (!iwl_is_ready_rf(priv)) + return -EIO; + +- /* scan complete use tx_power_next, need to be updated */ ++ /* scan complete and commit_rxon use tx_power_next value, ++ * it always need to be updated for newest request */ + priv->tx_power_next = tx_power; +- if (test_bit(STATUS_SCANNING, &priv->status) && !force) { +- IWL_DEBUG_INFO(priv, "Deferring tx power set while scanning\n"); ++ ++ /* do not set tx power when scanning or channel changing */ ++ defer = test_bit(STATUS_SCANNING, &priv->status) || ++ memcmp(&ctx->active, &ctx->staging, sizeof(ctx->staging)); ++ if (defer && !force) { ++ IWL_DEBUG_INFO(priv, "Deferring tx power set\n"); + return 0; + } + diff --git a/queue-2.6.38/m68k-mm-set-all-online-nodes-in-n_normal_memory.patch b/queue-2.6.38/m68k-mm-set-all-online-nodes-in-n_normal_memory.patch new file mode 100644 index 00000000000..8085a4c2085 --- /dev/null +++ b/queue-2.6.38/m68k-mm-set-all-online-nodes-in-n_normal_memory.patch @@ -0,0 +1,43 @@ +From 4aac0b4815ba592052758f4b468f253d383dc9d6 Mon Sep 17 00:00:00 2001 +From: Michael Schmitz +Date: Tue, 26 Apr 2011 14:51:53 +1200 +Subject: m68k/mm: Set all online nodes in N_NORMAL_MEMORY + +From: Michael Schmitz + +commit 4aac0b4815ba592052758f4b468f253d383dc9d6 upstream. + +For m68k, N_NORMAL_MEMORY represents all nodes that have present memory +since it does not support HIGHMEM. This patch sets the bit at the time +node_present_pages has been set by free_area_init_node. +At the time the node is brought online, the node state would have to be +done unconditionally since information about present memory has not yet +been recorded. + +If N_NORMAL_MEMORY is not accurate, slub may encounter errors since it +uses this nodemask to setup per-cache kmem_cache_node data structures. + +This pach is an alternative to the one proposed by David Rientjes + attempting to set node state immediately when +bringing the node online. + +Signed-off-by: Michael Schmitz +Tested-by: Thorsten Glaser +Signed-off-by: Geert Uytterhoeven +Signed-off-by: Greg Kroah-Hartman + +--- + arch/m68k/mm/motorola.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/arch/m68k/mm/motorola.c ++++ b/arch/m68k/mm/motorola.c +@@ -300,6 +300,8 @@ void __init paging_init(void) + zones_size[ZONE_DMA] = m68k_memory[i].size >> PAGE_SHIFT; + free_area_init_node(i, zones_size, + m68k_memory[i].addr >> PAGE_SHIFT, NULL); ++ if (node_present_pages(i)) ++ node_set_state(i, N_NORMAL_MEMORY); + } + } + diff --git a/queue-2.6.38/mm-check-if-pte-is-already-allocated-during-page-fault.patch b/queue-2.6.38/mm-check-if-pte-is-already-allocated-during-page-fault.patch new file mode 100644 index 00000000000..7aa27dfe105 --- /dev/null +++ b/queue-2.6.38/mm-check-if-pte-is-already-allocated-during-page-fault.patch @@ -0,0 +1,66 @@ +From cc03638df20acbec5d0d0d9e07234aadde9e698d Mon Sep 17 00:00:00 2001 +From: Mel Gorman +Date: Wed, 27 Apr 2011 15:26:56 -0700 +Subject: mm: check if PTE is already allocated during page fault + +From: Mel Gorman + +commit cc03638df20acbec5d0d0d9e07234aadde9e698d upstream. + +With transparent hugepage support, handle_mm_fault() has to be careful +that a normal PMD has been established before handling a PTE fault. To +achieve this, it used __pte_alloc() directly instead of pte_alloc_map as +pte_alloc_map is unsafe to run against a huge PMD. pte_offset_map() is +called once it is known the PMD is safe. + +pte_alloc_map() is smart enough to check if a PTE is already present +before calling __pte_alloc but this check was lost. As a consequence, +PTEs may be allocated unnecessarily and the page table lock taken. Thi +useless PTE does get cleaned up but it's a performance hit which is +visible in page_test from aim9. + +This patch simply re-adds the check normally done by pte_alloc_map to +check if the PTE needs to be allocated before taking the page table lock. +The effect is noticable in page_test from aim9. + + AIM9 + 2.6.38-vanilla 2.6.38-checkptenone + creat-clo 446.10 ( 0.00%) 424.47 (-5.10%) + page_test 38.10 ( 0.00%) 42.04 ( 9.37%) + brk_test 52.45 ( 0.00%) 51.57 (-1.71%) + exec_test 382.00 ( 0.00%) 456.90 (16.39%) + fork_test 60.11 ( 0.00%) 67.79 (11.34%) + MMTests Statistics: duration + Total Elapsed Time (seconds) 611.90 612.22 + +(While this affects 2.6.38, it is a performance rather than a functional +bug and normally outside the rules -stable. While the big performance +differences are to a microbench, the difference in fork and exec +performance may be significant enough that -stable wants to consider the +patch) + +Reported-by: Raz Ben Yehuda +Signed-off-by: Mel Gorman +Reviewed-by: Rik van Riel +Reviewed-by: Andrea Arcangeli +Reviewed-by: Minchan Kim +Acked-by: Johannes Weiner +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/memory.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/mm/memory.c ++++ b/mm/memory.c +@@ -3332,7 +3332,7 @@ int handle_mm_fault(struct mm_struct *mm + * run pte_offset_map on the pmd, if an huge pmd could + * materialize from under us from a different thread. + */ +- if (unlikely(__pte_alloc(mm, vma, pmd, address))) ++ if (unlikely(pmd_none(*pmd)) && __pte_alloc(mm, vma, pmd, address)) + return VM_FAULT_OOM; + /* if an huge pmd materialized from under us just retry later */ + if (unlikely(pmd_trans_huge(*pmd))) diff --git a/queue-2.6.38/mm-thp-fix-dev-zero-map_private-and-vm_flags-cleanups.patch b/queue-2.6.38/mm-thp-fix-dev-zero-map_private-and-vm_flags-cleanups.patch new file mode 100644 index 00000000000..06ec03c17e4 --- /dev/null +++ b/queue-2.6.38/mm-thp-fix-dev-zero-map_private-and-vm_flags-cleanups.patch @@ -0,0 +1,165 @@ +From 78f11a255749d09025f54d4e2df4fbcb031530e2 Mon Sep 17 00:00:00 2001 +From: Andrea Arcangeli +Date: Wed, 27 Apr 2011 15:26:45 -0700 +Subject: mm: thp: fix /dev/zero MAP_PRIVATE and vm_flags cleanups + +From: Andrea Arcangeli + +commit 78f11a255749d09025f54d4e2df4fbcb031530e2 upstream. + +The huge_memory.c THP page fault was allowed to run if vm_ops was null +(which would succeed for /dev/zero MAP_PRIVATE, as the f_op->mmap wouldn't +setup a special vma->vm_ops and it would fallback to regular anonymous +memory) but other THP logics weren't fully activated for vmas with vm_file +not NULL (/dev/zero has a not NULL vma->vm_file). + +So this removes the vm_file checks so that /dev/zero also can safely use +THP (the other albeit safer approach to fix this bug would have been to +prevent the THP initial page fault to run if vm_file was set). + +After removing the vm_file checks, this also makes huge_memory.c stricter +in khugepaged for the DEBUG_VM=y case. It doesn't replace the vm_file +check with a is_pfn_mapping check (but it keeps checking for VM_PFNMAP +under VM_BUG_ON) because for a is_cow_mapping() mapping VM_PFNMAP should +only be allowed to exist before the first page fault, and in turn when +vma->anon_vma is null (so preventing khugepaged registration). So I tend +to think the previous comment saying if vm_file was set, VM_PFNMAP might +have been set and we could still be registered in khugepaged (despite +anon_vma was not NULL to be registered in khugepaged) was too paranoid. +The is_linear_pfn_mapping check is also I think superfluous (as described +by comment) but under DEBUG_VM it is safe to stay. + +Addresses https://bugzilla.kernel.org/show_bug.cgi?id=33682 + +Signed-off-by: Andrea Arcangeli +Reported-by: Caspar Zhang +Acked-by: Mel Gorman +Acked-by: Rik van Riel +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/huge_mm.h | 2 +- + include/linux/mm.h | 3 ++- + mm/huge_memory.c | 43 ++++++++++++++++++++++++------------------- + 3 files changed, 27 insertions(+), 21 deletions(-) + +--- a/include/linux/huge_mm.h ++++ b/include/linux/huge_mm.h +@@ -117,7 +117,7 @@ static inline void vma_adjust_trans_huge + unsigned long end, + long adjust_next) + { +- if (!vma->anon_vma || vma->vm_ops || vma->vm_file) ++ if (!vma->anon_vma || vma->vm_ops) + return; + __vma_adjust_trans_huge(vma, start, end, adjust_next); + } +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -137,7 +137,8 @@ extern unsigned int kobjsize(const void + #define VM_RandomReadHint(v) ((v)->vm_flags & VM_RAND_READ) + + /* +- * special vmas that are non-mergable, non-mlock()able ++ * Special vmas that are non-mergable, non-mlock()able. ++ * Note: mm/huge_memory.c VM_NO_THP depends on this definition. + */ + #define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP) + +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -1400,6 +1400,9 @@ out: + return ret; + } + ++#define VM_NO_THP (VM_SPECIAL|VM_INSERTPAGE|VM_MIXEDMAP|VM_SAO| \ ++ VM_HUGETLB|VM_SHARED|VM_MAYSHARE) ++ + int hugepage_madvise(struct vm_area_struct *vma, + unsigned long *vm_flags, int advice) + { +@@ -1408,11 +1411,7 @@ int hugepage_madvise(struct vm_area_stru + /* + * Be somewhat over-protective like KSM for now! + */ +- if (*vm_flags & (VM_HUGEPAGE | +- VM_SHARED | VM_MAYSHARE | +- VM_PFNMAP | VM_IO | VM_DONTEXPAND | +- VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE | +- VM_MIXEDMAP | VM_SAO)) ++ if (*vm_flags & (VM_HUGEPAGE | VM_NO_THP)) + return -EINVAL; + *vm_flags &= ~VM_NOHUGEPAGE; + *vm_flags |= VM_HUGEPAGE; +@@ -1428,11 +1427,7 @@ int hugepage_madvise(struct vm_area_stru + /* + * Be somewhat over-protective like KSM for now! + */ +- if (*vm_flags & (VM_NOHUGEPAGE | +- VM_SHARED | VM_MAYSHARE | +- VM_PFNMAP | VM_IO | VM_DONTEXPAND | +- VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE | +- VM_MIXEDMAP | VM_SAO)) ++ if (*vm_flags & (VM_NOHUGEPAGE | VM_NO_THP)) + return -EINVAL; + *vm_flags &= ~VM_HUGEPAGE; + *vm_flags |= VM_NOHUGEPAGE; +@@ -1566,10 +1561,14 @@ int khugepaged_enter_vma_merge(struct vm + * page fault if needed. + */ + return 0; +- if (vma->vm_file || vma->vm_ops) ++ if (vma->vm_ops) + /* khugepaged not yet working on file or special mappings */ + return 0; +- VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma)); ++ /* ++ * If is_pfn_mapping() is true is_learn_pfn_mapping() must be ++ * true too, verify it here. ++ */ ++ VM_BUG_ON(is_linear_pfn_mapping(vma) || vma->vm_flags & VM_NO_THP); + hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; + hend = vma->vm_end & HPAGE_PMD_MASK; + if (hstart < hend) +@@ -1818,12 +1817,15 @@ static void collapse_huge_page(struct mm + (vma->vm_flags & VM_NOHUGEPAGE)) + goto out; + +- /* VM_PFNMAP vmas may have vm_ops null but vm_file set */ +- if (!vma->anon_vma || vma->vm_ops || vma->vm_file) ++ if (!vma->anon_vma || vma->vm_ops) + goto out; + if (is_vma_temporary_stack(vma)) + goto out; +- VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma)); ++ /* ++ * If is_pfn_mapping() is true is_learn_pfn_mapping() must be ++ * true too, verify it here. ++ */ ++ VM_BUG_ON(is_linear_pfn_mapping(vma) || vma->vm_flags & VM_NO_THP); + + pgd = pgd_offset(mm, address); + if (!pgd_present(*pgd)) +@@ -2056,13 +2058,16 @@ static unsigned int khugepaged_scan_mm_s + progress++; + continue; + } +- /* VM_PFNMAP vmas may have vm_ops null but vm_file set */ +- if (!vma->anon_vma || vma->vm_ops || vma->vm_file) ++ if (!vma->anon_vma || vma->vm_ops) + goto skip; + if (is_vma_temporary_stack(vma)) + goto skip; +- +- VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma)); ++ /* ++ * If is_pfn_mapping() is true is_learn_pfn_mapping() ++ * must be true too, verify it here. ++ */ ++ VM_BUG_ON(is_linear_pfn_mapping(vma) || ++ vma->vm_flags & VM_NO_THP); + + hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; + hend = vma->vm_end & HPAGE_PMD_MASK; diff --git a/queue-2.6.38/nfs-don-t-lose-ms_synchronous-on-remount-of-noac-mount.patch b/queue-2.6.38/nfs-don-t-lose-ms_synchronous-on-remount-of-noac-mount.patch new file mode 100644 index 00000000000..1a55d492d57 --- /dev/null +++ b/queue-2.6.38/nfs-don-t-lose-ms_synchronous-on-remount-of-noac-mount.patch @@ -0,0 +1,44 @@ +From 26c4c170731f00008f4317a2888a0a07ac99d90d Mon Sep 17 00:00:00 2001 +From: Jeff Layton +Date: Wed, 27 Apr 2011 11:49:09 -0400 +Subject: nfs: don't lose MS_SYNCHRONOUS on remount of noac mount + +From: Jeff Layton + +commit 26c4c170731f00008f4317a2888a0a07ac99d90d upstream. + +On a remount, the VFS layer will clear the MS_SYNCHRONOUS bit on the +assumption that the flags on the mount syscall will have it set if the +remounted fs is supposed to keep it. + +In the case of "noac" though, MS_SYNCHRONOUS is implied. A remount of +such a mount will lose the MS_SYNCHRONOUS flag since "sync" isn't part +of the mount options. + +Reported-by: Max Matveev +Signed-off-by: Jeff Layton +Signed-off-by: Trond Myklebust +Signed-off-by: Greg Kroah-Hartman + +--- + fs/nfs/super.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +--- a/fs/nfs/super.c ++++ b/fs/nfs/super.c +@@ -2077,6 +2077,15 @@ nfs_remount(struct super_block *sb, int + if (error < 0) + goto out; + ++ /* ++ * noac is a special case. It implies -o sync, but that's not ++ * necessarily reflected in the mtab options. do_remount_sb ++ * will clear MS_SYNCHRONOUS if -o sync wasn't specified in the ++ * remount options, so we have to explicitly reset it. ++ */ ++ if (data->flags & NFS_MOUNT_NOAC) ++ *flags |= MS_SYNCHRONOUS; ++ + /* compare new mount options with old ones */ + error = nfs_compare_remount_data(nfss, data); + out: diff --git a/queue-2.6.38/nfsv4.1-ensure-state-manager-thread-dies-on-last-umount.patch b/queue-2.6.38/nfsv4.1-ensure-state-manager-thread-dies-on-last-umount.patch new file mode 100644 index 00000000000..e991c9e3a0b --- /dev/null +++ b/queue-2.6.38/nfsv4.1-ensure-state-manager-thread-dies-on-last-umount.patch @@ -0,0 +1,39 @@ +From 47c2199b6eb5fbe38ddb844db7cdbd914d304f9c Mon Sep 17 00:00:00 2001 +From: Trond Myklebust +Date: Fri, 15 Apr 2011 17:34:18 -0400 +Subject: NFSv4.1: Ensure state manager thread dies on last umount + +From: Trond Myklebust + +commit 47c2199b6eb5fbe38ddb844db7cdbd914d304f9c upstream. + +Currently, the state manager may continue to try recovering state forever +even after the last filesystem to reference that nfs_client has umounted. + +Signed-off-by: Trond Myklebust +Signed-off-by: Greg Kroah-Hartman + +--- + fs/nfs/nfs4state.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/fs/nfs/nfs4state.c ++++ b/fs/nfs/nfs4state.c +@@ -1600,7 +1600,7 @@ static void nfs4_state_manager(struct nf + int status = 0; + + /* Ensure exclusive access to NFSv4 state */ +- for(;;) { ++ do { + if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) { + /* We're going to have to re-establish a clientid */ + status = nfs4_reclaim_lease(clp); +@@ -1684,7 +1684,7 @@ static void nfs4_state_manager(struct nf + break; + if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) + break; +- } ++ } while (atomic_read(&clp->cl_count) > 1); + return; + out_error: + printk(KERN_WARNING "Error: state manager failed on NFSv4 server %s" diff --git a/queue-2.6.38/oom-use-pte-pages-in-oom-score.patch b/queue-2.6.38/oom-use-pte-pages-in-oom-score.patch new file mode 100644 index 00000000000..89455a91a5d --- /dev/null +++ b/queue-2.6.38/oom-use-pte-pages-in-oom-score.patch @@ -0,0 +1,46 @@ +From f755a042d82b51b54f3bdd0890e5ea56c0fb6807 Mon Sep 17 00:00:00 2001 +From: KOSAKI Motohiro +Date: Wed, 27 Apr 2011 15:26:50 -0700 +Subject: oom: use pte pages in OOM score + +From: KOSAKI Motohiro + +commit f755a042d82b51b54f3bdd0890e5ea56c0fb6807 upstream. + +PTE pages eat up memory just like anything else, but we do not account for +them in any way in the OOM scores. They are also _guaranteed_ to get +freed up when a process is OOM killed, while RSS is not. + +Reported-by: Dave Hansen +Signed-off-by: KOSAKI Motohiro +Cc: Hugh Dickins +Cc: KAMEZAWA Hiroyuki +Cc: Oleg Nesterov +Acked-by: David Rientjes +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/oom_kill.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +--- a/mm/oom_kill.c ++++ b/mm/oom_kill.c +@@ -172,10 +172,13 @@ unsigned int oom_badness(struct task_str + + /* + * The baseline for the badness score is the proportion of RAM that each +- * task's rss and swap space use. ++ * task's rss, pagetable and swap space use. + */ +- points = (get_mm_rss(p->mm) + get_mm_counter(p->mm, MM_SWAPENTS)) * 1000 / +- totalpages; ++ points = get_mm_rss(p->mm) + p->mm->nr_ptes; ++ points += get_mm_counter(p->mm, MM_SWAPENTS); ++ ++ points *= 1000; ++ points /= totalpages; + task_unlock(p); + + /* diff --git a/queue-2.6.38/series b/queue-2.6.38/series index 0b2a1610741..ffb6fe8bc0f 100644 --- a/queue-2.6.38/series +++ b/queue-2.6.38/series @@ -37,3 +37,16 @@ slub-fix-panic-with-discontigmem.patch set-memory-ranges-in-n_normal_memory-when-onlined.patch flexcop-pci-fix-__xlate_proc_name-warning-for-flexcop-pci.patch virtio-console-enable-call-to-hvc_remove-on-console-port-remove.patch +oom-use-pte-pages-in-oom-score.patch +mm-check-if-pte-is-already-allocated-during-page-fault.patch +mm-thp-fix-dev-zero-map_private-and-vm_flags-cleanups.patch +m68k-mm-set-all-online-nodes-in-n_normal_memory.patch +vfs-avoid-large-kmalloc-s-for-the-fdtable.patch +nfs-don-t-lose-ms_synchronous-on-remount-of-noac-mount.patch +nfsv4.1-ensure-state-manager-thread-dies-on-last-umount.patch +um-mdd-support-for-64-bit-atomic-operations.patch +drm-select-framebuffer_console_primary-if-we-have-framebuffer_console.patch +agp-fix-arbitrary-kernel-memory-writes.patch +agp-fix-oom-and-buffer-overflow.patch +iwlwifi-do-not-set-tx-power-when-channel-is-changing.patch +iwl3945-do-not-deprecate-software-scan.patch diff --git a/queue-2.6.38/um-mdd-support-for-64-bit-atomic-operations.patch b/queue-2.6.38/um-mdd-support-for-64-bit-atomic-operations.patch new file mode 100644 index 00000000000..54eaefb98a1 --- /dev/null +++ b/queue-2.6.38/um-mdd-support-for-64-bit-atomic-operations.patch @@ -0,0 +1,274 @@ +From 57d8e02e3cd21bccf2b84b26b42feb79e1f0f83e Mon Sep 17 00:00:00 2001 +From: Richard Weinberger +Date: Wed, 27 Apr 2011 15:26:51 -0700 +Subject: um: mdd support for 64 bit atomic operations + +From: Richard Weinberger + +commit 57d8e02e3cd21bccf2b84b26b42feb79e1f0f83e upstream. + +This adds support for 64 bit atomic operations on 32 bit UML systems. XFS +needs them since 2.6.38. + + $ make ARCH=um SUBARCH=i386 + ... + LD .tmp_vmlinux1 + fs/built-in.o: In function `xlog_regrant_reserve_log_space': + xfs_log.c:(.text+0xd8584): undefined reference to `atomic64_read_386' + xfs_log.c:(.text+0xd85ac): undefined reference to `cmpxchg8b_emu' + ... + +Addresses https://bugzilla.kernel.org/show_bug.cgi?id=32812 + +Reported-by: Martin Walch +Tested-by: Martin Walch +Cc: Martin Walch +Signed-off-by: Richard Weinberger +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + arch/um/sys-i386/Makefile | 2 + arch/um/sys-i386/atomic64_cx8_32.S | 225 +++++++++++++++++++++++++++++++++++++ + 2 files changed, 226 insertions(+), 1 deletion(-) + +--- a/arch/um/sys-i386/Makefile ++++ b/arch/um/sys-i386/Makefile +@@ -4,7 +4,7 @@ + + obj-y = bug.o bugs.o checksum.o delay.o fault.o ksyms.o ldt.o ptrace.o \ + ptrace_user.o setjmp.o signal.o stub.o stub_segv.o syscalls.o sysrq.o \ +- sys_call_table.o tls.o ++ sys_call_table.o tls.o atomic64_cx8_32.o + + obj-$(CONFIG_BINFMT_ELF) += elfcore.o + +--- /dev/null ++++ b/arch/um/sys-i386/atomic64_cx8_32.S +@@ -0,0 +1,225 @@ ++/* ++ * atomic64_t for 586+ ++ * ++ * Copied from arch/x86/lib/atomic64_cx8_32.S ++ * ++ * Copyright © 2010 Luca Barbieri ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ */ ++ ++#include ++#include ++#include ++ ++.macro SAVE reg ++ pushl_cfi %\reg ++ CFI_REL_OFFSET \reg, 0 ++.endm ++ ++.macro RESTORE reg ++ popl_cfi %\reg ++ CFI_RESTORE \reg ++.endm ++ ++.macro read64 reg ++ movl %ebx, %eax ++ movl %ecx, %edx ++/* we need LOCK_PREFIX since otherwise cmpxchg8b always does the write */ ++ LOCK_PREFIX ++ cmpxchg8b (\reg) ++.endm ++ ++ENTRY(atomic64_read_cx8) ++ CFI_STARTPROC ++ ++ read64 %ecx ++ ret ++ CFI_ENDPROC ++ENDPROC(atomic64_read_cx8) ++ ++ENTRY(atomic64_set_cx8) ++ CFI_STARTPROC ++ ++1: ++/* we don't need LOCK_PREFIX since aligned 64-bit writes ++ * are atomic on 586 and newer */ ++ cmpxchg8b (%esi) ++ jne 1b ++ ++ ret ++ CFI_ENDPROC ++ENDPROC(atomic64_set_cx8) ++ ++ENTRY(atomic64_xchg_cx8) ++ CFI_STARTPROC ++ ++ movl %ebx, %eax ++ movl %ecx, %edx ++1: ++ LOCK_PREFIX ++ cmpxchg8b (%esi) ++ jne 1b ++ ++ ret ++ CFI_ENDPROC ++ENDPROC(atomic64_xchg_cx8) ++ ++.macro addsub_return func ins insc ++ENTRY(atomic64_\func\()_return_cx8) ++ CFI_STARTPROC ++ SAVE ebp ++ SAVE ebx ++ SAVE esi ++ SAVE edi ++ ++ movl %eax, %esi ++ movl %edx, %edi ++ movl %ecx, %ebp ++ ++ read64 %ebp ++1: ++ movl %eax, %ebx ++ movl %edx, %ecx ++ \ins\()l %esi, %ebx ++ \insc\()l %edi, %ecx ++ LOCK_PREFIX ++ cmpxchg8b (%ebp) ++ jne 1b ++ ++10: ++ movl %ebx, %eax ++ movl %ecx, %edx ++ RESTORE edi ++ RESTORE esi ++ RESTORE ebx ++ RESTORE ebp ++ ret ++ CFI_ENDPROC ++ENDPROC(atomic64_\func\()_return_cx8) ++.endm ++ ++addsub_return add add adc ++addsub_return sub sub sbb ++ ++.macro incdec_return func ins insc ++ENTRY(atomic64_\func\()_return_cx8) ++ CFI_STARTPROC ++ SAVE ebx ++ ++ read64 %esi ++1: ++ movl %eax, %ebx ++ movl %edx, %ecx ++ \ins\()l $1, %ebx ++ \insc\()l $0, %ecx ++ LOCK_PREFIX ++ cmpxchg8b (%esi) ++ jne 1b ++ ++10: ++ movl %ebx, %eax ++ movl %ecx, %edx ++ RESTORE ebx ++ ret ++ CFI_ENDPROC ++ENDPROC(atomic64_\func\()_return_cx8) ++.endm ++ ++incdec_return inc add adc ++incdec_return dec sub sbb ++ ++ENTRY(atomic64_dec_if_positive_cx8) ++ CFI_STARTPROC ++ SAVE ebx ++ ++ read64 %esi ++1: ++ movl %eax, %ebx ++ movl %edx, %ecx ++ subl $1, %ebx ++ sbb $0, %ecx ++ js 2f ++ LOCK_PREFIX ++ cmpxchg8b (%esi) ++ jne 1b ++ ++2: ++ movl %ebx, %eax ++ movl %ecx, %edx ++ RESTORE ebx ++ ret ++ CFI_ENDPROC ++ENDPROC(atomic64_dec_if_positive_cx8) ++ ++ENTRY(atomic64_add_unless_cx8) ++ CFI_STARTPROC ++ SAVE ebp ++ SAVE ebx ++/* these just push these two parameters on the stack */ ++ SAVE edi ++ SAVE esi ++ ++ movl %ecx, %ebp ++ movl %eax, %esi ++ movl %edx, %edi ++ ++ read64 %ebp ++1: ++ cmpl %eax, 0(%esp) ++ je 4f ++2: ++ movl %eax, %ebx ++ movl %edx, %ecx ++ addl %esi, %ebx ++ adcl %edi, %ecx ++ LOCK_PREFIX ++ cmpxchg8b (%ebp) ++ jne 1b ++ ++ movl $1, %eax ++3: ++ addl $8, %esp ++ CFI_ADJUST_CFA_OFFSET -8 ++ RESTORE ebx ++ RESTORE ebp ++ ret ++4: ++ cmpl %edx, 4(%esp) ++ jne 2b ++ xorl %eax, %eax ++ jmp 3b ++ CFI_ENDPROC ++ENDPROC(atomic64_add_unless_cx8) ++ ++ENTRY(atomic64_inc_not_zero_cx8) ++ CFI_STARTPROC ++ SAVE ebx ++ ++ read64 %esi ++1: ++ testl %eax, %eax ++ je 4f ++2: ++ movl %eax, %ebx ++ movl %edx, %ecx ++ addl $1, %ebx ++ adcl $0, %ecx ++ LOCK_PREFIX ++ cmpxchg8b (%esi) ++ jne 1b ++ ++ movl $1, %eax ++3: ++ RESTORE ebx ++ ret ++4: ++ testl %edx, %edx ++ jne 2b ++ jmp 3b ++ CFI_ENDPROC ++ENDPROC(atomic64_inc_not_zero_cx8) diff --git a/queue-2.6.38/vfs-avoid-large-kmalloc-s-for-the-fdtable.patch b/queue-2.6.38/vfs-avoid-large-kmalloc-s-for-the-fdtable.patch new file mode 100644 index 00000000000..94130228c91 --- /dev/null +++ b/queue-2.6.38/vfs-avoid-large-kmalloc-s-for-the-fdtable.patch @@ -0,0 +1,70 @@ +From 6d4831c283530a5f2c6bd8172c13efa236eb149d Mon Sep 17 00:00:00 2001 +From: Andrew Morton +Date: Wed, 27 Apr 2011 15:26:41 -0700 +Subject: vfs: avoid large kmalloc()s for the fdtable + +From: Andrew Morton + +commit 6d4831c283530a5f2c6bd8172c13efa236eb149d upstream. + +Azurit reports large increases in system time after 2.6.36 when running +Apache. It was bisected down to a892e2d7dcdfa6c76e6 ("vfs: use kmalloc() +to allocate fdmem if possible"). + +That patch caused the vfs to use kmalloc() for very large allocations and +this is causing excessive work (and presumably excessive reclaim) within +the page allocator. + +Fix it by falling back to vmalloc() earlier - when the allocation attempt +would have been considered "costly" by reclaim. + +Reported-by: azurIt +Tested-by: azurIt +Acked-by: Changli Gao +Cc: Americo Wang +Cc: Jiri Slaby +Acked-by: Eric Dumazet +Cc: Mel Gorman +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + fs/file.c | 18 +++++++++++------- + 1 file changed, 11 insertions(+), 7 deletions(-) + +--- a/fs/file.c ++++ b/fs/file.c +@@ -9,6 +9,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -39,14 +40,17 @@ int sysctl_nr_open_max = 1024 * 1024; /* + */ + static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list); + +-static inline void *alloc_fdmem(unsigned int size) ++static void *alloc_fdmem(unsigned int size) + { +- void *data; +- +- data = kmalloc(size, GFP_KERNEL|__GFP_NOWARN); +- if (data != NULL) +- return data; +- ++ /* ++ * Very large allocations can stress page reclaim, so fall back to ++ * vmalloc() if the allocation size will be considered "large" by the VM. ++ */ ++ if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { ++ void *data = kmalloc(size, GFP_KERNEL|__GFP_NOWARN); ++ if (data != NULL) ++ return data; ++ } + return vmalloc(size); + } +