From: Greg Kroah-Hartman Date: Tue, 13 Dec 2011 20:11:59 +0000 (-0800) Subject: 3.1 patches X-Git-Tag: v3.0.14~11 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=98adf791a2ac19ba5efeecbd3ce2423af387c3d6;p=thirdparty%2Fkernel%2Fstable-queue.git 3.1 patches added patches: arm-7204-1-arch-arm-kernel-setup.c-initialize-arm_dma_zone_size-earlier.patch cifs-check-for-null-last_entry-before-calling-cifs_save_resume_key.patch hfs-fix-hfs_find_init-sb-ext_tree-null-ptr-oops.patch hwmon-jz4740-fix-signedness-bug.patch jbd-jbd2-validate-sb-s_first-in-journal_get_superblock.patch linux-log2.h-fix-rounddown_pow_of_two-1.patch mmc-mxcmmc-fix-falling-back-to-pio.patch x86-hpet-immediately-disable-hpet-timer-1-if-rtc-irq-is-masked.patch xen-pm_idle-make-pm_idle-be-default_idle-under-xen.patch --- diff --git a/queue-3.1/arm-7204-1-arch-arm-kernel-setup.c-initialize-arm_dma_zone_size-earlier.patch b/queue-3.1/arm-7204-1-arch-arm-kernel-setup.c-initialize-arm_dma_zone_size-earlier.patch new file mode 100644 index 00000000000..0127bac804c --- /dev/null +++ b/queue-3.1/arm-7204-1-arch-arm-kernel-setup.c-initialize-arm_dma_zone_size-earlier.patch @@ -0,0 +1,49 @@ +From 9811ccdfa94b4773c8030569bd8ec75eafa485ac Mon Sep 17 00:00:00 2001 +From: Arnaud Patard +Date: Sun, 11 Dec 2011 20:32:25 +0100 +Subject: ARM: 7204/1: arch/arm/kernel/setup.c: initialize arm_dma_zone_size earlier + +From: Arnaud Patard + +commit 9811ccdfa94b4773c8030569bd8ec75eafa485ac upstream. + +arm_dma_zone_size is used by arm_bootmem_free() which is called by +paging_init(). Thus it needs to be set before calling it. + +Signed-off-by: Arnaud Patard +Acked-by: Nicolas Pitre +Signed-off-by: Russell King +Signed-off-by: Greg Kroah-Hartman + +--- + arch/arm/kernel/setup.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +--- a/arch/arm/kernel/setup.c ++++ b/arch/arm/kernel/setup.c +@@ -890,6 +890,12 @@ void __init setup_arch(char **cmdline_p) + machine_desc = mdesc; + machine_name = mdesc->name; + ++#ifdef CONFIG_ZONE_DMA ++ if (mdesc->dma_zone_size) { ++ extern unsigned long arm_dma_zone_size; ++ arm_dma_zone_size = mdesc->dma_zone_size; ++ } ++#endif + if (mdesc->soft_reboot) + reboot_setup("s"); + +@@ -920,12 +926,6 @@ void __init setup_arch(char **cmdline_p) + + tcm_init(); + +-#ifdef CONFIG_ZONE_DMA +- if (mdesc->dma_zone_size) { +- extern unsigned long arm_dma_zone_size; +- arm_dma_zone_size = mdesc->dma_zone_size; +- } +-#endif + #ifdef CONFIG_MULTI_IRQ_HANDLER + handle_arch_irq = mdesc->handle_irq; + #endif diff --git a/queue-3.1/cifs-check-for-null-last_entry-before-calling-cifs_save_resume_key.patch b/queue-3.1/cifs-check-for-null-last_entry-before-calling-cifs_save_resume_key.patch new file mode 100644 index 00000000000..af3482a9e93 --- /dev/null +++ b/queue-3.1/cifs-check-for-null-last_entry-before-calling-cifs_save_resume_key.patch @@ -0,0 +1,62 @@ +From 7023676f9ee851d94f0942e879243fc1f9081c47 Mon Sep 17 00:00:00 2001 +From: Jeff Layton +Date: Thu, 1 Dec 2011 20:23:34 -0500 +Subject: cifs: check for NULL last_entry before calling cifs_save_resume_key + +From: Jeff Layton + +commit 7023676f9ee851d94f0942e879243fc1f9081c47 upstream. + +Prior to commit eaf35b1, cifs_save_resume_key had some NULL pointer +checks at the top. It turns out that at least one of those NULL +pointer checks is needed after all. + +When the LastNameOffset in a FIND reply appears to be beyond the end of +the buffer, CIFSFindFirst and CIFSFindNext will set srch_inf.last_entry +to NULL. Since eaf35b1, the code will now oops in this situation. + +Fix this by having the callers check for a NULL last entry pointer +before calling cifs_save_resume_key. No change is needed for the +call site in cifs_readdir as it's not reachable with a NULL +current_entry pointer. + +This should fix: + + https://bugzilla.redhat.com/show_bug.cgi?id=750247 + +Cc: Christoph Hellwig +Reported-by: Adam G. Metzler +Signed-off-by: Jeff Layton +Signed-off-by: Steve French +Signed-off-by: Greg Kroah-Hartman + +--- + fs/cifs/readdir.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +--- a/fs/cifs/readdir.c ++++ b/fs/cifs/readdir.c +@@ -554,7 +554,10 @@ static int find_cifs_entry(const int xid + rc); + return rc; + } +- cifs_save_resume_key(cifsFile->srch_inf.last_entry, cifsFile); ++ /* FindFirst/Next set last_entry to NULL on malformed reply */ ++ if (cifsFile->srch_inf.last_entry) ++ cifs_save_resume_key(cifsFile->srch_inf.last_entry, ++ cifsFile); + } + + while ((index_to_find >= cifsFile->srch_inf.index_of_last_entry) && +@@ -562,7 +565,10 @@ static int find_cifs_entry(const int xid + cFYI(1, "calling findnext2"); + rc = CIFSFindNext(xid, pTcon, cifsFile->netfid, + &cifsFile->srch_inf); +- cifs_save_resume_key(cifsFile->srch_inf.last_entry, cifsFile); ++ /* FindFirst/Next set last_entry to NULL on malformed reply */ ++ if (cifsFile->srch_inf.last_entry) ++ cifs_save_resume_key(cifsFile->srch_inf.last_entry, ++ cifsFile); + if (rc) + return -ENOENT; + } diff --git a/queue-3.1/hfs-fix-hfs_find_init-sb-ext_tree-null-ptr-oops.patch b/queue-3.1/hfs-fix-hfs_find_init-sb-ext_tree-null-ptr-oops.patch new file mode 100644 index 00000000000..2a669374ac1 --- /dev/null +++ b/queue-3.1/hfs-fix-hfs_find_init-sb-ext_tree-null-ptr-oops.patch @@ -0,0 +1,90 @@ +From 434a964daa14b9db083ce20404a4a2add54d037a Mon Sep 17 00:00:00 2001 +From: Phillip Lougher +Date: Wed, 2 Nov 2011 13:38:01 -0700 +Subject: hfs: fix hfs_find_init() sb->ext_tree NULL ptr oops + +From: Phillip Lougher + +commit 434a964daa14b9db083ce20404a4a2add54d037a upstream. + +Clement Lecigne reports a filesystem which causes a kernel oops in +hfs_find_init() trying to dereference sb->ext_tree which is NULL. + +This proves to be because the filesystem has a corrupted MDB extent +record, where the extents file does not fit into the first three extents +in the file record (the first blocks). + +In hfs_get_block() when looking up the blocks for the extent file +(HFS_EXT_CNID), it fails the first blocks special case, and falls +through to the extent code (which ultimately calls hfs_find_init()) +which is in the process of being initialised. + +Hfs avoids this scenario by always having the extents b-tree fitting +into the first blocks (the extents B-tree can't have overflow extents). + +The fix is to check at mount time that the B-tree fits into first +blocks, i.e. fail if HFS_I(inode)->alloc_blocks >= +HFS_I(inode)->first_blocks + +Note, the existing commit 47f365eb57573 ("hfs: fix oops on mount with +corrupted btree extent records") becomes subsumed into this as a special +case, but only for the extents B-tree (HFS_EXT_CNID), it is perfectly +acceptable for the catalog B-Tree file to grow beyond three extents, +with the remaining extent descriptors in the extents overfow. + +This fixes CVE-2011-2203 + +Reported-by: Clement LECIGNE +Signed-off-by: Phillip Lougher +Cc: Jeff Mahoney +Cc: Christoph Hellwig +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Cc: Moritz Mühlenhoff + +--- + fs/hfs/btree.c | 20 +++++++++++++++----- + 1 file changed, 15 insertions(+), 5 deletions(-) + +--- a/fs/hfs/btree.c ++++ b/fs/hfs/btree.c +@@ -46,11 +46,26 @@ struct hfs_btree *hfs_btree_open(struct + case HFS_EXT_CNID: + hfs_inode_read_fork(tree->inode, mdb->drXTExtRec, mdb->drXTFlSize, + mdb->drXTFlSize, be32_to_cpu(mdb->drXTClpSiz)); ++ if (HFS_I(tree->inode)->alloc_blocks > ++ HFS_I(tree->inode)->first_blocks) { ++ printk(KERN_ERR "hfs: invalid btree extent records\n"); ++ unlock_new_inode(tree->inode); ++ goto free_inode; ++ } ++ + tree->inode->i_mapping->a_ops = &hfs_btree_aops; + break; + case HFS_CAT_CNID: + hfs_inode_read_fork(tree->inode, mdb->drCTExtRec, mdb->drCTFlSize, + mdb->drCTFlSize, be32_to_cpu(mdb->drCTClpSiz)); ++ ++ if (!HFS_I(tree->inode)->first_blocks) { ++ printk(KERN_ERR "hfs: invalid btree extent records " ++ "(0 size).\n"); ++ unlock_new_inode(tree->inode); ++ goto free_inode; ++ } ++ + tree->inode->i_mapping->a_ops = &hfs_btree_aops; + break; + default: +@@ -59,11 +74,6 @@ struct hfs_btree *hfs_btree_open(struct + } + unlock_new_inode(tree->inode); + +- if (!HFS_I(tree->inode)->first_blocks) { +- printk(KERN_ERR "hfs: invalid btree extent records (0 size).\n"); +- goto free_inode; +- } +- + mapping = tree->inode->i_mapping; + page = read_mapping_page(mapping, 0, NULL); + if (IS_ERR(page)) diff --git a/queue-3.1/hwmon-jz4740-fix-signedness-bug.patch b/queue-3.1/hwmon-jz4740-fix-signedness-bug.patch new file mode 100644 index 00000000000..878aa717a43 --- /dev/null +++ b/queue-3.1/hwmon-jz4740-fix-signedness-bug.patch @@ -0,0 +1,32 @@ +From 0b57d7602b68f7b2786b2f0e22da39cbd4139a95 Mon Sep 17 00:00:00 2001 +From: Axel Lin +Date: Thu, 8 Dec 2011 08:04:12 -0500 +Subject: hwmon: (jz4740) fix signedness bug + +From: Axel Lin + +commit 0b57d7602b68f7b2786b2f0e22da39cbd4139a95 upstream. + +wait_for_completion_interruptible_timeout() may return negative value. +In this case, checking if (t > 0) will return true if t is unsigned. + +Signed-off-by: Axel Lin +Acked-by: Lars-Peter Clausen +Signed-off-by: Guenter Roeck +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/hwmon/jz4740-hwmon.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/hwmon/jz4740-hwmon.c ++++ b/drivers/hwmon/jz4740-hwmon.c +@@ -59,7 +59,7 @@ static ssize_t jz4740_hwmon_read_adcin(s + { + struct jz4740_hwmon *hwmon = dev_get_drvdata(dev); + struct completion *completion = &hwmon->read_completion; +- unsigned long t; ++ long t; + unsigned long val; + int ret; + diff --git a/queue-3.1/jbd-jbd2-validate-sb-s_first-in-journal_get_superblock.patch b/queue-3.1/jbd-jbd2-validate-sb-s_first-in-journal_get_superblock.patch new file mode 100644 index 00000000000..18eb4629560 --- /dev/null +++ b/queue-3.1/jbd-jbd2-validate-sb-s_first-in-journal_get_superblock.patch @@ -0,0 +1,95 @@ +From 8762202dd0d6e46854f786bdb6fb3780a1625efe Mon Sep 17 00:00:00 2001 +From: Eryu Guan +Date: Tue, 1 Nov 2011 19:04:59 -0400 +Subject: jbd/jbd2: validate sb->s_first in journal_get_superblock() + +From: Eryu Guan + +commit 8762202dd0d6e46854f786bdb6fb3780a1625efe upstream. + +I hit a J_ASSERT(blocknr != 0) failure in cleanup_journal_tail() when +mounting a fsfuzzed ext3 image. It turns out that the corrupted ext3 +image has s_first = 0 in journal superblock, and the 0 is passed to +journal->j_head in journal_reset(), then to blocknr in +cleanup_journal_tail(), in the end the J_ASSERT failed. + +So validate s_first after reading journal superblock from disk in +journal_get_superblock() to ensure s_first is valid. + +The following script could reproduce it: + +fstype=ext3 +blocksize=1024 +img=$fstype.img +offset=0 +found=0 +magic="c0 3b 39 98" + +dd if=/dev/zero of=$img bs=1M count=8 +mkfs -t $fstype -b $blocksize -F $img +filesize=`stat -c %s $img` +while [ $offset -lt $filesize ] +do + if od -j $offset -N 4 -t x1 $img | grep -i "$magic";then + echo "Found journal: $offset" + found=1 + break + fi + offset=`echo "$offset+$blocksize" | bc` +done + +if [ $found -ne 1 ];then + echo "Magic \"$magic\" not found" + exit 1 +fi + +dd if=/dev/zero of=$img seek=$(($offset+23)) conv=notrunc bs=1 count=1 + +mkdir -p ./mnt +mount -o loop $img ./mnt + +Cc: Jan Kara +Signed-off-by: Eryu Guan +Signed-off-by: "Theodore Ts'o" +Cc: Moritz Mühlenhoff +Signed-off-by: Greg Kroah-Hartman + +--- + fs/jbd/journal.c | 8 ++++++++ + fs/jbd2/journal.c | 8 ++++++++ + 2 files changed, 16 insertions(+) + +--- a/fs/jbd/journal.c ++++ b/fs/jbd/journal.c +@@ -1135,6 +1135,14 @@ static int journal_get_superblock(journa + goto out; + } + ++ if (be32_to_cpu(sb->s_first) == 0 || ++ be32_to_cpu(sb->s_first) >= journal->j_maxlen) { ++ printk(KERN_WARNING ++ "JBD: Invalid start block of journal: %u\n", ++ be32_to_cpu(sb->s_first)); ++ goto out; ++ } ++ + return 0; + + out: +--- a/fs/jbd2/journal.c ++++ b/fs/jbd2/journal.c +@@ -1251,6 +1251,14 @@ static int journal_get_superblock(journa + goto out; + } + ++ if (be32_to_cpu(sb->s_first) == 0 || ++ be32_to_cpu(sb->s_first) >= journal->j_maxlen) { ++ printk(KERN_WARNING ++ "JBD2: Invalid start block of journal: %u\n", ++ be32_to_cpu(sb->s_first)); ++ goto out; ++ } ++ + return 0; + + out: diff --git a/queue-3.1/linux-log2.h-fix-rounddown_pow_of_two-1.patch b/queue-3.1/linux-log2.h-fix-rounddown_pow_of_two-1.patch new file mode 100644 index 00000000000..8ea1917ef16 --- /dev/null +++ b/queue-3.1/linux-log2.h-fix-rounddown_pow_of_two-1.patch @@ -0,0 +1,51 @@ +From 13c07b0286d340275f2d97adf085cecda37ede37 Mon Sep 17 00:00:00 2001 +From: Linus Torvalds +Date: Mon, 12 Dec 2011 22:06:55 -0800 +Subject: linux/log2.h: Fix rounddown_pow_of_two(1) + +From: Linus Torvalds + +commit 13c07b0286d340275f2d97adf085cecda37ede37 upstream. + +Exactly like roundup_pow_of_two(1), the rounddown version was buggy for +the case of a compile-time constant '1' argument. Probably because it +originated from the same code, sharing history with the roundup version +from before the bugfix (for that one, see commit 1a06a52ee1b0: "Fix +roundup_pow_of_two(1)"). + +However, unlike the roundup version, the fix for rounddown is to just +remove the broken special case entirely. It's simply not needed - the +generic code + + 1UL << ilog2(n) + +does the right thing for the constant '1' argment too. The only reason +roundup needed that special case was because rounding up does so by +subtracting one from the argument (and then adding one to the result) +causing the obvious problems with "ilog2(0)". + +But rounddown doesn't do any of that, since ilog2() naturally truncates +(ie "rounds down") to the right rounded down value. And without the +ilog2(0) case, there's no reason for the special case that had the wrong +value. + +tl;dr: rounddown_pow_of_two(1) should be 1, not 0. + +Acked-by: Dmitry Torokhov +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/log2.h | 1 - + 1 file changed, 1 deletion(-) + +--- a/include/linux/log2.h ++++ b/include/linux/log2.h +@@ -185,7 +185,6 @@ unsigned long __rounddown_pow_of_two(uns + #define rounddown_pow_of_two(n) \ + ( \ + __builtin_constant_p(n) ? ( \ +- (n == 1) ? 0 : \ + (1UL << ilog2(n))) : \ + __rounddown_pow_of_two(n) \ + ) diff --git a/queue-3.1/mmc-mxcmmc-fix-falling-back-to-pio.patch b/queue-3.1/mmc-mxcmmc-fix-falling-back-to-pio.patch new file mode 100644 index 00000000000..d1e75fe18ab --- /dev/null +++ b/queue-3.1/mmc-mxcmmc-fix-falling-back-to-pio.patch @@ -0,0 +1,33 @@ +From e58f516ff4730c4047c3f104b061f7a03e9a263c Mon Sep 17 00:00:00 2001 +From: Sascha Hauer +Date: Fri, 11 Nov 2011 16:28:05 +0100 +Subject: mmc: mxcmmc: fix falling back to PIO + +From: Sascha Hauer + +commit e58f516ff4730c4047c3f104b061f7a03e9a263c upstream. + +When we can't configure the dma channel we want to fall +back to PIO. We do this by setting host->do_dma to zero. +This does not work as do_dma is used to see whether dma +can be used for the current transfer. Instead, we have +to set host->dma to NULL. + +Signed-off-by: Sascha Hauer +Signed-off-by: Chris Ball +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/mmc/host/mxcmmc.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/mmc/host/mxcmmc.c ++++ b/drivers/mmc/host/mxcmmc.c +@@ -731,6 +731,7 @@ static void mxcmci_set_ios(struct mmc_ho + "failed to config DMA channel. Falling back to PIO\n"); + dma_release_channel(host->dma); + host->do_dma = 0; ++ host->dma = NULL; + } + } + diff --git a/queue-3.1/series b/queue-3.1/series index eb799bae163..c5b5290895f 100644 --- a/queue-3.1/series +++ b/queue-3.1/series @@ -25,3 +25,12 @@ target-fix-page-length-in-emulated-inquiry-vpd-page-86h.patch iscsi-target-add-missing-f_bit-for-iscsi_tm_rsp.patch target-file-walk-properly-over-sg-list.patch percpu-fix-chunk-range-calculation.patch +cifs-check-for-null-last_entry-before-calling-cifs_save_resume_key.patch +linux-log2.h-fix-rounddown_pow_of_two-1.patch +hwmon-jz4740-fix-signedness-bug.patch +arm-7204-1-arch-arm-kernel-setup.c-initialize-arm_dma_zone_size-earlier.patch +mmc-mxcmmc-fix-falling-back-to-pio.patch +xen-pm_idle-make-pm_idle-be-default_idle-under-xen.patch +x86-hpet-immediately-disable-hpet-timer-1-if-rtc-irq-is-masked.patch +jbd-jbd2-validate-sb-s_first-in-journal_get_superblock.patch +hfs-fix-hfs_find_init-sb-ext_tree-null-ptr-oops.patch diff --git a/queue-3.1/x86-hpet-immediately-disable-hpet-timer-1-if-rtc-irq-is-masked.patch b/queue-3.1/x86-hpet-immediately-disable-hpet-timer-1-if-rtc-irq-is-masked.patch new file mode 100644 index 00000000000..3255fa00f97 --- /dev/null +++ b/queue-3.1/x86-hpet-immediately-disable-hpet-timer-1-if-rtc-irq-is-masked.patch @@ -0,0 +1,84 @@ +From 2ded6e6a94c98ea453a156748cb7fabaf39a76b9 Mon Sep 17 00:00:00 2001 +From: Mark Langsdorf +Date: Fri, 18 Nov 2011 16:33:06 +0100 +Subject: x86, hpet: Immediately disable HPET timer 1 if rtc irq is masked + +From: Mark Langsdorf + +commit 2ded6e6a94c98ea453a156748cb7fabaf39a76b9 upstream. + +When HPET is operating in RTC mode, the TN_ENABLE bit on timer1 +controls whether the HPET or the RTC delivers interrupts to irq8. When +the system goes into suspend, the RTC driver sends a signal to the +HPET driver so that the HPET releases control of irq8, allowing the +RTC to wake the system from suspend. The switchover is accomplished by +a write to the HPET configuration registers which currently only +occurs while servicing the HPET interrupt. + +On some systems, I have seen the system suspend before an HPET +interrupt occurs, preventing the write to the HPET configuration +register and leaving the HPET in control of the irq8. As the HPET is +not active during suspend, it does not generate a wake signal and RTC +alarms do not work. + +This patch forces the HPET driver to immediately transfer control of +the irq8 channel to the RTC instead of waiting until the next +interrupt event. + +Signed-off-by: Mark Langsdorf +Link: http://lkml.kernel.org/r/20111118153306.GB16319@alberich.amd.com +Tested-by: Andreas Herrmann +Signed-off-by: Andreas Herrmann +Signed-off-by: Thomas Gleixner +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/hpet.c | 21 ++++++++++++++------- + 1 file changed, 14 insertions(+), 7 deletions(-) + +--- a/arch/x86/kernel/hpet.c ++++ b/arch/x86/kernel/hpet.c +@@ -1048,6 +1048,14 @@ int hpet_rtc_timer_init(void) + } + EXPORT_SYMBOL_GPL(hpet_rtc_timer_init); + ++static void hpet_disable_rtc_channel(void) ++{ ++ unsigned long cfg; ++ cfg = hpet_readl(HPET_T1_CFG); ++ cfg &= ~HPET_TN_ENABLE; ++ hpet_writel(cfg, HPET_T1_CFG); ++} ++ + /* + * The functions below are called from rtc driver. + * Return 0 if HPET is not being used. +@@ -1059,6 +1067,9 @@ int hpet_mask_rtc_irq_bit(unsigned long + return 0; + + hpet_rtc_flags &= ~bit_mask; ++ if (unlikely(!hpet_rtc_flags)) ++ hpet_disable_rtc_channel(); ++ + return 1; + } + EXPORT_SYMBOL_GPL(hpet_mask_rtc_irq_bit); +@@ -1124,15 +1135,11 @@ EXPORT_SYMBOL_GPL(hpet_rtc_dropped_irq); + + static void hpet_rtc_timer_reinit(void) + { +- unsigned int cfg, delta; ++ unsigned int delta; + int lost_ints = -1; + +- if (unlikely(!hpet_rtc_flags)) { +- cfg = hpet_readl(HPET_T1_CFG); +- cfg &= ~HPET_TN_ENABLE; +- hpet_writel(cfg, HPET_T1_CFG); +- return; +- } ++ if (unlikely(!hpet_rtc_flags)) ++ hpet_disable_rtc_channel(); + + if (!(hpet_rtc_flags & RTC_PIE) || hpet_pie_limit) + delta = hpet_default_delta; diff --git a/queue-3.1/xen-pm_idle-make-pm_idle-be-default_idle-under-xen.patch b/queue-3.1/xen-pm_idle-make-pm_idle-be-default_idle-under-xen.patch new file mode 100644 index 00000000000..4ceef786b81 --- /dev/null +++ b/queue-3.1/xen-pm_idle-make-pm_idle-be-default_idle-under-xen.patch @@ -0,0 +1,93 @@ +From e5fd47bfab2df0c2184cc0bf4245d8e1bb7724fb Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk +Date: Mon, 21 Nov 2011 18:02:02 -0500 +Subject: xen/pm_idle: Make pm_idle be default_idle under Xen. + +From: Konrad Rzeszutek Wilk + +commit e5fd47bfab2df0c2184cc0bf4245d8e1bb7724fb upstream. + +The idea behind commit d91ee5863b71 ("cpuidle: replace xen access to x86 +pm_idle and default_idle") was to have one call - disable_cpuidle() +which would make pm_idle not be molested by other code. It disallows +cpuidle_idle_call to be set to pm_idle (which is excellent). + +But in the select_idle_routine() and idle_setup(), the pm_idle can still +be set to either: amd_e400_idle, mwait_idle or default_idle. This +depends on some CPU flags (MWAIT) and in AMD case on the type of CPU. + +In case of mwait_idle we can hit some instances where the hypervisor +(Amazon EC2 specifically) sets the MWAIT and we get: + + Brought up 2 CPUs + invalid opcode: 0000 [#1] SMP + + Pid: 0, comm: swapper Not tainted 3.1.0-0.rc6.git0.3.fc16.x86_64 #1 + RIP: e030:[] [] mwait_idle+0x6f/0xb4 + ... + Call Trace: + [] cpu_idle+0xae/0xe8 + [] cpu_bringup_and_idle+0xe/0x10 + RIP [] mwait_idle+0x6f/0xb4 + RSP + +In the case of amd_e400_idle we don't get so spectacular crashes, but we +do end up making an MSR which is trapped in the hypervisor, and then +follow it up with a yield hypercall. Meaning we end up going to +hypervisor twice instead of just once. + +The previous behavior before v3.0 was that pm_idle was set to +default_idle regardless of select_idle_routine/idle_setup. + +We want to do that, but only for one specific case: Xen. This patch +does that. + +Fixes RH BZ #739499 and Ubuntu #881076 +Reported-by: Stefan Bader +Signed-off-by: Konrad Rzeszutek Wilk +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/system.h | 1 + + arch/x86/kernel/process.c | 8 ++++++++ + arch/x86/xen/setup.c | 2 +- + 3 files changed, 10 insertions(+), 1 deletion(-) + +--- a/arch/x86/include/asm/system.h ++++ b/arch/x86/include/asm/system.h +@@ -401,6 +401,7 @@ extern unsigned long arch_align_stack(un + extern void free_init_pages(char *what, unsigned long begin, unsigned long end); + + void default_idle(void); ++bool set_pm_idle_to_default(void); + + void stop_this_cpu(void *dummy); + +--- a/arch/x86/kernel/process.c ++++ b/arch/x86/kernel/process.c +@@ -403,6 +403,14 @@ void default_idle(void) + EXPORT_SYMBOL(default_idle); + #endif + ++bool set_pm_idle_to_default(void) ++{ ++ bool ret = !!pm_idle; ++ ++ pm_idle = default_idle; ++ ++ return ret; ++} + void stop_this_cpu(void *dummy) + { + local_irq_disable(); +--- a/arch/x86/xen/setup.c ++++ b/arch/x86/xen/setup.c +@@ -448,6 +448,6 @@ void __init xen_arch_setup(void) + #endif + disable_cpuidle(); + boot_option_idle_override = IDLE_HALT; +- ++ WARN_ON(set_pm_idle_to_default()); + fiddle_vdso(); + }