From: Chris Wright Date: Tue, 31 Mar 2009 05:10:37 +0000 (-0700) Subject: more 2.6.29 patches X-Git-Tag: v2.6.29.1~9 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=beb6eebadb7036dc99f7b522f74dbc13264c7fca;p=thirdparty%2Fkernel%2Fstable-queue.git more 2.6.29 patches --- diff --git a/queue-2.6.29/add-a-missing-unlock_kernel-in-raw_open.patch b/queue-2.6.29/add-a-missing-unlock_kernel-in-raw_open.patch new file mode 100644 index 00000000000..1070594b0f3 --- /dev/null +++ b/queue-2.6.29/add-a-missing-unlock_kernel-in-raw_open.patch @@ -0,0 +1,29 @@ +From stable-bounces@linux.kernel.org Mon Mar 30 18:50:20 2009 +Date: Mon, 30 Mar 2009 18:50:16 GMT +Message-Id: <200903301850.n2UIoGIH028079@hera.kernel.org> +From: Dan Carpenter +To: stable@kernel.org +Subject: Add a missing unlock_kernel() in raw_open() + +From: Dan Carpenter + +upstream commit: 996ff68d8b358885c1de82a45517c607999947c7 + +Cc: stable@kernel.org +Signed-off-by: Dan Carpenter +Signed-off-by: Jonathan Corbet +Signed-off-by: Chris Wright +--- + drivers/char/raw.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/char/raw.c ++++ b/drivers/char/raw.c +@@ -90,6 +90,7 @@ out1: + blkdev_put(bdev, filp->f_mode); + out: + mutex_unlock(&raw_mutex); ++ unlock_kernel(); + return err; + } + diff --git a/queue-2.6.29/arm-5428-1-module-relocation-update-for-r_arm_v4bx.patch b/queue-2.6.29/arm-5428-1-module-relocation-update-for-r_arm_v4bx.patch new file mode 100644 index 00000000000..13d4f043321 --- /dev/null +++ b/queue-2.6.29/arm-5428-1-module-relocation-update-for-r_arm_v4bx.patch @@ -0,0 +1,57 @@ +From 4731f8b66dd34ebf0e67ca6ba9162b0e509bec06 Mon Sep 17 00:00:00 2001 +From: Daniel Silverstone +Message-ID: +Date: Fri, 20 Mar 2009 11:11:43 +0100 +Subject: ARM: 5428/1: Module relocation update for R_ARM_V4BX + +upstream commit: 4731f8b66dd34ebf0e67ca6ba9162b0e509bec06 + +It would seem when building kernel modules with modern binutils +(required by modern GCC) for ARM v4T targets (specifically observed +with the Samsung 24xx SoC which is an 920T) R_ARM_V4BX relocations +are emitted for function epilogues. + +This manifests at module load time with an "unknown relocation: 40" +error message. + +The following patch adds the R_ARM_V4BX relocation to the ARM kernel +module loader. The relocation operation is taken from that within the +binutils bfd library. + +Signed-off-by: Simtec Linux Team +Signed-off-by: Vincent Sanders +Signed-off-by: Russell King +Signed-off-by: Chris Wright +--- + arch/arm/include/asm/elf.h | 1 + + arch/arm/kernel/module.c | 9 +++++++++ + 2 files changed, 10 insertions(+) + +--- a/arch/arm/include/asm/elf.h ++++ b/arch/arm/include/asm/elf.h +@@ -50,6 +50,7 @@ typedef struct user_fp elf_fpregset_t; + #define R_ARM_ABS32 2 + #define R_ARM_CALL 28 + #define R_ARM_JUMP24 29 ++#define R_ARM_V4BX 40 + + /* + * These are used to set parameters in the core dumps. +--- a/arch/arm/kernel/module.c ++++ b/arch/arm/kernel/module.c +@@ -132,6 +132,15 @@ apply_relocate(Elf32_Shdr *sechdrs, cons + *(u32 *)loc |= offset & 0x00ffffff; + break; + ++ case R_ARM_V4BX: ++ /* Preserve Rm and the condition code. Alter ++ * other bits to re-code instruction as ++ * MOV PC,Rm. ++ */ ++ *(u32 *)loc &= 0xf000000f; ++ *(u32 *)loc |= 0x01a0f000; ++ break; ++ + default: + printk(KERN_ERR "%s: unknown relocation: %u\n", + module->name, ELF32_R_TYPE(rel->r_info)); diff --git a/queue-2.6.29/arm-5435-1-fix-compile-warning-in-sanity_check_meminfo.patch b/queue-2.6.29/arm-5435-1-fix-compile-warning-in-sanity_check_meminfo.patch new file mode 100644 index 00000000000..3150a0397c8 --- /dev/null +++ b/queue-2.6.29/arm-5435-1-fix-compile-warning-in-sanity_check_meminfo.patch @@ -0,0 +1,39 @@ +From f0bba9f934517533acbda7329be93f55d5a01c03 Mon Sep 17 00:00:00 2001 +Message-ID: +From: Mikael Pettersson +Date: Sat, 28 Mar 2009 19:18:05 +0100 +Subject: ARM: 5435/1: fix compile warning in sanity_check_meminfo() + +upstream commit: f0bba9f934517533acbda7329be93f55d5a01c03 + +Compiling recent 2.6.29-rc kernels for ARM gives me the following warning: + +arch/arm/mm/mmu.c: In function 'sanity_check_meminfo': +arch/arm/mm/mmu.c:697: warning: comparison between pointer and integer + +This is because commit 3fd9825c42c784a59b3b90bdf073f49d4bb42a8d +"[ARM] 5402/1: fix a case of wrap-around in sanity_check_meminfo()" +in 2.6.29-rc5-git4 added a comparison of a pointer with PAGE_OFFSET, +which is an integer. + +Fixed by casting PAGE_OFFSET to void *. + +Signed-off-by: Mikael Pettersson +Acked-by: Nicolas Pitre +Signed-off-by: Russell King +Signed-off-by: Chris Wright +--- + arch/arm/mm/mmu.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/arm/mm/mmu.c ++++ b/arch/arm/mm/mmu.c +@@ -694,7 +694,7 @@ static void __init sanity_check_meminfo( + * the vmalloc area. + */ + if (__va(bank->start) >= VMALLOC_MIN || +- __va(bank->start) < PAGE_OFFSET) { ++ __va(bank->start) < (void *)PAGE_OFFSET) { + printk(KERN_NOTICE "Ignoring RAM at %.8lx-%.8lx " + "(vmalloc region overlap).\n", + bank->start, bank->start + bank->size - 1); diff --git a/queue-2.6.29/arm-cumana-fix-a-long-standing-bogon.patch b/queue-2.6.29/arm-cumana-fix-a-long-standing-bogon.patch new file mode 100644 index 00000000000..f264dc7eb13 --- /dev/null +++ b/queue-2.6.29/arm-cumana-fix-a-long-standing-bogon.patch @@ -0,0 +1,30 @@ +From ecbf61e7357d5c7047c813edd6983902d158688c Mon Sep 17 00:00:00 2001 +Message-ID: +From: Alan Cox +Date: Mon, 23 Mar 2009 10:37:57 +0000 +Subject: ARM: cumana: Fix a long standing bogon + +upstream commit: ecbf61e7357d5c7047c813edd6983902d158688c + +Should be using strncmp as the data from user space may be unterminated + +(Bug #8004) + +Signed-off-by: Alan Cox +Signed-off-by: Chris Wright +--- + drivers/scsi/arm/cumana_2.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/scsi/arm/cumana_2.c ++++ b/drivers/scsi/arm/cumana_2.c +@@ -318,7 +318,7 @@ cumanascsi_2_set_proc_info(struct Scsi_H + { + int ret = length; + +- if (length >= 11 && strcmp(buffer, "CUMANASCSI2") == 0) { ++ if (length >= 11 && strncmp(buffer, "CUMANASCSI2", 11) == 0) { + buffer += 11; + length -= 11; + diff --git a/queue-2.6.29/arm-fix-leak-in-iop13xx-pci.patch b/queue-2.6.29/arm-fix-leak-in-iop13xx-pci.patch new file mode 100644 index 00000000000..82821eb23be --- /dev/null +++ b/queue-2.6.29/arm-fix-leak-in-iop13xx-pci.patch @@ -0,0 +1,39 @@ +From b23c7a427e4b3764ad686a46de89ab652811c50a Mon Sep 17 00:00:00 2001 +Message-ID: +From: Alan Cox +Date: Mon, 23 Mar 2009 10:44:07 +0000 +Subject: ARM: fix leak in iop13xx/pci + +upstream commit: b23c7a427e4b3764ad686a46de89ab652811c50a + +Another leak found by Daniel Marjamäki + +Signed-off-by: Alan Cox +Signed-off-by: Russell King +Signed-off-by: Chris Wright +--- + arch/arm/mach-iop13xx/pci.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/arch/arm/mach-iop13xx/pci.c ++++ b/arch/arm/mach-iop13xx/pci.c +@@ -1026,8 +1026,10 @@ int iop13xx_pci_setup(int nr, struct pci + which_atu = 0; + } + +- if (!which_atu) ++ if (!which_atu) { ++ kfree(res); + return 0; ++ } + + switch(which_atu) { + case IOP13XX_INIT_ATU_ATUX: +@@ -1074,6 +1076,7 @@ int iop13xx_pci_setup(int nr, struct pci + sys->map_irq = iop13xx_pcie_map_irq; + break; + default: ++ kfree(res); + return 0; + } + diff --git a/queue-2.6.29/arm-pxa-fix-overlay-being-un-necessarily-initialized-on-pxa25x.patch b/queue-2.6.29/arm-pxa-fix-overlay-being-un-necessarily-initialized-on-pxa25x.patch new file mode 100644 index 00000000000..257b934846e --- /dev/null +++ b/queue-2.6.29/arm-pxa-fix-overlay-being-un-necessarily-initialized-on-pxa25x.patch @@ -0,0 +1,53 @@ +From 782385ae176b304c7105051e1b06c68bc0b4a2ba Mon Sep 17 00:00:00 2001 +Message-ID: +From: Eric Miao +Cc: Russell King +Date: Thu, 19 Mar 2009 15:24:30 +0800 +Subject: ARM: pxa: fix overlay being un-necessarily initialized on pxa25x + +upstream commit: 782385ae176b304c7105051e1b06c68bc0b4a2ba + +pxa25x doesn't support overlay in its LCD controller, this patch adds +pxafb_overlay_supported() functions to check the initialization is +necessary. + +Signed-off-by: Eric Miao +Signed-off-by: Chris Wright +--- + drivers/video/pxafb.c | 14 ++++++++++++++ + 1 file changed, 14 insertions(+) + +--- a/drivers/video/pxafb.c ++++ b/drivers/video/pxafb.c +@@ -883,10 +883,21 @@ static void __devinit init_pxafb_overlay + init_completion(&ofb->branch_done); + } + ++static inline int pxafb_overlay_supported(void) ++{ ++ if (cpu_is_pxa27x() || cpu_is_pxa3xx()) ++ return 1; ++ ++ return 0; ++} ++ + static int __devinit pxafb_overlay_init(struct pxafb_info *fbi) + { + int i, ret; + ++ if (!pxafb_overlay_supported()) ++ return 0; ++ + for (i = 0; i < 2; i++) { + init_pxafb_overlay(fbi, &fbi->overlay[i], i); + ret = register_framebuffer(&fbi->overlay[i].fb); +@@ -909,6 +920,9 @@ static void __devexit pxafb_overlay_exit + { + int i; + ++ if (!pxafb_overlay_supported()) ++ return; ++ + for (i = 0; i < 2; i++) + unregister_framebuffer(&fbi->overlay[i].fb); + } diff --git a/queue-2.6.29/arm-twl4030-leak-fix.patch b/queue-2.6.29/arm-twl4030-leak-fix.patch new file mode 100644 index 00000000000..cd869f12782 --- /dev/null +++ b/queue-2.6.29/arm-twl4030-leak-fix.patch @@ -0,0 +1,29 @@ +From 803c78e4da28d7d7cb0642caf643b9289ae7838a Mon Sep 17 00:00:00 2001 +Message-ID: +From: Alan Cox +Date: Mon, 23 Mar 2009 10:43:54 +0000 +Subject: ARM: twl4030 - leak fix + +upstream commit: 803c78e4da28d7d7cb0642caf643b9289ae7838a + +Trivial error path leak fix. Problem found by Daniel Marjamäki using +cppcheck + +Signed-off-by: Alan Cox +Acked-by: Tony Lindgren +Signed-off-by: Russell King +Signed-off-by: Chris Wright +--- + arch/arm/mach-omap2/mmc-twl4030.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/arch/arm/mach-omap2/mmc-twl4030.c ++++ b/arch/arm/mach-omap2/mmc-twl4030.c +@@ -397,6 +397,7 @@ void __init twl4030_mmc_init(struct twl4 + break; + default: + pr_err("MMC%d configuration not supported!\n", c->mmc); ++ kfree(mmc); + continue; + } + hsmmc_data[c->mmc - 1] = mmc; diff --git a/queue-2.6.29/cfg80211-fix-incorrect-assumption-on-last_request-for-11d.patch b/queue-2.6.29/cfg80211-fix-incorrect-assumption-on-last_request-for-11d.patch new file mode 100644 index 00000000000..049ffd56eb4 --- /dev/null +++ b/queue-2.6.29/cfg80211-fix-incorrect-assumption-on-last_request-for-11d.patch @@ -0,0 +1,57 @@ +From stable-bounces@linux.kernel.org Sat Mar 28 01:45:13 2009 +Date: Sat, 28 Mar 2009 01:45:08 GMT +Message-Id: <200903280145.n2S1j8ES031009@hera.kernel.org> +From: Luis R. Rodriguez +To: stable@kernel.org +Subject: cfg80211: fix incorrect assumption on last_request for 11d + +From: Luis R. Rodriguez + +upstream commit: cc0b6fe88e99096868bdbacbf486c97299533b5a + +The incorrect assumption is the last regulatory request +(last_request) is always a country IE when processing +country IEs. Although this is true 99% of the time the +first time this happens this could not be true. + +This fixes an oops in the branch check for the last_request +when accessing drv_last_ie. The access was done under the +assumption the struct won't be null. + +Note to stable: to port to 29 replace as follows, only 29 has +country IE code: + +s|NL80211_REGDOM_SET_BY_COUNTRY_IE|REGDOM_SET_BY_COUNTRY_IE + +Cc: stable@kernel.org +Reported-by: Quentin Armitage +Signed-off-by: Luis R. Rodriguez +Signed-off-by: John W. Linville +[chrisw: backport to 2.6.29] +Signed-off-by: Chris Wright +--- + net/wireless/reg.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/net/wireless/reg.c ++++ b/net/wireless/reg.c +@@ -1083,6 +1083,8 @@ EXPORT_SYMBOL(regulatory_hint); + static bool reg_same_country_ie_hint(struct wiphy *wiphy, + u32 country_ie_checksum) + { ++ if (unlikely(last_request->initiator != REGDOM_SET_BY_COUNTRY_IE)) ++ return false; + if (!last_request->wiphy) + return false; + if (likely(last_request->wiphy != wiphy)) +@@ -1133,7 +1135,9 @@ void regulatory_hint_11d(struct wiphy *w + /* We will run this for *every* beacon processed for the BSSID, so + * we optimize an early check to exit out early if we don't have to + * do anything */ +- if (likely(last_request->wiphy)) { ++ if (likely(last_request->initiator == ++ REGDOM_SET_BY_COUNTRY_IE && ++ likely(last_request->wiphy))) { + struct cfg80211_registered_device *drv_last_ie; + + drv_last_ie = wiphy_to_dev(last_request->wiphy); diff --git a/queue-2.6.29/fuse-fix-fuse_file_lseek-returning-with-lock-held.patch b/queue-2.6.29/fuse-fix-fuse_file_lseek-returning-with-lock-held.patch new file mode 100644 index 00000000000..1d35bdafcb0 --- /dev/null +++ b/queue-2.6.29/fuse-fix-fuse_file_lseek-returning-with-lock-held.patch @@ -0,0 +1,41 @@ +From stable-bounces@linux.kernel.org Mon Mar 30 18:50:19 2009 +Date: Mon, 30 Mar 2009 18:50:13 GMT +Message-Id: <200903301850.n2UIoDge028032@hera.kernel.org> +From: Dan Carpenter +To: stable@kernel.org +Subject: fuse: fix fuse_file_lseek returning with lock held + +From: Dan Carpenter + +upstream commit: 5291658d87ac1ae60418e79e7b6bad7d5f595e0c + +This bug was found with smatch (http://repo.or.cz/w/smatch.git/). If +we return directly the inode->i_mutex lock doesn't get released. + +Signed-off-by: Dan Carpenter +Signed-off-by: Miklos Szeredi +CC: stable@kernel.org +Signed-off-by: Chris Wright +--- + fs/fuse/file.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/fs/fuse/file.c ++++ b/fs/fuse/file.c +@@ -1465,7 +1465,7 @@ static loff_t fuse_file_llseek(struct fi + case SEEK_END: + retval = fuse_update_attributes(inode, NULL, file, NULL); + if (retval) +- return retval; ++ goto exit; + offset += i_size_read(inode); + break; + case SEEK_CUR: +@@ -1479,6 +1479,7 @@ static loff_t fuse_file_llseek(struct fi + } + retval = offset; + } ++exit: + mutex_unlock(&inode->i_mutex); + return retval; + } diff --git a/queue-2.6.29/lguest-fix-spurious-bug_on-on-invalid-guest-stack.patch b/queue-2.6.29/lguest-fix-spurious-bug_on-on-invalid-guest-stack.patch new file mode 100644 index 00000000000..a833d8ebb03 --- /dev/null +++ b/queue-2.6.29/lguest-fix-spurious-bug_on-on-invalid-guest-stack.patch @@ -0,0 +1,37 @@ +From stable-bounces@linux.kernel.org Tue Mar 31 01:55:10 2009 +Date: Tue, 31 Mar 2009 01:55:04 GMT +Message-Id: <200903310155.n2V1t4q3030493@hera.kernel.org> +From: Rusty Russell +To: stable@kernel.org +Subject: lguest: fix spurious BUG_ON() on invalid guest stack. + +From: Rusty Russell + +upstream commit: 6afbdd059c27330eccbd85943354f94c2b83a7fe + +Impact: fix crash on misbehaving guest + +gpte_addr() contains a BUG_ON(), insisting that the present flag is +set. We need to return before we call it if that isn't the case. + +Signed-off-by: Rusty Russell +Cc: stable@kernel.org +Signed-off-by: Chris Wright +--- + drivers/lguest/page_tables.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/drivers/lguest/page_tables.c ++++ b/drivers/lguest/page_tables.c +@@ -373,8 +373,10 @@ unsigned long guest_pa(struct lg_cpu *cp + /* First step: get the top-level Guest page table entry. */ + gpgd = lgread(cpu, gpgd_addr(cpu, vaddr), pgd_t); + /* Toplevel not present? We can't map it in. */ +- if (!(pgd_flags(gpgd) & _PAGE_PRESENT)) ++ if (!(pgd_flags(gpgd) & _PAGE_PRESENT)) { + kill_guest(cpu, "Bad address %#lx", vaddr); ++ return -1UL; ++ } + + gpte = lgread(cpu, gpte_addr(gpgd, vaddr), pte_t); + if (!(pte_flags(gpte) & _PAGE_PRESENT)) diff --git a/queue-2.6.29/lguest-wire-up-pte_update-pte_update_defer.patch b/queue-2.6.29/lguest-wire-up-pte_update-pte_update_defer.patch new file mode 100644 index 00000000000..0af405a89c0 --- /dev/null +++ b/queue-2.6.29/lguest-wire-up-pte_update-pte_update_defer.patch @@ -0,0 +1,61 @@ +From stable-bounces@linux.kernel.org Tue Mar 31 01:55:07 2009 +Date: Tue, 31 Mar 2009 01:55:02 GMT +Message-Id: <200903310155.n2V1t219030451@hera.kernel.org> +From: Rusty Russell +To: stable@kernel.org +Subject: lguest: wire up pte_update/pte_update_defer + +From: Rusty Russell + +upstream commit: b7ff99ea53cd16de8f6166c0e98f19a7c6ca67ee + +Impact: intermittent guest segv/crash fix + +I've been seeing random guest bad address crashes and segmentation faults: +bisect led to 4f98a2fee8 (vmscan: split LRU lists into anon & file sets), +but that's a red herring. + +It turns out that lguest never hooked up the pte_update/pte_update_defer +calls, so our ptes were not always in sync. After the vmscan commit, the +bug became reproducible; now a fsck in a 64MB guest causes reproducible +pagetable corruption. + +Signed-off-by: Rusty Russell +Cc: jeremy@xensource.com +Cc: virtualization@lists.osdl.org +Cc: stable@kernel.org +Signed-off-by: Chris Wright +--- + arch/x86/lguest/boot.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +--- a/arch/x86/lguest/boot.c ++++ b/arch/x86/lguest/boot.c +@@ -485,11 +485,17 @@ static void lguest_write_cr4(unsigned lo + * into a process' address space. We set the entry then tell the Host the + * toplevel and address this corresponds to. The Guest uses one pagetable per + * process, so we need to tell the Host which one we're changing (mm->pgd). */ ++static void lguest_pte_update(struct mm_struct *mm, unsigned long addr, ++ pte_t *ptep) ++{ ++ lazy_hcall(LHCALL_SET_PTE, __pa(mm->pgd), addr, ptep->pte_low); ++} ++ + static void lguest_set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pteval) + { + *ptep = pteval; +- lazy_hcall(LHCALL_SET_PTE, __pa(mm->pgd), addr, pteval.pte_low); ++ lguest_pte_update(mm, addr, ptep); + } + + /* The Guest calls this to set a top-level entry. Again, we set the entry then +@@ -1034,6 +1040,8 @@ __init void lguest_init(void) + pv_mmu_ops.read_cr3 = lguest_read_cr3; + pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu; + pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mode; ++ pv_mmu_ops.pte_update = lguest_pte_update; ++ pv_mmu_ops.pte_update_defer = lguest_pte_update; + + #ifdef CONFIG_X86_LOCAL_APIC + /* apic read/write intercepts */ diff --git a/queue-2.6.29/scsi-sg-avoid-blk_put_request-blk_rq_unmap_user-in-interrupt.patch b/queue-2.6.29/scsi-sg-avoid-blk_put_request-blk_rq_unmap_user-in-interrupt.patch new file mode 100644 index 00000000000..2858df1e4ff --- /dev/null +++ b/queue-2.6.29/scsi-sg-avoid-blk_put_request-blk_rq_unmap_user-in-interrupt.patch @@ -0,0 +1,79 @@ +From c96952ed7031e7c576ecf90cf95b8ec099d5295a Mon Sep 17 00:00:00 2001 +Message-Id: <20090330203128E.fujita.tomonori@lab.ntt.co.jp> +From: FUJITA Tomonori +Date: Wed, 4 Feb 2009 11:36:27 +0900 +Subject: SCSI: sg: avoid blk_put_request/blk_rq_unmap_user in interrupt + +upstream commit: c96952ed7031e7c576ecf90cf95b8ec099d5295a + +This fixes the following oops: + +http://marc.info/?l=linux-kernel&m=123316111415677&w=2 + +You can reproduce this bug by interrupting a program before a sg +response completes. This leads to the special sg state (the orphan +state), then sg calls blk_put_request in interrupt (rq->end_io). + +The above bug report shows the recursive lock problem because sg calls +blk_put_request in interrupt. We could call __blk_put_request here +instead however we also need to handle blk_rq_unmap_user here, which +can't be called in interrupt too. + +In the orphan state, we don't need to care about the data transfer +(the program revoked the command) so adding 'just free the resource' +mode to blk_rq_unmap_user is a possible option. + +I prefer to avoid complicating the blk mapping API when possible. I +change the orphan state to call sg_finish_rem_req via +execute_in_process_context. We hold sg_fd->kref so sg_fd doesn't go +away until keventd_wq finishes our work. copy_from_user/to_user fails +so blk_rq_unmap_user just frees the resource without the data +transfer. + +Signed-off-by: FUJITA Tomonori +Acked-by: Douglas Gilbert +Signed-off-by: James Bottomley +Signed-off-by: Chris Wright +--- + drivers/scsi/sg.c | 15 ++++++++++++--- + 1 file changed, 12 insertions(+), 3 deletions(-) + +--- a/drivers/scsi/sg.c ++++ b/drivers/scsi/sg.c +@@ -138,6 +138,7 @@ typedef struct sg_request { /* SG_MAX_QU + volatile char done; /* 0->before bh, 1->before read, 2->read */ + struct request *rq; + struct bio *bio; ++ struct execute_work ew; + } Sg_request; + + typedef struct sg_fd { /* holds the state of a file descriptor */ +@@ -1234,6 +1235,15 @@ sg_mmap(struct file *filp, struct vm_are + return 0; + } + ++static void sg_rq_end_io_usercontext(struct work_struct *work) ++{ ++ struct sg_request *srp = container_of(work, struct sg_request, ew.work); ++ struct sg_fd *sfp = srp->parentfp; ++ ++ sg_finish_rem_req(srp); ++ kref_put(&sfp->f_ref, sg_remove_sfp); ++} ++ + /* + * This function is a "bottom half" handler that is called by the mid + * level when a command is completed (or has failed). +@@ -1312,10 +1322,9 @@ static void sg_rq_end_io(struct request + */ + wake_up_interruptible(&sfp->read_wait); + kill_fasync(&sfp->async_qp, SIGPOLL, POLL_IN); ++ kref_put(&sfp->f_ref, sg_remove_sfp); + } else +- sg_finish_rem_req(srp); /* call with srp->done == 0 */ +- +- kref_put(&sfp->f_ref, sg_remove_sfp); ++ execute_in_process_context(sg_rq_end_io_usercontext, &srp->ew); + } + + static struct file_operations sg_fops = { diff --git a/queue-2.6.29/scsi-sg-fix-races-during-device-removal.patch b/queue-2.6.29/scsi-sg-fix-races-during-device-removal.patch new file mode 100644 index 00000000000..d8ff68d83eb --- /dev/null +++ b/queue-2.6.29/scsi-sg-fix-races-during-device-removal.patch @@ -0,0 +1,739 @@ +From c6517b7942fad663cc1cf3235cbe4207cf769332 Mon Sep 17 00:00:00 2001 +Message-Id: <20090330203123C.fujita.tomonori@lab.ntt.co.jp> +From: Tony Battersby +Cc: FUJITA Tomonori +Date: Wed, 21 Jan 2009 14:45:50 -0500 +Subject: SCSI: sg: fix races during device removal + +upstream commit: c6517b7942fad663cc1cf3235cbe4207cf769332 + +sg has the following problems related to device removal: + +* opening a sg fd races with removing a device +* closing a sg fd races with removing a device +* /proc/scsi/sg/* access races with removing a device +* command completion races with removing a device +* command completion races with closing a sg fd +* can rmmod sg with active commands + +These problems can cause kernel oopses, memory-use-after-free, or +double-free errors. This patch fixes these problems by using krefs +to manage the lifetime of sg_device and sg_fd. + +Each command submitted to the midlevel holds a reference to sg_fd +until the completion callback. This ensures that sg_fd doesn't go +away if the fd is closed with commands still outstanding. + +sg_fd gets the reference of sg_device (with scsi_device) and also +makes sure that the sg module doesn't go away. + +/proc/scsi/sg/* functions don't play nicely with krefs because they +give information about sg_fds which have been closed but not yet +freed due to still having outstanding commands and sg_devices which +have been removed but not yet freed due to still being referenced +by one or more sg_fds. To deal with this safely without removing +functionality, /proc functions now access sg_device and sg_fd while +holding a lock instead of using kref_get()/kref_put(). + +Signed-off-by: Tony Battersby +Acked-by: Douglas Gilbert +Signed-off-by: James Bottomley +Signed-off-by: Chris Wright +--- + drivers/scsi/sg.c | 418 +++++++++++++++++++++++++----------------------------- + 1 file changed, 201 insertions(+), 217 deletions(-) + +--- a/drivers/scsi/sg.c ++++ b/drivers/scsi/sg.c +@@ -101,6 +101,7 @@ static int scatter_elem_sz_prev = SG_SCA + #define SG_SECTOR_MSK (SG_SECTOR_SZ - 1) + + static int sg_add(struct device *, struct class_interface *); ++static void sg_device_destroy(struct kref *kref); + static void sg_remove(struct device *, struct class_interface *); + + static DEFINE_IDR(sg_index_idr); +@@ -158,6 +159,8 @@ typedef struct sg_fd { /* holds the sta + char next_cmd_len; /* 0 -> automatic (def), >0 -> use on next write() */ + char keep_orphan; /* 0 -> drop orphan (def), 1 -> keep for read() */ + char mmap_called; /* 0 -> mmap() never called on this fd */ ++ struct kref f_ref; ++ struct execute_work ew; + } Sg_fd; + + typedef struct sg_device { /* holds the state of each scsi generic device */ +@@ -171,6 +174,7 @@ typedef struct sg_device { /* holds the + char sgdebug; /* 0->off, 1->sense, 9->dump dev, 10-> all devs */ + struct gendisk *disk; + struct cdev * cdev; /* char_dev [sysfs: /sys/cdev/major/sg] */ ++ struct kref d_ref; + } Sg_device; + + static int sg_fasync(int fd, struct file *filp, int mode); +@@ -194,13 +198,14 @@ static void sg_build_reserve(Sg_fd * sfp + static void sg_link_reserve(Sg_fd * sfp, Sg_request * srp, int size); + static void sg_unlink_reserve(Sg_fd * sfp, Sg_request * srp); + static Sg_fd *sg_add_sfp(Sg_device * sdp, int dev); +-static int sg_remove_sfp(Sg_device * sdp, Sg_fd * sfp); +-static void __sg_remove_sfp(Sg_device * sdp, Sg_fd * sfp); ++static void sg_remove_sfp(struct kref *); + static Sg_request *sg_get_rq_mark(Sg_fd * sfp, int pack_id); + static Sg_request *sg_add_request(Sg_fd * sfp); + static int sg_remove_request(Sg_fd * sfp, Sg_request * srp); + static int sg_res_in_use(Sg_fd * sfp); ++static Sg_device *sg_lookup_dev(int dev); + static Sg_device *sg_get_dev(int dev); ++static void sg_put_dev(Sg_device *sdp); + #ifdef CONFIG_SCSI_PROC_FS + static int sg_last_dev(void); + #endif +@@ -237,22 +242,17 @@ sg_open(struct inode *inode, struct file + nonseekable_open(inode, filp); + SCSI_LOG_TIMEOUT(3, printk("sg_open: dev=%d, flags=0x%x\n", dev, flags)); + sdp = sg_get_dev(dev); +- if ((!sdp) || (!sdp->device)) { +- unlock_kernel(); +- return -ENXIO; +- } +- if (sdp->detached) { +- unlock_kernel(); +- return -ENODEV; ++ if (IS_ERR(sdp)) { ++ retval = PTR_ERR(sdp); ++ sdp = NULL; ++ goto sg_put; + } + + /* This driver's module count bumped by fops_get in */ + /* Prevent the device driver from vanishing while we sleep */ + retval = scsi_device_get(sdp->device); +- if (retval) { +- unlock_kernel(); +- return retval; +- } ++ if (retval) ++ goto sg_put; + + if (!((flags & O_NONBLOCK) || + scsi_block_when_processing_errors(sdp->device))) { +@@ -303,16 +303,20 @@ sg_open(struct inode *inode, struct file + if ((sfp = sg_add_sfp(sdp, dev))) + filp->private_data = sfp; + else { +- if (flags & O_EXCL) ++ if (flags & O_EXCL) { + sdp->exclude = 0; /* undo if error */ ++ wake_up_interruptible(&sdp->o_excl_wait); ++ } + retval = -ENOMEM; + goto error_out; + } +- unlock_kernel(); +- return 0; +- +- error_out: +- scsi_device_put(sdp->device); ++ retval = 0; ++error_out: ++ if (retval) ++ scsi_device_put(sdp->device); ++sg_put: ++ if (sdp) ++ sg_put_dev(sdp); + unlock_kernel(); + return retval; + } +@@ -327,13 +331,13 @@ sg_release(struct inode *inode, struct f + if ((!(sfp = (Sg_fd *) filp->private_data)) || (!(sdp = sfp->parentdp))) + return -ENXIO; + SCSI_LOG_TIMEOUT(3, printk("sg_release: %s\n", sdp->disk->disk_name)); +- if (0 == sg_remove_sfp(sdp, sfp)) { /* Returns 1 when sdp gone */ +- if (!sdp->detached) { +- scsi_device_put(sdp->device); +- } +- sdp->exclude = 0; +- wake_up_interruptible(&sdp->o_excl_wait); +- } ++ ++ sfp->closed = 1; ++ ++ sdp->exclude = 0; ++ wake_up_interruptible(&sdp->o_excl_wait); ++ ++ kref_put(&sfp->f_ref, sg_remove_sfp); + return 0; + } + +@@ -755,6 +759,7 @@ sg_common_write(Sg_fd * sfp, Sg_request + hp->duration = jiffies_to_msecs(jiffies); + + srp->rq->timeout = timeout; ++ kref_get(&sfp->f_ref); /* sg_rq_end_io() does kref_put(). */ + blk_execute_rq_nowait(sdp->device->request_queue, sdp->disk, + srp->rq, 1, sg_rq_end_io); + return 0; +@@ -1247,24 +1252,23 @@ sg_mmap(struct file *filp, struct vm_are + static void sg_rq_end_io(struct request *rq, int uptodate) + { + struct sg_request *srp = rq->end_io_data; +- Sg_device *sdp = NULL; ++ Sg_device *sdp; + Sg_fd *sfp; + unsigned long iflags; + unsigned int ms; + char *sense; +- int result, resid; ++ int result, resid, done = 1; + +- if (NULL == srp) { +- printk(KERN_ERR "sg_cmd_done: NULL request\n"); ++ if (WARN_ON(srp->done != 0)) + return; +- } ++ + sfp = srp->parentfp; +- if (sfp) +- sdp = sfp->parentdp; +- if ((NULL == sdp) || sdp->detached) { +- printk(KERN_INFO "sg_cmd_done: device detached\n"); ++ if (WARN_ON(sfp == NULL)) + return; +- } ++ ++ sdp = sfp->parentdp; ++ if (unlikely(sdp->detached)) ++ printk(KERN_INFO "sg_rq_end_io: device detached\n"); + + sense = rq->sense; + result = rq->errors; +@@ -1303,33 +1307,26 @@ static void sg_rq_end_io(struct request + } + /* Rely on write phase to clean out srp status values, so no "else" */ + +- if (sfp->closed) { /* whoops this fd already released, cleanup */ +- SCSI_LOG_TIMEOUT(1, printk("sg_cmd_done: already closed, freeing ...\n")); +- sg_finish_rem_req(srp); +- srp = NULL; +- if (NULL == sfp->headrp) { +- SCSI_LOG_TIMEOUT(1, printk("sg_cmd_done: already closed, final cleanup\n")); +- if (0 == sg_remove_sfp(sdp, sfp)) { /* device still present */ +- scsi_device_put(sdp->device); +- } +- sfp = NULL; +- } +- } else if (srp && srp->orphan) { ++ write_lock_irqsave(&sfp->rq_list_lock, iflags); ++ if (unlikely(srp->orphan)) { + if (sfp->keep_orphan) + srp->sg_io_owned = 0; +- else { +- sg_finish_rem_req(srp); +- srp = NULL; +- } ++ else ++ done = 0; + } +- if (sfp && srp) { +- /* Now wake up any sg_read() that is waiting for this packet. */ +- kill_fasync(&sfp->async_qp, SIGPOLL, POLL_IN); +- write_lock_irqsave(&sfp->rq_list_lock, iflags); +- srp->done = 1; ++ srp->done = done; ++ write_unlock_irqrestore(&sfp->rq_list_lock, iflags); ++ ++ if (likely(done)) { ++ /* Now wake up any sg_read() that is waiting for this ++ * packet. ++ */ + wake_up_interruptible(&sfp->read_wait); +- write_unlock_irqrestore(&sfp->rq_list_lock, iflags); +- } ++ kill_fasync(&sfp->async_qp, SIGPOLL, POLL_IN); ++ } else ++ sg_finish_rem_req(srp); /* call with srp->done == 0 */ ++ ++ kref_put(&sfp->f_ref, sg_remove_sfp); + } + + static struct file_operations sg_fops = { +@@ -1364,17 +1361,18 @@ static Sg_device *sg_alloc(struct gendis + printk(KERN_WARNING "kmalloc Sg_device failure\n"); + return ERR_PTR(-ENOMEM); + } +- error = -ENOMEM; ++ + if (!idr_pre_get(&sg_index_idr, GFP_KERNEL)) { + printk(KERN_WARNING "idr expansion Sg_device failure\n"); ++ error = -ENOMEM; + goto out; + } + + write_lock_irqsave(&sg_index_lock, iflags); +- error = idr_get_new(&sg_index_idr, sdp, &k); +- write_unlock_irqrestore(&sg_index_lock, iflags); + ++ error = idr_get_new(&sg_index_idr, sdp, &k); + if (error) { ++ write_unlock_irqrestore(&sg_index_lock, iflags); + printk(KERN_WARNING "idr allocation Sg_device failure: %d\n", + error); + goto out; +@@ -1391,6 +1389,9 @@ static Sg_device *sg_alloc(struct gendis + init_waitqueue_head(&sdp->o_excl_wait); + sdp->sg_tablesize = min(q->max_hw_segments, q->max_phys_segments); + sdp->index = k; ++ kref_init(&sdp->d_ref); ++ ++ write_unlock_irqrestore(&sg_index_lock, iflags); + + error = 0; + out: +@@ -1401,6 +1402,8 @@ static Sg_device *sg_alloc(struct gendis + return sdp; + + overflow: ++ idr_remove(&sg_index_idr, k); ++ write_unlock_irqrestore(&sg_index_lock, iflags); + sdev_printk(KERN_WARNING, scsidp, + "Unable to attach sg device type=%d, minor " + "number exceeds %d\n", scsidp->type, SG_MAX_DEVS - 1); +@@ -1488,49 +1491,46 @@ out: + return error; + } + +-static void +-sg_remove(struct device *cl_dev, struct class_interface *cl_intf) ++static void sg_device_destroy(struct kref *kref) ++{ ++ struct sg_device *sdp = container_of(kref, struct sg_device, d_ref); ++ unsigned long flags; ++ ++ /* CAUTION! Note that the device can still be found via idr_find() ++ * even though the refcount is 0. Therefore, do idr_remove() BEFORE ++ * any other cleanup. ++ */ ++ ++ write_lock_irqsave(&sg_index_lock, flags); ++ idr_remove(&sg_index_idr, sdp->index); ++ write_unlock_irqrestore(&sg_index_lock, flags); ++ ++ SCSI_LOG_TIMEOUT(3, ++ printk("sg_device_destroy: %s\n", ++ sdp->disk->disk_name)); ++ ++ put_disk(sdp->disk); ++ kfree(sdp); ++} ++ ++static void sg_remove(struct device *cl_dev, struct class_interface *cl_intf) + { + struct scsi_device *scsidp = to_scsi_device(cl_dev->parent); + Sg_device *sdp = dev_get_drvdata(cl_dev); + unsigned long iflags; + Sg_fd *sfp; +- Sg_fd *tsfp; +- Sg_request *srp; +- Sg_request *tsrp; +- int delay; + +- if (!sdp) ++ if (!sdp || sdp->detached) + return; + +- delay = 0; ++ SCSI_LOG_TIMEOUT(3, printk("sg_remove: %s\n", sdp->disk->disk_name)); ++ ++ /* Need a write lock to set sdp->detached. */ + write_lock_irqsave(&sg_index_lock, iflags); +- if (sdp->headfp) { +- sdp->detached = 1; +- for (sfp = sdp->headfp; sfp; sfp = tsfp) { +- tsfp = sfp->nextfp; +- for (srp = sfp->headrp; srp; srp = tsrp) { +- tsrp = srp->nextrp; +- if (sfp->closed || (0 == sg_srp_done(srp, sfp))) +- sg_finish_rem_req(srp); +- } +- if (sfp->closed) { +- scsi_device_put(sdp->device); +- __sg_remove_sfp(sdp, sfp); +- } else { +- delay = 1; +- wake_up_interruptible(&sfp->read_wait); +- kill_fasync(&sfp->async_qp, SIGPOLL, +- POLL_HUP); +- } +- } +- SCSI_LOG_TIMEOUT(3, printk("sg_remove: dev=%d, dirty\n", sdp->index)); +- if (NULL == sdp->headfp) { +- idr_remove(&sg_index_idr, sdp->index); +- } +- } else { /* nothing active, simple case */ +- SCSI_LOG_TIMEOUT(3, printk("sg_remove: dev=%d\n", sdp->index)); +- idr_remove(&sg_index_idr, sdp->index); ++ sdp->detached = 1; ++ for (sfp = sdp->headfp; sfp; sfp = sfp->nextfp) { ++ wake_up_interruptible(&sfp->read_wait); ++ kill_fasync(&sfp->async_qp, SIGPOLL, POLL_HUP); + } + write_unlock_irqrestore(&sg_index_lock, iflags); + +@@ -1538,13 +1538,8 @@ sg_remove(struct device *cl_dev, struct + device_destroy(sg_sysfs_class, MKDEV(SCSI_GENERIC_MAJOR, sdp->index)); + cdev_del(sdp->cdev); + sdp->cdev = NULL; +- put_disk(sdp->disk); +- sdp->disk = NULL; +- if (NULL == sdp->headfp) +- kfree(sdp); + +- if (delay) +- msleep(10); /* dirty detach so delay device destruction */ ++ sg_put_dev(sdp); + } + + module_param_named(scatter_elem_sz, scatter_elem_sz, int, S_IRUGO | S_IWUSR); +@@ -1941,22 +1936,6 @@ sg_get_rq_mark(Sg_fd * sfp, int pack_id) + return resp; + } + +-#ifdef CONFIG_SCSI_PROC_FS +-static Sg_request * +-sg_get_nth_request(Sg_fd * sfp, int nth) +-{ +- Sg_request *resp; +- unsigned long iflags; +- int k; +- +- read_lock_irqsave(&sfp->rq_list_lock, iflags); +- for (k = 0, resp = sfp->headrp; resp && (k < nth); +- ++k, resp = resp->nextrp) ; +- read_unlock_irqrestore(&sfp->rq_list_lock, iflags); +- return resp; +-} +-#endif +- + /* always adds to end of list */ + static Sg_request * + sg_add_request(Sg_fd * sfp) +@@ -2032,22 +2011,6 @@ sg_remove_request(Sg_fd * sfp, Sg_reques + return res; + } + +-#ifdef CONFIG_SCSI_PROC_FS +-static Sg_fd * +-sg_get_nth_sfp(Sg_device * sdp, int nth) +-{ +- Sg_fd *resp; +- unsigned long iflags; +- int k; +- +- read_lock_irqsave(&sg_index_lock, iflags); +- for (k = 0, resp = sdp->headfp; resp && (k < nth); +- ++k, resp = resp->nextfp) ; +- read_unlock_irqrestore(&sg_index_lock, iflags); +- return resp; +-} +-#endif +- + static Sg_fd * + sg_add_sfp(Sg_device * sdp, int dev) + { +@@ -2062,6 +2025,7 @@ sg_add_sfp(Sg_device * sdp, int dev) + init_waitqueue_head(&sfp->read_wait); + rwlock_init(&sfp->rq_list_lock); + ++ kref_init(&sfp->f_ref); + sfp->timeout = SG_DEFAULT_TIMEOUT; + sfp->timeout_user = SG_DEFAULT_TIMEOUT_USER; + sfp->force_packid = SG_DEF_FORCE_PACK_ID; +@@ -2089,15 +2053,54 @@ sg_add_sfp(Sg_device * sdp, int dev) + sg_build_reserve(sfp, bufflen); + SCSI_LOG_TIMEOUT(3, printk("sg_add_sfp: bufflen=%d, k_use_sg=%d\n", + sfp->reserve.bufflen, sfp->reserve.k_use_sg)); ++ ++ kref_get(&sdp->d_ref); ++ __module_get(THIS_MODULE); + return sfp; + } + +-static void +-__sg_remove_sfp(Sg_device * sdp, Sg_fd * sfp) ++static void sg_remove_sfp_usercontext(struct work_struct *work) ++{ ++ struct sg_fd *sfp = container_of(work, struct sg_fd, ew.work); ++ struct sg_device *sdp = sfp->parentdp; ++ ++ /* Cleanup any responses which were never read(). */ ++ while (sfp->headrp) ++ sg_finish_rem_req(sfp->headrp); ++ ++ if (sfp->reserve.bufflen > 0) { ++ SCSI_LOG_TIMEOUT(6, ++ printk("sg_remove_sfp: bufflen=%d, k_use_sg=%d\n", ++ (int) sfp->reserve.bufflen, ++ (int) sfp->reserve.k_use_sg)); ++ sg_remove_scat(&sfp->reserve); ++ } ++ ++ SCSI_LOG_TIMEOUT(6, ++ printk("sg_remove_sfp: %s, sfp=0x%p\n", ++ sdp->disk->disk_name, ++ sfp)); ++ kfree(sfp); ++ ++ scsi_device_put(sdp->device); ++ sg_put_dev(sdp); ++ module_put(THIS_MODULE); ++} ++ ++static void sg_remove_sfp(struct kref *kref) + { ++ struct sg_fd *sfp = container_of(kref, struct sg_fd, f_ref); ++ struct sg_device *sdp = sfp->parentdp; + Sg_fd *fp; + Sg_fd *prev_fp; ++ unsigned long iflags; ++ ++ /* CAUTION! Note that sfp can still be found by walking sdp->headfp ++ * even though the refcount is now 0. Therefore, unlink sfp from ++ * sdp->headfp BEFORE doing any other cleanup. ++ */ + ++ write_lock_irqsave(&sg_index_lock, iflags); + prev_fp = sdp->headfp; + if (sfp == prev_fp) + sdp->headfp = prev_fp->nextfp; +@@ -2110,54 +2113,10 @@ __sg_remove_sfp(Sg_device * sdp, Sg_fd * + prev_fp = fp; + } + } +- if (sfp->reserve.bufflen > 0) { +- SCSI_LOG_TIMEOUT(6, +- printk("__sg_remove_sfp: bufflen=%d, k_use_sg=%d\n", +- (int) sfp->reserve.bufflen, (int) sfp->reserve.k_use_sg)); +- sg_remove_scat(&sfp->reserve); +- } +- sfp->parentdp = NULL; +- SCSI_LOG_TIMEOUT(6, printk("__sg_remove_sfp: sfp=0x%p\n", sfp)); +- kfree(sfp); +-} +- +-/* Returns 0 in normal case, 1 when detached and sdp object removed */ +-static int +-sg_remove_sfp(Sg_device * sdp, Sg_fd * sfp) +-{ +- Sg_request *srp; +- Sg_request *tsrp; +- int dirty = 0; +- int res = 0; +- +- for (srp = sfp->headrp; srp; srp = tsrp) { +- tsrp = srp->nextrp; +- if (sg_srp_done(srp, sfp)) +- sg_finish_rem_req(srp); +- else +- ++dirty; +- } +- if (0 == dirty) { +- unsigned long iflags; ++ write_unlock_irqrestore(&sg_index_lock, iflags); ++ wake_up_interruptible(&sdp->o_excl_wait); + +- write_lock_irqsave(&sg_index_lock, iflags); +- __sg_remove_sfp(sdp, sfp); +- if (sdp->detached && (NULL == sdp->headfp)) { +- idr_remove(&sg_index_idr, sdp->index); +- kfree(sdp); +- res = 1; +- } +- write_unlock_irqrestore(&sg_index_lock, iflags); +- } else { +- /* MOD_INC's to inhibit unloading sg and associated adapter driver */ +- /* only bump the access_count if we actually succeeded in +- * throwing another counter on the host module */ +- scsi_device_get(sdp->device); /* XXX: retval ignored? */ +- sfp->closed = 1; /* flag dirty state on this fd */ +- SCSI_LOG_TIMEOUT(1, printk("sg_remove_sfp: worrisome, %d writes pending\n", +- dirty)); +- } +- return res; ++ execute_in_process_context(sg_remove_sfp_usercontext, &sfp->ew); + } + + static int +@@ -2199,19 +2158,38 @@ sg_last_dev(void) + } + #endif + +-static Sg_device * +-sg_get_dev(int dev) ++/* must be called with sg_index_lock held */ ++static Sg_device *sg_lookup_dev(int dev) + { +- Sg_device *sdp; +- unsigned long iflags; ++ return idr_find(&sg_index_idr, dev); ++} + +- read_lock_irqsave(&sg_index_lock, iflags); +- sdp = idr_find(&sg_index_idr, dev); +- read_unlock_irqrestore(&sg_index_lock, iflags); ++static Sg_device *sg_get_dev(int dev) ++{ ++ struct sg_device *sdp; ++ unsigned long flags; ++ ++ read_lock_irqsave(&sg_index_lock, flags); ++ sdp = sg_lookup_dev(dev); ++ if (!sdp) ++ sdp = ERR_PTR(-ENXIO); ++ else if (sdp->detached) { ++ /* If sdp->detached, then the refcount may already be 0, in ++ * which case it would be a bug to do kref_get(). ++ */ ++ sdp = ERR_PTR(-ENODEV); ++ } else ++ kref_get(&sdp->d_ref); ++ read_unlock_irqrestore(&sg_index_lock, flags); + + return sdp; + } + ++static void sg_put_dev(struct sg_device *sdp) ++{ ++ kref_put(&sdp->d_ref, sg_device_destroy); ++} ++ + #ifdef CONFIG_SCSI_PROC_FS + + static struct proc_dir_entry *sg_proc_sgp = NULL; +@@ -2468,8 +2446,10 @@ static int sg_proc_seq_show_dev(struct s + struct sg_proc_deviter * it = (struct sg_proc_deviter *) v; + Sg_device *sdp; + struct scsi_device *scsidp; ++ unsigned long iflags; + +- sdp = it ? sg_get_dev(it->index) : NULL; ++ read_lock_irqsave(&sg_index_lock, iflags); ++ sdp = it ? sg_lookup_dev(it->index) : NULL; + if (sdp && (scsidp = sdp->device) && (!sdp->detached)) + seq_printf(s, "%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\n", + scsidp->host->host_no, scsidp->channel, +@@ -2480,6 +2460,7 @@ static int sg_proc_seq_show_dev(struct s + (int) scsi_device_online(scsidp)); + else + seq_printf(s, "-1\t-1\t-1\t-1\t-1\t-1\t-1\t-1\t-1\n"); ++ read_unlock_irqrestore(&sg_index_lock, iflags); + return 0; + } + +@@ -2493,16 +2474,20 @@ static int sg_proc_seq_show_devstrs(stru + struct sg_proc_deviter * it = (struct sg_proc_deviter *) v; + Sg_device *sdp; + struct scsi_device *scsidp; ++ unsigned long iflags; + +- sdp = it ? sg_get_dev(it->index) : NULL; ++ read_lock_irqsave(&sg_index_lock, iflags); ++ sdp = it ? sg_lookup_dev(it->index) : NULL; + if (sdp && (scsidp = sdp->device) && (!sdp->detached)) + seq_printf(s, "%8.8s\t%16.16s\t%4.4s\n", + scsidp->vendor, scsidp->model, scsidp->rev); + else + seq_printf(s, "\n"); ++ read_unlock_irqrestore(&sg_index_lock, iflags); + return 0; + } + ++/* must be called while holding sg_index_lock */ + static void sg_proc_debug_helper(struct seq_file *s, Sg_device * sdp) + { + int k, m, new_interface, blen, usg; +@@ -2512,7 +2497,8 @@ static void sg_proc_debug_helper(struct + const char * cp; + unsigned int ms; + +- for (k = 0; (fp = sg_get_nth_sfp(sdp, k)); ++k) { ++ for (k = 0, fp = sdp->headfp; fp != NULL; ++k, fp = fp->nextfp) { ++ read_lock(&fp->rq_list_lock); /* irqs already disabled */ + seq_printf(s, " FD(%d): timeout=%dms bufflen=%d " + "(res)sgat=%d low_dma=%d\n", k + 1, + jiffies_to_msecs(fp->timeout), +@@ -2522,7 +2508,9 @@ static void sg_proc_debug_helper(struct + seq_printf(s, " cmd_q=%d f_packid=%d k_orphan=%d closed=%d\n", + (int) fp->cmd_q, (int) fp->force_packid, + (int) fp->keep_orphan, (int) fp->closed); +- for (m = 0; (srp = sg_get_nth_request(fp, m)); ++m) { ++ for (m = 0, srp = fp->headrp; ++ srp != NULL; ++ ++m, srp = srp->nextrp) { + hp = &srp->header; + new_interface = (hp->interface_id == '\0') ? 0 : 1; + if (srp->res_used) { +@@ -2559,6 +2547,7 @@ static void sg_proc_debug_helper(struct + } + if (0 == m) + seq_printf(s, " No requests active\n"); ++ read_unlock(&fp->rq_list_lock); + } + } + +@@ -2571,39 +2560,34 @@ static int sg_proc_seq_show_debug(struct + { + struct sg_proc_deviter * it = (struct sg_proc_deviter *) v; + Sg_device *sdp; ++ unsigned long iflags; + + if (it && (0 == it->index)) { + seq_printf(s, "max_active_device=%d(origin 1)\n", + (int)it->max); + seq_printf(s, " def_reserved_size=%d\n", sg_big_buff); + } +- sdp = it ? sg_get_dev(it->index) : NULL; +- if (sdp) { +- struct scsi_device *scsidp = sdp->device; + +- if (NULL == scsidp) { +- seq_printf(s, "device %d detached ??\n", +- (int)it->index); +- return 0; +- } ++ read_lock_irqsave(&sg_index_lock, iflags); ++ sdp = it ? sg_lookup_dev(it->index) : NULL; ++ if (sdp && sdp->headfp) { ++ struct scsi_device *scsidp = sdp->device; + +- if (sg_get_nth_sfp(sdp, 0)) { +- seq_printf(s, " >>> device=%s ", +- sdp->disk->disk_name); +- if (sdp->detached) +- seq_printf(s, "detached pending close "); +- else +- seq_printf +- (s, "scsi%d chan=%d id=%d lun=%d em=%d", +- scsidp->host->host_no, +- scsidp->channel, scsidp->id, +- scsidp->lun, +- scsidp->host->hostt->emulated); +- seq_printf(s, " sg_tablesize=%d excl=%d\n", +- sdp->sg_tablesize, sdp->exclude); +- } ++ seq_printf(s, " >>> device=%s ", sdp->disk->disk_name); ++ if (sdp->detached) ++ seq_printf(s, "detached pending close "); ++ else ++ seq_printf ++ (s, "scsi%d chan=%d id=%d lun=%d em=%d", ++ scsidp->host->host_no, ++ scsidp->channel, scsidp->id, ++ scsidp->lun, ++ scsidp->host->hostt->emulated); ++ seq_printf(s, " sg_tablesize=%d excl=%d\n", ++ sdp->sg_tablesize, sdp->exclude); + sg_proc_debug_helper(s, sdp); + } ++ read_unlock_irqrestore(&sg_index_lock, iflags); + return 0; + } + diff --git a/queue-2.6.29/scsi-sg-fix-races-with-ioctl.patch b/queue-2.6.29/scsi-sg-fix-races-with-ioctl.patch new file mode 100644 index 00000000000..c72e48f0313 --- /dev/null +++ b/queue-2.6.29/scsi-sg-fix-races-with-ioctl.patch @@ -0,0 +1,125 @@ +From a2dd3b4cea335713b58996bb07b3abcde1175f47 Mon Sep 17 00:00:00 2001 +Message-Id: <20090330203125S.fujita.tomonori@lab.ntt.co.jp> +From: Tony Battersby +Cc: FUJITA Tomonori +Date: Tue, 20 Jan 2009 17:00:09 -0500 +Subject: SCSI: sg: fix races with ioctl(SG_IO) + +upstream commit: a2dd3b4cea335713b58996bb07b3abcde1175f47 + +sg_io_owned needs to be set before the command is sent to the midlevel; +otherwise, a quickly-completing command may cause a different CPU +to see "srp->done == 1 && !srp->sg_io_owned", which would lead to +incorrect behavior. + +Check srp->done and set srp->orphan while holding rq_list_lock to +prevent races with sg_rq_end_io(). + +There is no need to check sfp->closed from read/write/ioctl/poll/etc. +since the kernel guarantees that this won't happen. + +The usefulness of sg_srp_done() was questionable before; now it is +definitely not needed. + +Signed-off-by: Tony Battersby +Acked-by: Douglas Gilbert +Signed-off-by: James Bottomley +Signed-off-by: Chris Wright +--- + drivers/scsi/sg.c | 39 ++++++++++++++------------------------- + 1 file changed, 14 insertions(+), 25 deletions(-) + +--- a/drivers/scsi/sg.c ++++ b/drivers/scsi/sg.c +@@ -189,7 +189,7 @@ static ssize_t sg_new_read(Sg_fd * sfp, + Sg_request * srp); + static ssize_t sg_new_write(Sg_fd *sfp, struct file *file, + const char __user *buf, size_t count, int blocking, +- int read_only, Sg_request **o_srp); ++ int read_only, int sg_io_owned, Sg_request **o_srp); + static int sg_common_write(Sg_fd * sfp, Sg_request * srp, + unsigned char *cmnd, int timeout, int blocking); + static int sg_read_oxfer(Sg_request * srp, char __user *outp, int num_read_xfer); +@@ -561,7 +561,8 @@ sg_write(struct file *filp, const char _ + return -EFAULT; + blocking = !(filp->f_flags & O_NONBLOCK); + if (old_hdr.reply_len < 0) +- return sg_new_write(sfp, filp, buf, count, blocking, 0, NULL); ++ return sg_new_write(sfp, filp, buf, count, ++ blocking, 0, 0, NULL); + if (count < (SZ_SG_HEADER + 6)) + return -EIO; /* The minimum scsi command length is 6 bytes. */ + +@@ -642,7 +643,7 @@ sg_write(struct file *filp, const char _ + + static ssize_t + sg_new_write(Sg_fd *sfp, struct file *file, const char __user *buf, +- size_t count, int blocking, int read_only, ++ size_t count, int blocking, int read_only, int sg_io_owned, + Sg_request **o_srp) + { + int k; +@@ -662,6 +663,7 @@ sg_new_write(Sg_fd *sfp, struct file *fi + SCSI_LOG_TIMEOUT(1, printk("sg_new_write: queue full\n")); + return -EDOM; + } ++ srp->sg_io_owned = sg_io_owned; + hp = &srp->header; + if (__copy_from_user(hp, buf, SZ_SG_IO_HDR)) { + sg_remove_request(sfp, srp); +@@ -766,18 +768,6 @@ sg_common_write(Sg_fd * sfp, Sg_request + } + + static int +-sg_srp_done(Sg_request *srp, Sg_fd *sfp) +-{ +- unsigned long iflags; +- int done; +- +- read_lock_irqsave(&sfp->rq_list_lock, iflags); +- done = srp->done; +- read_unlock_irqrestore(&sfp->rq_list_lock, iflags); +- return done; +-} +- +-static int + sg_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd_in, unsigned long arg) + { +@@ -809,27 +799,26 @@ sg_ioctl(struct inode *inode, struct fil + return -EFAULT; + result = + sg_new_write(sfp, filp, p, SZ_SG_IO_HDR, +- blocking, read_only, &srp); ++ blocking, read_only, 1, &srp); + if (result < 0) + return result; +- srp->sg_io_owned = 1; + while (1) { + result = 0; /* following macro to beat race condition */ + __wait_event_interruptible(sfp->read_wait, +- (sdp->detached || sfp->closed || sg_srp_done(srp, sfp)), +- result); ++ (srp->done || sdp->detached), ++ result); + if (sdp->detached) + return -ENODEV; +- if (sfp->closed) +- return 0; /* request packet dropped already */ +- if (0 == result) ++ write_lock_irq(&sfp->rq_list_lock); ++ if (srp->done) { ++ srp->done = 2; ++ write_unlock_irq(&sfp->rq_list_lock); + break; ++ } + srp->orphan = 1; ++ write_unlock_irq(&sfp->rq_list_lock); + return result; /* -ERESTARTSYS because signal hit process */ + } +- write_lock_irqsave(&sfp->rq_list_lock, iflags); +- srp->done = 2; +- write_unlock_irqrestore(&sfp->rq_list_lock, iflags); + result = sg_new_read(sfp, p, SZ_SG_IO_HDR, srp); + return (result < 0) ? result : 0; + } diff --git a/queue-2.6.29/series b/queue-2.6.29/series index e6838aa8763..db81435cf3f 100644 --- a/queue-2.6.29/series +++ b/queue-2.6.29/series @@ -23,3 +23,23 @@ ath5k-warn-and-correct-rate-for-unknown-hw-rate-indexes.patch cifs-fix-memory-overwrite-when-saving-nativefilesystem-field-during-mount.patch cfg80211-force-last_request-to-be-set-for-old_reg-if-regdom-is-eu.patch dvb-firedtv-firedtv-s2-problems-with-tuning-solved.patch +scsi-sg-fix-races-during-device-removal.patch +scsi-sg-fix-races-with-ioctl.patch +scsi-sg-avoid-blk_put_request-blk_rq_unmap_user-in-interrupt.patch +arm-pxa-fix-overlay-being-un-necessarily-initialized-on-pxa25x.patch +arm-5428-1-module-relocation-update-for-r_arm_v4bx.patch +arm-cumana-fix-a-long-standing-bogon.patch +arm-fix-leak-in-iop13xx-pci.patch +arm-twl4030-leak-fix.patch +arm-5435-1-fix-compile-warning-in-sanity_check_meminfo.patch +fuse-fix-fuse_file_lseek-returning-with-lock-held.patch +add-a-missing-unlock_kernel-in-raw_open.patch +x86-pat-pci-change-vma-prot-in-pci_mmap-to-reflect-inherited-prot.patch +x86-uv-fix-cpumask-iterator-in-uv_bau_init.patch +x86-fix-64k-corruption-check.patch +x86-ptrace-bts-fix-an-unreachable-statement.patch +x86-mtrr-don-t-modify-rddram-wrdram-bits-of-fixed-mtrrs.patch +vm-x86-pat-change-is_linear_pfn_mapping-to-not-use-vm_pgoff.patch +lguest-wire-up-pte_update-pte_update_defer.patch +lguest-fix-spurious-bug_on-on-invalid-guest-stack.patch +cfg80211-fix-incorrect-assumption-on-last_request-for-11d.patch diff --git a/queue-2.6.29/vm-x86-pat-change-is_linear_pfn_mapping-to-not-use-vm_pgoff.patch b/queue-2.6.29/vm-x86-pat-change-is_linear_pfn_mapping-to-not-use-vm_pgoff.patch new file mode 100644 index 00000000000..a2a531e4a0e --- /dev/null +++ b/queue-2.6.29/vm-x86-pat-change-is_linear_pfn_mapping-to-not-use-vm_pgoff.patch @@ -0,0 +1,123 @@ +From stable-bounces@linux.kernel.org Mon Mar 30 18:50:42 2009 +Date: Mon, 30 Mar 2009 18:50:36 GMT +Message-Id: <200903301850.n2UIoaIU028484@hera.kernel.org> +From: Pallipadi, Venkatesh +To: stable@kernel.org +Subject: VM, x86, PAT: Change is_linear_pfn_mapping to not use vm_pgoff + +From: Pallipadi, Venkatesh + +upstream commit: 4bb9c5c02153dfc89a6c73a6f32091413805ad7d + +Impact: fix false positive PAT warnings - also fix VirtalBox hang + +Use of vma->vm_pgoff to identify the pfnmaps that are fully +mapped at mmap time is broken. vm_pgoff is set by generic mmap +code even for cases where drivers are setting up the mappings +at the fault time. + +The problem was originally reported here: + + http://marc.info/?l=linux-kernel&m=123383810628583&w=2 + +Change is_linear_pfn_mapping logic to overload VM_INSERTPAGE +flag along with VM_PFNMAP to mean full PFNMAP setup at mmap +time. + +Problem also tracked at: + + http://bugzilla.kernel.org/show_bug.cgi?id=12800 + +Reported-by: Thomas Hellstrom +Tested-by: Frans Pop +Signed-off-by: Venkatesh Pallipadi +Signed-off-by: Suresh Siddha @intel.com> +Cc: Nick Piggin +Cc: "ebiederm@xmission.com" +Cc: # only for 2.6.29.1, not .28 +LKML-Reference: <20090313004527.GA7176@linux-os.sc.intel.com> +Signed-off-by: Ingo Molnar +Signed-off-by: Chris Wright +--- + arch/x86/mm/pat.c | 5 +++-- + include/linux/mm.h | 15 +++++++++++++-- + mm/memory.c | 6 ++++-- + 3 files changed, 20 insertions(+), 6 deletions(-) + +--- a/arch/x86/mm/pat.c ++++ b/arch/x86/mm/pat.c +@@ -641,10 +641,11 @@ static int reserve_pfn_range(u64 paddr, + is_ram = pat_pagerange_is_ram(paddr, paddr + size); + + /* +- * reserve_pfn_range() doesn't support RAM pages. ++ * reserve_pfn_range() doesn't support RAM pages. Maintain the current ++ * behavior with RAM pages by returning success. + */ + if (is_ram != 0) +- return -EINVAL; ++ return 0; + + ret = reserve_memtype(paddr, paddr + size, want_flags, &flags); + if (ret) +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -98,7 +98,7 @@ extern unsigned int kobjsize(const void + #define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */ + #define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */ + #define VM_MAPPED_COPY 0x01000000 /* T if mapped copy of data (nommu mmap) */ +-#define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it */ ++#define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it. Refer note in VM_PFNMAP_AT_MMAP below */ + #define VM_ALWAYSDUMP 0x04000000 /* Always include in core dumps */ + + #define VM_CAN_NONLINEAR 0x08000000 /* Has ->fault & does nonlinear pages */ +@@ -127,6 +127,17 @@ extern unsigned int kobjsize(const void + #define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP) + + /* ++ * pfnmap vmas that are fully mapped at mmap time (not mapped on fault). ++ * Used by x86 PAT to identify such PFNMAP mappings and optimize their handling. ++ * Note VM_INSERTPAGE flag is overloaded here. i.e, ++ * VM_INSERTPAGE && !VM_PFNMAP implies ++ * The vma has had "vm_insert_page()" done on it ++ * VM_INSERTPAGE && VM_PFNMAP implies ++ * The vma is PFNMAP with full mapping at mmap time ++ */ ++#define VM_PFNMAP_AT_MMAP (VM_INSERTPAGE | VM_PFNMAP) ++ ++/* + * mapping from the currently active vm_flags protection bits (the + * low four bits) to a page protection mask.. + */ +@@ -145,7 +156,7 @@ extern pgprot_t protection_map[16]; + */ + static inline int is_linear_pfn_mapping(struct vm_area_struct *vma) + { +- return ((vma->vm_flags & VM_PFNMAP) && vma->vm_pgoff); ++ return ((vma->vm_flags & VM_PFNMAP_AT_MMAP) == VM_PFNMAP_AT_MMAP); + } + + static inline int is_pfn_mapping(struct vm_area_struct *vma) +--- a/mm/memory.c ++++ b/mm/memory.c +@@ -1665,9 +1665,10 @@ int remap_pfn_range(struct vm_area_struc + * behaviour that some programs depend on. We mark the "original" + * un-COW'ed pages by matching them up with "vma->vm_pgoff". + */ +- if (addr == vma->vm_start && end == vma->vm_end) ++ if (addr == vma->vm_start && end == vma->vm_end) { + vma->vm_pgoff = pfn; +- else if (is_cow_mapping(vma->vm_flags)) ++ vma->vm_flags |= VM_PFNMAP_AT_MMAP; ++ } else if (is_cow_mapping(vma->vm_flags)) + return -EINVAL; + + vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; +@@ -1679,6 +1680,7 @@ int remap_pfn_range(struct vm_area_struc + * needed from higher level routine calling unmap_vmas + */ + vma->vm_flags &= ~(VM_IO | VM_RESERVED | VM_PFNMAP); ++ vma->vm_flags &= ~VM_PFNMAP_AT_MMAP; + return -EINVAL; + } + diff --git a/queue-2.6.29/x86-fix-64k-corruption-check.patch b/queue-2.6.29/x86-fix-64k-corruption-check.patch new file mode 100644 index 00000000000..9d03d5fa705 --- /dev/null +++ b/queue-2.6.29/x86-fix-64k-corruption-check.patch @@ -0,0 +1,46 @@ +From stable-bounces@linux.kernel.org Mon Mar 30 18:50:34 2009 +Date: Mon, 30 Mar 2009 18:50:28 GMT +Message-Id: <200903301850.n2UIoSHu028377@hera.kernel.org> +From: Yinghai Lu +To: stable@kernel.org +Subject: x86: fix 64k corruption-check + +From: Yinghai Lu + +upstream commit: 6d7942dc2a70a7e74c352107b150265602671588 + +Impact: fix boot crash + +Need to exit early if the addr is far above 64k. + +The crash got exposed by: + + 78a8b35: x86: make e820_update_range() handle small range update + +Signed-off-by: Yinghai Lu +Cc: +LKML-Reference: <49BC2279.2030101@kernel.org> +Signed-off-by: Ingo Molnar +Signed-off-by: Chris Wright +--- + arch/x86/kernel/check.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/arch/x86/kernel/check.c ++++ b/arch/x86/kernel/check.c +@@ -86,12 +86,12 @@ void __init setup_bios_corruption_check( + if (addr == 0) + break; + ++ if (addr >= corruption_check_size) ++ break; ++ + if ((addr + size) > corruption_check_size) + size = corruption_check_size - addr; + +- if (size == 0) +- break; +- + e820_update_range(addr, size, E820_RAM, E820_RESERVED); + scan_areas[num_scan_areas].addr = addr; + scan_areas[num_scan_areas].size = size; diff --git a/queue-2.6.29/x86-mtrr-don-t-modify-rddram-wrdram-bits-of-fixed-mtrrs.patch b/queue-2.6.29/x86-mtrr-don-t-modify-rddram-wrdram-bits-of-fixed-mtrrs.patch new file mode 100644 index 00000000000..691e65b8556 --- /dev/null +++ b/queue-2.6.29/x86-mtrr-don-t-modify-rddram-wrdram-bits-of-fixed-mtrrs.patch @@ -0,0 +1,168 @@ +From stable-bounces@linux.kernel.org Mon Mar 30 18:50:40 2009 +Date: Mon, 30 Mar 2009 18:50:32 GMT +Message-Id: <200903301850.n2UIoWFI028453@hera.kernel.org> +From: Andreas Herrmann +To: stable@kernel.org +Subject: x86: mtrr: don't modify RdDram/WrDram bits of fixed MTRRs + +From: Andreas Herrmann + +upstream commit: 3ff42da5048649503e343a32be37b14a6a4e8aaf + +Impact: bug fix + BIOS workaround + +BIOS is expected to clear the SYSCFG[MtrrFixDramModEn] on AMD CPUs +after fixed MTRRs are configured. + +Some BIOSes do not clear SYSCFG[MtrrFixDramModEn] on BP (and on APs). + +This can lead to obfuscation in Linux when this bit is not cleared on +BP but cleared on APs. A consequence of this is that the saved +fixed-MTRR state (from BP) differs from the fixed-MTRRs of APs -- +because RdDram/WrDram bits are read as zero when +SYSCFG[MtrrFixDramModEn] is cleared -- and Linux tries to sync +fixed-MTRR state from BP to AP. This implies that Linux sets +SYSCFG[MtrrFixDramEn] and activates those bits. + +More important is that (some) systems change these bits in SMM when +ACPI is enabled. Hence it is racy if Linux modifies RdMem/WrMem bits, +too. + +(1) The patch modifies an old fix from Bernhard Kaindl to get + suspend/resume working on some Acer Laptops. Bernhard's patch + tried to sync RdMem/WrMem bits of fixed MTRR registers and that + helped on those old Laptops. (Don't ask me why -- can't test it + myself). But this old problem was not the motivation for the + patch. (See http://lkml.org/lkml/2007/4/3/110) + +(2) The more important effect is to fix issues on some more current systems. + + On those systems Linux panics or just freezes, see + + http://bugzilla.kernel.org/show_bug.cgi?id=11541 + (and also duplicates of this bug: + http://bugzilla.kernel.org/show_bug.cgi?id=11737 + http://bugzilla.kernel.org/show_bug.cgi?id=11714) + + The affected systems boot only using acpi=ht, acpi=off or + when the kernel is built with CONFIG_MTRR=n. + + The acpi options prevent full enablement of ACPI. Obviously when + ACPI is enabled the BIOS/SMM modfies RdMem/WrMem bits. When + CONFIG_MTRR=y Linux also accesses and modifies those bits when it + needs to sync fixed-MTRRs across cores (Bernhard's fix, see (1)). + How do you synchronize that? You can't. As a consequence Linux + shouldn't touch those bits at all (Rationale are AMD's BKDGs which + recommend to clear the bit that makes RdMem/WrMem accessible). + This is the purpose of this patch. And (so far) this suffices to + fix (1) and (2). + +I suggest not to touch RdDram/WrDram bits of fixed-MTRRs and +SYSCFG[MtrrFixDramEn] and to clear SYSCFG[MtrrFixDramModEn] as +suggested by AMD K8, and AMD family 10h/11h BKDGs. +BIOS is expected to do this anyway. This should avoid that +Linux and SMM tread on each other's toes ... + +Signed-off-by: Andreas Herrmann +Cc: trenn@suse.de +Cc: Yinghai Lu +LKML-Reference: <20090312163937.GH20716@alberich.amd.com> +Cc: +Signed-off-by: Ingo Molnar +Signed-off-by: Chris Wright +--- + arch/x86/kernel/cpu/mtrr/generic.c | 51 +++++++++++++++++++++---------------- + 1 file changed, 30 insertions(+), 21 deletions(-) + +--- a/arch/x86/kernel/cpu/mtrr/generic.c ++++ b/arch/x86/kernel/cpu/mtrr/generic.c +@@ -41,6 +41,32 @@ static int __init mtrr_debug(char *opt) + } + early_param("mtrr.show", mtrr_debug); + ++/** ++ * BIOS is expected to clear MtrrFixDramModEn bit, see for example ++ * "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD ++ * Opteron Processors" (26094 Rev. 3.30 February 2006), section ++ * "13.2.1.2 SYSCFG Register": "The MtrrFixDramModEn bit should be set ++ * to 1 during BIOS initalization of the fixed MTRRs, then cleared to ++ * 0 for operation." ++ */ ++static inline void k8_check_syscfg_dram_mod_en(void) ++{ ++ u32 lo, hi; ++ ++ if (!((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && ++ (boot_cpu_data.x86 >= 0x0f))) ++ return; ++ ++ rdmsr(MSR_K8_SYSCFG, lo, hi); ++ if (lo & K8_MTRRFIXRANGE_DRAM_MODIFY) { ++ printk(KERN_ERR FW_WARN "MTRR: CPU %u: SYSCFG[MtrrFixDramModEn]" ++ " not cleared by BIOS, clearing this bit\n", ++ smp_processor_id()); ++ lo &= ~K8_MTRRFIXRANGE_DRAM_MODIFY; ++ mtrr_wrmsr(MSR_K8_SYSCFG, lo, hi); ++ } ++} ++ + /* + * Returns the effective MTRR type for the region + * Error returns: +@@ -174,6 +200,8 @@ get_fixed_ranges(mtrr_type * frs) + unsigned int *p = (unsigned int *) frs; + int i; + ++ k8_check_syscfg_dram_mod_en(); ++ + rdmsr(MTRRfix64K_00000_MSR, p[0], p[1]); + + for (i = 0; i < 2; i++) +@@ -308,27 +336,10 @@ void mtrr_wrmsr(unsigned msr, unsigned a + } + + /** +- * Enable and allow read/write of extended fixed-range MTRR bits on K8 CPUs +- * see AMD publication no. 24593, chapter 3.2.1 for more information +- */ +-static inline void k8_enable_fixed_iorrs(void) +-{ +- unsigned lo, hi; +- +- rdmsr(MSR_K8_SYSCFG, lo, hi); +- mtrr_wrmsr(MSR_K8_SYSCFG, lo +- | K8_MTRRFIXRANGE_DRAM_ENABLE +- | K8_MTRRFIXRANGE_DRAM_MODIFY, hi); +-} +- +-/** + * set_fixed_range - checks & updates a fixed-range MTRR if it differs from the value it should have + * @msr: MSR address of the MTTR which should be checked and updated + * @changed: pointer which indicates whether the MTRR needed to be changed + * @msrwords: pointer to the MSR values which the MSR should have +- * +- * If K8 extentions are wanted, update the K8 SYSCFG MSR also. +- * See AMD publication no. 24593, chapter 7.8.1, page 233 for more information. + */ + static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords) + { +@@ -337,10 +348,6 @@ static void set_fixed_range(int msr, boo + rdmsr(msr, lo, hi); + + if (lo != msrwords[0] || hi != msrwords[1]) { +- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && +- (boot_cpu_data.x86 >= 0x0f && boot_cpu_data.x86 <= 0x11) && +- ((msrwords[0] | msrwords[1]) & K8_MTRR_RDMEM_WRMEM_MASK)) +- k8_enable_fixed_iorrs(); + mtrr_wrmsr(msr, msrwords[0], msrwords[1]); + *changed = true; + } +@@ -419,6 +426,8 @@ static int set_fixed_ranges(mtrr_type * + bool changed = false; + int block=-1, range; + ++ k8_check_syscfg_dram_mod_en(); ++ + while (fixed_range_blocks[++block].ranges) + for (range=0; range < fixed_range_blocks[block].ranges; range++) + set_fixed_range(fixed_range_blocks[block].base_msr + range, diff --git a/queue-2.6.29/x86-pat-pci-change-vma-prot-in-pci_mmap-to-reflect-inherited-prot.patch b/queue-2.6.29/x86-pat-pci-change-vma-prot-in-pci_mmap-to-reflect-inherited-prot.patch new file mode 100644 index 00000000000..1422f1b8533 --- /dev/null +++ b/queue-2.6.29/x86-pat-pci-change-vma-prot-in-pci_mmap-to-reflect-inherited-prot.patch @@ -0,0 +1,48 @@ +From stable-bounces@linux.kernel.org Mon Mar 30 18:50:25 2009 +Date: Mon, 30 Mar 2009 18:50:19 GMT +Message-Id: <200903301850.n2UIoJ6b028199@hera.kernel.org> +From: Pallipadi, Venkatesh +To: stable@kernel.org +Subject: x86, PAT, PCI: Change vma prot in pci_mmap to reflect inherited prot + +From: Pallipadi, Venkatesh + +upstream commit: 9cdec049389ce2c324fd1ec508a71528a27d4a07 + +While looking at the issue in the thread: + + http://marc.info/?l=dri-devel&m=123606627824556&w=2 + +noticed a bug in pci PAT code and memory type setting. + +PCI mmap code did not set the proper protection in vma, when it +inherited protection in reserve_memtype. This bug only affects +the case where there exists a WC mapping before X does an mmap +with /proc or /sys pci interface. This will cause X userlevel +mmap from /proc or /sysfs to fail on fork. + +Reported-by: Kevin Winchester +Signed-off-by: Venkatesh Pallipadi +Signed-off-by: Suresh Siddha +Cc: Jesse Barnes +Cc: Dave Airlie +Cc: +LKML-Reference: <20090323190720.GA16831@linux-os.sc.intel.com> +Signed-off-by: Ingo Molnar +Signed-off-by: Chris Wright +--- + arch/x86/pci/i386.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/arch/x86/pci/i386.c ++++ b/arch/x86/pci/i386.c +@@ -319,6 +319,9 @@ int pci_mmap_page_range(struct pci_dev * + return -EINVAL; + } + flags = new_flags; ++ vma->vm_page_prot = __pgprot( ++ (pgprot_val(vma->vm_page_prot) & ~_PAGE_CACHE_MASK) | ++ flags); + } + + if (((vma->vm_pgoff < max_low_pfn_mapped) || diff --git a/queue-2.6.29/x86-ptrace-bts-fix-an-unreachable-statement.patch b/queue-2.6.29/x86-ptrace-bts-fix-an-unreachable-statement.patch new file mode 100644 index 00000000000..a64ade440cb --- /dev/null +++ b/queue-2.6.29/x86-ptrace-bts-fix-an-unreachable-statement.patch @@ -0,0 +1,40 @@ +From stable-bounces@linux.kernel.org Mon Mar 30 18:50:38 2009 +Date: Mon, 30 Mar 2009 18:50:30 GMT +Message-Id: <200903301850.n2UIoUvr028417@hera.kernel.org> +From: xiyou.wangcong@gmail.com +To: stable@kernel.org +Subject: x86: ptrace, bts: fix an unreachable statement + +From: xiyou.wangcong@gmail.com + +upstream commit: 5a8ac9d28dae5330c70562c7d7785f5104059c17 + +Commit c2724775ce57c98b8af9694857b941dc61056516 put a statement +after return, which makes that statement unreachable. + +Move that statement before return. + +Signed-off-by: WANG Cong +Cc: Roland McGrath +Cc: Markus Metzger +LKML-Reference: <20090313075622.GB8933@hack> +Cc: # .29 only +Signed-off-by: Ingo Molnar +Signed-off-by: Chris Wright +--- + arch/x86/kernel/ptrace.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/arch/x86/kernel/ptrace.c ++++ b/arch/x86/kernel/ptrace.c +@@ -690,9 +690,8 @@ static int ptrace_bts_config(struct task + if (!cfg.signal) + return -EINVAL; + +- return -EOPNOTSUPP; +- + child->thread.bts_ovfl_signal = cfg.signal; ++ return -EOPNOTSUPP; + } + + if ((cfg.flags & PTRACE_BTS_O_ALLOC) && diff --git a/queue-2.6.29/x86-uv-fix-cpumask-iterator-in-uv_bau_init.patch b/queue-2.6.29/x86-uv-fix-cpumask-iterator-in-uv_bau_init.patch new file mode 100644 index 00000000000..5c5a993c514 --- /dev/null +++ b/queue-2.6.29/x86-uv-fix-cpumask-iterator-in-uv_bau_init.patch @@ -0,0 +1,49 @@ +From stable-bounces@linux.kernel.org Mon Mar 30 18:50:29 2009 +Date: Mon, 30 Mar 2009 18:50:23 GMT +Message-Id: <200903301850.n2UIoN0w028284@hera.kernel.org> +From: Rusty Russell +To: stable@kernel.org +Subject: x86, uv: fix cpumask iterator in uv_bau_init() + +From: Rusty Russell + +upstream commit: 2c74d66624ddbda8101d54d1e184cf9229b378bc + +Impact: fix boot crash on UV systems + +Commit 76ba0ecda0de9accea9a91cb6dbde46782110e1c "cpumask: use +cpumask_var_t in uv_flush_tlb_others" used cur_cpu as an iterator; +it was supposed to be zero for the code below it. + +Reported-by: Cliff Wickman +Original-From: Cliff Wickman +Signed-off-by: Rusty Russell +Acked-by: Mike Travis +Cc: steiner@sgi.com +Cc: +LKML-Reference: <200903180822.31196.rusty@rustcorp.com.au> +Signed-off-by: Ingo Molnar +Signed-off-by: Chris Wright +--- + arch/x86/kernel/tlb_uv.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/arch/x86/kernel/tlb_uv.c ++++ b/arch/x86/kernel/tlb_uv.c +@@ -742,7 +742,7 @@ static int __init uv_bau_init(void) + int node; + int nblades; + int last_blade; +- int cur_cpu = 0; ++ int cur_cpu; + + if (!is_uv_system()) + return 0; +@@ -752,6 +752,7 @@ static int __init uv_bau_init(void) + uv_mmask = (1UL << uv_hub_info->n_val) - 1; + nblades = 0; + last_blade = -1; ++ cur_cpu = 0; + for_each_online_node(node) { + blade = uv_node_to_blade_id(node); + if (blade == last_blade)