From 208093a4bc07b1aa68fef32f7af84d403e68ac88 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 10 Nov 2008 16:05:51 -0800 Subject: [PATCH] start .27 patches --- ...ion-firmware-not-displayed-in-procfs.patch | 75 +++++++++ ...-fix-sysfs-broken-symlink-regression.patch | 40 +++++ queue-2.6.27/cciss-new-hardware-support.patch | 57 +++++++ ...e-cgroup-has-been-completely-removed.patch | 61 ++++++++ ...-all-pending-commits-in-ext3_sync_fs.patch | 78 ++++++++++ ...itialisation-out-of-the-default-path.patch | 136 ++++++++++++++++ ...le-pages-higher-order-than-max_order.patch | 147 ++++++++++++++++++ queue-2.6.27/series | 11 ++ ...orrupts-address-0xc000-during-resume.patch | 84 ++++++++++ .../x86-add-x86_reserve_low_64k.patch | 90 +++++++++++ ...x86-fix-config_x86_reserve_low_64k-y.patch | 64 ++++++++ ...ow-64k-on-ami-and-phoenix-bios-boxen.patch | 62 ++++++++ 12 files changed, 905 insertions(+) create mode 100644 queue-2.6.27/cciss-fix-regression-firmware-not-displayed-in-procfs.patch create mode 100644 queue-2.6.27/cciss-fix-sysfs-broken-symlink-regression.patch create mode 100644 queue-2.6.27/cciss-new-hardware-support.patch create mode 100644 queue-2.6.27/cgroups-fix-invalid-cgrp-dentry-before-cgroup-has-been-completely-removed.patch create mode 100644 queue-2.6.27/ext3-wait-on-all-pending-commits-in-ext3_sync_fs.patch create mode 100644 queue-2.6.27/hugetlb-pull-gigantic-page-initialisation-out-of-the-default-path.patch create mode 100644 queue-2.6.27/hugetlbfs-handle-pages-higher-order-than-max_order.patch create mode 100644 queue-2.6.27/series create mode 100644 queue-2.6.27/x86-add-dmi-quirk-for-ami-bios-which-corrupts-address-0xc000-during-resume.patch create mode 100644 queue-2.6.27/x86-add-x86_reserve_low_64k.patch create mode 100644 queue-2.6.27/x86-fix-config_x86_reserve_low_64k-y.patch create mode 100644 queue-2.6.27/x86-reserve-low-64k-on-ami-and-phoenix-bios-boxen.patch diff --git a/queue-2.6.27/cciss-fix-regression-firmware-not-displayed-in-procfs.patch b/queue-2.6.27/cciss-fix-regression-firmware-not-displayed-in-procfs.patch new file mode 100644 index 00000000000..f561f42221c --- /dev/null +++ b/queue-2.6.27/cciss-fix-regression-firmware-not-displayed-in-procfs.patch @@ -0,0 +1,75 @@ +From jejb@kernel.org Mon Nov 10 15:20:14 2008 +From: Mike Miller +Date: Fri, 7 Nov 2008 00:06:36 GMT +Subject: cciss: fix regression firmware not displayed in procfs +To: stable@kernel.org +Message-ID: <200811070006.mA706aMB004563@hera.kernel.org> + +From: Mike Miller + +commit 22bece00dc1f28dd3374c55e464c9f02eb642876 upstream + +This regression was introduced by commit +6ae5ce8e8d4de666f31286808d2285aa6a50fa40 ("cciss: remove redundant code"). + +This patch fixes a regression where the controller firmware version is not +displayed in procfs. The previous patch would be called anytime something +changed. This will get called only once for each controller. + +Signed-off-by: Mike Miller +Cc: FUJITA Tomonori +Cc: Jens Axboe +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/block/cciss.c | 23 ++++++++++++++++++++++- + 1 file changed, 22 insertions(+), 1 deletion(-) + +--- a/drivers/block/cciss.c ++++ b/drivers/block/cciss.c +@@ -3403,7 +3403,8 @@ static int __devinit cciss_init_one(stru + int i; + int j = 0; + int rc; +- int dac; ++ int dac, return_code; ++ InquiryData_struct *inq_buff = NULL; + + i = alloc_cciss_hba(); + if (i < 0) +@@ -3509,6 +3510,25 @@ static int __devinit cciss_init_one(stru + /* Turn the interrupts on so we can service requests */ + hba[i]->access.set_intr_mask(hba[i], CCISS_INTR_ON); + ++ /* Get the firmware version */ ++ inq_buff = kzalloc(sizeof(InquiryData_struct), GFP_KERNEL); ++ if (inq_buff == NULL) { ++ printk(KERN_ERR "cciss: out of memory\n"); ++ goto clean4; ++ } ++ ++ return_code = sendcmd_withirq(CISS_INQUIRY, i, inq_buff, ++ sizeof(InquiryData_struct), 0, 0 , 0, TYPE_CMD); ++ if (return_code == IO_OK) { ++ hba[i]->firm_ver[0] = inq_buff->data_byte[32]; ++ hba[i]->firm_ver[1] = inq_buff->data_byte[33]; ++ hba[i]->firm_ver[2] = inq_buff->data_byte[34]; ++ hba[i]->firm_ver[3] = inq_buff->data_byte[35]; ++ } else { /* send command failed */ ++ printk(KERN_WARNING "cciss: unable to determine firmware" ++ " version of controller\n"); ++ } ++ + cciss_procinit(i); + + hba[i]->cciss_max_sectors = 2048; +@@ -3519,6 +3539,7 @@ static int __devinit cciss_init_one(stru + return 1; + + clean4: ++ kfree(inq_buff); + #ifdef CONFIG_CISS_SCSI_TAPE + kfree(hba[i]->scsi_rejects.complete); + #endif diff --git a/queue-2.6.27/cciss-fix-sysfs-broken-symlink-regression.patch b/queue-2.6.27/cciss-fix-sysfs-broken-symlink-regression.patch new file mode 100644 index 00000000000..6982d1b30b6 --- /dev/null +++ b/queue-2.6.27/cciss-fix-sysfs-broken-symlink-regression.patch @@ -0,0 +1,40 @@ +From jejb@kernel.org Mon Nov 10 15:20:51 2008 +From: Mike Miller +Date: Fri, 7 Nov 2008 00:06:50 GMT +Subject: cciss: fix sysfs broken symlink regression +To: stable@kernel.org +Message-ID: <200811070006.mA706oRj004912@hera.kernel.org> + +From: Mike Miller + +commit 404443081ce5e6f68b5f7eda16c959835ff200c0 upstream + +Regression introduced by commit 6ae5ce8e8d4de666f31286808d2285aa6a50fa40 +("cciss: remove redundant code"). + +This patch fixes a broken symlink in sysfs that was introduced by the +above commit. We broke it in 2.6.27-rc on or about 20080804. Some +installers are broken if this symlink does not exist and they may not +detect the logical drives configured on the controller. It does not +require being backported into 2.6.26.x or earlier kernels. + +Signed-off-by: Mike Miller +Cc: Jens Axboe +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/block/cciss.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/block/cciss.c ++++ b/drivers/block/cciss.c +@@ -1365,6 +1365,7 @@ static void cciss_add_disk(ctlr_info_t * + disk->first_minor = drv_index << NWD_SHIFT; + disk->fops = &cciss_fops; + disk->private_data = &h->drv[drv_index]; ++ disk->driverfs_dev = &h->pdev->dev; + + /* Set up queue information */ + blk_queue_bounce_limit(disk->queue, h->pdev->dma_mask); diff --git a/queue-2.6.27/cciss-new-hardware-support.patch b/queue-2.6.27/cciss-new-hardware-support.patch new file mode 100644 index 00000000000..0ce1533c95e --- /dev/null +++ b/queue-2.6.27/cciss-new-hardware-support.patch @@ -0,0 +1,57 @@ +From linux-kernel@vger.kernel.org Mon Nov 10 15:21:35 2008 +From: Mike Miller +Date: Thu, 6 Nov 2008 23:59:04 GMT +Subject: cciss: new hardware support +To: stable@kernel.org +Message-ID: <200811062359.mA6Nx4i8025189@hera.kernel.org> + +From: Mike Miller + +commit 77ca7286d10b798e4907af941f29672bf484db77 upstream + +cciss: new hardware support + +Add support for 2 new SAS/SATA controllers. + +Signed-off-by: Mike Miller +Cc: Jens Axboe +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + Documentation/cciss.txt | 2 ++ + drivers/block/cciss.c | 4 ++++ + 2 files changed, 6 insertions(+) + +--- a/Documentation/cciss.txt ++++ b/Documentation/cciss.txt +@@ -26,6 +26,8 @@ This driver is known to work with the fo + * SA P410i + * SA P411 + * SA P812 ++ * SA P712m ++ * SA P711m + + Detecting drive failures: + ------------------------- +--- a/drivers/block/cciss.c ++++ b/drivers/block/cciss.c +@@ -96,6 +96,8 @@ static const struct pci_device_id cciss_ + {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x3245}, + {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x3247}, + {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x3249}, ++ {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x324A}, ++ {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x324B}, + {PCI_VENDOR_ID_HP, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, + PCI_CLASS_STORAGE_RAID << 8, 0xffff << 8, 0}, + {0,} +@@ -133,6 +135,8 @@ static struct board_type products[] = { + {0x3245103C, "Smart Array P410i", &SA5_access}, + {0x3247103C, "Smart Array P411", &SA5_access}, + {0x3249103C, "Smart Array P812", &SA5_access}, ++ {0x324A103C, "Smart Array P712m", &SA5_access}, ++ {0x324B103C, "Smart Array P711m", &SA5_access}, + {0xFFFF103C, "Unknown Smart Array", &SA5_access}, + }; + diff --git a/queue-2.6.27/cgroups-fix-invalid-cgrp-dentry-before-cgroup-has-been-completely-removed.patch b/queue-2.6.27/cgroups-fix-invalid-cgrp-dentry-before-cgroup-has-been-completely-removed.patch new file mode 100644 index 00000000000..17469877304 --- /dev/null +++ b/queue-2.6.27/cgroups-fix-invalid-cgrp-dentry-before-cgroup-has-been-completely-removed.patch @@ -0,0 +1,61 @@ +From jejb@kernel.org Mon Nov 10 15:14:35 2008 +From: Li Zefan +Date: Fri, 7 Nov 2008 00:05:48 GMT +Subject: cgroups: fix invalid cgrp->dentry before cgroup has been completely removed +To: stable@kernel.org +Message-ID: <200811070005.mA705mbU003066@hera.kernel.org> + +From: Li Zefan + +commit 24eb089950ce44603b30a3145a2c8520e2b55bb1 upstream + +This fixes an oops when reading /proc/sched_debug. + +A cgroup won't be removed completely until finishing cgroup_diput(), so we +shouldn't invalidate cgrp->dentry in cgroup_rmdir(). Otherwise, when a +group is being removed while cgroup_path() gets called, we may trigger +NULL dereference BUG. + +The bug can be reproduced: + + # cat test.sh + #!/bin/sh + mount -t cgroup -o cpu xxx /mnt + for (( ; ; )) + { + mkdir /mnt/sub + rmdir /mnt/sub + } + # ./test.sh & + # cat /proc/sched_debug + +BUG: unable to handle kernel NULL pointer dereference at 00000038 +IP: [] cgroup_path+0x39/0x90 +.. +Call Trace: + [] ? print_cfs_rq+0x6e/0x75d + [] ? sched_debug_show+0x72d/0xc1e +.. + +Signed-off-by: Li Zefan +Acked-by: Paul Menage +Cc: Peter Zijlstra +Cc: Ingo Molnar +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/cgroup.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/kernel/cgroup.c ++++ b/kernel/cgroup.c +@@ -2443,7 +2443,6 @@ static int cgroup_rmdir(struct inode *un + list_del(&cgrp->sibling); + spin_lock(&cgrp->dentry->d_lock); + d = dget(cgrp->dentry); +- cgrp->dentry = NULL; + spin_unlock(&d->d_lock); + + cgroup_d_remove_dir(d); diff --git a/queue-2.6.27/ext3-wait-on-all-pending-commits-in-ext3_sync_fs.patch b/queue-2.6.27/ext3-wait-on-all-pending-commits-in-ext3_sync_fs.patch new file mode 100644 index 00000000000..6936a1c2270 --- /dev/null +++ b/queue-2.6.27/ext3-wait-on-all-pending-commits-in-ext3_sync_fs.patch @@ -0,0 +1,78 @@ +From jejb@kernel.org Mon Nov 10 15:08:55 2008 +From: Arthur Jones +Date: Fri, 7 Nov 2008 00:05:17 GMT +Subject: ext3: wait on all pending commits in ext3_sync_fs +To: stable@kernel.org +Message-ID: <200811070005.mA705Htq002320@hera.kernel.org> + +From: Arthur Jones + +commit c87591b719737b4e91eb1a9fa8fd55a4ff1886d6 upstream + +In ext3_sync_fs, we only wait for a commit to finish if we started it, but +there may be one already in progress which will not be synced. + +In the case of a data=ordered umount with pending long symlinks which are +delayed due to a long list of other I/O on the backing block device, this +causes the buffer associated with the long symlinks to not be moved to the +inode dirty list in the second phase of fsync_super. Then, before they +can be dirtied again, kjournald exits, seeing the UMOUNT flag and the +dirty pages are never written to the backing block device, causing long +symlink corruption and exposing new or previously freed block data to +userspace. + +This can be reproduced with a script created +by Eric Sandeen : + + #!/bin/bash + + umount /mnt/test2 + mount /dev/sdb4 /mnt/test2 + rm -f /mnt/test2/* + dd if=/dev/zero of=/mnt/test2/bigfile bs=1M count=512 + touch + /mnt/test2/thisisveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryverylongfilename + ln -s + /mnt/test2/thisisveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryverylongfilename + /mnt/test2/link + umount /mnt/test2 + mount /dev/sdb4 /mnt/test2 + ls /mnt/test2/ + umount /mnt/test2 + +To ensure all commits are synced, we flush all journal commits now when +sync_fs'ing ext3. + +Signed-off-by: Arthur Jones +Cc: Eric Sandeen +Cc: Theodore Ts'o +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ext3/super.c | 11 +++++------ + 1 file changed, 5 insertions(+), 6 deletions(-) + +--- a/fs/ext3/super.c ++++ b/fs/ext3/super.c +@@ -2365,13 +2365,12 @@ static void ext3_write_super (struct sup + + static int ext3_sync_fs(struct super_block *sb, int wait) + { +- tid_t target; +- + sb->s_dirt = 0; +- if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) { +- if (wait) +- log_wait_commit(EXT3_SB(sb)->s_journal, target); +- } ++ if (wait) ++ ext3_force_commit(sb); ++ else ++ journal_start_commit(EXT3_SB(sb)->s_journal, NULL); ++ + return 0; + } + diff --git a/queue-2.6.27/hugetlb-pull-gigantic-page-initialisation-out-of-the-default-path.patch b/queue-2.6.27/hugetlb-pull-gigantic-page-initialisation-out-of-the-default-path.patch new file mode 100644 index 00000000000..7eb050945b9 --- /dev/null +++ b/queue-2.6.27/hugetlb-pull-gigantic-page-initialisation-out-of-the-default-path.patch @@ -0,0 +1,136 @@ +From jejb@kernel.org Mon Nov 10 15:15:31 2008 +From: Andy Whitcroft +Date: Fri, 7 Nov 2008 00:06:05 GMT +Subject: hugetlb: pull gigantic page initialisation out of the default path +To: stable@kernel.org +Message-ID: <200811070006.mA7065sH003540@hera.kernel.org> + +From: Andy Whitcroft + +commit 18229df5b613ed0732a766fc37850de2e7988e43 upstream + +As we can determine exactly when a gigantic page is in use we can optimise +the common regular page cases by pulling out gigantic page initialisation +into its own function. As gigantic pages are never released to buddy we +do not need a destructor. This effectivly reverts the previous change to +the main buddy allocator. It also adds a paranoid check to ensure we +never release gigantic pages from hugetlbfs to the main buddy. + +Signed-off-by: Andy Whitcroft +Cc: Jon Tollefson +Cc: Mel Gorman +Cc: Nick Piggin +Cc: Christoph Lameter +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/hugetlb.c | 12 +++++++++++- + mm/internal.h | 1 + + mm/page_alloc.c | 28 +++++++++++++++++++++------- + 3 files changed, 33 insertions(+), 8 deletions(-) + +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -455,6 +455,8 @@ static void update_and_free_page(struct + { + int i; + ++ VM_BUG_ON(h->order >= MAX_ORDER); ++ + h->nr_huge_pages--; + h->nr_huge_pages_node[page_to_nid(page)]--; + for (i = 0; i < pages_per_huge_page(h); i++) { +@@ -969,6 +971,14 @@ found: + return 1; + } + ++static void prep_compound_huge_page(struct page *page, int order) ++{ ++ if (unlikely(order > (MAX_ORDER - 1))) ++ prep_compound_gigantic_page(page, order); ++ else ++ prep_compound_page(page, order); ++} ++ + /* Put bootmem huge pages into the standard lists after mem_map is up */ + static void __init gather_bootmem_prealloc(void) + { +@@ -979,7 +989,7 @@ static void __init gather_bootmem_preall + struct hstate *h = m->hstate; + __ClearPageReserved(page); + WARN_ON(page_count(page) != 1); +- prep_compound_page(page, h->order); ++ prep_compound_huge_page(page, h->order); + prep_new_huge_page(h, page, page_to_nid(page)); + } + } +--- a/mm/internal.h ++++ b/mm/internal.h +@@ -17,6 +17,7 @@ void free_pgtables(struct mmu_gather *tl + unsigned long floor, unsigned long ceiling); + + extern void prep_compound_page(struct page *page, unsigned long order); ++extern void prep_compound_gigantic_page(struct page *page, unsigned long order); + + static inline void set_page_count(struct page *page, int v) + { +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -268,24 +268,39 @@ void prep_compound_page(struct page *pag + { + int i; + int nr_pages = 1 << order; ++ ++ set_compound_page_dtor(page, free_compound_page); ++ set_compound_order(page, order); ++ __SetPageHead(page); ++ for (i = 1; i < nr_pages; i++) { ++ struct page *p = page + i; ++ ++ __SetPageTail(p); ++ p->first_page = page; ++ } ++} ++ ++#ifdef CONFIG_HUGETLBFS ++void prep_compound_gigantic_page(struct page *page, unsigned long order) ++{ ++ int i; ++ int nr_pages = 1 << order; + struct page *p = page + 1; + + set_compound_page_dtor(page, free_compound_page); + set_compound_order(page, order); + __SetPageHead(page); +- for (i = 1; i < nr_pages; i++, p++) { +- if (unlikely((i & (MAX_ORDER_NR_PAGES - 1)) == 0)) +- p = pfn_to_page(page_to_pfn(page) + i); ++ for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) { + __SetPageTail(p); + p->first_page = page; + } + } ++#endif + + static void destroy_compound_page(struct page *page, unsigned long order) + { + int i; + int nr_pages = 1 << order; +- struct page *p = page + 1; + + if (unlikely(compound_order(page) != order)) + bad_page(page); +@@ -293,9 +308,8 @@ static void destroy_compound_page(struct + if (unlikely(!PageHead(page))) + bad_page(page); + __ClearPageHead(page); +- for (i = 1; i < nr_pages; i++, p++) { +- if (unlikely((i & (MAX_ORDER_NR_PAGES - 1)) == 0)) +- p = pfn_to_page(page_to_pfn(page) + i); ++ for (i = 1; i < nr_pages; i++) { ++ struct page *p = page + i; + + if (unlikely(!PageTail(p) | + (p->first_page != page))) diff --git a/queue-2.6.27/hugetlbfs-handle-pages-higher-order-than-max_order.patch b/queue-2.6.27/hugetlbfs-handle-pages-higher-order-than-max_order.patch new file mode 100644 index 00000000000..d1d40b62b00 --- /dev/null +++ b/queue-2.6.27/hugetlbfs-handle-pages-higher-order-than-max_order.patch @@ -0,0 +1,147 @@ +From jejb@kernel.org Mon Nov 10 15:16:22 2008 +From: Andy Whitcroft +Date: Fri, 7 Nov 2008 00:06:24 GMT +Subject:hugetlbfs: handle pages higher order than MAX_ORDER +To: stable@kernel.org +Message-ID: <200811070006.mA706OTv004171@hera.kernel.org> + +From: Andy Whitcroft + +commit 69d177c2fc702d402b17fdca2190d5a7e3ca55c5 upstream + +When working with hugepages, hugetlbfs assumes that those hugepages are +smaller than MAX_ORDER. Specifically it assumes that the mem_map is +contigious and uses that to optimise access to the elements of the mem_map +that represent the hugepage. Gigantic pages (such as 16GB pages on +powerpc) by definition are of greater order than MAX_ORDER (larger than +MAX_ORDER_NR_PAGES in size). This means that we can no longer make use of +the buddy alloctor guarentees for the contiguity of the mem_map, which +ensures that the mem_map is at least contigious for maximmally aligned +areas of MAX_ORDER_NR_PAGES pages. + +This patch adds new mem_map accessors and iterator helpers which handle +any discontiguity at MAX_ORDER_NR_PAGES boundaries. It then uses these to +implement gigantic page versions of copy_huge_page and clear_huge_page, +and to allow follow_hugetlb_page handle gigantic pages. + +Signed-off-by: Andy Whitcroft +Cc: Jon Tollefson +Cc: Mel Gorman +Cc: Nick Piggin +Cc: Christoph Lameter +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds + +--- + mm/hugetlb.c | 37 ++++++++++++++++++++++++++++++++++++- + mm/internal.h | 28 ++++++++++++++++++++++++++++ + 2 files changed, 64 insertions(+), 1 deletion(-) + +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -353,11 +353,26 @@ static int vma_has_reserves(struct vm_ar + return 0; + } + ++static void clear_gigantic_page(struct page *page, ++ unsigned long addr, unsigned long sz) ++{ ++ int i; ++ struct page *p = page; ++ ++ might_sleep(); ++ for (i = 0; i < sz/PAGE_SIZE; i++, p = mem_map_next(p, page, i)) { ++ cond_resched(); ++ clear_user_highpage(p, addr + i * PAGE_SIZE); ++ } ++} + static void clear_huge_page(struct page *page, + unsigned long addr, unsigned long sz) + { + int i; + ++ if (unlikely(sz > MAX_ORDER_NR_PAGES)) ++ return clear_gigantic_page(page, addr, sz); ++ + might_sleep(); + for (i = 0; i < sz/PAGE_SIZE; i++) { + cond_resched(); +@@ -365,12 +380,32 @@ static void clear_huge_page(struct page + } + } + ++static void copy_gigantic_page(struct page *dst, struct page *src, ++ unsigned long addr, struct vm_area_struct *vma) ++{ ++ int i; ++ struct hstate *h = hstate_vma(vma); ++ struct page *dst_base = dst; ++ struct page *src_base = src; ++ might_sleep(); ++ for (i = 0; i < pages_per_huge_page(h); ) { ++ cond_resched(); ++ copy_user_highpage(dst, src, addr + i*PAGE_SIZE, vma); ++ ++ i++; ++ dst = mem_map_next(dst, dst_base, i); ++ src = mem_map_next(src, src_base, i); ++ } ++} + static void copy_huge_page(struct page *dst, struct page *src, + unsigned long addr, struct vm_area_struct *vma) + { + int i; + struct hstate *h = hstate_vma(vma); + ++ if (unlikely(pages_per_huge_page(h) > MAX_ORDER_NR_PAGES)) ++ return copy_gigantic_page(dst, src, addr, vma); ++ + might_sleep(); + for (i = 0; i < pages_per_huge_page(h); i++) { + cond_resched(); +@@ -2113,7 +2148,7 @@ int follow_hugetlb_page(struct mm_struct + same_page: + if (pages) { + get_page(page); +- pages[i] = page + pfn_offset; ++ pages[i] = mem_map_offset(page, pfn_offset); + } + + if (vmas) +--- a/mm/internal.h ++++ b/mm/internal.h +@@ -54,6 +54,34 @@ static inline unsigned long page_order(s + } + + /* ++ * Return the mem_map entry representing the 'offset' subpage within ++ * the maximally aligned gigantic page 'base'. Handle any discontiguity ++ * in the mem_map at MAX_ORDER_NR_PAGES boundaries. ++ */ ++static inline struct page *mem_map_offset(struct page *base, int offset) ++{ ++ if (unlikely(offset >= MAX_ORDER_NR_PAGES)) ++ return pfn_to_page(page_to_pfn(base) + offset); ++ return base + offset; ++} ++ ++/* ++ * Iterator over all subpages withing the maximally aligned gigantic ++ * page 'base'. Handle any discontiguity in the mem_map. ++ */ ++static inline struct page *mem_map_next(struct page *iter, ++ struct page *base, int offset) ++{ ++ if (unlikely((offset & (MAX_ORDER_NR_PAGES - 1)) == 0)) { ++ unsigned long pfn = page_to_pfn(base) + offset; ++ if (!pfn_valid(pfn)) ++ return NULL; ++ return pfn_to_page(pfn); ++ } ++ return iter + 1; ++} ++ ++/* + * FLATMEM and DISCONTIGMEM configurations use alloc_bootmem_node, + * so all functions starting at paging_init should be marked __init + * in those cases. SPARSEMEM, however, allows for memory hotplug, diff --git a/queue-2.6.27/series b/queue-2.6.27/series new file mode 100644 index 00000000000..853a8b1d6cb --- /dev/null +++ b/queue-2.6.27/series @@ -0,0 +1,11 @@ +ext3-wait-on-all-pending-commits-in-ext3_sync_fs.patch +x86-add-dmi-quirk-for-ami-bios-which-corrupts-address-0xc000-during-resume.patch +x86-reserve-low-64k-on-ami-and-phoenix-bios-boxen.patch +x86-add-x86_reserve_low_64k.patch +x86-fix-config_x86_reserve_low_64k-y.patch +cgroups-fix-invalid-cgrp-dentry-before-cgroup-has-been-completely-removed.patch +hugetlb-pull-gigantic-page-initialisation-out-of-the-default-path.patch +hugetlbfs-handle-pages-higher-order-than-max_order.patch +cciss-fix-regression-firmware-not-displayed-in-procfs.patch +cciss-fix-sysfs-broken-symlink-regression.patch +cciss-new-hardware-support.patch diff --git a/queue-2.6.27/x86-add-dmi-quirk-for-ami-bios-which-corrupts-address-0xc000-during-resume.patch b/queue-2.6.27/x86-add-dmi-quirk-for-ami-bios-which-corrupts-address-0xc000-during-resume.patch new file mode 100644 index 00000000000..5a3d3e29e7c --- /dev/null +++ b/queue-2.6.27/x86-add-dmi-quirk-for-ami-bios-which-corrupts-address-0xc000-during-resume.patch @@ -0,0 +1,84 @@ +From 5649b7c30316a51792808422ac03ee825d26aa5e Mon Sep 17 00:00:00 2001 +From: Ingo Molnar +Date: Tue, 16 Sep 2008 09:29:09 +0200 +Subject: x86: add DMI quirk for AMI BIOS which corrupts address 0xc000 during resume + +From: Ingo Molnar + +commit 5649b7c30316a51792808422ac03ee825d26aa5e upstream + +Alan Jenkins and Andy Wettstein reported a suspend/resume memory +corruption bug and extensively documented it here: + + http://bugzilla.kernel.org/show_bug.cgi?id=11237 + +The bug is that the BIOS overwrites 1K of memory at 0xc000 physical, +without registering it in e820 as reserved or giving the kernel any +idea about this. + +Detect AMI BIOSen and reserve that 1K. + +We paint this bug around with a very broad brush (reserving that 1K on all +AMI BIOS systems), as the bug was extremely hard to find and needed several +weeks and lots of debugging and patching. + +The bug was found via the CONFIG_X86_CHECK_BIOS_CORRUPTION=y debug feature, +if similar bugs are suspected then this feature can be enabled on other +systems as well to scan low memory for corrupted memory. + +Reported-by: Alan Jenkins +Reported-by: Andy Wettstein +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/setup.c | 27 +++++++++++++++++++++++++++ + 1 file changed, 27 insertions(+) + +--- a/arch/x86/kernel/setup.c ++++ b/arch/x86/kernel/setup.c +@@ -578,6 +578,29 @@ static struct x86_quirks default_x86_qui + + struct x86_quirks *x86_quirks __initdata = &default_x86_quirks; + ++static int __init dmi_low_memory_corruption(const struct dmi_system_id *d) ++{ ++ printk(KERN_NOTICE ++ "%s detected: BIOS corrupts 0xc000, working it around.\n", ++ d->ident); ++ ++ reserve_early(0xc000, 0xc400, "BIOS quirk"); ++ ++ return 0; ++} ++ ++/* List of systems that have known low memory corruption BIOS problems */ ++static struct dmi_system_id __initdata bad_bios_dmi_table[] = { ++ { ++ .callback = dmi_low_memory_corruption, ++ .ident = "AMI BIOS", ++ .matches = { ++ DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."), ++ }, ++ }, ++ {} ++}; ++ + /* + * Determine if we were loaded by an EFI loader. If so, then we have also been + * passed the efi memmap, systab, etc., so we should use these data structures +@@ -601,6 +624,8 @@ void __init setup_arch(char **cmdline_p) + printk(KERN_INFO "Command line: %s\n", boot_command_line); + #endif + ++ dmi_check_system(bad_bios_dmi_table); ++ + early_cpu_init(); + early_ioremap_init(); + +@@ -885,3 +910,5 @@ void __init setup_arch(char **cmdline_p) + #endif + #endif + } ++ ++ diff --git a/queue-2.6.27/x86-add-x86_reserve_low_64k.patch b/queue-2.6.27/x86-add-x86_reserve_low_64k.patch new file mode 100644 index 00000000000..dcf477ca3bb --- /dev/null +++ b/queue-2.6.27/x86-add-x86_reserve_low_64k.patch @@ -0,0 +1,90 @@ +From fc38151947477596aa27df6c4306ad6008dc6711 Mon Sep 17 00:00:00 2001 +From: Ingo Molnar +Date: Tue, 16 Sep 2008 10:07:34 +0200 +Subject: x86: add X86_RESERVE_LOW_64K + +From: Ingo Molnar + +commit fc38151947477596aa27df6c4306ad6008dc6711 upstream. + +This bugzilla: + + http://bugzilla.kernel.org/show_bug.cgi?id=11237 + +Documents a wide range of systems where the BIOS utilizes the first +64K of physical memory during suspend/resume and other hardware events. + +Currently we reserve this memory on all AMI and Phoenix BIOS systems. +Life is too short to hunt subtle memory corruption problems like this, +so we try to be robust by default. + +Still, allow this to be overriden: allow users who want that first 64K +of memory to be available to the kernel disable the quirk, via +CONFIG_X86_RESERVE_LOW_64K=n. + +Also, allow the early reservation to overlap with other +early reservations. + +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/Kconfig | 20 ++++++++++++++++++++ + arch/x86/kernel/setup.c | 4 +++- + 2 files changed, 23 insertions(+), 1 deletion(-) + +--- a/arch/x86/Kconfig ++++ b/arch/x86/Kconfig +@@ -1059,6 +1059,26 @@ config HIGHPTE + low memory. Setting this option will put user-space page table + entries in high memory. + ++config X86_RESERVE_LOW_64K ++ bool "Reserve low 64K of RAM on AMI/Phoenix BIOSen" ++ default y ++ help ++ Reserve the first 64K of physical RAM on BIOSes that are known ++ to potentially corrupt that memory range. A numbers of BIOSes are ++ known to utilize this area during suspend/resume, so it must not ++ be used by the kernel. ++ ++ Set this to N if you are absolutely sure that you trust the BIOS ++ to get all its memory reservations and usages right. ++ ++ If you have doubts about the BIOS (e.g. suspend/resume does not ++ work or there's kernel crashes after certain hardware hotplug ++ events) and it's not AMI or Phoenix, then you might want to enable ++ X86_CHECK_BIOS_CORRUPTION=y to allow the kernel to check typical ++ corruption patterns. ++ ++ Say Y if unsure. ++ + config MATH_EMULATION + bool + prompt "Math emulation" if X86_32 +--- a/arch/x86/kernel/setup.c ++++ b/arch/x86/kernel/setup.c +@@ -584,13 +584,14 @@ static int __init dmi_low_memory_corrupt + "%s detected: BIOS may corrupt low RAM, working it around.\n", + d->ident); + +- reserve_early(0x0, 0x10000, "BIOS quirk"); ++ reserve_early_overlap_ok(0x0, 0x10000, "BIOS quirk"); + + return 0; + } + + /* List of systems that have known low memory corruption BIOS problems */ + static struct dmi_system_id __initdata bad_bios_dmi_table[] = { ++#ifdef CONFIG_X86_RESERVE_LOW_64K + { + .callback = dmi_low_memory_corruption, + .ident = "AMI BIOS", +@@ -606,6 +607,7 @@ static struct dmi_system_id __initdata b + }, + }, + {} ++#endif + }; + + /* diff --git a/queue-2.6.27/x86-fix-config_x86_reserve_low_64k-y.patch b/queue-2.6.27/x86-fix-config_x86_reserve_low_64k-y.patch new file mode 100644 index 00000000000..ff3800860fe --- /dev/null +++ b/queue-2.6.27/x86-fix-config_x86_reserve_low_64k-y.patch @@ -0,0 +1,64 @@ +From 2216d199b1430d1c0affb1498a9ebdbd9c0de439 Mon Sep 17 00:00:00 2001 +From: Yinghai Lu +Date: Mon, 22 Sep 2008 02:52:26 -0700 +Subject: x86: fix CONFIG_X86_RESERVE_LOW_64K=y + +From: Yinghai Lu + +commit 2216d199b1430d1c0affb1498a9ebdbd9c0de439 upstream + +The bad_bios_dmi_table() quirk never triggered because we do DMI setup +too late. Move it a bit earlier. + +Also change the CONFIG_X86_RESERVE_LOW_64K quirk to operate on the e820 +table directly instead of messing with early reservations - this handles +overlaps (which do occur in this low range of RAM) more gracefully. + +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/setup.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +--- a/arch/x86/kernel/setup.c ++++ b/arch/x86/kernel/setup.c +@@ -584,7 +584,8 @@ static int __init dmi_low_memory_corrupt + "%s detected: BIOS may corrupt low RAM, working it around.\n", + d->ident); + +- reserve_early_overlap_ok(0x0, 0x10000, "BIOS quirk"); ++ e820_update_range(0, 0x10000, E820_RAM, E820_RESERVED); ++ sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); + + return 0; + } +@@ -633,8 +634,6 @@ void __init setup_arch(char **cmdline_p) + printk(KERN_INFO "Command line: %s\n", boot_command_line); + #endif + +- dmi_check_system(bad_bios_dmi_table); +- + early_cpu_init(); + early_ioremap_init(); + +@@ -733,6 +732,10 @@ void __init setup_arch(char **cmdline_p) + + finish_e820_parsing(); + ++ dmi_scan_machine(); ++ ++ dmi_check_system(bad_bios_dmi_table); ++ + #ifdef CONFIG_X86_32 + probe_roms(); + #endif +@@ -815,8 +818,6 @@ void __init setup_arch(char **cmdline_p) + vsmp_init(); + #endif + +- dmi_scan_machine(); +- + io_delay_init(); + + /* diff --git a/queue-2.6.27/x86-reserve-low-64k-on-ami-and-phoenix-bios-boxen.patch b/queue-2.6.27/x86-reserve-low-64k-on-ami-and-phoenix-bios-boxen.patch new file mode 100644 index 00000000000..0de192c540a --- /dev/null +++ b/queue-2.6.27/x86-reserve-low-64k-on-ami-and-phoenix-bios-boxen.patch @@ -0,0 +1,62 @@ +From 1e22436eba84edfec9c25e5a25d09062c4f91ca9 Mon Sep 17 00:00:00 2001 +From: Ingo Molnar +Date: Tue, 16 Sep 2008 09:58:02 +0200 +Subject: x86: reserve low 64K on AMI and Phoenix BIOS boxen + +From: Ingo Molnar + +commit 1e22436eba84edfec9c25e5a25d09062c4f91ca9 upstream + +there's multiple reports about suspend/resume related low memory +corruption in this bugzilla: + + http://bugzilla.kernel.org/show_bug.cgi?id=11237 + +the common pattern is that the corruption is caused by the BIOS, +and that it affects some portion of the first 64K of physical RAM. + +So add a DMI quirk + +This will waste 64K RAM on 'good' systems too, but without knowing +the exact nature of this BIOS memory corruption this is the safest +approach. + +This might as well solve a wide range of suspend/resume breakages +under Linux. + +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/setup.c | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +--- a/arch/x86/kernel/setup.c ++++ b/arch/x86/kernel/setup.c +@@ -581,10 +581,10 @@ struct x86_quirks *x86_quirks __initdata + static int __init dmi_low_memory_corruption(const struct dmi_system_id *d) + { + printk(KERN_NOTICE +- "%s detected: BIOS corrupts 0xc000, working it around.\n", ++ "%s detected: BIOS may corrupt low RAM, working it around.\n", + d->ident); + +- reserve_early(0xc000, 0xc400, "BIOS quirk"); ++ reserve_early(0x0, 0x10000, "BIOS quirk"); + + return 0; + } +@@ -598,6 +598,13 @@ static struct dmi_system_id __initdata b + DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."), + }, + }, ++ { ++ .callback = dmi_low_memory_corruption, ++ .ident = "Phoenix BIOS", ++ .matches = { ++ DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies, LTD"), ++ }, ++ }, + {} + }; + -- 2.47.3