From: Greg Kroah-Hartman Date: Fri, 2 Nov 2007 17:28:58 +0000 (-0700) Subject: more 2.6.22 patches (hopefully the last of them...) X-Git-Tag: v2.6.22.12~3 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=1b79017c6afc0e9ded6ef558cae47c472f1d9145;p=thirdparty%2Fkernel%2Fstable-queue.git more 2.6.22 patches (hopefully the last of them...) --- diff --git a/queue-2.6.22/dm-snapshot-fix-invalidation-deadlock.patch b/queue-2.6.22/dm-snapshot-fix-invalidation-deadlock.patch new file mode 100644 index 00000000000..1bd4dc19f23 --- /dev/null +++ b/queue-2.6.22/dm-snapshot-fix-invalidation-deadlock.patch @@ -0,0 +1,138 @@ +From fcac03abd325e4f7a4cc8fe05fea2793b1c8eb75 Mon Sep 17 00:00:00 2001 +From: Milan Broz +Date: Thu, 12 Jul 2007 17:28:00 +0100 +Subject: [PATCH] dm snapshot: fix invalidation deadlock +Message-ID: <472A1A8E.3020101@redhat.com> + +From: Milan Broz + +patch fcac03abd325e4f7a4cc8fe05fea2793b1c8eb75 in mainline + +Process persistent exception store metadata IOs in a separate thread. + +A snapshot may become invalid while inside generic_make_request(). +A synchronous write is then needed to update the metadata while still +inside that function. Since the introduction of +md-dm-reduce-stack-usage-with-stacked-block-devices.patch this has to +be performed by a separate thread to avoid deadlock. + +Signed-off-by: Milan Broz +Signed-off-by: Alasdair G Kergon +Signed-off-by: Linus Torvalds +Cc: Chuck Ebbert +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/dm-exception-store.c | 48 +++++++++++++++++++++++++++++++++++----- + 1 file changed, 43 insertions(+), 5 deletions(-) + +--- a/drivers/md/dm-exception-store.c ++++ b/drivers/md/dm-exception-store.c +@@ -125,6 +125,8 @@ struct pstore { + uint32_t callback_count; + struct commit_callback *callbacks; + struct dm_io_client *io_client; ++ ++ struct workqueue_struct *metadata_wq; + }; + + static inline unsigned int sectors_to_pages(unsigned int sectors) +@@ -156,10 +158,24 @@ static void free_area(struct pstore *ps) + ps->area = NULL; + } + ++struct mdata_req { ++ struct io_region *where; ++ struct dm_io_request *io_req; ++ struct work_struct work; ++ int result; ++}; ++ ++static void do_metadata(struct work_struct *work) ++{ ++ struct mdata_req *req = container_of(work, struct mdata_req, work); ++ ++ req->result = dm_io(req->io_req, 1, req->where, NULL); ++} ++ + /* + * Read or write a chunk aligned and sized block of data from a device. + */ +-static int chunk_io(struct pstore *ps, uint32_t chunk, int rw) ++static int chunk_io(struct pstore *ps, uint32_t chunk, int rw, int metadata) + { + struct io_region where = { + .bdev = ps->snap->cow->bdev, +@@ -173,8 +189,23 @@ static int chunk_io(struct pstore *ps, u + .client = ps->io_client, + .notify.fn = NULL, + }; ++ struct mdata_req req; ++ ++ if (!metadata) ++ return dm_io(&io_req, 1, &where, NULL); + +- return dm_io(&io_req, 1, &where, NULL); ++ req.where = &where; ++ req.io_req = &io_req; ++ ++ /* ++ * Issue the synchronous I/O from a different thread ++ * to avoid generic_make_request recursion. ++ */ ++ INIT_WORK(&req.work, do_metadata); ++ queue_work(ps->metadata_wq, &req.work); ++ flush_workqueue(ps->metadata_wq); ++ ++ return req.result; + } + + /* +@@ -189,7 +220,7 @@ static int area_io(struct pstore *ps, ui + /* convert a metadata area index to a chunk index */ + chunk = 1 + ((ps->exceptions_per_area + 1) * area); + +- r = chunk_io(ps, chunk, rw); ++ r = chunk_io(ps, chunk, rw, 0); + if (r) + return r; + +@@ -230,7 +261,7 @@ static int read_header(struct pstore *ps + if (r) + return r; + +- r = chunk_io(ps, 0, READ); ++ r = chunk_io(ps, 0, READ, 1); + if (r) + goto bad; + +@@ -292,7 +323,7 @@ static int write_header(struct pstore *p + dh->version = cpu_to_le32(ps->version); + dh->chunk_size = cpu_to_le32(ps->snap->chunk_size); + +- return chunk_io(ps, 0, WRITE); ++ return chunk_io(ps, 0, WRITE, 1); + } + + /* +@@ -409,6 +440,7 @@ static void persistent_destroy(struct ex + { + struct pstore *ps = get_info(store); + ++ destroy_workqueue(ps->metadata_wq); + dm_io_client_destroy(ps->io_client); + vfree(ps->callbacks); + free_area(ps); +@@ -589,6 +621,12 @@ int dm_create_persistent(struct exceptio + atomic_set(&ps->pending_count, 0); + ps->callbacks = NULL; + ++ ps->metadata_wq = create_singlethread_workqueue("ksnaphd"); ++ if (!ps->metadata_wq) { ++ DMERR("couldn't start header metadata update thread"); ++ return -ENOMEM; ++ } ++ + store->destroy = persistent_destroy; + store->read_metadata = persistent_read_metadata; + store->prepare_exception = persistent_prepare; diff --git a/queue-2.6.22/param_sysfs_builtin-memchr-argument-fix.patch b/queue-2.6.22/param_sysfs_builtin-memchr-argument-fix.patch new file mode 100644 index 00000000000..fdaa2ce2ae4 --- /dev/null +++ b/queue-2.6.22/param_sysfs_builtin-memchr-argument-fix.patch @@ -0,0 +1,89 @@ +From faf8c714f4508207a9c81cc94dafc76ed6680b44 Mon Sep 17 00:00:00 2001 +From: Dave Young +Date: Thu, 18 Oct 2007 03:05:07 -0700 +Subject: param_sysfs_builtin memchr argument fix +Message-ID: <4720EBA6.60803@redhat.com> + +From: Dave Young + +patch faf8c714f4508207a9c81cc94dafc76ed6680b44 in mainline. + +If memchr argument is longer than strlen(kp->name), there will be some +weird result. + +It will casuse duplicate filenames in sysfs for the "nousb". kernel +warning messages are as bellow: + +sysfs: duplicate filename 'usbcore' can not be created +WARNING: at fs/sysfs/dir.c:416 sysfs_add_one() + [] sysfs_add_one+0xa0/0xe0 + [] create_dir+0x48/0xb0 + [] sysfs_create_dir+0x29/0x50 + [] create_dir+0x1b/0x50 + [] kobject_add+0x46/0x150 + [] kobject_init+0x3a/0x80 + [] kernel_param_sysfs_setup+0x50/0xb0 + [] param_sysfs_builtin+0xee/0x130 + [] param_sysfs_init+0x23/0x60 + [] __next_cpu+0x12/0x20 + [] kernel_init+0x0/0xb0 + [] kernel_init+0x0/0xb0 + [] do_initcalls+0x46/0x1e0 + [] create_proc_entry+0x52/0x90 + [] register_irq_proc+0x9c/0xc0 + [] proc_mkdir_mode+0x34/0x50 + [] kernel_init+0x0/0xb0 + [] kernel_init+0x62/0xb0 + [] kernel_thread_helper+0x7/0x14 + ======================= +kobject_add failed for usbcore with -EEXIST, don't try to register things with the same name in the same directory. + [] kobject_add+0xf6/0x150 + [] kernel_param_sysfs_setup+0x50/0xb0 + [] param_sysfs_builtin+0xee/0x130 + [] param_sysfs_init+0x23/0x60 + [] __next_cpu+0x12/0x20 + [] kernel_init+0x0/0xb0 + [] kernel_init+0x0/0xb0 + [] do_initcalls+0x46/0x1e0 + [] create_proc_entry+0x52/0x90 + [] register_irq_proc+0x9c/0xc0 + [] proc_mkdir_mode+0x34/0x50 + [] kernel_init+0x0/0xb0 + [] kernel_init+0x62/0xb0 + [] kernel_thread_helper+0x7/0x14 + ======================= +Module 'usbcore' failed to be added to sysfs, error number -17 +The system will be unstable now. + +Signed-off-by: Dave Young +Cc: Greg KH +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Cc: Chuck Ebbert +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/params.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +--- a/kernel/params.c ++++ b/kernel/params.c +@@ -591,11 +591,17 @@ static void __init param_sysfs_builtin(v + + for (i=0; i < __stop___param - __start___param; i++) { + char *dot; ++ size_t kplen; + + kp = &__start___param[i]; ++ kplen = strlen(kp->name); + + /* We do not handle args without periods. */ +- dot = memchr(kp->name, '.', MAX_KBUILD_MODNAME); ++ if (kplen > MAX_KBUILD_MODNAME) { ++ DEBUGP("kernel parameter name is too long: %s\n", kp->name); ++ continue; ++ } ++ dot = memchr(kp->name, '.', kplen); + if (!dot) { + DEBUGP("couldn't find period in %s\n", kp->name); + continue; diff --git a/queue-2.6.22/revert-x86_64-allocate-sparsemem-memmap-above-4g.patch b/queue-2.6.22/revert-x86_64-allocate-sparsemem-memmap-above-4g.patch new file mode 100644 index 00000000000..1d427cef79b --- /dev/null +++ b/queue-2.6.22/revert-x86_64-allocate-sparsemem-memmap-above-4g.patch @@ -0,0 +1,89 @@ +From 6a22c57b8d2a62dea7280a6b2ac807a539ef0716 Mon Sep 17 00:00:00 2001 +From: Linus Torvalds +Date: Mon, 29 Oct 2007 11:36:04 -0700 +Subject: Revert "x86_64: allocate sparsemem memmap above 4G" +Message-ID: <472A1BD7.6000400@redhat.com> + +From: Linus Torvalds + +patch 6a22c57b8d2a62dea7280a6b2ac807a539ef0716 in mainline. + +This reverts commit 2e1c49db4c640b35df13889b86b9d62215ade4b6. + +First off, testing in Fedora has shown it to cause boot failures, +bisected down by Martin Ebourne, and reported by Dave Jobes. So the +commit will likely be reverted in the 2.6.23 stable kernels. + +Secondly, in the 2.6.24 model, x86-64 has now grown support for +SPARSEMEM_VMEMMAP, which disables the relevant code anyway, so while the +bug is not visible any more, it's become invisible due to the code just +being irrelevant and no longer enabled on the only architecture that +this ever affected. + +backported to 2.6.22 by Chuck Ebbert + +Reported-by: Dave Jones +Tested-by: Martin Ebourne +Cc: Zou Nan hai +Cc: Suresh Siddha +Cc: Andrew Morton +Acked-by: Andy Whitcroft +Signed-off-by: Linus Torvalds +Cc: Chuck Ebbert +Signed-off-by: Greg Kroah-Hartman + + +--- + arch/x86_64/mm/init.c | 5 ----- + include/linux/bootmem.h | 1 - + mm/sparse.c | 11 ----------- + 3 files changed, 17 deletions(-) + +--- a/arch/x86_64/mm/init.c ++++ b/arch/x86_64/mm/init.c +@@ -769,8 +769,3 @@ int in_gate_area_no_task(unsigned long a + return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END); + } + +-void *alloc_bootmem_high_node(pg_data_t *pgdat, unsigned long size) +-{ +- return __alloc_bootmem_core(pgdat->bdata, size, +- SMP_CACHE_BYTES, (4UL*1024*1024*1024), 0); +-} +--- a/include/linux/bootmem.h ++++ b/include/linux/bootmem.h +@@ -59,7 +59,6 @@ extern void *__alloc_bootmem_core(struct + unsigned long align, + unsigned long goal, + unsigned long limit); +-extern void *alloc_bootmem_high_node(pg_data_t *pgdat, unsigned long size); + + #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE + extern void reserve_bootmem(unsigned long addr, unsigned long size); +--- a/mm/sparse.c ++++ b/mm/sparse.c +@@ -209,12 +209,6 @@ static int __meminit sparse_init_one_sec + return 1; + } + +-__attribute__((weak)) +-void *alloc_bootmem_high_node(pg_data_t *pgdat, unsigned long size) +-{ +- return NULL; +-} +- + static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum) + { + struct page *map; +@@ -225,11 +219,6 @@ static struct page __init *sparse_early_ + if (map) + return map; + +- map = alloc_bootmem_high_node(NODE_DATA(nid), +- sizeof(struct page) * PAGES_PER_SECTION); +- if (map) +- return map; +- + map = alloc_bootmem_node(NODE_DATA(nid), + sizeof(struct page) * PAGES_PER_SECTION); + if (map) diff --git a/queue-2.6.22/series b/queue-2.6.22/series index b99503ae2c8..c51b321a323 100644 --- a/queue-2.6.22/series +++ b/queue-2.6.22/series @@ -3,3 +3,7 @@ genirq-suppress-resend-of-level-interrupts.patch genirq-mark-io_apic-level-interrupts-to-avoid-resend.patch ib-uverbs-fix-checking-of-userspace-object-ownership.patch minixfs-limit-minixfs-printks-on-corrupted-dir-i_size.patch +param_sysfs_builtin-memchr-argument-fix.patch +x86-fix-global_flush_tlb-bug.patch +dm-snapshot-fix-invalidation-deadlock.patch +revert-x86_64-allocate-sparsemem-memmap-above-4g.patch diff --git a/queue-2.6.22/x86-fix-global_flush_tlb-bug.patch b/queue-2.6.22/x86-fix-global_flush_tlb-bug.patch new file mode 100644 index 00000000000..a2be77c9332 --- /dev/null +++ b/queue-2.6.22/x86-fix-global_flush_tlb-bug.patch @@ -0,0 +1,70 @@ +From 9a24d04a3c26c223f22493492c5c9085b8773d4a Mon Sep 17 00:00:00 2001 +From: Ingo Molnar +Date: Fri, 19 Oct 2007 12:19:26 +0200 +Subject: [PATCH] x86: fix global_flush_tlb() bug + +From: Ingo Molnar + +patch 9a24d04a3c26c223f22493492c5c9085b8773d4a upstream + +While we were reviewing pageattr_32/64.c for unification, +Thomas Gleixner noticed the following serious SMP bug in +global_flush_tlb(): + + down_read(&init_mm.mmap_sem); + list_replace_init(&deferred_pages, &l); + up_read(&init_mm.mmap_sem); + +this is SMP-unsafe because list_replace_init() done on two CPUs in +parallel can corrupt the list. + +This bug has been introduced about a year ago in the 64-bit tree: + + commit ea7322decb974a4a3e804f96a0201e893ff88ce3 + Author: Andi Kleen + Date: Thu Dec 7 02:14:05 2006 +0100 + + [PATCH] x86-64: Speed and clean up cache flushing in change_page_attr + + down_read(&init_mm.mmap_sem); + - dpage = xchg(&deferred_pages, NULL); + + list_replace_init(&deferred_pages, &l); + up_read(&init_mm.mmap_sem); + +the xchg() based version was SMP-safe, but list_replace_init() is not. +So this "cleanup" introduced a nasty bug. + +why this bug never become prominent is a mystery - it can probably be +explained with the (still) relative obscurity of the x86_64 architecture. + +the safe fix for now is to write-lock init_mm.mmap_sem. + +Signed-off-by: Ingo Molnar +Signed-off-by: Thomas Gleixner +Cc: Andi Kleen +Cc: Andrew Morton +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86_64/mm/pageattr.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +--- a/arch/x86_64/mm/pageattr.c ++++ b/arch/x86_64/mm/pageattr.c +@@ -227,9 +227,14 @@ void global_flush_tlb(void) + struct page *pg, *next; + struct list_head l; + +- down_read(&init_mm.mmap_sem); ++ /* ++ * Write-protect the semaphore, to exclude two contexts ++ * doing a list_replace_init() call in parallel and to ++ * exclude new additions to the deferred_pages list: ++ */ ++ down_write(&init_mm.mmap_sem); + list_replace_init(&deferred_pages, &l); +- up_read(&init_mm.mmap_sem); ++ up_write(&init_mm.mmap_sem); + + flush_map(&l); +