From: Greg Kroah-Hartman Date: Fri, 1 Mar 2013 00:30:32 +0000 (-0800) Subject: 3.0-stable patches X-Git-Tag: v3.8.2~26 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=773c286111470b6df67a024c8b61f5674c11453d;p=thirdparty%2Fkernel%2Fstable-queue.git 3.0-stable patches added patches: doc-kernel-parameters-document-console-hvc-n.patch doc-xen-mention-earlyprintk-xen-in-the-documentation.patch ftrace-call-ftrace-cleanup-module-notifier-after-all-other-notifiers.patch iommu-amd-initialize-device-table-after-dma_ops.patch ocfs2-ac-ac_allow_chain_relink-0-won-t-disable-group-relink.patch posix-timer-don-t-call-idr_find-with-out-of-range-id.patch target-add-missing-mapped_lun-bounds-checking-during-make_mappedlun-setup.patch x86-make-sure-we-can-boot-in-the-case-the-bda-contains-pure-garbage.patch --- diff --git a/queue-3.0/doc-kernel-parameters-document-console-hvc-n.patch b/queue-3.0/doc-kernel-parameters-document-console-hvc-n.patch new file mode 100644 index 00000000000..618c3867a0c --- /dev/null +++ b/queue-3.0/doc-kernel-parameters-document-console-hvc-n.patch @@ -0,0 +1,32 @@ +From a2fd6419174470f5ae6383f5037d0ee21ed9833f Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk +Date: Mon, 25 Feb 2013 15:54:09 -0500 +Subject: doc, kernel-parameters: Document 'console=hvc' + +From: Konrad Rzeszutek Wilk + +commit a2fd6419174470f5ae6383f5037d0ee21ed9833f upstream. + +Both the PowerPC hypervisor and Xen hypervisor can utilize the +hvc driver. + +Signed-off-by: Konrad Rzeszutek Wilk +Link: http://lkml.kernel.org/r/1361825650-14031-3-git-send-email-konrad.wilk@oracle.com +Signed-off-by: H. Peter Anvin +Signed-off-by: Greg Kroah-Hartman + +--- + Documentation/kernel-parameters.txt | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/Documentation/kernel-parameters.txt ++++ b/Documentation/kernel-parameters.txt +@@ -531,6 +531,8 @@ bytes respectively. Such letter suffixes + UART at the specified I/O port or MMIO address, + switching to the matching ttyS device later. The + options are the same as for ttyS, above. ++ hvc Use the hypervisor console device . This is for ++ both Xen and PowerPC hypervisors. + + If the device connected to the port is not a TTY but a braille + device, prepend "brl," before the device type, for instance diff --git a/queue-3.0/doc-xen-mention-earlyprintk-xen-in-the-documentation.patch b/queue-3.0/doc-xen-mention-earlyprintk-xen-in-the-documentation.patch new file mode 100644 index 00000000000..87d9febb422 --- /dev/null +++ b/queue-3.0/doc-xen-mention-earlyprintk-xen-in-the-documentation.patch @@ -0,0 +1,44 @@ +From 2482a92e7d17187301d7313cfe5021b13393a0b4 Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk +Date: Mon, 25 Feb 2013 15:54:08 -0500 +Subject: doc, xen: Mention 'earlyprintk=xen' in the documentation. + +From: Konrad Rzeszutek Wilk + +commit 2482a92e7d17187301d7313cfe5021b13393a0b4 upstream. + +The earlyprintk for Xen PV guests utilizes a simple hypercall +(console_io) to provide output to Xen emergency console. + +Note that the Xen hypervisor should be booted with 'loglevel=all' +to output said information. + +Reported-by: H. Peter Anvin +Signed-off-by: Konrad Rzeszutek Wilk +Link: http://lkml.kernel.org/r/1361825650-14031-2-git-send-email-konrad.wilk@oracle.com +Signed-off-by: H. Peter Anvin +Signed-off-by: Greg Kroah-Hartman + +--- + Documentation/kernel-parameters.txt | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/Documentation/kernel-parameters.txt ++++ b/Documentation/kernel-parameters.txt +@@ -679,6 +679,7 @@ bytes respectively. Such letter suffixes + + earlyprintk= [X86,SH,BLACKFIN] + earlyprintk=vga ++ earlyprintk=xen + earlyprintk=serial[,ttySn[,baudrate]] + earlyprintk=ttySn[,baudrate] + earlyprintk=dbgp[debugController#] +@@ -696,6 +697,8 @@ bytes respectively. Such letter suffixes + The VGA output is eventually overwritten by the real + console. + ++ The xen output can only be used by Xen PV guests. ++ + ekgdboc= [X86,KGDB] Allow early kernel console debugging + ekgdboc=kbd + diff --git a/queue-3.0/ftrace-call-ftrace-cleanup-module-notifier-after-all-other-notifiers.patch b/queue-3.0/ftrace-call-ftrace-cleanup-module-notifier-after-all-other-notifiers.patch new file mode 100644 index 00000000000..db7d3903130 --- /dev/null +++ b/queue-3.0/ftrace-call-ftrace-cleanup-module-notifier-after-all-other-notifiers.patch @@ -0,0 +1,127 @@ +From 8c189ea64eea01ca20d102ddb74d6936dd16c579 Mon Sep 17 00:00:00 2001 +From: "Steven Rostedt (Red Hat)" +Date: Wed, 13 Feb 2013 15:18:38 -0500 +Subject: ftrace: Call ftrace cleanup module notifier after all other notifiers + +From: "Steven Rostedt (Red Hat)" + +commit 8c189ea64eea01ca20d102ddb74d6936dd16c579 upstream. + +Commit: c1bf08ac "ftrace: Be first to run code modification on modules" + +changed ftrace module notifier's priority to INT_MAX in order to +process the ftrace nops before anything else could touch them +(namely kprobes). This was the correct thing to do. + +Unfortunately, the ftrace module notifier also contains the ftrace +clean up code. As opposed to the set up code, this code should be +run *after* all the module notifiers have run in case a module is doing +correct clean-up and unregisters its ftrace hooks. Basically, ftrace +needs to do clean up on module removal, as it needs to know about code +being removed so that it doesn't try to modify that code. But after it +removes the module from its records, if a ftrace user tries to remove +a probe, that removal will fail due as the record of that code segment +no longer exists. + +Nothing really bad happens if the probe removal is called after ftrace +did the clean up, but the ftrace removal function will return an error. +Correct code (such as kprobes) will produce a WARN_ON() if it fails +to remove the probe. As people get annoyed by frivolous warnings, it's +best to do the ftrace clean up after everything else. + +By splitting the ftrace_module_notifier into two notifiers, one that +does the module load setup that is run at high priority, and the other +that is called for module clean up that is run at low priority, the +problem is solved. + +Reported-by: Frank Ch. Eigler +Acked-by: Masami Hiramatsu +Signed-off-by: Steven Rostedt +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/trace/ftrace.c | 46 ++++++++++++++++++++++++++++++++-------------- + 1 file changed, 32 insertions(+), 14 deletions(-) + +--- a/kernel/trace/ftrace.c ++++ b/kernel/trace/ftrace.c +@@ -3432,37 +3432,51 @@ static void ftrace_init_module(struct mo + ftrace_process_locs(mod, start, end); + } + +-static int ftrace_module_notify(struct notifier_block *self, +- unsigned long val, void *data) ++static int ftrace_module_notify_enter(struct notifier_block *self, ++ unsigned long val, void *data) + { + struct module *mod = data; + +- switch (val) { +- case MODULE_STATE_COMING: ++ if (val == MODULE_STATE_COMING) + ftrace_init_module(mod, mod->ftrace_callsites, + mod->ftrace_callsites + + mod->num_ftrace_callsites); +- break; +- case MODULE_STATE_GOING: ++ return 0; ++} ++ ++static int ftrace_module_notify_exit(struct notifier_block *self, ++ unsigned long val, void *data) ++{ ++ struct module *mod = data; ++ ++ if (val == MODULE_STATE_GOING) + ftrace_release_mod(mod); +- break; +- } + + return 0; + } + #else +-static int ftrace_module_notify(struct notifier_block *self, +- unsigned long val, void *data) ++static int ftrace_module_notify_enter(struct notifier_block *self, ++ unsigned long val, void *data) ++{ ++ return 0; ++} ++static int ftrace_module_notify_exit(struct notifier_block *self, ++ unsigned long val, void *data) + { + return 0; + } + #endif /* CONFIG_MODULES */ + +-struct notifier_block ftrace_module_nb = { +- .notifier_call = ftrace_module_notify, ++struct notifier_block ftrace_module_enter_nb = { ++ .notifier_call = ftrace_module_notify_enter, + .priority = INT_MAX, /* Run before anything that can use kprobes */ + }; + ++struct notifier_block ftrace_module_exit_nb = { ++ .notifier_call = ftrace_module_notify_exit, ++ .priority = INT_MIN, /* Run after anything that can remove kprobes */ ++}; ++ + extern unsigned long __start_mcount_loc[]; + extern unsigned long __stop_mcount_loc[]; + +@@ -3494,9 +3508,13 @@ void __init ftrace_init(void) + __start_mcount_loc, + __stop_mcount_loc); + +- ret = register_module_notifier(&ftrace_module_nb); ++ ret = register_module_notifier(&ftrace_module_enter_nb); ++ if (ret) ++ pr_warning("Failed to register trace ftrace module enter notifier\n"); ++ ++ ret = register_module_notifier(&ftrace_module_exit_nb); + if (ret) +- pr_warning("Failed to register trace ftrace module notifier\n"); ++ pr_warning("Failed to register trace ftrace module exit notifier\n"); + + set_ftrace_early_filters(); + diff --git a/queue-3.0/iommu-amd-initialize-device-table-after-dma_ops.patch b/queue-3.0/iommu-amd-initialize-device-table-after-dma_ops.patch new file mode 100644 index 00000000000..6053e5f56b1 --- /dev/null +++ b/queue-3.0/iommu-amd-initialize-device-table-after-dma_ops.patch @@ -0,0 +1,57 @@ +From f528d980c17b8714aedc918ba86e058af914d66b Mon Sep 17 00:00:00 2001 +From: Joerg Roedel +Date: Wed, 6 Feb 2013 12:55:23 +0100 +Subject: iommu/amd: Initialize device table after dma_ops + +From: Joerg Roedel + +commit f528d980c17b8714aedc918ba86e058af914d66b upstream. + +When dma_ops are initialized the unity mappings are +created. The init_device_table_dma() function makes sure DMA +from all devices is blocked by default. This opens a short +window in time where DMA to unity mapped regions is blocked +by the IOMMU. Make sure this does not happen by initializing +the device table after dma_ops. + +Signed-off-by: Joerg Roedel +Signed-off-by: Shuah Khan +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/amd_iommu_init.c | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +--- a/arch/x86/kernel/amd_iommu_init.c ++++ b/arch/x86/kernel/amd_iommu_init.c +@@ -1363,6 +1363,7 @@ static struct syscore_ops amd_iommu_sysc + */ + static int __init amd_iommu_init(void) + { ++ struct amd_iommu *iommu; + int i, ret = 0; + + /* +@@ -1411,9 +1412,6 @@ static int __init amd_iommu_init(void) + if (amd_iommu_pd_alloc_bitmap == NULL) + goto free; + +- /* init the device table */ +- init_device_table(); +- + /* + * let all alias entries point to itself + */ +@@ -1463,6 +1461,12 @@ static int __init amd_iommu_init(void) + if (ret) + goto free_disable; + ++ /* init the device table */ ++ init_device_table(); ++ ++ for_each_iommu(iommu) ++ iommu_flush_all_caches(iommu); ++ + amd_iommu_init_api(); + + amd_iommu_init_notifier(); diff --git a/queue-3.0/ocfs2-ac-ac_allow_chain_relink-0-won-t-disable-group-relink.patch b/queue-3.0/ocfs2-ac-ac_allow_chain_relink-0-won-t-disable-group-relink.patch new file mode 100644 index 00000000000..41bf7b7eb4d --- /dev/null +++ b/queue-3.0/ocfs2-ac-ac_allow_chain_relink-0-won-t-disable-group-relink.patch @@ -0,0 +1,108 @@ +From 309a85b6861fedbb48a22d45e0e079d1be993b3a Mon Sep 17 00:00:00 2001 +From: "Xiaowei.Hu" +Date: Wed, 27 Feb 2013 17:02:49 -0800 +Subject: ocfs2: ac->ac_allow_chain_relink=0 won't disable group relink + +From: "Xiaowei.Hu" + +commit 309a85b6861fedbb48a22d45e0e079d1be993b3a upstream. + +ocfs2_block_group_alloc_discontig() disables chain relink by setting +ac->ac_allow_chain_relink = 0 because it grabs clusters from multiple +cluster groups. + +It doesn't keep the credits for all chain relink,but +ocfs2_claim_suballoc_bits overrides this in this call trace: +ocfs2_block_group_claim_bits()->ocfs2_claim_clusters()-> +__ocfs2_claim_clusters()->ocfs2_claim_suballoc_bits() +ocfs2_claim_suballoc_bits set ac->ac_allow_chain_relink = 1; then call +ocfs2_search_chain() one time and disable it again, and then we run out +of credits. + +Fix is to allow relink by default and disable it in +ocfs2_block_group_alloc_discontig. + +Without this patch, End-users will run into a crash due to run out of +credits, backtrace like this: + + RIP: 0010:[] [] + jbd2_journal_dirty_metadata+0x164/0x170 [jbd2] + RSP: 0018:ffff8801b919b5b8 EFLAGS: 00010246 + RAX: 0000000000000000 RBX: ffff88022139ddc0 RCX: ffff880159f652d0 + RDX: ffff880178aa3000 RSI: ffff880159f652d0 RDI: ffff880087f09bf8 + RBP: ffff8801b919b5e8 R08: 0000000000000000 R09: 0000000000000000 + R10: 0000000000001e00 R11: 00000000000150b0 R12: ffff880159f652d0 + R13: ffff8801a0cae908 R14: ffff880087f09bf8 R15: ffff88018d177800 + FS: 00007fc9b0b6b6e0(0000) GS:ffff88022fd40000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b + CR2: 000000000040819c CR3: 0000000184017000 CR4: 00000000000006e0 + DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 + DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 + Process dd (pid: 9945, threadinfo ffff8801b919a000, task ffff880149a264c0) + Call Trace: + ocfs2_journal_dirty+0x2f/0x70 [ocfs2] + ocfs2_relink_block_group+0x111/0x480 [ocfs2] + ocfs2_search_chain+0x455/0x9a0 [ocfs2] + ... + +Signed-off-by: Xiaowei.Hu +Reviewed-by: Srinivas Eeda +Cc: Mark Fasheh +Cc: Joel Becker +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ocfs2/suballoc.c | 7 +++---- + fs/ocfs2/suballoc.h | 2 +- + 2 files changed, 4 insertions(+), 5 deletions(-) + +--- a/fs/ocfs2/suballoc.c ++++ b/fs/ocfs2/suballoc.c +@@ -642,7 +642,7 @@ ocfs2_block_group_alloc_discontig(handle + * cluster groups will be staying in cache for the duration of + * this operation. + */ +- ac->ac_allow_chain_relink = 0; ++ ac->ac_disable_chain_relink = 1; + + /* Claim the first region */ + status = ocfs2_block_group_claim_bits(osb, handle, ac, min_bits, +@@ -1823,7 +1823,7 @@ static int ocfs2_search_chain(struct ocf + * Do this *after* figuring out how many bits we're taking out + * of our target group. + */ +- if (ac->ac_allow_chain_relink && ++ if (!ac->ac_disable_chain_relink && + (prev_group_bh) && + (ocfs2_block_group_reasonably_empty(bg, res->sr_bits))) { + status = ocfs2_relink_block_group(handle, alloc_inode, +@@ -1928,7 +1928,6 @@ static int ocfs2_claim_suballoc_bits(str + + victim = ocfs2_find_victim_chain(cl); + ac->ac_chain = victim; +- ac->ac_allow_chain_relink = 1; + + status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, + res, &bits_left); +@@ -1947,7 +1946,7 @@ static int ocfs2_claim_suballoc_bits(str + * searching each chain in order. Don't allow chain relinking + * because we only calculate enough journal credits for one + * relink per alloc. */ +- ac->ac_allow_chain_relink = 0; ++ ac->ac_disable_chain_relink = 1; + for (i = 0; i < le16_to_cpu(cl->cl_next_free_rec); i ++) { + if (i == victim) + continue; +--- a/fs/ocfs2/suballoc.h ++++ b/fs/ocfs2/suballoc.h +@@ -49,7 +49,7 @@ struct ocfs2_alloc_context { + + /* these are used by the chain search */ + u16 ac_chain; +- int ac_allow_chain_relink; ++ int ac_disable_chain_relink; + group_search_t *ac_group_search; + + u64 ac_last_group; diff --git a/queue-3.0/posix-timer-don-t-call-idr_find-with-out-of-range-id.patch b/queue-3.0/posix-timer-don-t-call-idr_find-with-out-of-range-id.patch new file mode 100644 index 00000000000..c7818a213bd --- /dev/null +++ b/queue-3.0/posix-timer-don-t-call-idr_find-with-out-of-range-id.patch @@ -0,0 +1,54 @@ +From e182bb38d7db7494fa5dcd82da17fe0dedf60ecf Mon Sep 17 00:00:00 2001 +From: Tejun Heo +Date: Wed, 20 Feb 2013 15:24:12 -0800 +Subject: posix-timer: Don't call idr_find() with out-of-range ID + +From: Tejun Heo + +commit e182bb38d7db7494fa5dcd82da17fe0dedf60ecf upstream. + +When idr_find() was fed a negative ID, it used to look up the ID +ignoring the sign bit before recent ("idr: remove MAX_IDR_MASK and +move left MAX_IDR_* into idr.c") patch. Now a negative ID triggers +a WARN_ON_ONCE(). + +__lock_timer() feeds timer_id from userland directly to idr_find() +without sanitizing it which can trigger the above malfunctions. Add a +range check on @timer_id before invoking idr_find() in __lock_timer(). + +While timer_t is defined as int by all archs at the moment, Andrew +worries that it may be defined as a larger type later on. Make the +test cover larger integers too so that it at least is guaranteed to +not return the wrong timer. + +Note that WARN_ON_ONCE() in idr_find() on id < 0 is transitional +precaution while moving away from ignoring MSB. Once it's gone we can +remove the guard as long as timer_t isn't larger than int. + +Signed-off-by: Tejun Heo nnn +Reported-by: Sasha Levin +Cc: Andrew Morton +Link: http://lkml.kernel.org/r/20130220232412.GL3570@htj.dyndns.org +Signed-off-by: Thomas Gleixner +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/posix-timers.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/kernel/posix-timers.c ++++ b/kernel/posix-timers.c +@@ -639,6 +639,13 @@ static struct k_itimer *__lock_timer(tim + { + struct k_itimer *timr; + ++ /* ++ * timer_t could be any type >= int and we want to make sure any ++ * @timer_id outside positive int range fails lookup. ++ */ ++ if ((unsigned long long)timer_id > INT_MAX) ++ return NULL; ++ + rcu_read_lock(); + timr = idr_find(&posix_timers_id, (int)timer_id); + if (timr) { diff --git a/queue-3.0/series b/queue-3.0/series index 1d2598216a8..0e9a4b0876e 100644 --- a/queue-3.0/series +++ b/queue-3.0/series @@ -1 +1,9 @@ quota-autoload-the-quota_v2-module-for-qfmt_vfs_v1-quota-format.patch +iommu-amd-initialize-device-table-after-dma_ops.patch +posix-timer-don-t-call-idr_find-with-out-of-range-id.patch +ftrace-call-ftrace-cleanup-module-notifier-after-all-other-notifiers.patch +doc-xen-mention-earlyprintk-xen-in-the-documentation.patch +doc-kernel-parameters-document-console-hvc-n.patch +x86-make-sure-we-can-boot-in-the-case-the-bda-contains-pure-garbage.patch +target-add-missing-mapped_lun-bounds-checking-during-make_mappedlun-setup.patch +ocfs2-ac-ac_allow_chain_relink-0-won-t-disable-group-relink.patch diff --git a/queue-3.0/target-add-missing-mapped_lun-bounds-checking-during-make_mappedlun-setup.patch b/queue-3.0/target-add-missing-mapped_lun-bounds-checking-during-make_mappedlun-setup.patch new file mode 100644 index 00000000000..3079665e3c9 --- /dev/null +++ b/queue-3.0/target-add-missing-mapped_lun-bounds-checking-during-make_mappedlun-setup.patch @@ -0,0 +1,43 @@ +From fbbf8555a986ed31e54f006b6cc637ea4ff1425b Mon Sep 17 00:00:00 2001 +From: Nicholas Bellinger +Date: Mon, 18 Feb 2013 18:31:37 -0800 +Subject: target: Add missing mapped_lun bounds checking during make_mappedlun setup + +From: Nicholas Bellinger + +commit fbbf8555a986ed31e54f006b6cc637ea4ff1425b upstream. + +This patch adds missing bounds checking for the configfs provided +mapped_lun value during target_fabric_make_mappedlun() setup ahead +of se_lun_acl initialization. + +This addresses a potential OOPs when using a mapped_lun value that +exceeds the hardcoded TRANSPORT_MAX_LUNS_PER_TPG-1 value within +se_node_acl->device_list[]. + +Reported-by: Jan Engelhardt +Cc: Jan Engelhardt +Signed-off-by: Nicholas Bellinger +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/target/target_core_fabric_configfs.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/drivers/target/target_core_fabric_configfs.c ++++ b/drivers/target/target_core_fabric_configfs.c +@@ -355,6 +355,14 @@ static struct config_group *target_fabri + ret = -EINVAL; + goto out; + } ++ if (mapped_lun > (TRANSPORT_MAX_LUNS_PER_TPG-1)) { ++ pr_err("Mapped LUN: %lu exceeds TRANSPORT_MAX_LUNS_PER_TPG" ++ "-1: %u for Target Portal Group: %u\n", mapped_lun, ++ TRANSPORT_MAX_LUNS_PER_TPG-1, ++ se_tpg->se_tpg_tfo->tpg_get_tag(se_tpg)); ++ ret = -EINVAL; ++ goto out; ++ } + + lacl = core_dev_init_initiator_node_lun_acl(se_tpg, mapped_lun, + config_item_name(acl_ci), &ret); diff --git a/queue-3.0/x86-make-sure-we-can-boot-in-the-case-the-bda-contains-pure-garbage.patch b/queue-3.0/x86-make-sure-we-can-boot-in-the-case-the-bda-contains-pure-garbage.patch new file mode 100644 index 00000000000..c8f3ab02b1d --- /dev/null +++ b/queue-3.0/x86-make-sure-we-can-boot-in-the-case-the-bda-contains-pure-garbage.patch @@ -0,0 +1,122 @@ +From 7c10093692ed2e6f318387d96b829320aa0ca64c Mon Sep 17 00:00:00 2001 +From: "H. Peter Anvin" +Date: Wed, 27 Feb 2013 12:46:40 -0800 +Subject: x86: Make sure we can boot in the case the BDA contains pure garbage + +From: "H. Peter Anvin" + +commit 7c10093692ed2e6f318387d96b829320aa0ca64c upstream. + +On non-BIOS platforms it is possible that the BIOS data area contains +garbage instead of being zeroed or something equivalent (firmware +people: we are talking of 1.5K here, so please do the sane thing.) + +We need on the order of 20-30K of low memory in order to boot, which +may grow up to < 64K in the future. We probably want to avoid the +lowest of the low memory. At the same time, it seems extremely +unlikely that a legitimate EBDA would ever reach down to the 128K +(which would require it to be over half a megabyte in size.) Thus, +pick 128K as the cutoff for "this is insane, ignore." We may still +end up reserving a bunch of extra memory on the low megabyte, but that +is not really a major issue these days. In the worst case we lose +512K of RAM. + +This code really should be merged with trim_bios_range() in +arch/x86/kernel/setup.c, but that is a bigger patch for a later merge +window. + +Reported-by: Darren Hart +Signed-off-by: H. Peter Anvin +Cc: Matt Fleming +Link: http://lkml.kernel.org/n/tip-oebml055yyfm8yxmria09rja@git.kernel.org +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/head.c | 57 ++++++++++++++++++++++++++++++------------------- + 1 file changed, 36 insertions(+), 21 deletions(-) + +--- a/arch/x86/kernel/head.c ++++ b/arch/x86/kernel/head.c +@@ -5,8 +5,6 @@ + #include + #include + +-#define BIOS_LOWMEM_KILOBYTES 0x413 +- + /* + * The BIOS places the EBDA/XBDA at the top of conventional + * memory, and usually decreases the reported amount of +@@ -16,17 +14,30 @@ + * chipset: reserve a page before VGA to prevent PCI prefetch + * into it (errata #56). Usually the page is reserved anyways, + * unless you have no PS/2 mouse plugged in. ++ * ++ * This functions is deliberately very conservative. Losing ++ * memory in the bottom megabyte is rarely a problem, as long ++ * as we have enough memory to install the trampoline. Using ++ * memory that is in use by the BIOS or by some DMA device ++ * the BIOS didn't shut down *is* a big problem. + */ ++ ++#define BIOS_LOWMEM_KILOBYTES 0x413 ++#define LOWMEM_CAP 0x9f000U /* Absolute maximum */ ++#define INSANE_CUTOFF 0x20000U /* Less than this = insane */ ++ + void __init reserve_ebda_region(void) + { + unsigned int lowmem, ebda_addr; + +- /* To determine the position of the EBDA and the */ +- /* end of conventional memory, we need to look at */ +- /* the BIOS data area. In a paravirtual environment */ +- /* that area is absent. We'll just have to assume */ +- /* that the paravirt case can handle memory setup */ +- /* correctly, without our help. */ ++ /* ++ * To determine the position of the EBDA and the ++ * end of conventional memory, we need to look at ++ * the BIOS data area. In a paravirtual environment ++ * that area is absent. We'll just have to assume ++ * that the paravirt case can handle memory setup ++ * correctly, without our help. ++ */ + if (paravirt_enabled()) + return; + +@@ -37,19 +48,23 @@ void __init reserve_ebda_region(void) + /* start of EBDA area */ + ebda_addr = get_bios_ebda(); + +- /* Fixup: bios puts an EBDA in the top 64K segment */ +- /* of conventional memory, but does not adjust lowmem. */ +- if ((lowmem - ebda_addr) <= 0x10000) +- lowmem = ebda_addr; +- +- /* Fixup: bios does not report an EBDA at all. */ +- /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */ +- if ((ebda_addr == 0) && (lowmem >= 0x9f000)) +- lowmem = 0x9f000; +- +- /* Paranoia: should never happen, but... */ +- if ((lowmem == 0) || (lowmem >= 0x100000)) +- lowmem = 0x9f000; ++ /* ++ * Note: some old Dells seem to need 4k EBDA without ++ * reporting so, so just consider the memory above 0x9f000 ++ * to be off limits (bugzilla 2990). ++ */ ++ ++ /* If the EBDA address is below 128K, assume it is bogus */ ++ if (ebda_addr < INSANE_CUTOFF) ++ ebda_addr = LOWMEM_CAP; ++ ++ /* If lowmem is less than 128K, assume it is bogus */ ++ if (lowmem < INSANE_CUTOFF) ++ lowmem = LOWMEM_CAP; ++ ++ /* Use the lower of the lowmem and EBDA markers as the cutoff */ ++ lowmem = min(lowmem, ebda_addr); ++ lowmem = min(lowmem, LOWMEM_CAP); /* Absolute cap */ + + /* reserve all memory between lowmem and the 1MB mark */ + memblock_x86_reserve_range(lowmem, 0x100000, "* BIOS reserved");