]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.0 patches
authorGreg Kroah-Hartman <gregkh@suse.de>
Thu, 15 Sep 2011 08:04:15 +0000 (10:04 +0200)
committerGreg Kroah-Hartman <gregkh@suse.de>
Thu, 15 Sep 2011 08:04:15 +0000 (10:04 +0200)
15 files changed:
queue-3.0/alarmtimers-avoid-possible-denial-of-service-with-high-freq-periodic-timers.patch [new file with mode: 0644]
queue-3.0/alarmtimers-avoid-possible-null-pointer-traversal.patch [new file with mode: 0644]
queue-3.0/alarmtimers-memset-itimerspec-passed-into-alarm_timer_get.patch [new file with mode: 0644]
queue-3.0/mmc-core-prevent-aggressive-clock-gating-racing-with-ios.patch [new file with mode: 0644]
queue-3.0/mmc-core-use-non-reentrant-workqueue-for-clock-gating.patch [new file with mode: 0644]
queue-3.0/mmc-rename-mmc_host_clk_-ungate-gate-to-mmc_host_clk_-hold-release.patch [new file with mode: 0644]
queue-3.0/mmc-sdhci-s3c-fix-mmc-card-i-o-problem.patch [new file with mode: 0644]
queue-3.0/rtc-fix-rtc-pie-frequency-limit.patch [new file with mode: 0644]
queue-3.0/sched-fix-a-memory-leak-in-__sdt_free.patch [new file with mode: 0644]
queue-3.0/sched-move-blk_schedule_flush_plug-out-of-__schedule.patch [new file with mode: 0644]
queue-3.0/sched-separate-the-scheduler-entry-for-preemption.patch [new file with mode: 0644]
queue-3.0/series
queue-3.0/x86-perf-check-that-current-mm-is-alive-before-getting-user-callchain.patch [new file with mode: 0644]
queue-3.0/xen-use-maximum-reservation-to-limit-amount-of-usable-ram.patch [new file with mode: 0644]
queue-3.0/xen-x86_32-do-not-enable-iterrupts-when-returning-from.patch [new file with mode: 0644]

diff --git a/queue-3.0/alarmtimers-avoid-possible-denial-of-service-with-high-freq-periodic-timers.patch b/queue-3.0/alarmtimers-avoid-possible-denial-of-service-with-high-freq-periodic-timers.patch
new file mode 100644 (file)
index 0000000..ef878e2
--- /dev/null
@@ -0,0 +1,42 @@
+From 6af7e471e5a7746b8024d70b4363d3dfe41d36b8 Mon Sep 17 00:00:00 2001
+From: John Stultz <john.stultz@linaro.org>
+Date: Wed, 10 Aug 2011 10:26:09 -0700
+Subject: alarmtimers: Avoid possible denial of service with high freq periodic timers
+
+From: John Stultz <john.stultz@linaro.org>
+
+commit 6af7e471e5a7746b8024d70b4363d3dfe41d36b8 upstream.
+
+Its possible to jam up the alarm timers by setting very small interval
+timers, which will cause the alarmtimer subsystem to spend all of its time
+firing and restarting timers. This can effectivly lock up a box.
+
+A deeper fix is needed, closely mimicking the hrtimer code, but for now
+just cap the interval to 100us to avoid userland hanging the system.
+
+CC: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: John Stultz <john.stultz@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ kernel/time/alarmtimer.c |    9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+--- a/kernel/time/alarmtimer.c
++++ b/kernel/time/alarmtimer.c
+@@ -481,6 +481,15 @@ static int alarm_timer_set(struct k_itim
+       if (!rtcdev)
+               return -ENOTSUPP;
++      /*
++       * XXX HACK! Currently we can DOS a system if the interval
++       * period on alarmtimers is too small. Cap the interval here
++       * to 100us and solve this properly in a future patch! -jstultz
++       */
++      if ((new_setting->it_interval.tv_sec == 0) &&
++                      (new_setting->it_interval.tv_nsec < 100000))
++              new_setting->it_interval.tv_nsec = 100000;
++
+       if (old_setting)
+               alarm_timer_get(timr, old_setting);
diff --git a/queue-3.0/alarmtimers-avoid-possible-null-pointer-traversal.patch b/queue-3.0/alarmtimers-avoid-possible-null-pointer-traversal.patch
new file mode 100644 (file)
index 0000000..aaa61bc
--- /dev/null
@@ -0,0 +1,36 @@
+From 971c90bfa2f0b4fe52d6d9002178d547706f1343 Mon Sep 17 00:00:00 2001
+From: John Stultz <john.stultz@linaro.org>
+Date: Thu, 4 Aug 2011 07:25:35 -0700
+Subject: alarmtimers: Avoid possible null pointer traversal
+
+From: John Stultz <john.stultz@linaro.org>
+
+commit 971c90bfa2f0b4fe52d6d9002178d547706f1343 upstream.
+
+We don't check if old_setting is non null before assigning it, so
+correct this.
+
+CC: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: John Stultz <john.stultz@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ kernel/time/alarmtimer.c |    7 ++-----
+ 1 file changed, 2 insertions(+), 5 deletions(-)
+
+--- a/kernel/time/alarmtimer.c
++++ b/kernel/time/alarmtimer.c
+@@ -479,11 +479,8 @@ static int alarm_timer_set(struct k_itim
+       if (!rtcdev)
+               return -ENOTSUPP;
+-      /* Save old values */
+-      old_setting->it_interval =
+-                      ktime_to_timespec(timr->it.alarmtimer.period);
+-      old_setting->it_value =
+-                      ktime_to_timespec(timr->it.alarmtimer.node.expires);
++      if (old_setting)
++              alarm_timer_get(timr, old_setting);
+       /* If the timer was already set, cancel it */
+       alarm_cancel(&timr->it.alarmtimer);
diff --git a/queue-3.0/alarmtimers-memset-itimerspec-passed-into-alarm_timer_get.patch b/queue-3.0/alarmtimers-memset-itimerspec-passed-into-alarm_timer_get.patch
new file mode 100644 (file)
index 0000000..0b207dc
--- /dev/null
@@ -0,0 +1,30 @@
+From ea7802f630d356acaf66b3c0b28c00a945fc35dc Mon Sep 17 00:00:00 2001
+From: John Stultz <john.stultz@linaro.org>
+Date: Thu, 4 Aug 2011 07:51:56 -0700
+Subject: alarmtimers: Memset itimerspec passed into alarm_timer_get
+
+From: John Stultz <john.stultz@linaro.org>
+
+commit ea7802f630d356acaf66b3c0b28c00a945fc35dc upstream.
+
+Following common_timer_get, zero out the itimerspec passed in.
+
+CC: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: John Stultz <john.stultz@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ kernel/time/alarmtimer.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/kernel/time/alarmtimer.c
++++ b/kernel/time/alarmtimer.c
+@@ -441,6 +441,8 @@ static int alarm_timer_create(struct k_i
+ static void alarm_timer_get(struct k_itimer *timr,
+                               struct itimerspec *cur_setting)
+ {
++      memset(cur_setting, 0, sizeof(struct itimerspec));
++
+       cur_setting->it_interval =
+                       ktime_to_timespec(timr->it.alarmtimer.period);
+       cur_setting->it_value =
diff --git a/queue-3.0/mmc-core-prevent-aggressive-clock-gating-racing-with-ios.patch b/queue-3.0/mmc-core-prevent-aggressive-clock-gating-racing-with-ios.patch
new file mode 100644 (file)
index 0000000..39b6c80
--- /dev/null
@@ -0,0 +1,267 @@
+From 778e277cb82411c9002ca28ccbd216c4d9eb9158 Mon Sep 17 00:00:00 2001
+From: Mika Westerberg <mika.westerberg@linux.intel.com>
+Date: Thu, 18 Aug 2011 15:23:48 +0300
+Subject: mmc: core: prevent aggressive clock gating racing with ios updates
+
+From: Mika Westerberg <mika.westerberg@linux.intel.com>
+
+commit 778e277cb82411c9002ca28ccbd216c4d9eb9158 upstream.
+
+We have seen at least two different races when clock gating kicks in in a
+middle of ios structure update.
+
+First one happens when ios->clock is changed outside of aggressive clock
+gating framework, for example via mmc_set_clock(). The race might happen
+when we run following code:
+
+mmc_set_ios():
+       ...
+       if (ios->clock > 0)
+               mmc_set_ungated(host);
+
+Now if gating kicks in right after the condition check we end up setting
+host->clk_gated to false even though we have just gated the clock. Next
+time a request is started we try to ungate and restore the clock in
+mmc_host_clk_hold(). However since we have host->clk_gated set to false the
+original clock is not restored.
+
+This eventually will cause the host controller to hang since its clock is
+disabled while we are trying to issue a request. For example on Intel
+Medfield platform we see:
+
+[   13.818610] mmc2: Timeout waiting for hardware interrupt.
+[   13.818698] sdhci: =========== REGISTER DUMP (mmc2)===========
+[   13.818753] sdhci: Sys addr: 0x00000000 | Version:  0x00008901
+[   13.818804] sdhci: Blk size: 0x00000000 | Blk cnt:  0x00000000
+[   13.818853] sdhci: Argument: 0x00000000 | Trn mode: 0x00000000
+[   13.818903] sdhci: Present:  0x1fff0000 | Host ctl: 0x00000001
+[   13.818951] sdhci: Power:    0x0000000d | Blk gap:  0x00000000
+[   13.819000] sdhci: Wake-up:  0x00000000 | Clock:    0x00000000
+[   13.819049] sdhci: Timeout:  0x00000000 | Int stat: 0x00000000
+[   13.819098] sdhci: Int enab: 0x00ff00c3 | Sig enab: 0x00ff00c3
+[   13.819147] sdhci: AC12 err: 0x00000000 | Slot int: 0x00000000
+[   13.819196] sdhci: Caps:     0x6bee32b2 | Caps_1:   0x00000000
+[   13.819245] sdhci: Cmd:      0x00000000 | Max curr: 0x00000000
+[   13.819292] sdhci: Host ctl2: 0x00000000
+[   13.819331] sdhci: ADMA Err: 0x00000000 | ADMA Ptr: 0x00000000
+[   13.819377] sdhci: ===========================================
+[   13.919605] mmc2: Reset 0x2 never completed.
+
+and it never recovers.
+
+Second race might happen while running mmc_power_off():
+
+static void mmc_power_off(struct mmc_host *host)
+{
+       host->ios.clock = 0;
+       host->ios.vdd = 0;
+
+[ clock gating kicks in here ]
+
+       /*
+        * Reset ocr mask to be the highest possible voltage supported for
+        * this mmc host. This value will be used at next power up.
+        */
+       host->ocr = 1 << (fls(host->ocr_avail) - 1);
+
+       if (!mmc_host_is_spi(host)) {
+               host->ios.bus_mode = MMC_BUSMODE_OPENDRAIN;
+               host->ios.chip_select = MMC_CS_DONTCARE;
+       }
+       host->ios.power_mode = MMC_POWER_OFF;
+       host->ios.bus_width = MMC_BUS_WIDTH_1;
+       host->ios.timing = MMC_TIMING_LEGACY;
+       mmc_set_ios(host);
+}
+
+If the clock gating worker kicks in while we are only partially updated the
+ios structure the host controller gets incomplete ios and might not work as
+supposed. Again on Intel Medfield platform we get:
+
+[    4.185349] kernel BUG at drivers/mmc/host/sdhci.c:1155!
+[    4.185422] invalid opcode: 0000 [#1] PREEMPT SMP
+[    4.185509] Modules linked in:
+[    4.185565]
+[    4.185608] Pid: 4, comm: kworker/0:0 Not tainted 3.0.0+ #240 Intel Corporation Medfield/iCDKA
+[    4.185742] EIP: 0060:[<c136364e>] EFLAGS: 00010083 CPU: 0
+[    4.185827] EIP is at sdhci_set_power+0x3e/0xd0
+[    4.185891] EAX: f5ff98e0 EBX: f5ff98e0 ECX: 00000000 EDX: 00000001
+[    4.185970] ESI: f5ff977c EDI: f5ff9904 EBP: f644fe98 ESP: f644fe94
+[    4.186049]  DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068
+[    4.186125] Process kworker/0:0 (pid: 4, ti=f644e000 task=f644c0e0 task.ti=f644e000)
+[    4.186219] Stack:
+[    4.186257]  f5ff98e0 f644feb0 c1365173 00000282 f5ff9460 f5ff96e0 f5ff96e0 f644feec
+[    4.186418]  c1355bd8 f644c0e0 c1499c3d f5ff96e0 f644fed4 00000006 f5ff96e0 00000286
+[    4.186579]  f644fedc c107922b f644feec 00000286 f5ff9460 f5ff9700 f644ff10 c135839e
+[    4.186739] Call Trace:
+[    4.186802]  [<c1365173>] sdhci_set_ios+0x1c3/0x340
+[    4.186883]  [<c1355bd8>] mmc_gate_clock+0x68/0x120
+[    4.186963]  [<c1499c3d>] ? _raw_spin_unlock_irqrestore+0x4d/0x60
+[    4.187052]  [<c107922b>] ? trace_hardirqs_on+0xb/0x10
+[    4.187134]  [<c135839e>] mmc_host_clk_gate_delayed+0xbe/0x130
+[    4.187219]  [<c105ec09>] ? process_one_work+0xf9/0x5b0
+[    4.187300]  [<c135841d>] mmc_host_clk_gate_work+0xd/0x10
+[    4.187379]  [<c105ec82>] process_one_work+0x172/0x5b0
+[    4.187457]  [<c105ec09>] ? process_one_work+0xf9/0x5b0
+[    4.187538]  [<c1358410>] ? mmc_host_clk_gate_delayed+0x130/0x130
+[    4.187625]  [<c105f3c8>] worker_thread+0x118/0x330
+[    4.187700]  [<c1496cee>] ? preempt_schedule+0x2e/0x50
+[    4.187779]  [<c105f2b0>] ? rescuer_thread+0x1f0/0x1f0
+[    4.187857]  [<c1062cf4>] kthread+0x74/0x80
+[    4.187931]  [<c1062c80>] ? __init_kthread_worker+0x60/0x60
+[    4.188015]  [<c149acfa>] kernel_thread_helper+0x6/0xd
+[    4.188079] Code: 81 fa 00 00 04 00 0f 84 a7 00 00 00 7f 21 81 fa 80 00 00 00 0f 84 92 00 00 00 81 fa 00 00 0
+[    4.188780] EIP: [<c136364e>] sdhci_set_power+0x3e/0xd0 SS:ESP 0068:f644fe94
+[    4.188898] ---[ end trace a7b23eecc71777e4 ]---
+
+This BUG() comes from the fact that ios.power_mode was still in previous
+value (MMC_POWER_ON) and ios.vdd was set to zero.
+
+We prevent these by inhibiting the clock gating while we update the ios
+structure.
+
+Both problems can be reproduced by simply running the device in a reboot
+loop.
+
+Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
+Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
+Tested-by: Chris Ball <cjb@laptop.org>
+Signed-off-by: Chris Ball <cjb@laptop.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/mmc/core/core.c |   31 +++++++++++++++++++++++++++++--
+ 1 file changed, 29 insertions(+), 2 deletions(-)
+
+--- a/drivers/mmc/core/core.c
++++ b/drivers/mmc/core/core.c
+@@ -634,15 +634,17 @@ static inline void mmc_set_ios(struct mm
+  */
+ void mmc_set_chip_select(struct mmc_host *host, int mode)
+ {
++      mmc_host_clk_hold(host);
+       host->ios.chip_select = mode;
+       mmc_set_ios(host);
++      mmc_host_clk_release(host);
+ }
+ /*
+  * Sets the host clock to the highest possible frequency that
+  * is below "hz".
+  */
+-void mmc_set_clock(struct mmc_host *host, unsigned int hz)
++static void __mmc_set_clock(struct mmc_host *host, unsigned int hz)
+ {
+       WARN_ON(hz < host->f_min);
+@@ -653,6 +655,13 @@ void mmc_set_clock(struct mmc_host *host
+       mmc_set_ios(host);
+ }
++void mmc_set_clock(struct mmc_host *host, unsigned int hz)
++{
++      mmc_host_clk_hold(host);
++      __mmc_set_clock(host, hz);
++      mmc_host_clk_release(host);
++}
++
+ #ifdef CONFIG_MMC_CLKGATE
+ /*
+  * This gates the clock by setting it to 0 Hz.
+@@ -685,7 +694,7 @@ void mmc_ungate_clock(struct mmc_host *h
+       if (host->clk_old) {
+               BUG_ON(host->ios.clock);
+               /* This call will also set host->clk_gated to false */
+-              mmc_set_clock(host, host->clk_old);
++              __mmc_set_clock(host, host->clk_old);
+       }
+ }
+@@ -713,8 +722,10 @@ void mmc_set_ungated(struct mmc_host *ho
+  */
+ void mmc_set_bus_mode(struct mmc_host *host, unsigned int mode)
+ {
++      mmc_host_clk_hold(host);
+       host->ios.bus_mode = mode;
+       mmc_set_ios(host);
++      mmc_host_clk_release(host);
+ }
+ /*
+@@ -722,8 +733,10 @@ void mmc_set_bus_mode(struct mmc_host *h
+  */
+ void mmc_set_bus_width(struct mmc_host *host, unsigned int width)
+ {
++      mmc_host_clk_hold(host);
+       host->ios.bus_width = width;
+       mmc_set_ios(host);
++      mmc_host_clk_release(host);
+ }
+ /**
+@@ -921,8 +934,10 @@ u32 mmc_select_voltage(struct mmc_host *
+               ocr &= 3 << bit;
++              mmc_host_clk_hold(host);
+               host->ios.vdd = bit;
+               mmc_set_ios(host);
++              mmc_host_clk_release(host);
+       } else {
+               pr_warning("%s: host doesn't support card's voltages\n",
+                               mmc_hostname(host));
+@@ -969,8 +984,10 @@ int mmc_set_signal_voltage(struct mmc_ho
+  */
+ void mmc_set_timing(struct mmc_host *host, unsigned int timing)
+ {
++      mmc_host_clk_hold(host);
+       host->ios.timing = timing;
+       mmc_set_ios(host);
++      mmc_host_clk_release(host);
+ }
+ /*
+@@ -978,8 +995,10 @@ void mmc_set_timing(struct mmc_host *hos
+  */
+ void mmc_set_driver_type(struct mmc_host *host, unsigned int drv_type)
+ {
++      mmc_host_clk_hold(host);
+       host->ios.drv_type = drv_type;
+       mmc_set_ios(host);
++      mmc_host_clk_release(host);
+ }
+ /*
+@@ -997,6 +1016,8 @@ static void mmc_power_up(struct mmc_host
+ {
+       int bit;
++      mmc_host_clk_hold(host);
++
+       /* If ocr is set, we use it */
+       if (host->ocr)
+               bit = ffs(host->ocr) - 1;
+@@ -1032,10 +1053,14 @@ static void mmc_power_up(struct mmc_host
+        * time required to reach a stable voltage.
+        */
+       mmc_delay(10);
++
++      mmc_host_clk_release(host);
+ }
+ static void mmc_power_off(struct mmc_host *host)
+ {
++      mmc_host_clk_hold(host);
++
+       host->ios.clock = 0;
+       host->ios.vdd = 0;
+@@ -1053,6 +1078,8 @@ static void mmc_power_off(struct mmc_hos
+       host->ios.bus_width = MMC_BUS_WIDTH_1;
+       host->ios.timing = MMC_TIMING_LEGACY;
+       mmc_set_ios(host);
++
++      mmc_host_clk_release(host);
+ }
+ /*
diff --git a/queue-3.0/mmc-core-use-non-reentrant-workqueue-for-clock-gating.patch b/queue-3.0/mmc-core-use-non-reentrant-workqueue-for-clock-gating.patch
new file mode 100644 (file)
index 0000000..e4bade3
--- /dev/null
@@ -0,0 +1,37 @@
+From 50a50f9248497484c678631a9c1a719f1aaeab79 Mon Sep 17 00:00:00 2001
+From: Mika Westerberg <mika.westerberg@linux.intel.com>
+Date: Thu, 18 Aug 2011 15:23:49 +0300
+Subject: mmc: core: use non-reentrant workqueue for clock gating
+
+From: Mika Westerberg <mika.westerberg@linux.intel.com>
+
+commit 50a50f9248497484c678631a9c1a719f1aaeab79 upstream.
+
+The default multithread workqueue can cause the same work to be executed
+concurrently on a different CPUs. This isn't really suitable for clock
+gating as it might already gated the clock and gating it twice results both
+host->clk_old and host->ios.clock to be set to 0.
+
+To prevent this from happening we use system_nrt_wq instead.
+
+Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
+Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
+Tested-by: Chris Ball <cjb@laptop.org>
+Signed-off-by: Chris Ball <cjb@laptop.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/mmc/core/host.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/mmc/core/host.c
++++ b/drivers/mmc/core/host.c
+@@ -179,7 +179,7 @@ void mmc_host_clk_release(struct mmc_hos
+       host->clk_requests--;
+       if (mmc_host_may_gate_card(host->card) &&
+           !host->clk_requests)
+-              schedule_work(&host->clk_gate_work);
++              queue_work(system_nrt_wq, &host->clk_gate_work);
+       spin_unlock_irqrestore(&host->clk_lock, flags);
+ }
diff --git a/queue-3.0/mmc-rename-mmc_host_clk_-ungate-gate-to-mmc_host_clk_-hold-release.patch b/queue-3.0/mmc-rename-mmc_host_clk_-ungate-gate-to-mmc_host_clk_-hold-release.patch
new file mode 100644 (file)
index 0000000..b92ec47
--- /dev/null
@@ -0,0 +1,121 @@
+From 08c14071fda4e69abb9d5b1566651cd092b158d3 Mon Sep 17 00:00:00 2001
+From: Mika Westerberg <mika.westerberg@linux.intel.com>
+Date: Thu, 18 Aug 2011 15:23:47 +0300
+Subject: mmc: rename mmc_host_clk_{ungate|gate} to mmc_host_clk_{hold|release}
+
+From: Mika Westerberg <mika.westerberg@linux.intel.com>
+
+commit 08c14071fda4e69abb9d5b1566651cd092b158d3 upstream.
+
+As per suggestion by Linus Walleij:
+
+  > If you think the names of the functions are confusing then
+  > you may rename them, say like this:
+  >
+  > mmc_host_clk_ungate() -> mmc_host_clk_hold()
+  > mmc_host_clk_gate() -> mmc_host_clk_release()
+  >
+  > Which would make the usecases more clear
+
+(This is CC'd to stable@ because the next two patches, which fix
+observable races, depend on it.)
+
+Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
+Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
+Signed-off-by: Chris Ball <cjb@laptop.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/mmc/core/core.c |    4 ++--
+ drivers/mmc/core/host.c |   10 +++++-----
+ drivers/mmc/core/host.h |    8 ++++----
+ 3 files changed, 11 insertions(+), 11 deletions(-)
+
+--- a/drivers/mmc/core/core.c
++++ b/drivers/mmc/core/core.c
+@@ -132,7 +132,7 @@ void mmc_request_done(struct mmc_host *h
+               if (mrq->done)
+                       mrq->done(mrq);
+-              mmc_host_clk_gate(host);
++              mmc_host_clk_release(host);
+       }
+ }
+@@ -191,7 +191,7 @@ mmc_start_request(struct mmc_host *host,
+                       mrq->stop->mrq = mrq;
+               }
+       }
+-      mmc_host_clk_ungate(host);
++      mmc_host_clk_hold(host);
+       led_trigger_event(host->led, LED_FULL);
+       host->ops->request(host, mrq);
+ }
+--- a/drivers/mmc/core/host.c
++++ b/drivers/mmc/core/host.c
+@@ -119,14 +119,14 @@ static void mmc_host_clk_gate_work(struc
+ }
+ /**
+- *    mmc_host_clk_ungate - ungate hardware MCI clocks
++ *    mmc_host_clk_hold - ungate hardware MCI clocks
+  *    @host: host to ungate.
+  *
+  *    Makes sure the host ios.clock is restored to a non-zero value
+  *    past this call. Increase clock reference count and ungate clock
+  *    if we're the first user.
+  */
+-void mmc_host_clk_ungate(struct mmc_host *host)
++void mmc_host_clk_hold(struct mmc_host *host)
+ {
+       unsigned long flags;
+@@ -164,14 +164,14 @@ static bool mmc_host_may_gate_card(struc
+ }
+ /**
+- *    mmc_host_clk_gate - gate off hardware MCI clocks
++ *    mmc_host_clk_release - gate off hardware MCI clocks
+  *    @host: host to gate.
+  *
+  *    Calls the host driver with ios.clock set to zero as often as possible
+  *    in order to gate off hardware MCI clocks. Decrease clock reference
+  *    count and schedule disabling of clock.
+  */
+-void mmc_host_clk_gate(struct mmc_host *host)
++void mmc_host_clk_release(struct mmc_host *host)
+ {
+       unsigned long flags;
+@@ -231,7 +231,7 @@ static inline void mmc_host_clk_exit(str
+       if (cancel_work_sync(&host->clk_gate_work))
+               mmc_host_clk_gate_delayed(host);
+       if (host->clk_gated)
+-              mmc_host_clk_ungate(host);
++              mmc_host_clk_hold(host);
+       /* There should be only one user now */
+       WARN_ON(host->clk_requests > 1);
+ }
+--- a/drivers/mmc/core/host.h
++++ b/drivers/mmc/core/host.h
+@@ -16,16 +16,16 @@ int mmc_register_host_class(void);
+ void mmc_unregister_host_class(void);
+ #ifdef CONFIG_MMC_CLKGATE
+-void mmc_host_clk_ungate(struct mmc_host *host);
+-void mmc_host_clk_gate(struct mmc_host *host);
++void mmc_host_clk_hold(struct mmc_host *host);
++void mmc_host_clk_release(struct mmc_host *host);
+ unsigned int mmc_host_clk_rate(struct mmc_host *host);
+ #else
+-static inline void mmc_host_clk_ungate(struct mmc_host *host)
++static inline void mmc_host_clk_hold(struct mmc_host *host)
+ {
+ }
+-static inline void mmc_host_clk_gate(struct mmc_host *host)
++static inline void mmc_host_clk_release(struct mmc_host *host)
+ {
+ }
diff --git a/queue-3.0/mmc-sdhci-s3c-fix-mmc-card-i-o-problem.patch b/queue-3.0/mmc-sdhci-s3c-fix-mmc-card-i-o-problem.patch
new file mode 100644 (file)
index 0000000..bc6290e
--- /dev/null
@@ -0,0 +1,41 @@
+From 49bb1e619568ec84785ceb366f07db2a6f0b64cc Mon Sep 17 00:00:00 2001
+From: Girish K S <girish.shivananjappa@linaro.org>
+Date: Fri, 26 Aug 2011 14:58:18 +0530
+Subject: mmc: sdhci-s3c: Fix mmc card I/O problem
+
+From: Girish K S <girish.shivananjappa@linaro.org>
+
+commit 49bb1e619568ec84785ceb366f07db2a6f0b64cc upstream.
+
+This patch fixes the problem in sdhci-s3c host driver for Samsung Soc's.
+During the card identification stage the mmc core driver enumerates for
+the best bus width in combination with the highest available data rate.
+It starts enumerating from the highest bus width (8) to lowest width (1).
+
+In case of few MMC cards the 4-bit bus enumeration fails and tries
+the 1-bit bus enumeration. When switched to 1-bit bus mode the host driver
+has to clear the previous bus width setting and apply the new setting.
+
+The current patch will clear the previous bus mode and apply the new
+mode setting.
+
+Signed-off-by: Girish K S <girish.shivananjappa@linaro.org>
+Acked-by: Jaehoon Chung <jh80.chung@samsung.com>
+Signed-off-by: Chris Ball <cjb@laptop.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/mmc/host/sdhci-s3c.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/mmc/host/sdhci-s3c.c
++++ b/drivers/mmc/host/sdhci-s3c.c
+@@ -301,6 +301,8 @@ static int sdhci_s3c_platform_8bit_width
+               ctrl &= ~SDHCI_CTRL_8BITBUS;
+               break;
+       default:
++              ctrl &= ~SDHCI_CTRL_4BITBUS;
++              ctrl &= ~SDHCI_CTRL_8BITBUS;
+               break;
+       }
diff --git a/queue-3.0/rtc-fix-rtc-pie-frequency-limit.patch b/queue-3.0/rtc-fix-rtc-pie-frequency-limit.patch
new file mode 100644 (file)
index 0000000..4e53a7b
--- /dev/null
@@ -0,0 +1,46 @@
+From 938f97bcf1bdd1b681d5d14d1d7117a2e22d4434 Mon Sep 17 00:00:00 2001
+From: John Stultz <john.stultz@linaro.org>
+Date: Fri, 22 Jul 2011 09:12:51 +0000
+Subject: rtc: Fix RTC PIE frequency limit
+
+From: John Stultz <john.stultz@linaro.org>
+
+commit 938f97bcf1bdd1b681d5d14d1d7117a2e22d4434 upstream.
+
+Thomas earlier submitted a fix to limit the RTC PIE freq, but
+picked 5000Hz out of the air. Willy noticed that we should
+instead use the 8192Hz max from the rtc man documentation.
+
+Cc: Willy Tarreau <w@1wt.eu>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: John Stultz <john.stultz@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ drivers/rtc/interface.c |    2 +-
+ include/linux/rtc.h     |    3 +++
+ 2 files changed, 4 insertions(+), 1 deletion(-)
+
+--- a/drivers/rtc/interface.c
++++ b/drivers/rtc/interface.c
+@@ -708,7 +708,7 @@ int rtc_irq_set_freq(struct rtc_device *
+       int err = 0;
+       unsigned long flags;
+-      if (freq <= 0 || freq > 5000)
++      if (freq <= 0 || freq > RTC_MAX_FREQ)
+               return -EINVAL;
+ retry:
+       spin_lock_irqsave(&rtc->irq_task_lock, flags);
+--- a/include/linux/rtc.h
++++ b/include/linux/rtc.h
+@@ -97,6 +97,9 @@ struct rtc_pll_info {
+ #define RTC_AF 0x20   /* Alarm interrupt */
+ #define RTC_UF 0x10   /* Update interrupt for 1Hz RTC */
++
++#define RTC_MAX_FREQ  8192
++
+ #ifdef __KERNEL__
+ #include <linux/types.h>
diff --git a/queue-3.0/sched-fix-a-memory-leak-in-__sdt_free.patch b/queue-3.0/sched-fix-a-memory-leak-in-__sdt_free.patch
new file mode 100644 (file)
index 0000000..ee934d0
--- /dev/null
@@ -0,0 +1,54 @@
+From feff8fa0075bdfd43c841e9d689ed81adda988d6 Mon Sep 17 00:00:00 2001
+From: WANG Cong <amwang@redhat.com>
+Date: Thu, 18 Aug 2011 20:36:57 +0800
+Subject: sched: Fix a memory leak in __sdt_free()
+
+From: WANG Cong <amwang@redhat.com>
+
+commit feff8fa0075bdfd43c841e9d689ed81adda988d6 upstream.
+
+This patch fixes the following memory leak:
+
+unreferenced object 0xffff880107266800 (size 512):
+  comm "sched-powersave", pid 3718, jiffies 4323097853 (age 27495.450s)
+  hex dump (first 32 bytes):
+    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+  backtrace:
+    [<ffffffff81133940>] create_object+0x187/0x28b
+    [<ffffffff814ac103>] kmemleak_alloc+0x73/0x98
+    [<ffffffff811232ba>] __kmalloc_node+0x104/0x159
+    [<ffffffff81044b98>] kzalloc_node.clone.97+0x15/0x17
+    [<ffffffff8104cb90>] build_sched_domains+0xb7/0x7f3
+    [<ffffffff8104d4df>] partition_sched_domains+0x1db/0x24a
+    [<ffffffff8109ee4a>] do_rebuild_sched_domains+0x3b/0x47
+    [<ffffffff810a00c7>] rebuild_sched_domains+0x10/0x12
+    [<ffffffff8104d5ba>] sched_power_savings_store+0x6c/0x7b
+    [<ffffffff8104d5df>] sched_mc_power_savings_store+0x16/0x18
+    [<ffffffff8131322c>] sysdev_class_store+0x20/0x22
+    [<ffffffff81193876>] sysfs_write_file+0x108/0x144
+    [<ffffffff81135b10>] vfs_write+0xaf/0x102
+    [<ffffffff81135d23>] sys_write+0x4d/0x74
+    [<ffffffff814c8a42>] system_call_fastpath+0x16/0x1b
+    [<ffffffffffffffff>] 0xffffffffffffffff
+
+Signed-off-by: WANG Cong <amwang@redhat.com>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Link: http://lkml.kernel.org/r/1313671017-4112-1-git-send-email-amwang@redhat.com
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ kernel/sched.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -7423,6 +7423,7 @@ static void __sdt_free(const struct cpum
+                       struct sched_domain *sd = *per_cpu_ptr(sdd->sd, j);
+                       if (sd && (sd->flags & SD_OVERLAP))
+                               free_sched_groups(sd->groups, 0);
++                      kfree(*per_cpu_ptr(sdd->sd, j));
+                       kfree(*per_cpu_ptr(sdd->sg, j));
+                       kfree(*per_cpu_ptr(sdd->sgp, j));
+               }
diff --git a/queue-3.0/sched-move-blk_schedule_flush_plug-out-of-__schedule.patch b/queue-3.0/sched-move-blk_schedule_flush_plug-out-of-__schedule.patch
new file mode 100644 (file)
index 0000000..fc88bcc
--- /dev/null
@@ -0,0 +1,77 @@
+From 9c40cef2b799f9b5e7fa5de4d2ad3a0168ba118c Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 22 Jun 2011 19:47:01 +0200
+Subject: sched: Move blk_schedule_flush_plug() out of __schedule()
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 9c40cef2b799f9b5e7fa5de4d2ad3a0168ba118c upstream.
+
+There is no real reason to run blk_schedule_flush_plug() with
+interrupts and preemption disabled.
+
+Move it into schedule() and call it when the task is going voluntarily
+to sleep. There might be false positives when the task is woken
+between that call and actually scheduling, but that's not really
+different from being woken immediately after switching away.
+
+This fixes a deadlock in the scheduler where the
+blk_schedule_flush_plug() callchain enables interrupts and thereby
+allows a wakeup to happen of the task that's going to sleep.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Link: http://lkml.kernel.org/n/tip-dwfxtra7yg1b5r65m32ywtct@git.kernel.org
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ kernel/sched.c |   25 +++++++++++++++----------
+ 1 file changed, 15 insertions(+), 10 deletions(-)
+
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -4285,16 +4285,6 @@ need_resched:
+                               if (to_wakeup)
+                                       try_to_wake_up_local(to_wakeup);
+                       }
+-
+-                      /*
+-                       * If we are going to sleep and we have plugged IO
+-                       * queued, make sure to submit it to avoid deadlocks.
+-                       */
+-                      if (blk_needs_flush_plug(prev)) {
+-                              raw_spin_unlock(&rq->lock);
+-                              blk_schedule_flush_plug(prev);
+-                              raw_spin_lock(&rq->lock);
+-                      }
+               }
+               switch_count = &prev->nvcsw;
+       }
+@@ -4333,8 +4323,23 @@ need_resched:
+               goto need_resched;
+ }
++static inline void sched_submit_work(struct task_struct *tsk)
++{
++      if (!tsk->state)
++              return;
++      /*
++       * If we are going to sleep and we have plugged IO queued,
++       * make sure to submit it to avoid deadlocks.
++       */
++      if (blk_needs_flush_plug(tsk))
++              blk_schedule_flush_plug(tsk);
++}
++
+ asmlinkage void schedule(void)
+ {
++      struct task_struct *tsk = current;
++
++      sched_submit_work(tsk);
+       __schedule();
+ }
+ EXPORT_SYMBOL(schedule);
diff --git a/queue-3.0/sched-separate-the-scheduler-entry-for-preemption.patch b/queue-3.0/sched-separate-the-scheduler-entry-for-preemption.patch
new file mode 100644 (file)
index 0000000..32abdb6
--- /dev/null
@@ -0,0 +1,85 @@
+From c259e01a1ec90063042f758e409cd26b2a0963c8 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Wed, 22 Jun 2011 19:47:00 +0200
+Subject: sched: Separate the scheduler entry for preemption
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit c259e01a1ec90063042f758e409cd26b2a0963c8 upstream.
+
+Block-IO and workqueues call into notifier functions from the
+scheduler core code with interrupts and preemption disabled. These
+calls should be made before entering the scheduler core.
+
+To simplify this, separate the scheduler core code into
+__schedule(). __schedule() is directly called from the places which
+set PREEMPT_ACTIVE and from schedule(). This allows us to add the work
+checks into schedule(), so they are only called when a task voluntary
+goes to sleep.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Link: http://lkml.kernel.org/r/20110622174918.813258321@linutronix.de
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ kernel/sched.c |   15 ++++++++++-----
+ 1 file changed, 10 insertions(+), 5 deletions(-)
+
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -4242,9 +4242,9 @@ pick_next_task(struct rq *rq)
+ }
+ /*
+- * schedule() is the main scheduler function.
++ * __schedule() is the main scheduler function.
+  */
+-asmlinkage void __sched schedule(void)
++static void __sched __schedule(void)
+ {
+       struct task_struct *prev, *next;
+       unsigned long *switch_count;
+@@ -4332,6 +4332,11 @@ need_resched:
+       if (need_resched())
+               goto need_resched;
+ }
++
++asmlinkage void schedule(void)
++{
++      __schedule();
++}
+ EXPORT_SYMBOL(schedule);
+ #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
+@@ -4405,7 +4410,7 @@ asmlinkage void __sched notrace preempt_
+       do {
+               add_preempt_count_notrace(PREEMPT_ACTIVE);
+-              schedule();
++              __schedule();
+               sub_preempt_count_notrace(PREEMPT_ACTIVE);
+               /*
+@@ -4433,7 +4438,7 @@ asmlinkage void __sched preempt_schedule
+       do {
+               add_preempt_count(PREEMPT_ACTIVE);
+               local_irq_enable();
+-              schedule();
++              __schedule();
+               local_irq_disable();
+               sub_preempt_count(PREEMPT_ACTIVE);
+@@ -5558,7 +5563,7 @@ static inline int should_resched(void)
+ static void __cond_resched(void)
+ {
+       add_preempt_count(PREEMPT_ACTIVE);
+-      schedule();
++      __schedule();
+       sub_preempt_count(PREEMPT_ACTIVE);
+ }
index e72eb760e46dd53ced62ce216104e77d99503868..98e3de6523a5bd8bbb0f05d1a14702ddedc86b16 100644 (file)
@@ -54,3 +54,17 @@ mm-page-allocator-initialise-zlc-for-first-zone-eligible-for-zone_reclaim.patch
 mm-page-allocator-reconsider-zones-for-allocation-after-direct-reclaim.patch
 igb-fix-wol-on-second-port-of-i350-device.patch
 mxc-iomux-v3-correct-no_pad_ctrl-definition.patch
+alarmtimers-avoid-possible-null-pointer-traversal.patch
+alarmtimers-memset-itimerspec-passed-into-alarm_timer_get.patch
+alarmtimers-avoid-possible-denial-of-service-with-high-freq-periodic-timers.patch
+rtc-fix-rtc-pie-frequency-limit.patch
+sched-separate-the-scheduler-entry-for-preemption.patch
+sched-move-blk_schedule_flush_plug-out-of-__schedule.patch
+sched-fix-a-memory-leak-in-__sdt_free.patch
+x86-perf-check-that-current-mm-is-alive-before-getting-user-callchain.patch
+mmc-rename-mmc_host_clk_-ungate-gate-to-mmc_host_clk_-hold-release.patch
+mmc-core-prevent-aggressive-clock-gating-racing-with-ios.patch
+mmc-core-use-non-reentrant-workqueue-for-clock-gating.patch
+mmc-sdhci-s3c-fix-mmc-card-i-o-problem.patch
+xen-use-maximum-reservation-to-limit-amount-of-usable-ram.patch
+xen-x86_32-do-not-enable-iterrupts-when-returning-from.patch
diff --git a/queue-3.0/x86-perf-check-that-current-mm-is-alive-before-getting-user-callchain.patch b/queue-3.0/x86-perf-check-that-current-mm-is-alive-before-getting-user-callchain.patch
new file mode 100644 (file)
index 0000000..55f975e
--- /dev/null
@@ -0,0 +1,63 @@
+From 20afc60f892d285fde179ead4b24e6a7938c2f1b Mon Sep 17 00:00:00 2001
+From: Andrey Vagin <avagin@openvz.org>
+Date: Tue, 30 Aug 2011 12:32:36 +0400
+Subject: x86, perf: Check that current->mm is alive before getting user callchain
+
+From: Andrey Vagin <avagin@openvz.org>
+
+commit 20afc60f892d285fde179ead4b24e6a7938c2f1b upstream.
+
+An event may occur when an mm is already released.
+
+I added an event in dequeue_entity() and caught a panic with
+the following backtrace:
+
+[  434.421110] BUG: unable to handle kernel NULL pointer dereference at 0000000000000050
+[  434.421258] IP: [<ffffffff810464ac>] __get_user_pages_fast+0x9c/0x120
+...
+[  434.421258] Call Trace:
+[  434.421258]  [<ffffffff8101ae81>] copy_from_user_nmi+0x51/0xf0
+[  434.421258]  [<ffffffff8109a0d5>] ? sched_clock_local+0x25/0x90
+[  434.421258]  [<ffffffff8101b048>] perf_callchain_user+0x128/0x170
+[  434.421258]  [<ffffffff811154cd>] ? __perf_event_header__init_id+0xed/0x100
+[  434.421258]  [<ffffffff81116690>] perf_prepare_sample+0x200/0x280
+[  434.421258]  [<ffffffff81118da8>] __perf_event_overflow+0x1b8/0x290
+[  434.421258]  [<ffffffff81065240>] ? tg_shares_up+0x0/0x670
+[  434.421258]  [<ffffffff8104fe1a>] ? walk_tg_tree+0x6a/0xb0
+[  434.421258]  [<ffffffff81118f44>] perf_swevent_overflow+0xc4/0xf0
+[  434.421258]  [<ffffffff81119150>] do_perf_sw_event+0x1e0/0x250
+[  434.421258]  [<ffffffff81119204>] perf_tp_event+0x44/0x70
+[  434.421258]  [<ffffffff8105701f>] ftrace_profile_sched_block+0xdf/0x110
+[  434.421258]  [<ffffffff8106121d>] dequeue_entity+0x2ad/0x2d0
+[  434.421258]  [<ffffffff810614ec>] dequeue_task_fair+0x1c/0x60
+[  434.421258]  [<ffffffff8105818a>] dequeue_task+0x9a/0xb0
+[  434.421258]  [<ffffffff810581e2>] deactivate_task+0x42/0xe0
+[  434.421258]  [<ffffffff814bc019>] thread_return+0x191/0x808
+[  434.421258]  [<ffffffff81098a44>] ? switch_task_namespaces+0x24/0x60
+[  434.421258]  [<ffffffff8106f4c4>] do_exit+0x464/0x910
+[  434.421258]  [<ffffffff8106f9c8>] do_group_exit+0x58/0xd0
+[  434.421258]  [<ffffffff8106fa57>] sys_exit_group+0x17/0x20
+[  434.421258]  [<ffffffff8100b202>] system_call_fastpath+0x16/0x1b
+
+Signed-off-by: Andrey Vagin <avagin@openvz.org>
+Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Link: http://lkml.kernel.org/r/1314693156-24131-1-git-send-email-avagin@openvz.org
+Signed-off-by: Ingo Molnar <mingo@elte.hu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/x86/kernel/cpu/perf_event.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/arch/x86/kernel/cpu/perf_event.c
++++ b/arch/x86/kernel/cpu/perf_event.c
+@@ -1856,6 +1856,9 @@ perf_callchain_user(struct perf_callchai
+       perf_callchain_store(entry, regs->ip);
++      if (!current->mm)
++              return;
++
+       if (perf_callchain_user32(regs, entry))
+               return;
diff --git a/queue-3.0/xen-use-maximum-reservation-to-limit-amount-of-usable-ram.patch b/queue-3.0/xen-use-maximum-reservation-to-limit-amount-of-usable-ram.patch
new file mode 100644 (file)
index 0000000..83cf047
--- /dev/null
@@ -0,0 +1,75 @@
+From d312ae878b6aed3912e1acaaf5d0b2a9d08a4f11 Mon Sep 17 00:00:00 2001
+From: David Vrabel <david.vrabel@citrix.com>
+Date: Fri, 19 Aug 2011 15:57:16 +0100
+Subject: xen: use maximum reservation to limit amount of usable RAM
+
+From: David Vrabel <david.vrabel@citrix.com>
+
+commit d312ae878b6aed3912e1acaaf5d0b2a9d08a4f11 upstream.
+
+Use the domain's maximum reservation to limit the amount of extra RAM
+for the memory balloon. This reduces the size of the pages tables and
+the amount of reserved low memory (which defaults to about 1/32 of the
+total RAM).
+
+On a system with 8 GiB of RAM with the domain limited to 1 GiB the
+kernel reports:
+
+Before:
+
+Memory: 627792k/4472000k available
+
+After:
+
+Memory: 549740k/11132224k available
+
+A increase of about 76 MiB (~1.5% of the unused 7 GiB).  The reserved
+low memory is also reduced from 253 MiB to 32 MiB.  The total
+additional usable RAM is 329 MiB.
+
+For dom0, this requires at patch to Xen ('x86: use 'dom0_mem' to limit
+the number of pages for dom0') (c/s 23790)
+
+Signed-off-by: David Vrabel <david.vrabel@citrix.com>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/x86/xen/setup.c |   19 +++++++++++++++++++
+ 1 file changed, 19 insertions(+)
+
+--- a/arch/x86/xen/setup.c
++++ b/arch/x86/xen/setup.c
+@@ -185,6 +185,19 @@ static unsigned long __init xen_set_iden
+                                       PFN_UP(start_pci), PFN_DOWN(last));
+       return identity;
+ }
++
++static unsigned long __init xen_get_max_pages(void)
++{
++      unsigned long max_pages = MAX_DOMAIN_PAGES;
++      domid_t domid = DOMID_SELF;
++      int ret;
++
++      ret = HYPERVISOR_memory_op(XENMEM_maximum_reservation, &domid);
++      if (ret > 0)
++              max_pages = ret;
++      return min(max_pages, MAX_DOMAIN_PAGES);
++}
++
+ /**
+  * machine_specific_memory_setup - Hook for machine specific memory setup.
+  **/
+@@ -293,6 +306,12 @@ char * __init xen_memory_setup(void)
+       sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
++      extra_limit = xen_get_max_pages();
++      if (extra_limit >= max_pfn)
++              extra_pages = extra_limit - max_pfn;
++      else
++              extra_pages = 0;
++
+       extra_pages += xen_return_unused_memory(xen_start_info->nr_pages, &e820);
+       /*
diff --git a/queue-3.0/xen-x86_32-do-not-enable-iterrupts-when-returning-from.patch b/queue-3.0/xen-x86_32-do-not-enable-iterrupts-when-returning-from.patch
new file mode 100644 (file)
index 0000000..499fd89
--- /dev/null
@@ -0,0 +1,57 @@
+From d198d499148a0c64a41b3aba9e7dd43772832b91 Mon Sep 17 00:00:00 2001
+From: Igor Mammedov <imammedo@redhat.com>
+Date: Thu, 1 Sep 2011 13:46:55 +0200
+Subject: xen: x86_32: do not enable iterrupts when returning from exception in interrupt context
+
+From: Igor Mammedov <imammedo@redhat.com>
+
+commit d198d499148a0c64a41b3aba9e7dd43772832b91 upstream.
+
+If vmalloc page_fault happens inside of interrupt handler with interrupts
+disabled then on exit path from exception handler when there is no pending
+interrupts, the following code (arch/x86/xen/xen-asm_32.S:112):
+
+       cmpw $0x0001, XEN_vcpu_info_pending(%eax)
+       sete XEN_vcpu_info_mask(%eax)
+
+will enable interrupts even if they has been previously disabled according to
+eflags from the bounce frame (arch/x86/xen/xen-asm_32.S:99)
+
+       testb $X86_EFLAGS_IF>>8, 8+1+ESP_OFFSET(%esp)
+       setz XEN_vcpu_info_mask(%eax)
+
+Solution is in setting XEN_vcpu_info_mask only when it should be set
+according to
+       cmpw $0x0001, XEN_vcpu_info_pending(%eax)
+but not clearing it if there isn't any pending events.
+
+Reproducer for bug is attached to RHBZ 707552
+
+Signed-off-by: Igor Mammedov <imammedo@redhat.com>
+Acked-by: Jeremy Fitzhardinge <jeremy@goop.org>
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
+
+---
+ arch/x86/xen/xen-asm_32.S |    8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+--- a/arch/x86/xen/xen-asm_32.S
++++ b/arch/x86/xen/xen-asm_32.S
+@@ -113,11 +113,13 @@ xen_iret_start_crit:
+       /*
+        * If there's something pending, mask events again so we can
+-       * jump back into xen_hypervisor_callback
++       * jump back into xen_hypervisor_callback. Otherwise do not
++       * touch XEN_vcpu_info_mask.
+        */
+-      sete XEN_vcpu_info_mask(%eax)
++      jne 1f
++      movb $1, XEN_vcpu_info_mask(%eax)
+-      popl %eax
++1:    popl %eax
+       /*
+        * From this point on the registers are restored and the stack