]> git.ipfire.org Git - people/pmueller/ipfire-2.x.git/blame - src/patches/60012_xen3-auto-common.patch1
Corrected links and text on ids.cgi
[people/pmueller/ipfire-2.x.git] / src / patches / 60012_xen3-auto-common.patch1
CommitLineData
cc90b958
BS
1Subject: xen3 common
2From: http://xenbits.xensource.com/linux-2.6.18-xen.hg (tip 728:832aac894efd)
3Patch-mainline: obsolete
4Acked-by: jbeulich@novell.com
5
6List of files that don't require modification anymore (and hence
7removed from this patch), for reference and in case upstream wants to
8take the forward porting patches:
92.6.22/include/linux/sched.h
102.6.22/kernel/softlockup.c
112.6.22/kernel/timer.c
122.6.25/mm/highmem.c
13
14---
15 drivers/Makefile | 1
16 drivers/acpi/Makefile | 3
17 drivers/acpi/hardware/hwsleep.c | 15
18 drivers/acpi/processor_core.c | 72 +++
19 drivers/acpi/processor_extcntl.c | 241 +++++++++++
20 drivers/acpi/processor_idle.c | 24 -
21 drivers/acpi/processor_perflib.c | 21
22 drivers/acpi/sleep/main.c | 9
23 drivers/char/agp/intel-agp.c | 10
24 drivers/char/mem.c | 16
25 drivers/char/tpm/Makefile | 2
26 drivers/char/tpm/tpm.h | 15
27 drivers/char/tpm/tpm_vtpm.c | 542 +++++++++++++++++++++++++
28 drivers/char/tpm/tpm_vtpm.h | 55 ++
29 drivers/char/tpm/tpm_xen.c | 722 ++++++++++++++++++++++++++++++++++
30 drivers/ide/ide-lib.c | 8
31 drivers/oprofile/buffer_sync.c | 87 +++-
32 drivers/oprofile/cpu_buffer.c | 51 +-
33 drivers/oprofile/cpu_buffer.h | 9
34 drivers/oprofile/event_buffer.h | 3
35 drivers/oprofile/oprof.c | 30 +
36 drivers/oprofile/oprof.h | 3
37 drivers/oprofile/oprofile_files.c | 201 +++++++++
38 fs/compat_ioctl.c | 19
39 include/acpi/processor.h | 143 ++++++
40 include/asm-generic/pci.h | 2
41 include/asm-generic/pgtable.h | 4
42 include/linux/aio.h | 5
43 include/linux/highmem.h | 8
44 include/linux/interrupt.h | 6
45 include/linux/kexec.h | 13
46 include/linux/mm.h | 8
47 include/linux/oprofile.h | 12
48 include/linux/page-flags.h | 26 +
49 include/linux/pci.h | 11
50 include/linux/vermagic.h | 7
51 kernel/irq/spurious.c | 2
52 kernel/kexec.c | 71 ++-
53 kernel/sysctl.c | 2
54 mm/memory.c | 42 +
55 mm/mprotect.c | 2
56 mm/page_alloc.c | 12
57 net/core/dev.c | 62 ++
58 net/core/skbuff.c | 4
59 net/ipv4/netfilter/nf_nat_proto_tcp.c | 3
60 net/ipv4/netfilter/nf_nat_proto_udp.c | 4
61 net/ipv4/xfrm4_output.c | 2
62 scripts/Makefile.build | 14
63 scripts/Makefile.lib | 6
64 51 files changed, 2671 insertions(+), 86 deletions(-)
65
66--- a/drivers/acpi/hardware/hwsleep.c
67+++ b/drivers/acpi/hardware/hwsleep.c
68@@ -241,7 +241,11 @@ acpi_status asmlinkage acpi_enter_sleep_
69 u32 PM1Bcontrol;
70 struct acpi_bit_register_info *sleep_type_reg_info;
71 struct acpi_bit_register_info *sleep_enable_reg_info;
72+#if !(defined(CONFIG_XEN) && defined(CONFIG_X86))
73 u32 in_value;
74+#else
75+ int err;
76+#endif
77 struct acpi_object_list arg_list;
78 union acpi_object arg;
79 acpi_status status;
80@@ -351,6 +355,7 @@ acpi_status asmlinkage acpi_enter_sleep_
81
82 ACPI_FLUSH_CPU_CACHE();
83
84+#if !(defined(CONFIG_XEN) && defined(CONFIG_X86))
85 status = acpi_hw_register_write(ACPI_REGISTER_PM1A_CONTROL,
86 PM1Acontrol);
87 if (ACPI_FAILURE(status)) {
88@@ -397,6 +402,16 @@ acpi_status asmlinkage acpi_enter_sleep_
89 /* Spin until we wake */
90
91 } while (!in_value);
92+#else
93+ /* PV ACPI just need check hypercall return value */
94+ err = acpi_notify_hypervisor_state(sleep_state,
95+ PM1Acontrol, PM1Bcontrol);
96+ if (err) {
97+ ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
98+ "Hypervisor failure [%d]\n", err));
99+ return_ACPI_STATUS(AE_ERROR);
100+ }
101+#endif
102
103 return_ACPI_STATUS(AE_OK);
104 }
105--- a/drivers/acpi/Makefile
106+++ b/drivers/acpi/Makefile
107@@ -34,6 +34,9 @@ processor-objs += processor_core.o proce
108 ifdef CONFIG_CPU_FREQ
109 processor-objs += processor_perflib.o
110 endif
111+ifdef CONFIG_PROCESSOR_EXTERNAL_CONTROL
112+processor-objs += processor_perflib.o processor_extcntl.o
113+endif
114
115 obj-y += sleep/
116 obj-y += bus.o glue.o
117--- a/drivers/acpi/processor_core.c
118+++ b/drivers/acpi/processor_core.c
119@@ -620,7 +620,8 @@ static int acpi_processor_get_info(struc
120 */
121 if (pr->id == -1) {
122 if (ACPI_FAILURE
123- (acpi_processor_hotadd_init(pr->handle, &pr->id))) {
124+ (acpi_processor_hotadd_init(pr->handle, &pr->id)) &&
125+ !processor_cntl_external()) {
126 return -ENODEV;
127 }
128 }
129@@ -662,7 +663,11 @@ static int acpi_processor_get_info(struc
130 return 0;
131 }
132
133+#ifndef CONFIG_XEN
134 static DEFINE_PER_CPU(void *, processor_device_array);
135+#else
136+static void *processor_device_array[NR_ACPI_CPUS];
137+#endif
138
139 static int __cpuinit acpi_processor_start(struct acpi_device *device)
140 {
141@@ -671,30 +676,46 @@ static int __cpuinit acpi_processor_star
142 struct acpi_processor *pr;
143 struct sys_device *sysdev;
144
145+ processor_extcntl_init();
146+
147 pr = acpi_driver_data(device);
148
149 result = acpi_processor_get_info(pr, device->flags.unique_id);
150- if (result) {
151+ if (result ||
152+ ((pr->id == -1) && !processor_cntl_external())) {
153 /* Processor is physically not present */
154 return 0;
155 }
156
157- BUG_ON((pr->id >= nr_cpu_ids) || (pr->id < 0));
158+ BUG_ON(!processor_cntl_external() &&
159+ ((pr->id >= nr_cpu_ids) || (pr->id < 0)));
160
161 /*
162 * Buggy BIOS check
163 * ACPI id of processors can be reported wrongly by the BIOS.
164 * Don't trust it blindly
165 */
166+#ifndef CONFIG_XEN
167 if (per_cpu(processor_device_array, pr->id) != NULL &&
168 per_cpu(processor_device_array, pr->id) != device) {
169+#else
170+ BUG_ON(pr->acpi_id >= NR_ACPI_CPUS);
171+ if (processor_device_array[pr->acpi_id] != NULL &&
172+ processor_device_array[pr->acpi_id] != device) {
173+#endif
174 printk(KERN_WARNING "BIOS reported wrong ACPI id "
175 "for the processor\n");
176 return -ENODEV;
177 }
178+#ifndef CONFIG_XEN
179 per_cpu(processor_device_array, pr->id) = device;
180
181 per_cpu(processors, pr->id) = pr;
182+#else
183+ processor_device_array[pr->acpi_id] = device;
184+ if (pr->id != -1)
185+ per_cpu(processors, pr->id) = pr;
186+#endif
187
188 result = acpi_processor_add_fs(device);
189 if (result)
190@@ -710,15 +731,28 @@ static int __cpuinit acpi_processor_star
191 /* _PDC call should be done before doing anything else (if reqd.). */
192 arch_acpi_processor_init_pdc(pr);
193 acpi_processor_set_pdc(pr);
194-#ifdef CONFIG_CPU_FREQ
195+#if defined(CONFIG_CPU_FREQ) || defined(CONFIG_PROCESSOR_EXTERNAL_CONTROL)
196 acpi_processor_ppc_has_changed(pr);
197 #endif
198- acpi_processor_get_throttling_info(pr);
199- acpi_processor_get_limit_info(pr);
200+
201+ /*
202+ * pr->id may equal to -1 while processor_cntl_external enabled.
203+ * throttle and thermal module don't support this case.
204+ * Tx only works when dom0 vcpu == pcpu num by far, as we give
205+ * control to dom0.
206+ */
207+ if (pr->id != -1) {
208+ acpi_processor_get_throttling_info(pr);
209+ acpi_processor_get_limit_info(pr);
210+ }
211
212
213 acpi_processor_power_init(pr, device);
214
215+ result = processor_extcntl_prepare(pr);
216+ if (result)
217+ goto end;
218+
219 pr->cdev = thermal_cooling_device_register("Processor", device,
220 &processor_cooling_ops);
221 if (IS_ERR(pr->cdev)) {
222@@ -846,7 +880,7 @@ static int acpi_processor_remove(struct
223
224 pr = acpi_driver_data(device);
225
226- if (pr->id >= nr_cpu_ids) {
227+ if (!processor_cntl_external() && pr->id >= nr_cpu_ids) {
228 kfree(pr);
229 return 0;
230 }
231@@ -872,8 +906,14 @@ static int acpi_processor_remove(struct
232 pr->cdev = NULL;
233 }
234
235+#ifndef CONFIG_XEN
236 per_cpu(processors, pr->id) = NULL;
237 per_cpu(processor_device_array, pr->id) = NULL;
238+#else
239+ if (pr->id != -1)
240+ per_cpu(processors, pr->id) = NULL;
241+ processor_device_array[pr->acpi_id] = NULL;
242+#endif
243 kfree(pr);
244
245 return 0;
246@@ -933,6 +973,10 @@ int acpi_processor_device_add(acpi_handl
247 if (!pr)
248 return -ENODEV;
249
250+ if (processor_cntl_external())
251+ processor_notify_external(pr,
252+ PROCESSOR_HOTPLUG, HOTPLUG_TYPE_ADD);
253+
254 if ((pr->id >= 0) && (pr->id < nr_cpu_ids)) {
255 kobject_uevent(&(*device)->dev.kobj, KOBJ_ONLINE);
256 }
257@@ -972,6 +1016,10 @@ static void __ref acpi_processor_hotplug
258 break;
259 }
260
261+ if (processor_cntl_external())
262+ processor_notify_external(pr,
263+ PROCESSOR_HOTPLUG, HOTPLUG_TYPE_ADD);
264+
265 if (pr->id >= 0 && (pr->id < nr_cpu_ids)) {
266 kobject_uevent(&device->dev.kobj, KOBJ_OFFLINE);
267 break;
268@@ -1003,6 +1051,11 @@ static void __ref acpi_processor_hotplug
269
270 if ((pr->id < nr_cpu_ids) && (cpu_present(pr->id)))
271 kobject_uevent(&device->dev.kobj, KOBJ_OFFLINE);
272+
273+ if (processor_cntl_external())
274+ processor_notify_external(pr, PROCESSOR_HOTPLUG,
275+ HOTPLUG_TYPE_REMOVE);
276+
277 break;
278 default:
279 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
280@@ -1067,6 +1120,11 @@ static acpi_status acpi_processor_hotadd
281
282 static int acpi_processor_handle_eject(struct acpi_processor *pr)
283 {
284+#ifdef CONFIG_XEN
285+ if (pr->id == -1)
286+ return (0);
287+#endif
288+
289 if (cpu_online(pr->id))
290 cpu_down(pr->id);
291
292--- /dev/null
293+++ b/drivers/acpi/processor_extcntl.c
294@@ -0,0 +1,241 @@
295+/*
296+ * processor_extcntl.c - channel to external control logic
297+ *
298+ * Copyright (C) 2008, Intel corporation
299+ *
300+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
301+ *
302+ * This program is free software; you can redistribute it and/or modify
303+ * it under the terms of the GNU General Public License as published by
304+ * the Free Software Foundation; either version 2 of the License, or (at
305+ * your option) any later version.
306+ *
307+ * This program is distributed in the hope that it will be useful, but
308+ * WITHOUT ANY WARRANTY; without even the implied warranty of
309+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
310+ * General Public License for more details.
311+ *
312+ * You should have received a copy of the GNU General Public License along
313+ * with this program; if not, write to the Free Software Foundation, Inc.,
314+ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
315+ *
316+ */
317+
318+#include <linux/kernel.h>
319+#include <linux/init.h>
320+#include <linux/types.h>
321+#include <linux/acpi.h>
322+#include <linux/pm.h>
323+#include <linux/cpu.h>
324+
325+#include <acpi/processor.h>
326+
327+#define ACPI_PROCESSOR_COMPONENT 0x01000000
328+#define ACPI_PROCESSOR_CLASS "processor"
329+#define ACPI_PROCESSOR_DRIVER_NAME "ACPI Processor Driver"
330+#define _COMPONENT ACPI_PROCESSOR_COMPONENT
331+ACPI_MODULE_NAME("acpi_processor")
332+
333+static int processor_extcntl_parse_csd(struct acpi_processor *pr);
334+static int processor_extcntl_get_performance(struct acpi_processor *pr);
335+/*
336+ * External processor control logic may register with its own set of
337+ * ops to get ACPI related notification. One example is like VMM.
338+ */
339+const struct processor_extcntl_ops *processor_extcntl_ops;
340+EXPORT_SYMBOL(processor_extcntl_ops);
341+
342+static int processor_notify_smm(void)
343+{
344+ acpi_status status;
345+ static int is_done = 0;
346+
347+ /* only need successfully notify BIOS once */
348+ /* avoid double notification which may lead to unexpected result */
349+ if (is_done)
350+ return 0;
351+
352+ /* Can't write pstate_cnt to smi_cmd if either value is zero */
353+ if ((!acpi_fadt.smi_cmd) || (!acpi_fadt.pstate_cnt)) {
354+ ACPI_DEBUG_PRINT((ACPI_DB_INFO,"No SMI port or pstate_cnt\n"));
355+ return 0;
356+ }
357+
358+ ACPI_DEBUG_PRINT((ACPI_DB_INFO,
359+ "Writing pstate_cnt [0x%x] to smi_cmd [0x%x]\n",
360+ acpi_fadt.pstate_cnt, acpi_fadt.smi_cmd));
361+
362+ /* FADT v1 doesn't support pstate_cnt, many BIOS vendors use
363+ * it anyway, so we need to support it... */
364+ if (acpi_fadt_is_v1) {
365+ ACPI_DEBUG_PRINT((ACPI_DB_INFO,
366+ "Using v1.0 FADT reserved value for pstate_cnt\n"));
367+ }
368+
369+ status = acpi_os_write_port(acpi_fadt.smi_cmd,
370+ (u32) acpi_fadt.pstate_cnt, 8);
371+ if (ACPI_FAILURE(status))
372+ return status;
373+
374+ is_done = 1;
375+
376+ return 0;
377+}
378+
379+int processor_notify_external(struct acpi_processor *pr, int event, int type)
380+{
381+ int ret = -EINVAL;
382+
383+ if (!processor_cntl_external())
384+ return -EINVAL;
385+
386+ switch (event) {
387+ case PROCESSOR_PM_INIT:
388+ case PROCESSOR_PM_CHANGE:
389+ if ((type >= PM_TYPE_MAX) ||
390+ !processor_extcntl_ops->pm_ops[type])
391+ break;
392+
393+ ret = processor_extcntl_ops->pm_ops[type](pr, event);
394+ break;
395+ case PROCESSOR_HOTPLUG:
396+ if (processor_extcntl_ops->hotplug)
397+ ret = processor_extcntl_ops->hotplug(pr, type);
398+ break;
399+ default:
400+ printk(KERN_ERR "Unsupport processor events %d.\n", event);
401+ break;
402+ }
403+
404+ return ret;
405+}
406+
407+/*
408+ * External control logic can decide to grab full or part of physical
409+ * processor control bits. Take a VMM for example, physical processors
410+ * are owned by VMM and thus existence information like hotplug is
411+ * always required to be notified to VMM. Similar is processor idle
412+ * state which is also necessarily controlled by VMM. But for other
413+ * control bits like performance/throttle states, VMM may choose to
414+ * control or not upon its own policy.
415+ */
416+void processor_extcntl_init(void)
417+{
418+ if (!processor_extcntl_ops)
419+ arch_acpi_processor_init_extcntl(&processor_extcntl_ops);
420+}
421+
422+/*
423+ * This is called from ACPI processor init, and targeted to hold
424+ * some tricky housekeeping jobs to satisfy external control model.
425+ * For example, we may put dependency parse stub here for idle
426+ * and performance state. Those information may be not available
427+ * if splitting from dom0 control logic like cpufreq driver.
428+ */
429+int processor_extcntl_prepare(struct acpi_processor *pr)
430+{
431+ /* parse cstate dependency information */
432+ if (processor_pm_external())
433+ processor_extcntl_parse_csd(pr);
434+
435+ /* Initialize performance states */
436+ if (processor_pmperf_external())
437+ processor_extcntl_get_performance(pr);
438+
439+ return 0;
440+}
441+
442+/*
443+ * Currently no _CSD is implemented which is why existing ACPI code
444+ * doesn't parse _CSD at all. But to keep interface complete with
445+ * external control logic, we put a placeholder here for future
446+ * compatibility.
447+ */
448+static int processor_extcntl_parse_csd(struct acpi_processor *pr)
449+{
450+ int i;
451+
452+ for (i = 0; i < pr->power.count; i++) {
453+ if (!pr->power.states[i].valid)
454+ continue;
455+
456+ /* No dependency by default */
457+ pr->power.states[i].domain_info = NULL;
458+ pr->power.states[i].csd_count = 0;
459+ }
460+
461+ return 0;
462+}
463+
464+/*
465+ * Existing ACPI module does parse performance states at some point,
466+ * when acpi-cpufreq driver is loaded which however is something
467+ * we'd like to disable to avoid confliction with external control
468+ * logic. So we have to collect raw performance information here
469+ * when ACPI processor object is found and started.
470+ */
471+static int processor_extcntl_get_performance(struct acpi_processor *pr)
472+{
473+ int ret;
474+ struct acpi_processor_performance *perf;
475+ struct acpi_psd_package *pdomain;
476+
477+ if (pr->performance)
478+ return -EBUSY;
479+
480+ perf = kzalloc(sizeof(struct acpi_processor_performance), GFP_KERNEL);
481+ if (!perf)
482+ return -ENOMEM;
483+
484+ pr->performance = perf;
485+ /* Get basic performance state information */
486+ ret = acpi_processor_get_performance_info(pr);
487+ if (ret < 0)
488+ goto err_out;
489+
490+ /*
491+ * Well, here we need retrieve performance dependency information
492+ * from _PSD object. The reason why existing interface is not used
493+ * is due to the reason that existing interface sticks to Linux cpu
494+ * id to construct some bitmap, however we want to split ACPI
495+ * processor objects from Linux cpu id logic. For example, even
496+ * when Linux is configured as UP, we still want to parse all ACPI
497+ * processor objects to external logic. In this case, it's preferred
498+ * to use ACPI ID instead.
499+ */
500+ pdomain = &pr->performance->domain_info;
501+ pdomain->num_processors = 0;
502+ ret = acpi_processor_get_psd(pr);
503+ if (ret < 0) {
504+ /*
505+ * _PSD is optional - assume no coordination if absent (or
506+ * broken), matching native kernels' behavior.
507+ */
508+ pdomain->num_entries = ACPI_PSD_REV0_ENTRIES;
509+ pdomain->revision = ACPI_PSD_REV0_REVISION;
510+ pdomain->domain = pr->acpi_id;
511+ pdomain->coord_type = DOMAIN_COORD_TYPE_SW_ALL;
512+ pdomain->num_processors = 1;
513+ }
514+
515+ /* Some sanity check */
516+ if ((pdomain->revision != ACPI_PSD_REV0_REVISION) ||
517+ (pdomain->num_entries != ACPI_PSD_REV0_ENTRIES) ||
518+ ((pdomain->coord_type != DOMAIN_COORD_TYPE_SW_ALL) &&
519+ (pdomain->coord_type != DOMAIN_COORD_TYPE_SW_ANY) &&
520+ (pdomain->coord_type != DOMAIN_COORD_TYPE_HW_ALL))) {
521+ ret = -EINVAL;
522+ goto err_out;
523+ }
524+
525+ /* Last step is to notify BIOS that external logic exists */
526+ processor_notify_smm();
527+
528+ processor_notify_external(pr, PROCESSOR_PM_INIT, PM_TYPE_PERF);
529+
530+ return 0;
531+err_out:
532+ pr->performance = NULL;
533+ kfree(perf);
534+ return ret;
535+}
536--- a/drivers/acpi/processor_idle.c
537+++ b/drivers/acpi/processor_idle.c
538@@ -905,7 +905,8 @@ static int acpi_processor_get_power_info
539 */
540 cx.entry_method = ACPI_CSTATE_HALT;
541 snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI HLT");
542- } else {
543+ /* This doesn't apply to external control case */
544+ } else if (!processor_pm_external()) {
545 continue;
546 }
547 if (cx.type == ACPI_STATE_C1 &&
548@@ -944,6 +945,12 @@ static int acpi_processor_get_power_info
549
550 cx.power = obj->integer.value;
551
552+#ifdef CONFIG_PROCESSOR_EXTERNAL_CONTROL
553+ /* cache control methods to notify external logic */
554+ if (processor_pm_external())
555+ memcpy(&cx.reg, reg, sizeof(*reg));
556+#endif
557+
558 current_count++;
559 memcpy(&(pr->power.states[current_count]), &cx, sizeof(cx));
560
561@@ -1285,14 +1292,18 @@ int acpi_processor_cst_has_changed(struc
562 * been initialized.
563 */
564 if (pm_idle_save) {
565- pm_idle = pm_idle_save;
566+ if (!processor_pm_external())
567+ pm_idle = pm_idle_save;
568 /* Relies on interrupts forcing exit from idle. */
569 synchronize_sched();
570 }
571
572 pr->flags.power = 0;
573 result = acpi_processor_get_power_info(pr);
574- if ((pr->flags.power == 1) && (pr->flags.power_setup_done))
575+ if (processor_pm_external())
576+ processor_notify_external(pr,
577+ PROCESSOR_PM_CHANGE, PM_TYPE_IDLE);
578+ else if ((pr->flags.power == 1) && (pr->flags.power_setup_done))
579 pm_idle = acpi_processor_idle;
580
581 return result;
582@@ -1814,7 +1825,7 @@ int __cpuinit acpi_processor_power_init(
583 printk(")\n");
584
585 #ifndef CONFIG_CPU_IDLE
586- if (pr->id == 0) {
587+ if (!processor_pm_external() && (pr->id == 0)) {
588 pm_idle_save = pm_idle;
589 pm_idle = acpi_processor_idle;
590 }
591@@ -1828,6 +1839,11 @@ int __cpuinit acpi_processor_power_init(
592 acpi_driver_data(device));
593 if (!entry)
594 return -EIO;
595+
596+ if (processor_pm_external())
597+ processor_notify_external(pr,
598+ PROCESSOR_PM_INIT, PM_TYPE_IDLE);
599+
600 return 0;
601 }
602
603--- a/drivers/acpi/processor_perflib.c
604+++ b/drivers/acpi/processor_perflib.c
605@@ -80,6 +80,7 @@ MODULE_PARM_DESC(ignore_ppc, "If the fre
606
607 static int acpi_processor_ppc_status;
608
609+#ifdef CONFIG_CPU_FREQ
610 static int acpi_processor_ppc_notifier(struct notifier_block *nb,
611 unsigned long event, void *data)
612 {
613@@ -122,6 +123,7 @@ static int acpi_processor_ppc_notifier(s
614 static struct notifier_block acpi_ppc_notifier_block = {
615 .notifier_call = acpi_processor_ppc_notifier,
616 };
617+#endif /* CONFIG_CPU_FREQ */
618
619 static int acpi_processor_get_platform_limit(struct acpi_processor *pr)
620 {
621@@ -166,9 +168,15 @@ int acpi_processor_ppc_has_changed(struc
622 if (ret < 0)
623 return (ret);
624 else
625+#ifdef CONFIG_CPU_FREQ
626 return cpufreq_update_policy(pr->id);
627+#elif CONFIG_PROCESSOR_EXTERNAL_CONTROL
628+ return processor_notify_external(pr,
629+ PROCESSOR_PM_CHANGE, PM_TYPE_PERF);
630+#endif
631 }
632
633+#ifdef CONFIG_CPU_FREQ
634 void acpi_processor_ppc_init(void)
635 {
636 if (!cpufreq_register_notifier
637@@ -187,6 +195,7 @@ void acpi_processor_ppc_exit(void)
638
639 acpi_processor_ppc_status &= ~PPC_REGISTERED;
640 }
641+#endif /* CONFIG_CPU_FREQ */
642
643 static int acpi_processor_get_performance_control(struct acpi_processor *pr)
644 {
645@@ -328,7 +337,10 @@ static int acpi_processor_get_performanc
646 return result;
647 }
648
649-static int acpi_processor_get_performance_info(struct acpi_processor *pr)
650+#ifndef CONFIG_PROCESSOR_EXTERNAL_CONTROL
651+static
652+#endif
653+int acpi_processor_get_performance_info(struct acpi_processor *pr)
654 {
655 int result = 0;
656 acpi_status status = AE_OK;
657@@ -356,6 +368,7 @@ static int acpi_processor_get_performanc
658 return 0;
659 }
660
661+#ifdef CONFIG_CPU_FREQ
662 int acpi_processor_notify_smm(struct module *calling_module)
663 {
664 acpi_status status;
665@@ -416,6 +429,7 @@ int acpi_processor_notify_smm(struct mod
666 }
667
668 EXPORT_SYMBOL(acpi_processor_notify_smm);
669+#endif /* CONFIG_CPU_FREQ */
670
671 #ifdef CONFIG_X86_ACPI_CPUFREQ_PROC_INTF
672 /* /proc/acpi/processor/../performance interface (DEPRECATED) */
673@@ -507,7 +521,10 @@ static void acpi_cpufreq_remove_file(str
674 }
675 #endif /* CONFIG_X86_ACPI_CPUFREQ_PROC_INTF */
676
677-static int acpi_processor_get_psd(struct acpi_processor *pr)
678+#ifndef CONFIG_PROCESSOR_EXTERNAL_CONTROL
679+static
680+#endif
681+int acpi_processor_get_psd(struct acpi_processor *pr)
682 {
683 int result = 0;
684 acpi_status status = AE_OK;
685--- a/drivers/acpi/sleep/main.c
686+++ b/drivers/acpi/sleep/main.c
687@@ -27,6 +27,7 @@ u8 sleep_states[ACPI_S_STATE_COUNT];
688 static int acpi_sleep_prepare(u32 acpi_state)
689 {
690 #ifdef CONFIG_ACPI_SLEEP
691+#ifndef CONFIG_ACPI_PV_SLEEP
692 /* do we have a wakeup address for S2 and S3? */
693 if (acpi_state == ACPI_STATE_S3) {
694 if (!acpi_wakeup_address) {
695@@ -36,6 +37,7 @@ static int acpi_sleep_prepare(u32 acpi_s
696 (acpi_physical_address)acpi_wakeup_address);
697
698 }
699+#endif
700 ACPI_FLUSH_CPU_CACHE();
701 acpi_enable_wakeup_device_prep(acpi_state);
702 #endif
703@@ -208,7 +210,14 @@ static int acpi_suspend_enter(suspend_st
704 break;
705
706 case ACPI_STATE_S3:
707+#ifdef CONFIG_ACPI_PV_SLEEP
708+ /* Hyperviosr will save and restore CPU context
709+ * and then we can skip low level housekeeping here.
710+ */
711+ acpi_enter_sleep_state(acpi_state);
712+#else
713 do_suspend_lowlevel();
714+#endif
715 break;
716 }
717
718--- a/drivers/char/agp/intel-agp.c
719+++ b/drivers/char/agp/intel-agp.c
720@@ -247,6 +247,13 @@ static void *i8xx_alloc_pages(void)
721 if (page == NULL)
722 return NULL;
723
724+#ifdef CONFIG_XEN
725+ if (xen_create_contiguous_region((unsigned long)page_address(page), 2, 32)) {
726+ __free_pages(page, 2);
727+ return NULL;
728+ }
729+#endif
730+
731 if (set_pages_uc(page, 4) < 0) {
732 set_pages_wb(page, 4);
733 __free_pages(page, 2);
734@@ -266,6 +273,9 @@ static void i8xx_destroy_pages(void *add
735
736 page = virt_to_page(addr);
737 set_pages_wb(page, 4);
738+#ifdef CONFIG_XEN
739+ xen_destroy_contiguous_region((unsigned long)page_address(page), 2);
740+#endif
741 put_page(page);
742 __free_pages(page, 2);
743 atomic_dec(&agp_bridge->current_memory_agp);
744--- a/drivers/char/mem.c
745+++ b/drivers/char/mem.c
746@@ -110,6 +110,7 @@ void __attribute__((weak)) unxlate_dev_m
747 {
748 }
749
750+#ifndef ARCH_HAS_DEV_MEM
751 /*
752 * This funcion reads the *physical* memory. The f_pos points directly to the
753 * memory location.
754@@ -254,6 +255,7 @@ static ssize_t write_mem(struct file * f
755 *ppos += written;
756 return written;
757 }
758+#endif
759
760 int __attribute__((weak)) phys_mem_access_prot_allowed(struct file *file,
761 unsigned long pfn, unsigned long size, pgprot_t *vma_prot)
762@@ -372,6 +374,9 @@ static int mmap_mem(struct file * file,
763 static int mmap_kmem(struct file * file, struct vm_area_struct * vma)
764 {
765 unsigned long pfn;
766+#ifdef CONFIG_XEN
767+ unsigned long i, count;
768+#endif
769
770 /* Turn a kernel-virtual address into a physical page frame */
771 pfn = __pa((u64)vma->vm_pgoff << PAGE_SHIFT) >> PAGE_SHIFT;
772@@ -386,6 +391,13 @@ static int mmap_kmem(struct file * file,
773 if (!pfn_valid(pfn))
774 return -EIO;
775
776+#ifdef CONFIG_XEN
777+ count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
778+ for (i = 0; i < count; i++)
779+ if ((pfn + i) != mfn_to_local_pfn(pfn_to_mfn(pfn + i)))
780+ return -EIO;
781+#endif
782+
783 vma->vm_pgoff = pfn;
784 return mmap_mem(file, vma);
785 }
786@@ -802,6 +814,7 @@ static int open_port(struct inode * inod
787 #define open_kmem open_mem
788 #define open_oldmem open_mem
789
790+#ifndef ARCH_HAS_DEV_MEM
791 static const struct file_operations mem_fops = {
792 .llseek = memory_lseek,
793 .read = read_mem,
794@@ -810,6 +823,9 @@ static const struct file_operations mem_
795 .open = open_mem,
796 .get_unmapped_area = get_unmapped_area_mem,
797 };
798+#else
799+extern const struct file_operations mem_fops;
800+#endif
801
802 #ifdef CONFIG_DEVKMEM
803 static const struct file_operations kmem_fops = {
804--- a/drivers/char/tpm/Makefile
805+++ b/drivers/char/tpm/Makefile
806@@ -9,3 +9,5 @@ obj-$(CONFIG_TCG_TIS) += tpm_tis.o
807 obj-$(CONFIG_TCG_NSC) += tpm_nsc.o
808 obj-$(CONFIG_TCG_ATMEL) += tpm_atmel.o
809 obj-$(CONFIG_TCG_INFINEON) += tpm_infineon.o
810+obj-$(CONFIG_TCG_XEN) += tpm_xenu.o
811+tpm_xenu-y = tpm_xen.o tpm_vtpm.o
812--- a/drivers/char/tpm/tpm.h
813+++ b/drivers/char/tpm/tpm.h
814@@ -107,6 +107,9 @@ struct tpm_chip {
815 struct dentry **bios_dir;
816
817 struct list_head list;
818+#ifdef CONFIG_XEN
819+ void *priv;
820+#endif
821 void (*release) (struct device *);
822 };
823
824@@ -124,6 +127,18 @@ static inline void tpm_write_index(int b
825 outb(value & 0xFF, base+1);
826 }
827
828+#ifdef CONFIG_XEN
829+static inline void *chip_get_private(const struct tpm_chip *chip)
830+{
831+ return chip->priv;
832+}
833+
834+static inline void chip_set_private(struct tpm_chip *chip, void *priv)
835+{
836+ chip->priv = priv;
837+}
838+#endif
839+
840 extern void tpm_get_timeouts(struct tpm_chip *);
841 extern void tpm_gen_interrupt(struct tpm_chip *);
842 extern void tpm_continue_selftest(struct tpm_chip *);
843--- /dev/null
844+++ b/drivers/char/tpm/tpm_vtpm.c
845@@ -0,0 +1,542 @@
846+/*
847+ * Copyright (C) 2006 IBM Corporation
848+ *
849+ * Authors:
850+ * Stefan Berger <stefanb@us.ibm.com>
851+ *
852+ * Generic device driver part for device drivers in a virtualized
853+ * environment.
854+ *
855+ * This program is free software; you can redistribute it and/or
856+ * modify it under the terms of the GNU General Public License as
857+ * published by the Free Software Foundation, version 2 of the
858+ * License.
859+ *
860+ */
861+
862+#include <asm/uaccess.h>
863+#include <linux/list.h>
864+#include <linux/device.h>
865+#include <linux/interrupt.h>
866+#include <linux/platform_device.h>
867+#include "tpm.h"
868+#include "tpm_vtpm.h"
869+
870+/* read status bits */
871+enum {
872+ STATUS_BUSY = 0x01,
873+ STATUS_DATA_AVAIL = 0x02,
874+ STATUS_READY = 0x04
875+};
876+
877+struct transmission {
878+ struct list_head next;
879+
880+ unsigned char *request;
881+ size_t request_len;
882+ size_t request_buflen;
883+
884+ unsigned char *response;
885+ size_t response_len;
886+ size_t response_buflen;
887+
888+ unsigned int flags;
889+};
890+
891+enum {
892+ TRANSMISSION_FLAG_WAS_QUEUED = 0x1
893+};
894+
895+
896+enum {
897+ DATAEX_FLAG_QUEUED_ONLY = 0x1
898+};
899+
900+
901+/* local variables */
902+
903+/* local function prototypes */
904+static int _vtpm_send_queued(struct tpm_chip *chip);
905+
906+
907+/* =============================================================
908+ * Some utility functions
909+ * =============================================================
910+ */
911+static void vtpm_state_init(struct vtpm_state *vtpms)
912+{
913+ vtpms->current_request = NULL;
914+ spin_lock_init(&vtpms->req_list_lock);
915+ init_waitqueue_head(&vtpms->req_wait_queue);
916+ INIT_LIST_HEAD(&vtpms->queued_requests);
917+
918+ vtpms->current_response = NULL;
919+ spin_lock_init(&vtpms->resp_list_lock);
920+ init_waitqueue_head(&vtpms->resp_wait_queue);
921+
922+ vtpms->disconnect_time = jiffies;
923+}
924+
925+
926+static inline struct transmission *transmission_alloc(void)
927+{
928+ return kzalloc(sizeof(struct transmission), GFP_ATOMIC);
929+}
930+
931+static unsigned char *
932+transmission_set_req_buffer(struct transmission *t,
933+ unsigned char *buffer, size_t len)
934+{
935+ if (t->request_buflen < len) {
936+ kfree(t->request);
937+ t->request = kmalloc(len, GFP_KERNEL);
938+ if (!t->request) {
939+ t->request_buflen = 0;
940+ return NULL;
941+ }
942+ t->request_buflen = len;
943+ }
944+
945+ memcpy(t->request, buffer, len);
946+ t->request_len = len;
947+
948+ return t->request;
949+}
950+
951+static unsigned char *
952+transmission_set_res_buffer(struct transmission *t,
953+ const unsigned char *buffer, size_t len)
954+{
955+ if (t->response_buflen < len) {
956+ kfree(t->response);
957+ t->response = kmalloc(len, GFP_ATOMIC);
958+ if (!t->response) {
959+ t->response_buflen = 0;
960+ return NULL;
961+ }
962+ t->response_buflen = len;
963+ }
964+
965+ memcpy(t->response, buffer, len);
966+ t->response_len = len;
967+
968+ return t->response;
969+}
970+
971+static inline void transmission_free(struct transmission *t)
972+{
973+ kfree(t->request);
974+ kfree(t->response);
975+ kfree(t);
976+}
977+
978+/* =============================================================
979+ * Interface with the lower layer driver
980+ * =============================================================
981+ */
982+/*
983+ * Lower layer uses this function to make a response available.
984+ */
985+int vtpm_vd_recv(const struct tpm_chip *chip,
986+ const unsigned char *buffer, size_t count,
987+ void *ptr)
988+{
989+ unsigned long flags;
990+ int ret_size = 0;
991+ struct transmission *t;
992+ struct vtpm_state *vtpms;
993+
994+ vtpms = (struct vtpm_state *)chip_get_private(chip);
995+
996+ /*
997+ * The list with requests must contain one request
998+ * only and the element there must be the one that
999+ * was passed to me from the front-end.
1000+ */
1001+ spin_lock_irqsave(&vtpms->resp_list_lock, flags);
1002+ if (vtpms->current_request != ptr) {
1003+ spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
1004+ return 0;
1005+ }
1006+
1007+ if ((t = vtpms->current_request)) {
1008+ transmission_free(t);
1009+ vtpms->current_request = NULL;
1010+ }
1011+
1012+ t = transmission_alloc();
1013+ if (t) {
1014+ if (!transmission_set_res_buffer(t, buffer, count)) {
1015+ transmission_free(t);
1016+ spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
1017+ return -ENOMEM;
1018+ }
1019+ ret_size = count;
1020+ vtpms->current_response = t;
1021+ wake_up_interruptible(&vtpms->resp_wait_queue);
1022+ }
1023+ spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
1024+
1025+ return ret_size;
1026+}
1027+
1028+
1029+/*
1030+ * Lower layer indicates its status (connected/disconnected)
1031+ */
1032+void vtpm_vd_status(const struct tpm_chip *chip, u8 vd_status)
1033+{
1034+ struct vtpm_state *vtpms;
1035+
1036+ vtpms = (struct vtpm_state *)chip_get_private(chip);
1037+
1038+ vtpms->vd_status = vd_status;
1039+ if ((vtpms->vd_status & TPM_VD_STATUS_CONNECTED) == 0) {
1040+ vtpms->disconnect_time = jiffies;
1041+ }
1042+}
1043+
1044+/* =============================================================
1045+ * Interface with the generic TPM driver
1046+ * =============================================================
1047+ */
1048+static int vtpm_recv(struct tpm_chip *chip, u8 *buf, size_t count)
1049+{
1050+ int rc = 0;
1051+ unsigned long flags;
1052+ struct vtpm_state *vtpms;
1053+
1054+ vtpms = (struct vtpm_state *)chip_get_private(chip);
1055+
1056+ /*
1057+ * Check if the previous operation only queued the command
1058+ * In this case there won't be a response, so I just
1059+ * return from here and reset that flag. In any other
1060+ * case I should receive a response from the back-end.
1061+ */
1062+ spin_lock_irqsave(&vtpms->resp_list_lock, flags);
1063+ if ((vtpms->flags & DATAEX_FLAG_QUEUED_ONLY) != 0) {
1064+ vtpms->flags &= ~DATAEX_FLAG_QUEUED_ONLY;
1065+ spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
1066+ /*
1067+ * The first few commands (measurements) must be
1068+ * queued since it might not be possible to talk to the
1069+ * TPM, yet.
1070+ * Return a response of up to 30 '0's.
1071+ */
1072+
1073+ count = min_t(size_t, count, 30);
1074+ memset(buf, 0x0, count);
1075+ return count;
1076+ }
1077+ /*
1078+ * Check whether something is in the responselist and if
1079+ * there's nothing in the list wait for something to appear.
1080+ */
1081+
1082+ if (!vtpms->current_response) {
1083+ spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
1084+ interruptible_sleep_on_timeout(&vtpms->resp_wait_queue,
1085+ 1000);
1086+ spin_lock_irqsave(&vtpms->resp_list_lock ,flags);
1087+ }
1088+
1089+ if (vtpms->current_response) {
1090+ struct transmission *t = vtpms->current_response;
1091+ vtpms->current_response = NULL;
1092+ rc = min(count, t->response_len);
1093+ memcpy(buf, t->response, rc);
1094+ transmission_free(t);
1095+ }
1096+
1097+ spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
1098+ return rc;
1099+}
1100+
1101+static int vtpm_send(struct tpm_chip *chip, u8 *buf, size_t count)
1102+{
1103+ int rc = 0;
1104+ unsigned long flags;
1105+ struct transmission *t = transmission_alloc();
1106+ struct vtpm_state *vtpms;
1107+
1108+ vtpms = (struct vtpm_state *)chip_get_private(chip);
1109+
1110+ if (!t)
1111+ return -ENOMEM;
1112+ /*
1113+ * If there's a current request, it must be the
1114+ * previous request that has timed out.
1115+ */
1116+ spin_lock_irqsave(&vtpms->req_list_lock, flags);
1117+ if (vtpms->current_request != NULL) {
1118+ printk("WARNING: Sending although there is a request outstanding.\n"
1119+ " Previous request must have timed out.\n");
1120+ transmission_free(vtpms->current_request);
1121+ vtpms->current_request = NULL;
1122+ }
1123+ spin_unlock_irqrestore(&vtpms->req_list_lock, flags);
1124+
1125+ /*
1126+ * Queue the packet if the driver below is not
1127+ * ready, yet, or there is any packet already
1128+ * in the queue.
1129+ * If the driver below is ready, unqueue all
1130+ * packets first before sending our current
1131+ * packet.
1132+ * For each unqueued packet, except for the
1133+ * last (=current) packet, call the function
1134+ * tpm_xen_recv to wait for the response to come
1135+ * back.
1136+ */
1137+ if ((vtpms->vd_status & TPM_VD_STATUS_CONNECTED) == 0) {
1138+ if (time_after(jiffies,
1139+ vtpms->disconnect_time + HZ * 10)) {
1140+ rc = -ENOENT;
1141+ } else {
1142+ goto queue_it;
1143+ }
1144+ } else {
1145+ /*
1146+ * Send all queued packets.
1147+ */
1148+ if (_vtpm_send_queued(chip) == 0) {
1149+
1150+ vtpms->current_request = t;
1151+
1152+ rc = vtpm_vd_send(vtpms->tpm_private,
1153+ buf,
1154+ count,
1155+ t);
1156+ /*
1157+ * The generic TPM driver will call
1158+ * the function to receive the response.
1159+ */
1160+ if (rc < 0) {
1161+ vtpms->current_request = NULL;
1162+ goto queue_it;
1163+ }
1164+ } else {
1165+queue_it:
1166+ if (!transmission_set_req_buffer(t, buf, count)) {
1167+ transmission_free(t);
1168+ rc = -ENOMEM;
1169+ goto exit;
1170+ }
1171+ /*
1172+ * An error occurred. Don't event try
1173+ * to send the current request. Just
1174+ * queue it.
1175+ */
1176+ spin_lock_irqsave(&vtpms->req_list_lock, flags);
1177+ vtpms->flags |= DATAEX_FLAG_QUEUED_ONLY;
1178+ list_add_tail(&t->next, &vtpms->queued_requests);
1179+ spin_unlock_irqrestore(&vtpms->req_list_lock, flags);
1180+ }
1181+ }
1182+
1183+exit:
1184+ return rc;
1185+}
1186+
1187+
1188+/*
1189+ * Send all queued requests.
1190+ */
1191+static int _vtpm_send_queued(struct tpm_chip *chip)
1192+{
1193+ int rc;
1194+ int error = 0;
1195+ long flags;
1196+ unsigned char buffer[1];
1197+ struct vtpm_state *vtpms;
1198+ vtpms = (struct vtpm_state *)chip_get_private(chip);
1199+
1200+ spin_lock_irqsave(&vtpms->req_list_lock, flags);
1201+
1202+ while (!list_empty(&vtpms->queued_requests)) {
1203+ /*
1204+ * Need to dequeue them.
1205+ * Read the result into a dummy buffer.
1206+ */
1207+ struct transmission *qt = (struct transmission *)
1208+ vtpms->queued_requests.next;
1209+ list_del(&qt->next);
1210+ vtpms->current_request = qt;
1211+ spin_unlock_irqrestore(&vtpms->req_list_lock, flags);
1212+
1213+ rc = vtpm_vd_send(vtpms->tpm_private,
1214+ qt->request,
1215+ qt->request_len,
1216+ qt);
1217+
1218+ if (rc < 0) {
1219+ spin_lock_irqsave(&vtpms->req_list_lock, flags);
1220+ if ((qt = vtpms->current_request) != NULL) {
1221+ /*
1222+ * requeue it at the beginning
1223+ * of the list
1224+ */
1225+ list_add(&qt->next,
1226+ &vtpms->queued_requests);
1227+ }
1228+ vtpms->current_request = NULL;
1229+ error = 1;
1230+ break;
1231+ }
1232+ /*
1233+ * After this point qt is not valid anymore!
1234+ * It is freed when the front-end is delivering
1235+ * the data by calling tpm_recv
1236+ */
1237+ /*
1238+ * Receive response into provided dummy buffer
1239+ */
1240+ rc = vtpm_recv(chip, buffer, sizeof(buffer));
1241+ spin_lock_irqsave(&vtpms->req_list_lock, flags);
1242+ }
1243+
1244+ spin_unlock_irqrestore(&vtpms->req_list_lock, flags);
1245+
1246+ return error;
1247+}
1248+
1249+static void vtpm_cancel(struct tpm_chip *chip)
1250+{
1251+ unsigned long flags;
1252+ struct vtpm_state *vtpms = (struct vtpm_state *)chip_get_private(chip);
1253+
1254+ spin_lock_irqsave(&vtpms->resp_list_lock,flags);
1255+
1256+ if (!vtpms->current_response && vtpms->current_request) {
1257+ spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
1258+ interruptible_sleep_on(&vtpms->resp_wait_queue);
1259+ spin_lock_irqsave(&vtpms->resp_list_lock,flags);
1260+ }
1261+
1262+ if (vtpms->current_response) {
1263+ struct transmission *t = vtpms->current_response;
1264+ vtpms->current_response = NULL;
1265+ transmission_free(t);
1266+ }
1267+
1268+ spin_unlock_irqrestore(&vtpms->resp_list_lock,flags);
1269+}
1270+
1271+static u8 vtpm_status(struct tpm_chip *chip)
1272+{
1273+ u8 rc = 0;
1274+ unsigned long flags;
1275+ struct vtpm_state *vtpms;
1276+
1277+ vtpms = (struct vtpm_state *)chip_get_private(chip);
1278+
1279+ spin_lock_irqsave(&vtpms->resp_list_lock, flags);
1280+ /*
1281+ * Data are available if:
1282+ * - there's a current response
1283+ * - the last packet was queued only (this is fake, but necessary to
1284+ * get the generic TPM layer to call the receive function.)
1285+ */
1286+ if (vtpms->current_response ||
1287+ 0 != (vtpms->flags & DATAEX_FLAG_QUEUED_ONLY)) {
1288+ rc = STATUS_DATA_AVAIL;
1289+ } else if (!vtpms->current_response && !vtpms->current_request) {
1290+ rc = STATUS_READY;
1291+ }
1292+
1293+ spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
1294+ return rc;
1295+}
1296+
1297+static struct file_operations vtpm_ops = {
1298+ .owner = THIS_MODULE,
1299+ .llseek = no_llseek,
1300+ .open = tpm_open,
1301+ .read = tpm_read,
1302+ .write = tpm_write,
1303+ .release = tpm_release,
1304+};
1305+
1306+static DEVICE_ATTR(pubek, S_IRUGO, tpm_show_pubek, NULL);
1307+static DEVICE_ATTR(pcrs, S_IRUGO, tpm_show_pcrs, NULL);
1308+static DEVICE_ATTR(enabled, S_IRUGO, tpm_show_enabled, NULL);
1309+static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL);
1310+static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL);
1311+static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated,
1312+ NULL);
1313+static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL);
1314+static DEVICE_ATTR(cancel, S_IWUSR |S_IWGRP, NULL, tpm_store_cancel);
1315+
1316+static struct attribute *vtpm_attrs[] = {
1317+ &dev_attr_pubek.attr,
1318+ &dev_attr_pcrs.attr,
1319+ &dev_attr_enabled.attr,
1320+ &dev_attr_active.attr,
1321+ &dev_attr_owned.attr,
1322+ &dev_attr_temp_deactivated.attr,
1323+ &dev_attr_caps.attr,
1324+ &dev_attr_cancel.attr,
1325+ NULL,
1326+};
1327+
1328+static struct attribute_group vtpm_attr_grp = { .attrs = vtpm_attrs };
1329+
1330+#define TPM_LONG_TIMEOUT (10 * 60 * HZ)
1331+
1332+static struct tpm_vendor_specific tpm_vtpm = {
1333+ .recv = vtpm_recv,
1334+ .send = vtpm_send,
1335+ .cancel = vtpm_cancel,
1336+ .status = vtpm_status,
1337+ .req_complete_mask = STATUS_BUSY | STATUS_DATA_AVAIL,
1338+ .req_complete_val = STATUS_DATA_AVAIL,
1339+ .req_canceled = STATUS_READY,
1340+ .attr_group = &vtpm_attr_grp,
1341+ .miscdev = {
1342+ .fops = &vtpm_ops,
1343+ },
1344+ .duration = {
1345+ TPM_LONG_TIMEOUT,
1346+ TPM_LONG_TIMEOUT,
1347+ TPM_LONG_TIMEOUT,
1348+ },
1349+};
1350+
1351+struct tpm_chip *init_vtpm(struct device *dev,
1352+ struct tpm_private *tp)
1353+{
1354+ long rc;
1355+ struct tpm_chip *chip;
1356+ struct vtpm_state *vtpms;
1357+
1358+ vtpms = kzalloc(sizeof(struct vtpm_state), GFP_KERNEL);
1359+ if (!vtpms)
1360+ return ERR_PTR(-ENOMEM);
1361+
1362+ vtpm_state_init(vtpms);
1363+ vtpms->tpm_private = tp;
1364+
1365+ chip = tpm_register_hardware(dev, &tpm_vtpm);
1366+ if (!chip) {
1367+ rc = -ENODEV;
1368+ goto err_free_mem;
1369+ }
1370+
1371+ chip_set_private(chip, vtpms);
1372+
1373+ return chip;
1374+
1375+err_free_mem:
1376+ kfree(vtpms);
1377+
1378+ return ERR_PTR(rc);
1379+}
1380+
1381+void cleanup_vtpm(struct device *dev)
1382+{
1383+ struct tpm_chip *chip = dev_get_drvdata(dev);
1384+ struct vtpm_state *vtpms = (struct vtpm_state*)chip_get_private(chip);
1385+ tpm_remove_hardware(dev);
1386+ kfree(vtpms);
1387+}
1388--- /dev/null
1389+++ b/drivers/char/tpm/tpm_vtpm.h
1390@@ -0,0 +1,55 @@
1391+#ifndef TPM_VTPM_H
1392+#define TPM_VTPM_H
1393+
1394+struct tpm_chip;
1395+struct tpm_private;
1396+
1397+struct vtpm_state {
1398+ struct transmission *current_request;
1399+ spinlock_t req_list_lock;
1400+ wait_queue_head_t req_wait_queue;
1401+
1402+ struct list_head queued_requests;
1403+
1404+ struct transmission *current_response;
1405+ spinlock_t resp_list_lock;
1406+ wait_queue_head_t resp_wait_queue; // processes waiting for responses
1407+
1408+ u8 vd_status;
1409+ u8 flags;
1410+
1411+ unsigned long disconnect_time;
1412+
1413+ /*
1414+ * The following is a private structure of the underlying
1415+ * driver. It is passed as parameter in the send function.
1416+ */
1417+ struct tpm_private *tpm_private;
1418+};
1419+
1420+
1421+enum vdev_status {
1422+ TPM_VD_STATUS_DISCONNECTED = 0x0,
1423+ TPM_VD_STATUS_CONNECTED = 0x1
1424+};
1425+
1426+/* this function is called from tpm_vtpm.c */
1427+int vtpm_vd_send(struct tpm_private * tp,
1428+ const u8 * buf, size_t count, void *ptr);
1429+
1430+/* these functions are offered by tpm_vtpm.c */
1431+struct tpm_chip *init_vtpm(struct device *,
1432+ struct tpm_private *);
1433+void cleanup_vtpm(struct device *);
1434+int vtpm_vd_recv(const struct tpm_chip* chip,
1435+ const unsigned char *buffer, size_t count, void *ptr);
1436+void vtpm_vd_status(const struct tpm_chip *, u8 status);
1437+
1438+static inline struct tpm_private *tpm_private_from_dev(struct device *dev)
1439+{
1440+ struct tpm_chip *chip = dev_get_drvdata(dev);
1441+ struct vtpm_state *vtpms = chip_get_private(chip);
1442+ return vtpms->tpm_private;
1443+}
1444+
1445+#endif
1446--- /dev/null
1447+++ b/drivers/char/tpm/tpm_xen.c
1448@@ -0,0 +1,722 @@
1449+/*
1450+ * Copyright (c) 2005, IBM Corporation
1451+ *
1452+ * Author: Stefan Berger, stefanb@us.ibm.com
1453+ * Grant table support: Mahadevan Gomathisankaran
1454+ *
1455+ * This code has been derived from drivers/xen/netfront/netfront.c
1456+ *
1457+ * Copyright (c) 2002-2004, K A Fraser
1458+ *
1459+ * This program is free software; you can redistribute it and/or
1460+ * modify it under the terms of the GNU General Public License version 2
1461+ * as published by the Free Software Foundation; or, when distributed
1462+ * separately from the Linux kernel or incorporated into other
1463+ * software packages, subject to the following license:
1464+ *
1465+ * Permission is hereby granted, free of charge, to any person obtaining a copy
1466+ * of this source file (the "Software"), to deal in the Software without
1467+ * restriction, including without limitation the rights to use, copy, modify,
1468+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
1469+ * and to permit persons to whom the Software is furnished to do so, subject to
1470+ * the following conditions:
1471+ *
1472+ * The above copyright notice and this permission notice shall be included in
1473+ * all copies or substantial portions of the Software.
1474+ *
1475+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1476+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1477+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
1478+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1479+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
1480+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
1481+ * IN THE SOFTWARE.
1482+ */
1483+
1484+#include <linux/errno.h>
1485+#include <linux/err.h>
1486+#include <linux/interrupt.h>
1487+#include <linux/mutex.h>
1488+#include <asm/uaccess.h>
1489+#include <xen/evtchn.h>
1490+#include <xen/interface/grant_table.h>
1491+#include <xen/interface/io/tpmif.h>
1492+#include <xen/gnttab.h>
1493+#include <xen/xenbus.h>
1494+#include "tpm.h"
1495+#include "tpm_vtpm.h"
1496+
1497+#undef DEBUG
1498+
1499+/* local structures */
1500+struct tpm_private {
1501+ struct tpm_chip *chip;
1502+
1503+ tpmif_tx_interface_t *tx;
1504+ atomic_t refcnt;
1505+ unsigned int irq;
1506+ u8 is_connected;
1507+ u8 is_suspended;
1508+
1509+ spinlock_t tx_lock;
1510+
1511+ struct tx_buffer *tx_buffers[TPMIF_TX_RING_SIZE];
1512+
1513+ atomic_t tx_busy;
1514+ void *tx_remember;
1515+
1516+ domid_t backend_id;
1517+ wait_queue_head_t wait_q;
1518+
1519+ struct xenbus_device *dev;
1520+ int ring_ref;
1521+};
1522+
1523+struct tx_buffer {
1524+ unsigned int size; // available space in data
1525+ unsigned int len; // used space in data
1526+ unsigned char *data; // pointer to a page
1527+};
1528+
1529+
1530+/* locally visible variables */
1531+static grant_ref_t gref_head;
1532+static struct tpm_private *my_priv;
1533+
1534+/* local function prototypes */
1535+static irqreturn_t tpmif_int(int irq,
1536+ void *tpm_priv,
1537+ struct pt_regs *ptregs);
1538+static void tpmif_rx_action(unsigned long unused);
1539+static int tpmif_connect(struct xenbus_device *dev,
1540+ struct tpm_private *tp,
1541+ domid_t domid);
1542+static DECLARE_TASKLET(tpmif_rx_tasklet, tpmif_rx_action, 0);
1543+static int tpmif_allocate_tx_buffers(struct tpm_private *tp);
1544+static void tpmif_free_tx_buffers(struct tpm_private *tp);
1545+static void tpmif_set_connected_state(struct tpm_private *tp,
1546+ u8 newstate);
1547+static int tpm_xmit(struct tpm_private *tp,
1548+ const u8 * buf, size_t count, int userbuffer,
1549+ void *remember);
1550+static void destroy_tpmring(struct tpm_private *tp);
1551+void __exit tpmif_exit(void);
1552+
1553+#define DPRINTK(fmt, args...) \
1554+ pr_debug("xen_tpm_fr (%s:%d) " fmt, __FUNCTION__, __LINE__, ##args)
1555+#define IPRINTK(fmt, args...) \
1556+ printk(KERN_INFO "xen_tpm_fr: " fmt, ##args)
1557+#define WPRINTK(fmt, args...) \
1558+ printk(KERN_WARNING "xen_tpm_fr: " fmt, ##args)
1559+
1560+#define GRANT_INVALID_REF 0
1561+
1562+
1563+static inline int
1564+tx_buffer_copy(struct tx_buffer *txb, const u8 *src, int len,
1565+ int isuserbuffer)
1566+{
1567+ int copied = len;
1568+
1569+ if (len > txb->size)
1570+ copied = txb->size;
1571+ if (isuserbuffer) {
1572+ if (copy_from_user(txb->data, src, copied))
1573+ return -EFAULT;
1574+ } else {
1575+ memcpy(txb->data, src, copied);
1576+ }
1577+ txb->len = len;
1578+ return copied;
1579+}
1580+
1581+static inline struct tx_buffer *tx_buffer_alloc(void)
1582+{
1583+ struct tx_buffer *txb;
1584+
1585+ txb = kzalloc(sizeof(struct tx_buffer), GFP_KERNEL);
1586+ if (!txb)
1587+ return NULL;
1588+
1589+ txb->len = 0;
1590+ txb->size = PAGE_SIZE;
1591+ txb->data = (unsigned char *)__get_free_page(GFP_KERNEL);
1592+ if (txb->data == NULL) {
1593+ kfree(txb);
1594+ txb = NULL;
1595+ }
1596+
1597+ return txb;
1598+}
1599+
1600+
1601+static inline void tx_buffer_free(struct tx_buffer *txb)
1602+{
1603+ if (txb) {
1604+ free_page((long)txb->data);
1605+ kfree(txb);
1606+ }
1607+}
1608+
1609+/**************************************************************
1610+ Utility function for the tpm_private structure
1611+**************************************************************/
1612+static void tpm_private_init(struct tpm_private *tp)
1613+{
1614+ spin_lock_init(&tp->tx_lock);
1615+ init_waitqueue_head(&tp->wait_q);
1616+ atomic_set(&tp->refcnt, 1);
1617+}
1618+
1619+static void tpm_private_put(void)
1620+{
1621+ if (!atomic_dec_and_test(&my_priv->refcnt))
1622+ return;
1623+
1624+ tpmif_free_tx_buffers(my_priv);
1625+ kfree(my_priv);
1626+ my_priv = NULL;
1627+}
1628+
1629+static struct tpm_private *tpm_private_get(void)
1630+{
1631+ int err;
1632+
1633+ if (my_priv) {
1634+ atomic_inc(&my_priv->refcnt);
1635+ return my_priv;
1636+ }
1637+
1638+ my_priv = kzalloc(sizeof(struct tpm_private), GFP_KERNEL);
1639+ if (!my_priv)
1640+ return NULL;
1641+
1642+ tpm_private_init(my_priv);
1643+ err = tpmif_allocate_tx_buffers(my_priv);
1644+ if (err < 0)
1645+ tpm_private_put();
1646+
1647+ return my_priv;
1648+}
1649+
1650+/**************************************************************
1651+
1652+ The interface to let the tpm plugin register its callback
1653+ function and send data to another partition using this module
1654+
1655+**************************************************************/
1656+
1657+static DEFINE_MUTEX(suspend_lock);
1658+/*
1659+ * Send data via this module by calling this function
1660+ */
1661+int vtpm_vd_send(struct tpm_private *tp,
1662+ const u8 * buf, size_t count, void *ptr)
1663+{
1664+ int sent;
1665+
1666+ mutex_lock(&suspend_lock);
1667+ sent = tpm_xmit(tp, buf, count, 0, ptr);
1668+ mutex_unlock(&suspend_lock);
1669+
1670+ return sent;
1671+}
1672+
1673+/**************************************************************
1674+ XENBUS support code
1675+**************************************************************/
1676+
1677+static int setup_tpmring(struct xenbus_device *dev,
1678+ struct tpm_private *tp)
1679+{
1680+ tpmif_tx_interface_t *sring;
1681+ int err;
1682+
1683+ tp->ring_ref = GRANT_INVALID_REF;
1684+
1685+ sring = (void *)__get_free_page(GFP_KERNEL);
1686+ if (!sring) {
1687+ xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
1688+ return -ENOMEM;
1689+ }
1690+ tp->tx = sring;
1691+
1692+ err = xenbus_grant_ring(dev, virt_to_mfn(tp->tx));
1693+ if (err < 0) {
1694+ free_page((unsigned long)sring);
1695+ tp->tx = NULL;
1696+ xenbus_dev_fatal(dev, err, "allocating grant reference");
1697+ goto fail;
1698+ }
1699+ tp->ring_ref = err;
1700+
1701+ err = tpmif_connect(dev, tp, dev->otherend_id);
1702+ if (err)
1703+ goto fail;
1704+
1705+ return 0;
1706+fail:
1707+ destroy_tpmring(tp);
1708+ return err;
1709+}
1710+
1711+
1712+static void destroy_tpmring(struct tpm_private *tp)
1713+{
1714+ tpmif_set_connected_state(tp, 0);
1715+
1716+ if (tp->ring_ref != GRANT_INVALID_REF) {
1717+ gnttab_end_foreign_access(tp->ring_ref, (unsigned long)tp->tx);
1718+ tp->ring_ref = GRANT_INVALID_REF;
1719+ tp->tx = NULL;
1720+ }
1721+
1722+ if (tp->irq)
1723+ unbind_from_irqhandler(tp->irq, tp);
1724+
1725+ tp->irq = 0;
1726+}
1727+
1728+
1729+static int talk_to_backend(struct xenbus_device *dev,
1730+ struct tpm_private *tp)
1731+{
1732+ const char *message = NULL;
1733+ int err;
1734+ struct xenbus_transaction xbt;
1735+
1736+ err = setup_tpmring(dev, tp);
1737+ if (err) {
1738+ xenbus_dev_fatal(dev, err, "setting up ring");
1739+ goto out;
1740+ }
1741+
1742+again:
1743+ err = xenbus_transaction_start(&xbt);
1744+ if (err) {
1745+ xenbus_dev_fatal(dev, err, "starting transaction");
1746+ goto destroy_tpmring;
1747+ }
1748+
1749+ err = xenbus_printf(xbt, dev->nodename,
1750+ "ring-ref","%u", tp->ring_ref);
1751+ if (err) {
1752+ message = "writing ring-ref";
1753+ goto abort_transaction;
1754+ }
1755+
1756+ err = xenbus_printf(xbt, dev->nodename, "event-channel", "%u",
1757+ irq_to_evtchn_port(tp->irq));
1758+ if (err) {
1759+ message = "writing event-channel";
1760+ goto abort_transaction;
1761+ }
1762+
1763+ err = xenbus_transaction_end(xbt, 0);
1764+ if (err == -EAGAIN)
1765+ goto again;
1766+ if (err) {
1767+ xenbus_dev_fatal(dev, err, "completing transaction");
1768+ goto destroy_tpmring;
1769+ }
1770+
1771+ xenbus_switch_state(dev, XenbusStateConnected);
1772+
1773+ return 0;
1774+
1775+abort_transaction:
1776+ xenbus_transaction_end(xbt, 1);
1777+ if (message)
1778+ xenbus_dev_error(dev, err, "%s", message);
1779+destroy_tpmring:
1780+ destroy_tpmring(tp);
1781+out:
1782+ return err;
1783+}
1784+
1785+/**
1786+ * Callback received when the backend's state changes.
1787+ */
1788+static void backend_changed(struct xenbus_device *dev,
1789+ enum xenbus_state backend_state)
1790+{
1791+ struct tpm_private *tp = tpm_private_from_dev(&dev->dev);
1792+ DPRINTK("\n");
1793+
1794+ switch (backend_state) {
1795+ case XenbusStateInitialising:
1796+ case XenbusStateInitWait:
1797+ case XenbusStateInitialised:
1798+ case XenbusStateReconfiguring:
1799+ case XenbusStateReconfigured:
1800+ case XenbusStateUnknown:
1801+ break;
1802+
1803+ case XenbusStateConnected:
1804+ tpmif_set_connected_state(tp, 1);
1805+ break;
1806+
1807+ case XenbusStateClosing:
1808+ tpmif_set_connected_state(tp, 0);
1809+ xenbus_frontend_closed(dev);
1810+ break;
1811+
1812+ case XenbusStateClosed:
1813+ tpmif_set_connected_state(tp, 0);
1814+ if (tp->is_suspended == 0)
1815+ device_unregister(&dev->dev);
1816+ xenbus_frontend_closed(dev);
1817+ break;
1818+ }
1819+}
1820+
1821+static int tpmfront_probe(struct xenbus_device *dev,
1822+ const struct xenbus_device_id *id)
1823+{
1824+ int err;
1825+ int handle;
1826+ struct tpm_private *tp = tpm_private_get();
1827+
1828+ if (!tp)
1829+ return -ENOMEM;
1830+
1831+ tp->chip = init_vtpm(&dev->dev, tp);
1832+ if (IS_ERR(tp->chip))
1833+ return PTR_ERR(tp->chip);
1834+
1835+ err = xenbus_scanf(XBT_NIL, dev->nodename,
1836+ "handle", "%i", &handle);
1837+ if (XENBUS_EXIST_ERR(err))
1838+ return err;
1839+
1840+ if (err < 0) {
1841+ xenbus_dev_fatal(dev,err,"reading virtual-device");
1842+ return err;
1843+ }
1844+
1845+ tp->dev = dev;
1846+
1847+ err = talk_to_backend(dev, tp);
1848+ if (err) {
1849+ tpm_private_put();
1850+ return err;
1851+ }
1852+
1853+ return 0;
1854+}
1855+
1856+
1857+static int tpmfront_remove(struct xenbus_device *dev)
1858+{
1859+ struct tpm_private *tp = tpm_private_from_dev(&dev->dev);
1860+ destroy_tpmring(tp);
1861+ cleanup_vtpm(&dev->dev);
1862+ return 0;
1863+}
1864+
1865+static int tpmfront_suspend(struct xenbus_device *dev)
1866+{
1867+ struct tpm_private *tp = tpm_private_from_dev(&dev->dev);
1868+ u32 ctr;
1869+
1870+ /* Take the lock, preventing any application from sending. */
1871+ mutex_lock(&suspend_lock);
1872+ tp->is_suspended = 1;
1873+
1874+ for (ctr = 0; atomic_read(&tp->tx_busy); ctr++) {
1875+ if ((ctr % 10) == 0)
1876+ printk("TPM-FE [INFO]: Waiting for outstanding "
1877+ "request.\n");
1878+ /* Wait for a request to be responded to. */
1879+ interruptible_sleep_on_timeout(&tp->wait_q, 100);
1880+ }
1881+
1882+ return 0;
1883+}
1884+
1885+static int tpmfront_suspend_finish(struct tpm_private *tp)
1886+{
1887+ tp->is_suspended = 0;
1888+ /* Allow applications to send again. */
1889+ mutex_unlock(&suspend_lock);
1890+ return 0;
1891+}
1892+
1893+static int tpmfront_suspend_cancel(struct xenbus_device *dev)
1894+{
1895+ struct tpm_private *tp = tpm_private_from_dev(&dev->dev);
1896+ return tpmfront_suspend_finish(tp);
1897+}
1898+
1899+static int tpmfront_resume(struct xenbus_device *dev)
1900+{
1901+ struct tpm_private *tp = tpm_private_from_dev(&dev->dev);
1902+ destroy_tpmring(tp);
1903+ return talk_to_backend(dev, tp);
1904+}
1905+
1906+static int tpmif_connect(struct xenbus_device *dev,
1907+ struct tpm_private *tp,
1908+ domid_t domid)
1909+{
1910+ int err;
1911+
1912+ tp->backend_id = domid;
1913+
1914+ err = bind_listening_port_to_irqhandler(
1915+ domid, tpmif_int, SA_SAMPLE_RANDOM, "tpmif", tp);
1916+ if (err <= 0) {
1917+ WPRINTK("bind_listening_port_to_irqhandler failed "
1918+ "(err=%d)\n", err);
1919+ return err;
1920+ }
1921+ tp->irq = err;
1922+
1923+ return 0;
1924+}
1925+
1926+static struct xenbus_device_id tpmfront_ids[] = {
1927+ { "vtpm" },
1928+ { "" }
1929+};
1930+
1931+static struct xenbus_driver tpmfront = {
1932+ .name = "vtpm",
1933+ .owner = THIS_MODULE,
1934+ .ids = tpmfront_ids,
1935+ .probe = tpmfront_probe,
1936+ .remove = tpmfront_remove,
1937+ .resume = tpmfront_resume,
1938+ .otherend_changed = backend_changed,
1939+ .suspend = tpmfront_suspend,
1940+ .suspend_cancel = tpmfront_suspend_cancel,
1941+};
1942+
1943+static void __init init_tpm_xenbus(void)
1944+{
1945+ xenbus_register_frontend(&tpmfront);
1946+}
1947+
1948+static int tpmif_allocate_tx_buffers(struct tpm_private *tp)
1949+{
1950+ unsigned int i;
1951+
1952+ for (i = 0; i < TPMIF_TX_RING_SIZE; i++) {
1953+ tp->tx_buffers[i] = tx_buffer_alloc();
1954+ if (!tp->tx_buffers[i]) {
1955+ tpmif_free_tx_buffers(tp);
1956+ return -ENOMEM;
1957+ }
1958+ }
1959+ return 0;
1960+}
1961+
1962+static void tpmif_free_tx_buffers(struct tpm_private *tp)
1963+{
1964+ unsigned int i;
1965+
1966+ for (i = 0; i < TPMIF_TX_RING_SIZE; i++)
1967+ tx_buffer_free(tp->tx_buffers[i]);
1968+}
1969+
1970+static void tpmif_rx_action(unsigned long priv)
1971+{
1972+ struct tpm_private *tp = (struct tpm_private *)priv;
1973+ int i = 0;
1974+ unsigned int received;
1975+ unsigned int offset = 0;
1976+ u8 *buffer;
1977+ tpmif_tx_request_t *tx = &tp->tx->ring[i].req;
1978+
1979+ atomic_set(&tp->tx_busy, 0);
1980+ wake_up_interruptible(&tp->wait_q);
1981+
1982+ received = tx->size;
1983+
1984+ buffer = kmalloc(received, GFP_ATOMIC);
1985+ if (!buffer)
1986+ return;
1987+
1988+ for (i = 0; i < TPMIF_TX_RING_SIZE && offset < received; i++) {
1989+ struct tx_buffer *txb = tp->tx_buffers[i];
1990+ tpmif_tx_request_t *tx;
1991+ unsigned int tocopy;
1992+
1993+ tx = &tp->tx->ring[i].req;
1994+ tocopy = tx->size;
1995+ if (tocopy > PAGE_SIZE)
1996+ tocopy = PAGE_SIZE;
1997+
1998+ memcpy(&buffer[offset], txb->data, tocopy);
1999+
2000+ gnttab_release_grant_reference(&gref_head, tx->ref);
2001+
2002+ offset += tocopy;
2003+ }
2004+
2005+ vtpm_vd_recv(tp->chip, buffer, received, tp->tx_remember);
2006+ kfree(buffer);
2007+}
2008+
2009+
2010+static irqreturn_t tpmif_int(int irq, void *tpm_priv, struct pt_regs *ptregs)
2011+{
2012+ struct tpm_private *tp = tpm_priv;
2013+ unsigned long flags;
2014+
2015+ spin_lock_irqsave(&tp->tx_lock, flags);
2016+ tpmif_rx_tasklet.data = (unsigned long)tp;
2017+ tasklet_schedule(&tpmif_rx_tasklet);
2018+ spin_unlock_irqrestore(&tp->tx_lock, flags);
2019+
2020+ return IRQ_HANDLED;
2021+}
2022+
2023+
2024+static int tpm_xmit(struct tpm_private *tp,
2025+ const u8 * buf, size_t count, int isuserbuffer,
2026+ void *remember)
2027+{
2028+ tpmif_tx_request_t *tx;
2029+ TPMIF_RING_IDX i;
2030+ unsigned int offset = 0;
2031+
2032+ spin_lock_irq(&tp->tx_lock);
2033+
2034+ if (unlikely(atomic_read(&tp->tx_busy))) {
2035+ printk("tpm_xmit: There's an outstanding request/response "
2036+ "on the way!\n");
2037+ spin_unlock_irq(&tp->tx_lock);
2038+ return -EBUSY;
2039+ }
2040+
2041+ if (tp->is_connected != 1) {
2042+ spin_unlock_irq(&tp->tx_lock);
2043+ return -EIO;
2044+ }
2045+
2046+ for (i = 0; count > 0 && i < TPMIF_TX_RING_SIZE; i++) {
2047+ struct tx_buffer *txb = tp->tx_buffers[i];
2048+ int copied;
2049+
2050+ if (!txb) {
2051+ DPRINTK("txb (i=%d) is NULL. buffers initilized?\n"
2052+ "Not transmitting anything!\n", i);
2053+ spin_unlock_irq(&tp->tx_lock);
2054+ return -EFAULT;
2055+ }
2056+
2057+ copied = tx_buffer_copy(txb, &buf[offset], count,
2058+ isuserbuffer);
2059+ if (copied < 0) {
2060+ /* An error occurred */
2061+ spin_unlock_irq(&tp->tx_lock);
2062+ return copied;
2063+ }
2064+ count -= copied;
2065+ offset += copied;
2066+
2067+ tx = &tp->tx->ring[i].req;
2068+ tx->addr = virt_to_machine(txb->data);
2069+ tx->size = txb->len;
2070+ tx->unused = 0;
2071+
2072+ DPRINTK("First 4 characters sent by TPM-FE are "
2073+ "0x%02x 0x%02x 0x%02x 0x%02x\n",
2074+ txb->data[0],txb->data[1],txb->data[2],txb->data[3]);
2075+
2076+ /* Get the granttable reference for this page. */
2077+ tx->ref = gnttab_claim_grant_reference(&gref_head);
2078+ if (tx->ref == -ENOSPC) {
2079+ spin_unlock_irq(&tp->tx_lock);
2080+ DPRINTK("Grant table claim reference failed in "
2081+ "func:%s line:%d file:%s\n",
2082+ __FUNCTION__, __LINE__, __FILE__);
2083+ return -ENOSPC;
2084+ }
2085+ gnttab_grant_foreign_access_ref(tx->ref,
2086+ tp->backend_id,
2087+ virt_to_mfn(txb->data),
2088+ 0 /*RW*/);
2089+ wmb();
2090+ }
2091+
2092+ atomic_set(&tp->tx_busy, 1);
2093+ tp->tx_remember = remember;
2094+
2095+ mb();
2096+
2097+ notify_remote_via_irq(tp->irq);
2098+
2099+ spin_unlock_irq(&tp->tx_lock);
2100+ return offset;
2101+}
2102+
2103+
2104+static void tpmif_notify_upperlayer(struct tpm_private *tp)
2105+{
2106+ /* Notify upper layer about the state of the connection to the BE. */
2107+ vtpm_vd_status(tp->chip, (tp->is_connected
2108+ ? TPM_VD_STATUS_CONNECTED
2109+ : TPM_VD_STATUS_DISCONNECTED));
2110+}
2111+
2112+
2113+static void tpmif_set_connected_state(struct tpm_private *tp, u8 is_connected)
2114+{
2115+ /*
2116+ * Don't notify upper layer if we are in suspend mode and
2117+ * should disconnect - assumption is that we will resume
2118+ * The mutex keeps apps from sending.
2119+ */
2120+ if (is_connected == 0 && tp->is_suspended == 1)
2121+ return;
2122+
2123+ /*
2124+ * Unlock the mutex if we are connected again
2125+ * after being suspended - now resuming.
2126+ * This also removes the suspend state.
2127+ */
2128+ if (is_connected == 1 && tp->is_suspended == 1)
2129+ tpmfront_suspend_finish(tp);
2130+
2131+ if (is_connected != tp->is_connected) {
2132+ tp->is_connected = is_connected;
2133+ tpmif_notify_upperlayer(tp);
2134+ }
2135+}
2136+
2137+
2138+
2139+/* =================================================================
2140+ * Initialization function.
2141+ * =================================================================
2142+ */
2143+
2144+
2145+static int __init tpmif_init(void)
2146+{
2147+ struct tpm_private *tp;
2148+
2149+ if (is_initial_xendomain())
2150+ return -EPERM;
2151+
2152+ tp = tpm_private_get();
2153+ if (!tp)
2154+ return -ENOMEM;
2155+
2156+ IPRINTK("Initialising the vTPM driver.\n");
2157+ if (gnttab_alloc_grant_references(TPMIF_TX_RING_SIZE,
2158+ &gref_head) < 0) {
2159+ tpm_private_put();
2160+ return -EFAULT;
2161+ }
2162+
2163+ init_tpm_xenbus();
2164+ return 0;
2165+}
2166+
2167+
2168+module_init(tpmif_init);
2169+
2170+MODULE_LICENSE("Dual BSD/GPL");
2171--- a/drivers/ide/ide-lib.c
2172+++ b/drivers/ide/ide-lib.c
2173@@ -177,12 +177,12 @@ void ide_toggle_bounce(ide_drive_t *driv
2174 {
2175 u64 addr = BLK_BOUNCE_HIGH; /* dma64_addr_t */
2176
2177- if (!PCI_DMA_BUS_IS_PHYS) {
2178- addr = BLK_BOUNCE_ANY;
2179- } else if (on && drive->media == ide_disk) {
2180+ if (on && drive->media == ide_disk) {
2181 struct device *dev = drive->hwif->dev;
2182
2183- if (dev && dev->dma_mask)
2184+ if (!PCI_DMA_BUS_IS_PHYS)
2185+ addr = BLK_BOUNCE_ANY;
2186+ else if (dev && dev->dma_mask)
2187 addr = *dev->dma_mask;
2188 }
2189
2190--- a/drivers/Makefile
2191+++ b/drivers/Makefile
2192@@ -37,6 +37,7 @@ obj-y += base/ block/ misc/ mfd/ net/
2193 obj-$(CONFIG_NUBUS) += nubus/
2194 obj-$(CONFIG_ATM) += atm/
2195 obj-y += macintosh/
2196+obj-$(CONFIG_XEN) += xen/
2197 obj-$(CONFIG_IDE) += ide/
2198 obj-$(CONFIG_SCSI) += scsi/
2199 obj-$(CONFIG_ATA) += ata/
2200--- a/drivers/oprofile/buffer_sync.c
2201+++ b/drivers/oprofile/buffer_sync.c
2202@@ -6,6 +6,10 @@
2203 *
2204 * @author John Levon <levon@movementarian.org>
2205 *
2206+ * Modified by Aravind Menon for Xen
2207+ * These modifications are:
2208+ * Copyright (C) 2005 Hewlett-Packard Co.
2209+ *
2210 * This is the core of the buffer management. Each
2211 * CPU buffer is processed and entered into the
2212 * global event buffer. Such processing is necessary
2213@@ -40,6 +44,7 @@ static cpumask_t marked_cpus = CPU_MASK_
2214 static DEFINE_SPINLOCK(task_mortuary);
2215 static void process_task_mortuary(void);
2216
2217+static int cpu_current_domain[NR_CPUS];
2218
2219 /* Take ownership of the task struct and place it on the
2220 * list for processing. Only after two full buffer syncs
2221@@ -148,6 +153,11 @@ static void end_sync(void)
2222 int sync_start(void)
2223 {
2224 int err;
2225+ int i;
2226+
2227+ for (i = 0; i < NR_CPUS; i++) {
2228+ cpu_current_domain[i] = COORDINATOR_DOMAIN;
2229+ }
2230
2231 start_cpu_work();
2232
2233@@ -274,15 +284,31 @@ static void add_cpu_switch(int i)
2234 last_cookie = INVALID_COOKIE;
2235 }
2236
2237-static void add_kernel_ctx_switch(unsigned int in_kernel)
2238+static void add_cpu_mode_switch(unsigned int cpu_mode)
2239 {
2240 add_event_entry(ESCAPE_CODE);
2241- if (in_kernel)
2242- add_event_entry(KERNEL_ENTER_SWITCH_CODE);
2243- else
2244- add_event_entry(KERNEL_EXIT_SWITCH_CODE);
2245+ switch (cpu_mode) {
2246+ case CPU_MODE_USER:
2247+ add_event_entry(USER_ENTER_SWITCH_CODE);
2248+ break;
2249+ case CPU_MODE_KERNEL:
2250+ add_event_entry(KERNEL_ENTER_SWITCH_CODE);
2251+ break;
2252+ case CPU_MODE_XEN:
2253+ add_event_entry(XEN_ENTER_SWITCH_CODE);
2254+ break;
2255+ default:
2256+ break;
2257+ }
2258 }
2259-
2260+
2261+static void add_domain_switch(unsigned long domain_id)
2262+{
2263+ add_event_entry(ESCAPE_CODE);
2264+ add_event_entry(DOMAIN_SWITCH_CODE);
2265+ add_event_entry(domain_id);
2266+}
2267+
2268 static void
2269 add_user_ctx_switch(struct task_struct const * task, unsigned long cookie)
2270 {
2271@@ -347,9 +373,9 @@ static int add_us_sample(struct mm_struc
2272 * for later lookup from userspace.
2273 */
2274 static int
2275-add_sample(struct mm_struct * mm, struct op_sample * s, int in_kernel)
2276+add_sample(struct mm_struct * mm, struct op_sample * s, int cpu_mode)
2277 {
2278- if (in_kernel) {
2279+ if (cpu_mode >= CPU_MODE_KERNEL) {
2280 add_sample_entry(s->eip, s->event);
2281 return 1;
2282 } else if (mm) {
2283@@ -495,15 +521,21 @@ void sync_buffer(int cpu)
2284 struct mm_struct *mm = NULL;
2285 struct task_struct * new;
2286 unsigned long cookie = 0;
2287- int in_kernel = 1;
2288+ int cpu_mode = 1;
2289 unsigned int i;
2290 sync_buffer_state state = sb_buffer_start;
2291 unsigned long available;
2292+ int domain_switch = 0;
2293
2294 mutex_lock(&buffer_mutex);
2295
2296 add_cpu_switch(cpu);
2297
2298+ /* We need to assign the first samples in this CPU buffer to the
2299+ same domain that we were processing at the last sync_buffer */
2300+ if (cpu_current_domain[cpu] != COORDINATOR_DOMAIN) {
2301+ add_domain_switch(cpu_current_domain[cpu]);
2302+ }
2303 /* Remember, only we can modify tail_pos */
2304
2305 available = get_slots(cpu_buf);
2306@@ -511,16 +543,18 @@ void sync_buffer(int cpu)
2307 for (i = 0; i < available; ++i) {
2308 struct op_sample * s = &cpu_buf->buffer[cpu_buf->tail_pos];
2309
2310- if (is_code(s->eip)) {
2311- if (s->event <= CPU_IS_KERNEL) {
2312- /* kernel/userspace switch */
2313- in_kernel = s->event;
2314+ if (is_code(s->eip) && !domain_switch) {
2315+ if (s->event <= CPU_MODE_XEN) {
2316+ /* xen/kernel/userspace switch */
2317+ cpu_mode = s->event;
2318 if (state == sb_buffer_start)
2319 state = sb_sample_start;
2320- add_kernel_ctx_switch(s->event);
2321+ add_cpu_mode_switch(s->event);
2322 } else if (s->event == CPU_TRACE_BEGIN) {
2323 state = sb_bt_start;
2324 add_trace_begin();
2325+ } else if (s->event == CPU_DOMAIN_SWITCH) {
2326+ domain_switch = 1;
2327 } else {
2328 struct mm_struct * oldmm = mm;
2329
2330@@ -534,11 +568,21 @@ void sync_buffer(int cpu)
2331 add_user_ctx_switch(new, cookie);
2332 }
2333 } else {
2334- if (state >= sb_bt_start &&
2335- !add_sample(mm, s, in_kernel)) {
2336- if (state == sb_bt_start) {
2337- state = sb_bt_ignore;
2338- atomic_inc(&oprofile_stats.bt_lost_no_mapping);
2339+ if (domain_switch) {
2340+ cpu_current_domain[cpu] = s->eip;
2341+ add_domain_switch(s->eip);
2342+ domain_switch = 0;
2343+ } else {
2344+ if (cpu_current_domain[cpu] !=
2345+ COORDINATOR_DOMAIN) {
2346+ add_sample_entry(s->eip, s->event);
2347+ }
2348+ else if (state >= sb_bt_start &&
2349+ !add_sample(mm, s, cpu_mode)) {
2350+ if (state == sb_bt_start) {
2351+ state = sb_bt_ignore;
2352+ atomic_inc(&oprofile_stats.bt_lost_no_mapping);
2353+ }
2354 }
2355 }
2356 }
2357@@ -547,6 +591,11 @@ void sync_buffer(int cpu)
2358 }
2359 release_mm(mm);
2360
2361+ /* We reset domain to COORDINATOR at each CPU switch */
2362+ if (cpu_current_domain[cpu] != COORDINATOR_DOMAIN) {
2363+ add_domain_switch(COORDINATOR_DOMAIN);
2364+ }
2365+
2366 mark_done(cpu);
2367
2368 mutex_unlock(&buffer_mutex);
2369--- a/drivers/oprofile/cpu_buffer.c
2370+++ b/drivers/oprofile/cpu_buffer.c
2371@@ -6,6 +6,10 @@
2372 *
2373 * @author John Levon <levon@movementarian.org>
2374 *
2375+ * Modified by Aravind Menon for Xen
2376+ * These modifications are:
2377+ * Copyright (C) 2005 Hewlett-Packard Co.
2378+ *
2379 * Each CPU has a local buffer that stores PC value/event
2380 * pairs. We also log context switches when we notice them.
2381 * Eventually each CPU's buffer is processed into the global
2382@@ -34,6 +38,8 @@ static void wq_sync_buffer(struct work_s
2383 #define DEFAULT_TIMER_EXPIRE (HZ / 10)
2384 static int work_enabled;
2385
2386+static int32_t current_domain = COORDINATOR_DOMAIN;
2387+
2388 void free_cpu_buffers(void)
2389 {
2390 int i;
2391@@ -72,7 +78,7 @@ int alloc_cpu_buffers(void)
2392 goto fail;
2393
2394 b->last_task = NULL;
2395- b->last_is_kernel = -1;
2396+ b->last_cpu_mode = -1;
2397 b->tracing = 0;
2398 b->buffer_size = buffer_size;
2399 b->tail_pos = 0;
2400@@ -130,7 +136,7 @@ void cpu_buffer_reset(struct oprofile_cp
2401 * collected will populate the buffer with proper
2402 * values to initialize the buffer
2403 */
2404- cpu_buf->last_is_kernel = -1;
2405+ cpu_buf->last_cpu_mode = -1;
2406 cpu_buf->last_task = NULL;
2407 }
2408
2409@@ -180,13 +186,13 @@ add_code(struct oprofile_cpu_buffer * bu
2410 * because of the head/tail separation of the writer and reader
2411 * of the CPU buffer.
2412 *
2413- * is_kernel is needed because on some architectures you cannot
2414+ * cpu_mode is needed because on some architectures you cannot
2415 * tell if you are in kernel or user space simply by looking at
2416- * pc. We tag this in the buffer by generating kernel enter/exit
2417- * events whenever is_kernel changes
2418+ * pc. We tag this in the buffer by generating kernel/user (and xen)
2419+ * enter events whenever cpu_mode changes
2420 */
2421 static int log_sample(struct oprofile_cpu_buffer * cpu_buf, unsigned long pc,
2422- int is_kernel, unsigned long event)
2423+ int cpu_mode, unsigned long event)
2424 {
2425 struct task_struct * task;
2426
2427@@ -202,18 +208,18 @@ static int log_sample(struct oprofile_cp
2428 return 0;
2429 }
2430
2431- is_kernel = !!is_kernel;
2432-
2433 task = current;
2434
2435 /* notice a switch from user->kernel or vice versa */
2436- if (cpu_buf->last_is_kernel != is_kernel) {
2437- cpu_buf->last_is_kernel = is_kernel;
2438- add_code(cpu_buf, is_kernel);
2439+ if (cpu_buf->last_cpu_mode != cpu_mode) {
2440+ cpu_buf->last_cpu_mode = cpu_mode;
2441+ add_code(cpu_buf, cpu_mode);
2442 }
2443-
2444+
2445 /* notice a task switch */
2446- if (cpu_buf->last_task != task) {
2447+ /* if not processing other domain samples */
2448+ if ((cpu_buf->last_task != task) &&
2449+ (current_domain == COORDINATOR_DOMAIN)) {
2450 cpu_buf->last_task = task;
2451 add_code(cpu_buf, (unsigned long)task);
2452 }
2453@@ -297,6 +303,25 @@ void oprofile_add_trace(unsigned long pc
2454 add_sample(cpu_buf, pc, 0);
2455 }
2456
2457+int oprofile_add_domain_switch(int32_t domain_id)
2458+{
2459+ struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[smp_processor_id()];
2460+
2461+ /* should have space for switching into and out of domain
2462+ (2 slots each) plus one sample and one cpu mode switch */
2463+ if (((nr_available_slots(cpu_buf) < 6) &&
2464+ (domain_id != COORDINATOR_DOMAIN)) ||
2465+ (nr_available_slots(cpu_buf) < 2))
2466+ return 0;
2467+
2468+ add_code(cpu_buf, CPU_DOMAIN_SWITCH);
2469+ add_sample(cpu_buf, domain_id, 0);
2470+
2471+ current_domain = domain_id;
2472+
2473+ return 1;
2474+}
2475+
2476 /*
2477 * This serves to avoid cpu buffer overflow, and makes sure
2478 * the task mortuary progresses
2479--- a/drivers/oprofile/cpu_buffer.h
2480+++ b/drivers/oprofile/cpu_buffer.h
2481@@ -37,7 +37,7 @@ struct oprofile_cpu_buffer {
2482 volatile unsigned long tail_pos;
2483 unsigned long buffer_size;
2484 struct task_struct * last_task;
2485- int last_is_kernel;
2486+ int last_cpu_mode;
2487 int tracing;
2488 struct op_sample * buffer;
2489 unsigned long sample_received;
2490@@ -53,7 +53,10 @@ DECLARE_PER_CPU(struct oprofile_cpu_buff
2491 void cpu_buffer_reset(struct oprofile_cpu_buffer * cpu_buf);
2492
2493 /* transient events for the CPU buffer -> event buffer */
2494-#define CPU_IS_KERNEL 1
2495-#define CPU_TRACE_BEGIN 2
2496+#define CPU_MODE_USER 0
2497+#define CPU_MODE_KERNEL 1
2498+#define CPU_MODE_XEN 2
2499+#define CPU_TRACE_BEGIN 3
2500+#define CPU_DOMAIN_SWITCH 4
2501
2502 #endif /* OPROFILE_CPU_BUFFER_H */
2503--- a/drivers/oprofile/event_buffer.h
2504+++ b/drivers/oprofile/event_buffer.h
2505@@ -30,6 +30,9 @@ void wake_up_buffer_waiter(void);
2506 #define INVALID_COOKIE ~0UL
2507 #define NO_COOKIE 0UL
2508
2509+/* Constant used to refer to coordinator domain (Xen) */
2510+#define COORDINATOR_DOMAIN -1
2511+
2512 extern const struct file_operations event_buffer_fops;
2513
2514 /* mutex between sync_cpu_buffers() and the
2515--- a/drivers/oprofile/oprof.c
2516+++ b/drivers/oprofile/oprof.c
2517@@ -5,6 +5,10 @@
2518 * @remark Read the file COPYING
2519 *
2520 * @author John Levon <levon@movementarian.org>
2521+ *
2522+ * Modified by Aravind Menon for Xen
2523+ * These modifications are:
2524+ * Copyright (C) 2005 Hewlett-Packard Co.
2525 */
2526
2527 #include <linux/kernel.h>
2528@@ -33,6 +37,32 @@ static DEFINE_MUTEX(start_mutex);
2529 */
2530 static int timer = 0;
2531
2532+int oprofile_set_active(int active_domains[], unsigned int adomains)
2533+{
2534+ int err;
2535+
2536+ if (!oprofile_ops.set_active)
2537+ return -EINVAL;
2538+
2539+ mutex_lock(&start_mutex);
2540+ err = oprofile_ops.set_active(active_domains, adomains);
2541+ mutex_unlock(&start_mutex);
2542+ return err;
2543+}
2544+
2545+int oprofile_set_passive(int passive_domains[], unsigned int pdomains)
2546+{
2547+ int err;
2548+
2549+ if (!oprofile_ops.set_passive)
2550+ return -EINVAL;
2551+
2552+ mutex_lock(&start_mutex);
2553+ err = oprofile_ops.set_passive(passive_domains, pdomains);
2554+ mutex_unlock(&start_mutex);
2555+ return err;
2556+}
2557+
2558 int oprofile_setup(void)
2559 {
2560 int err;
2561--- a/drivers/oprofile/oprof.h
2562+++ b/drivers/oprofile/oprof.h
2563@@ -35,5 +35,8 @@ void oprofile_create_files(struct super_
2564 void oprofile_timer_init(struct oprofile_operations * ops);
2565
2566 int oprofile_set_backtrace(unsigned long depth);
2567+
2568+int oprofile_set_active(int active_domains[], unsigned int adomains);
2569+int oprofile_set_passive(int passive_domains[], unsigned int pdomains);
2570
2571 #endif /* OPROF_H */
2572--- a/drivers/oprofile/oprofile_files.c
2573+++ b/drivers/oprofile/oprofile_files.c
2574@@ -5,15 +5,21 @@
2575 * @remark Read the file COPYING
2576 *
2577 * @author John Levon <levon@movementarian.org>
2578+ *
2579+ * Modified by Aravind Menon for Xen
2580+ * These modifications are:
2581+ * Copyright (C) 2005 Hewlett-Packard Co.
2582 */
2583
2584 #include <linux/fs.h>
2585 #include <linux/oprofile.h>
2586+#include <asm/uaccess.h>
2587+#include <linux/ctype.h>
2588
2589 #include "event_buffer.h"
2590 #include "oprofile_stats.h"
2591 #include "oprof.h"
2592-
2593+
2594 unsigned long fs_buffer_size = 131072;
2595 unsigned long fs_cpu_buffer_size = 8192;
2596 unsigned long fs_buffer_watershed = 32768; /* FIXME: tune */
2597@@ -117,11 +123,202 @@ static ssize_t dump_write(struct file *
2598 static const struct file_operations dump_fops = {
2599 .write = dump_write,
2600 };
2601-
2602+
2603+#define TMPBUFSIZE 512
2604+
2605+static unsigned int adomains = 0;
2606+static int active_domains[MAX_OPROF_DOMAINS + 1];
2607+static DEFINE_MUTEX(adom_mutex);
2608+
2609+static ssize_t adomain_write(struct file * file, char const __user * buf,
2610+ size_t count, loff_t * offset)
2611+{
2612+ char *tmpbuf;
2613+ char *startp, *endp;
2614+ int i;
2615+ unsigned long val;
2616+ ssize_t retval = count;
2617+
2618+ if (*offset)
2619+ return -EINVAL;
2620+ if (count > TMPBUFSIZE - 1)
2621+ return -EINVAL;
2622+
2623+ if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
2624+ return -ENOMEM;
2625+
2626+ if (copy_from_user(tmpbuf, buf, count)) {
2627+ kfree(tmpbuf);
2628+ return -EFAULT;
2629+ }
2630+ tmpbuf[count] = 0;
2631+
2632+ mutex_lock(&adom_mutex);
2633+
2634+ startp = tmpbuf;
2635+ /* Parse one more than MAX_OPROF_DOMAINS, for easy error checking */
2636+ for (i = 0; i <= MAX_OPROF_DOMAINS; i++) {
2637+ val = simple_strtoul(startp, &endp, 0);
2638+ if (endp == startp)
2639+ break;
2640+ while (ispunct(*endp) || isspace(*endp))
2641+ endp++;
2642+ active_domains[i] = val;
2643+ if (active_domains[i] != val)
2644+ /* Overflow, force error below */
2645+ i = MAX_OPROF_DOMAINS + 1;
2646+ startp = endp;
2647+ }
2648+ /* Force error on trailing junk */
2649+ adomains = *startp ? MAX_OPROF_DOMAINS + 1 : i;
2650+
2651+ kfree(tmpbuf);
2652+
2653+ if (adomains > MAX_OPROF_DOMAINS
2654+ || oprofile_set_active(active_domains, adomains)) {
2655+ adomains = 0;
2656+ retval = -EINVAL;
2657+ }
2658+
2659+ mutex_unlock(&adom_mutex);
2660+ return retval;
2661+}
2662+
2663+static ssize_t adomain_read(struct file * file, char __user * buf,
2664+ size_t count, loff_t * offset)
2665+{
2666+ char * tmpbuf;
2667+ size_t len;
2668+ int i;
2669+ ssize_t retval;
2670+
2671+ if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
2672+ return -ENOMEM;
2673+
2674+ mutex_lock(&adom_mutex);
2675+
2676+ len = 0;
2677+ for (i = 0; i < adomains; i++)
2678+ len += snprintf(tmpbuf + len,
2679+ len < TMPBUFSIZE ? TMPBUFSIZE - len : 0,
2680+ "%u ", active_domains[i]);
2681+ WARN_ON(len > TMPBUFSIZE);
2682+ if (len != 0 && len <= TMPBUFSIZE)
2683+ tmpbuf[len-1] = '\n';
2684+
2685+ mutex_unlock(&adom_mutex);
2686+
2687+ retval = simple_read_from_buffer(buf, count, offset, tmpbuf, len);
2688+
2689+ kfree(tmpbuf);
2690+ return retval;
2691+}
2692+
2693+
2694+static struct file_operations active_domain_ops = {
2695+ .read = adomain_read,
2696+ .write = adomain_write,
2697+};
2698+
2699+static unsigned int pdomains = 0;
2700+static int passive_domains[MAX_OPROF_DOMAINS];
2701+static DEFINE_MUTEX(pdom_mutex);
2702+
2703+static ssize_t pdomain_write(struct file * file, char const __user * buf,
2704+ size_t count, loff_t * offset)
2705+{
2706+ char *tmpbuf;
2707+ char *startp, *endp;
2708+ int i;
2709+ unsigned long val;
2710+ ssize_t retval = count;
2711+
2712+ if (*offset)
2713+ return -EINVAL;
2714+ if (count > TMPBUFSIZE - 1)
2715+ return -EINVAL;
2716+
2717+ if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
2718+ return -ENOMEM;
2719+
2720+ if (copy_from_user(tmpbuf, buf, count)) {
2721+ kfree(tmpbuf);
2722+ return -EFAULT;
2723+ }
2724+ tmpbuf[count] = 0;
2725+
2726+ mutex_lock(&pdom_mutex);
2727+
2728+ startp = tmpbuf;
2729+ /* Parse one more than MAX_OPROF_DOMAINS, for easy error checking */
2730+ for (i = 0; i <= MAX_OPROF_DOMAINS; i++) {
2731+ val = simple_strtoul(startp, &endp, 0);
2732+ if (endp == startp)
2733+ break;
2734+ while (ispunct(*endp) || isspace(*endp))
2735+ endp++;
2736+ passive_domains[i] = val;
2737+ if (passive_domains[i] != val)
2738+ /* Overflow, force error below */
2739+ i = MAX_OPROF_DOMAINS + 1;
2740+ startp = endp;
2741+ }
2742+ /* Force error on trailing junk */
2743+ pdomains = *startp ? MAX_OPROF_DOMAINS + 1 : i;
2744+
2745+ kfree(tmpbuf);
2746+
2747+ if (pdomains > MAX_OPROF_DOMAINS
2748+ || oprofile_set_passive(passive_domains, pdomains)) {
2749+ pdomains = 0;
2750+ retval = -EINVAL;
2751+ }
2752+
2753+ mutex_unlock(&pdom_mutex);
2754+ return retval;
2755+}
2756+
2757+static ssize_t pdomain_read(struct file * file, char __user * buf,
2758+ size_t count, loff_t * offset)
2759+{
2760+ char * tmpbuf;
2761+ size_t len;
2762+ int i;
2763+ ssize_t retval;
2764+
2765+ if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
2766+ return -ENOMEM;
2767+
2768+ mutex_lock(&pdom_mutex);
2769+
2770+ len = 0;
2771+ for (i = 0; i < pdomains; i++)
2772+ len += snprintf(tmpbuf + len,
2773+ len < TMPBUFSIZE ? TMPBUFSIZE - len : 0,
2774+ "%u ", passive_domains[i]);
2775+ WARN_ON(len > TMPBUFSIZE);
2776+ if (len != 0 && len <= TMPBUFSIZE)
2777+ tmpbuf[len-1] = '\n';
2778+
2779+ mutex_unlock(&pdom_mutex);
2780+
2781+ retval = simple_read_from_buffer(buf, count, offset, tmpbuf, len);
2782+
2783+ kfree(tmpbuf);
2784+ return retval;
2785+}
2786+
2787+static struct file_operations passive_domain_ops = {
2788+ .read = pdomain_read,
2789+ .write = pdomain_write,
2790+};
2791+
2792 void oprofile_create_files(struct super_block * sb, struct dentry * root)
2793 {
2794 oprofilefs_create_file(sb, root, "enable", &enable_fops);
2795 oprofilefs_create_file_perm(sb, root, "dump", &dump_fops, 0666);
2796+ oprofilefs_create_file(sb, root, "active_domains", &active_domain_ops);
2797+ oprofilefs_create_file(sb, root, "passive_domains", &passive_domain_ops);
2798 oprofilefs_create_file(sb, root, "buffer", &event_buffer_fops);
2799 oprofilefs_create_ulong(sb, root, "buffer_size", &fs_buffer_size);
2800 oprofilefs_create_ulong(sb, root, "buffer_watershed", &fs_buffer_watershed);
2801#--- a/fs/aio.c
2802#+++ b/fs/aio.c
2803#@@ -36,6 +36,11 @@
2804# #include <asm/uaccess.h>
2805# #include <asm/mmu_context.h>
2806
2807#+#ifdef CONFIG_EPOLL
2808#+#include <linux/poll.h>
2809#+#include <linux/eventpoll.h>
2810#+#endif
2811#+
2812# #if DEBUG > 1
2813# #define dprintk printk
2814# #else
2815#@@ -1010,6 +1015,11 @@ put_rq:
2816# if (waitqueue_active(&ctx->wait))
2817# wake_up(&ctx->wait);
2818#
2819#+#ifdef CONFIG_EPOLL
2820#+ if (ctx->file && waitqueue_active(&ctx->poll_wait))
2821#+ wake_up(&ctx->poll_wait);
2822#+#endif
2823#+
2824# spin_unlock_irqrestore(&ctx->ctx_lock, flags);
2825# return ret;
2826# }
2827#@@ -1017,6 +1027,8 @@ put_rq:
2828# /* aio_read_evt
2829# * Pull an event off of the ioctx's event ring. Returns the number of
2830# * events fetched (0 or 1 ;-)
2831#+ * If ent parameter is 0, just returns the number of events that would
2832#+ * be fetched.
2833# * FIXME: make this use cmpxchg.
2834# * TODO: make the ringbuffer user mmap()able (requires FIXME).
2835# */
2836#@@ -1039,13 +1051,18 @@ static int aio_read_evt(struct kioctx *i
2837#
2838# head = ring->head % info->nr;
2839# if (head != ring->tail) {
2840#- struct io_event *evp = aio_ring_event(info, head, KM_USER1);
2841#- *ent = *evp;
2842#- head = (head + 1) % info->nr;
2843#- smp_mb(); /* finish reading the event before updatng the head */
2844#- ring->head = head;
2845#- ret = 1;
2846#- put_aio_ring_event(evp, KM_USER1);
2847#+ if (ent) { /* event requested */
2848#+ struct io_event *evp =
2849#+ aio_ring_event(info, head, KM_USER1);
2850#+ *ent = *evp;
2851#+ head = (head + 1) % info->nr;
2852#+ /* finish reading the event before updatng the head */
2853#+ smp_mb();
2854#+ ring->head = head;
2855#+ ret = 1;
2856#+ put_aio_ring_event(evp, KM_USER1);
2857#+ } else /* only need to know availability */
2858#+ ret = 1;
2859# }
2860# spin_unlock(&info->ring_lock);
2861#
2862#@@ -1235,6 +1252,13 @@ static void io_destroy(struct kioctx *io
2863#
2864# aio_cancel_all(ioctx);
2865# wait_for_all_aios(ioctx);
2866#+#ifdef CONFIG_EPOLL
2867#+ /* forget the poll file, but it's up to the user to close it */
2868#+ if (ioctx->file) {
2869#+ ioctx->file->private_data = 0;
2870#+ ioctx->file = 0;
2871#+ }
2872#+#endif
2873#
2874# /*
2875# * Wake up any waiters. The setting of ctx->dead must be seen
2876#@@ -1245,6 +1269,67 @@ static void io_destroy(struct kioctx *io
2877# put_ioctx(ioctx); /* once for the lookup */
2878# }
2879#
2880#+#ifdef CONFIG_EPOLL
2881#+
2882#+static int aio_queue_fd_close(struct inode *inode, struct file *file)
2883#+{
2884#+ struct kioctx *ioctx = file->private_data;
2885#+ if (ioctx) {
2886#+ file->private_data = 0;
2887#+ spin_lock_irq(&ioctx->ctx_lock);
2888#+ ioctx->file = 0;
2889#+ spin_unlock_irq(&ioctx->ctx_lock);
2890#+ }
2891#+ return 0;
2892#+}
2893#+
2894#+static unsigned int aio_queue_fd_poll(struct file *file, poll_table *wait)
2895#+{ unsigned int pollflags = 0;
2896#+ struct kioctx *ioctx = file->private_data;
2897#+
2898#+ if (ioctx) {
2899#+
2900#+ spin_lock_irq(&ioctx->ctx_lock);
2901#+ /* Insert inside our poll wait queue */
2902#+ poll_wait(file, &ioctx->poll_wait, wait);
2903#+
2904#+ /* Check our condition */
2905#+ if (aio_read_evt(ioctx, 0))
2906#+ pollflags = POLLIN | POLLRDNORM;
2907#+ spin_unlock_irq(&ioctx->ctx_lock);
2908#+ }
2909#+
2910#+ return pollflags;
2911#+}
2912#+
2913#+static const struct file_operations aioq_fops = {
2914#+ .release = aio_queue_fd_close,
2915#+ .poll = aio_queue_fd_poll
2916#+};
2917#+
2918#+/* make_aio_fd:
2919#+ * Create a file descriptor that can be used to poll the event queue.
2920#+ * Based and piggybacked on the excellent epoll code.
2921#+ */
2922#+
2923#+static int make_aio_fd(struct kioctx *ioctx)
2924#+{
2925#+ int error, fd;
2926#+ struct inode *inode;
2927#+ struct file *file;
2928#+
2929#+ error = ep_getfd(&fd, &inode, &file, NULL, &aioq_fops);
2930#+ if (error)
2931#+ return error;
2932#+
2933#+ /* associate the file with the IO context */
2934#+ file->private_data = ioctx;
2935#+ ioctx->file = file;
2936#+ init_waitqueue_head(&ioctx->poll_wait);
2937#+ return fd;
2938#+}
2939#+#endif
2940#+
2941# /* sys_io_setup:
2942# * Create an aio_context capable of receiving at least nr_events.
2943# * ctxp must not point to an aio_context that already exists, and
2944#@@ -1257,18 +1342,30 @@ static void io_destroy(struct kioctx *io
2945# * resources are available. May fail with -EFAULT if an invalid
2946# * pointer is passed for ctxp. Will fail with -ENOSYS if not
2947# * implemented.
2948#+ *
2949#+ * To request a selectable fd, the user context has to be initialized
2950#+ * to 1, instead of 0, and the return value is the fd.
2951#+ * This keeps the system call compatible, since a non-zero value
2952#+ * was not allowed so far.
2953# */
2954# asmlinkage long sys_io_setup(unsigned nr_events, aio_context_t __user *ctxp)
2955# {
2956# struct kioctx *ioctx = NULL;
2957# unsigned long ctx;
2958# long ret;
2959#+ int make_fd = 0;
2960#
2961# ret = get_user(ctx, ctxp);
2962# if (unlikely(ret))
2963# goto out;
2964#
2965# ret = -EINVAL;
2966#+#ifdef CONFIG_EPOLL
2967#+ if (ctx == 1) {
2968#+ make_fd = 1;
2969#+ ctx = 0;
2970#+ }
2971#+#endif
2972# if (unlikely(ctx || nr_events == 0)) {
2973# pr_debug("EINVAL: io_setup: ctx %lu nr_events %u\n",
2974# ctx, nr_events);
2975#@@ -1279,8 +1376,12 @@ asmlinkage long sys_io_setup(unsigned nr
2976# ret = PTR_ERR(ioctx);
2977# if (!IS_ERR(ioctx)) {
2978# ret = put_user(ioctx->user_id, ctxp);
2979#- if (!ret)
2980#- return 0;
2981#+#ifdef CONFIG_EPOLL
2982#+ if (make_fd && ret >= 0)
2983#+ ret = make_aio_fd(ioctx);
2984#+#endif
2985#+ if (ret >= 0)
2986#+ return ret;
2987#
2988# get_ioctx(ioctx); /* io_destroy() expects us to hold a ref */
2989# io_destroy(ioctx);
2990--- a/fs/compat_ioctl.c
2991+++ b/fs/compat_ioctl.c
2992@@ -114,6 +114,13 @@
2993 #include <asm/fbio.h>
2994 #endif
2995
2996+#ifdef CONFIG_XEN
2997+#include <xen/interface/xen.h>
2998+#include <xen/public/evtchn.h>
2999+#include <xen/public/privcmd.h>
3000+#include <xen/compat_ioctl.h>
3001+#endif
3002+
3003 static int do_ioctl32_pointer(unsigned int fd, unsigned int cmd,
3004 unsigned long arg, struct file *f)
3005 {
3006@@ -2727,6 +2734,18 @@ IGNORE_IOCTL(FBIOGETCMAP32)
3007 IGNORE_IOCTL(FBIOSCURSOR32)
3008 IGNORE_IOCTL(FBIOGCURSOR32)
3009 #endif
3010+
3011+#ifdef CONFIG_XEN
3012+HANDLE_IOCTL(IOCTL_PRIVCMD_MMAP_32, privcmd_ioctl_32)
3013+HANDLE_IOCTL(IOCTL_PRIVCMD_MMAPBATCH_32, privcmd_ioctl_32)
3014+COMPATIBLE_IOCTL(IOCTL_PRIVCMD_HYPERCALL)
3015+COMPATIBLE_IOCTL(IOCTL_EVTCHN_BIND_VIRQ)
3016+COMPATIBLE_IOCTL(IOCTL_EVTCHN_BIND_INTERDOMAIN)
3017+COMPATIBLE_IOCTL(IOCTL_EVTCHN_BIND_UNBOUND_PORT)
3018+COMPATIBLE_IOCTL(IOCTL_EVTCHN_UNBIND)
3019+COMPATIBLE_IOCTL(IOCTL_EVTCHN_NOTIFY)
3020+COMPATIBLE_IOCTL(IOCTL_EVTCHN_RESET)
3021+#endif
3022 };
3023
3024 #define IOCTL_HASHSIZE 256
3025--- a/include/acpi/processor.h
3026+++ b/include/acpi/processor.h
3027@@ -17,6 +17,12 @@
3028 #define ACPI_PROCESSOR_MAX_THROTTLE 250 /* 25% */
3029 #define ACPI_PROCESSOR_MAX_DUTY_WIDTH 4
3030
3031+#ifdef CONFIG_XEN
3032+#define NR_ACPI_CPUS (NR_CPUS < 256 ? 256 : NR_CPUS)
3033+#else
3034+#define NR_ACPI_CPUS NR_CPUS
3035+#endif /* CONFIG_XEN */
3036+
3037 #define ACPI_PDC_REVISION_ID 0x1
3038
3039 #define ACPI_PSD_REV0_REVISION 0 /* Support for _PSD as in ACPI 3.0 */
3040@@ -42,6 +48,17 @@
3041
3042 struct acpi_processor_cx;
3043
3044+#ifdef CONFIG_PROCESSOR_EXTERNAL_CONTROL
3045+struct acpi_csd_package {
3046+ acpi_integer num_entries;
3047+ acpi_integer revision;
3048+ acpi_integer domain;
3049+ acpi_integer coord_type;
3050+ acpi_integer num_processors;
3051+ acpi_integer index;
3052+} __attribute__ ((packed));
3053+#endif
3054+
3055 struct acpi_power_register {
3056 u8 descriptor;
3057 u16 length;
3058@@ -74,6 +91,12 @@ struct acpi_processor_cx {
3059 u32 power;
3060 u32 usage;
3061 u64 time;
3062+#ifdef CONFIG_PROCESSOR_EXTERNAL_CONTROL
3063+ /* Require raw information for external control logic */
3064+ struct acpi_power_register reg;
3065+ u32 csd_count;
3066+ struct acpi_csd_package *domain_info;
3067+#endif
3068 struct acpi_processor_cx_policy promotion;
3069 struct acpi_processor_cx_policy demotion;
3070 char desc[ACPI_CX_DESC_LEN];
3071@@ -304,6 +327,9 @@ static inline void acpi_processor_ppc_ex
3072 {
3073 return;
3074 }
3075+#ifdef CONFIG_PROCESSOR_EXTERNAL_CONTROL
3076+int acpi_processor_ppc_has_changed(struct acpi_processor *pr);
3077+#else
3078 static inline int acpi_processor_ppc_has_changed(struct acpi_processor *pr)
3079 {
3080 static unsigned int printout = 1;
3081@@ -316,6 +342,7 @@ static inline int acpi_processor_ppc_has
3082 }
3083 return 0;
3084 }
3085+#endif /* CONFIG_PROCESSOR_EXTERNAL_CONTROL */
3086 #endif /* CONFIG_CPU_FREQ */
3087
3088 /* in processor_throttling.c */
3089@@ -352,4 +379,120 @@ static inline void acpi_thermal_cpufreq_
3090 }
3091 #endif
3092
3093+/*
3094+ * Following are interfaces geared to external processor PM control
3095+ * logic like a VMM
3096+ */
3097+/* Events notified to external control logic */
3098+#define PROCESSOR_PM_INIT 1
3099+#define PROCESSOR_PM_CHANGE 2
3100+#define PROCESSOR_HOTPLUG 3
3101+
3102+/* Objects for the PM events */
3103+#define PM_TYPE_IDLE 0
3104+#define PM_TYPE_PERF 1
3105+#define PM_TYPE_THR 2
3106+#define PM_TYPE_MAX 3
3107+
3108+/* Processor hotplug events */
3109+#define HOTPLUG_TYPE_ADD 0
3110+#define HOTPLUG_TYPE_REMOVE 1
3111+
3112+#ifdef CONFIG_PROCESSOR_EXTERNAL_CONTROL
3113+struct processor_extcntl_ops {
3114+ /* Transfer processor PM events to external control logic */
3115+ int (*pm_ops[PM_TYPE_MAX])(struct acpi_processor *pr, int event);
3116+ /* Notify physical processor status to external control logic */
3117+ int (*hotplug)(struct acpi_processor *pr, int type);
3118+};
3119+extern const struct processor_extcntl_ops *processor_extcntl_ops;
3120+
3121+static inline int processor_cntl_external(void)
3122+{
3123+ return (processor_extcntl_ops != NULL);
3124+}
3125+
3126+static inline int processor_pm_external(void)
3127+{
3128+ return processor_cntl_external() &&
3129+ (processor_extcntl_ops->pm_ops[PM_TYPE_IDLE] != NULL);
3130+}
3131+
3132+static inline int processor_pmperf_external(void)
3133+{
3134+ return processor_cntl_external() &&
3135+ (processor_extcntl_ops->pm_ops[PM_TYPE_PERF] != NULL);
3136+}
3137+
3138+static inline int processor_pmthr_external(void)
3139+{
3140+ return processor_cntl_external() &&
3141+ (processor_extcntl_ops->pm_ops[PM_TYPE_THR] != NULL);
3142+}
3143+
3144+extern int processor_notify_external(struct acpi_processor *pr,
3145+ int event, int type);
3146+extern void processor_extcntl_init(void);
3147+extern int processor_extcntl_prepare(struct acpi_processor *pr);
3148+extern int acpi_processor_get_performance_info(struct acpi_processor *pr);
3149+extern int acpi_processor_get_psd(struct acpi_processor *pr);
3150+void arch_acpi_processor_init_extcntl(const struct processor_extcntl_ops **);
3151+#else
3152+static inline int processor_cntl_external(void) {return 0;}
3153+static inline int processor_pm_external(void) {return 0;}
3154+static inline int processor_pmperf_external(void) {return 0;}
3155+static inline int processor_pmthr_external(void) {return 0;}
3156+static inline int processor_notify_external(struct acpi_processor *pr,
3157+ int event, int type)
3158+{
3159+ return 0;
3160+}
3161+static inline void processor_extcntl_init(void) {}
3162+static inline int processor_extcntl_prepare(struct acpi_processor *pr)
3163+{
3164+ return 0;
3165+}
3166+#endif /* CONFIG_PROCESSOR_EXTERNAL_CONTROL */
3167+
3168+#ifdef CONFIG_XEN
3169+static inline void xen_convert_pct_reg(struct xen_pct_register *xpct,
3170+ struct acpi_pct_register *apct)
3171+{
3172+ xpct->descriptor = apct->descriptor;
3173+ xpct->length = apct->length;
3174+ xpct->space_id = apct->space_id;
3175+ xpct->bit_width = apct->bit_width;
3176+ xpct->bit_offset = apct->bit_offset;
3177+ xpct->reserved = apct->reserved;
3178+ xpct->address = apct->address;
3179+}
3180+
3181+static inline void xen_convert_pss_states(struct xen_processor_px *xpss,
3182+ struct acpi_processor_px *apss, int state_count)
3183+{
3184+ int i;
3185+ for(i=0; i<state_count; i++) {
3186+ xpss->core_frequency = apss->core_frequency;
3187+ xpss->power = apss->power;
3188+ xpss->transition_latency = apss->transition_latency;
3189+ xpss->bus_master_latency = apss->bus_master_latency;
3190+ xpss->control = apss->control;
3191+ xpss->status = apss->status;
3192+ xpss++;
3193+ apss++;
3194+ }
3195+}
3196+
3197+static inline void xen_convert_psd_pack(struct xen_psd_package *xpsd,
3198+ struct acpi_psd_package *apsd)
3199+{
3200+ xpsd->num_entries = apsd->num_entries;
3201+ xpsd->revision = apsd->revision;
3202+ xpsd->domain = apsd->domain;
3203+ xpsd->coord_type = apsd->coord_type;
3204+ xpsd->num_processors = apsd->num_processors;
3205+}
3206+
3207+#endif /* CONFIG_XEN */
3208+
3209 #endif
3210--- a/include/asm-generic/pci.h
3211+++ b/include/asm-generic/pci.h
3212@@ -43,7 +43,9 @@ pcibios_select_root(struct pci_dev *pdev
3213 return root;
3214 }
3215
3216+#ifndef pcibios_scan_all_fns
3217 #define pcibios_scan_all_fns(a, b) 0
3218+#endif
3219
3220 #ifndef HAVE_ARCH_PCI_GET_LEGACY_IDE_IRQ
3221 static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
3222--- a/include/asm-generic/pgtable.h
3223+++ b/include/asm-generic/pgtable.h
3224@@ -99,6 +99,10 @@ static inline void ptep_set_wrprotect(st
3225 }
3226 #endif
3227
3228+#ifndef arch_change_pte_range
3229+#define arch_change_pte_range(mm, pmd, addr, end, newprot) 0
3230+#endif
3231+
3232 #ifndef __HAVE_ARCH_PTE_SAME
3233 #define pte_same(A,B) (pte_val(A) == pte_val(B))
3234 #endif
3235--- a/include/linux/aio.h
3236+++ b/include/linux/aio.h
3237@@ -199,6 +199,11 @@ struct kioctx {
3238 struct aio_ring_info ring_info;
3239
3240 struct delayed_work wq;
3241+#ifdef CONFIG_EPOLL
3242+ // poll integration
3243+ wait_queue_head_t poll_wait;
3244+ struct file *file;
3245+#endif
3246 };
3247
3248 /* prototypes */
3249--- a/include/linux/highmem.h
3250+++ b/include/linux/highmem.h
3251@@ -62,6 +62,7 @@ static inline void *kmap_atomic(struct p
3252
3253 #endif /* CONFIG_HIGHMEM */
3254
3255+#ifndef __HAVE_ARCH_CLEAR_USER_HIGHPAGE
3256 /* when CONFIG_HIGHMEM is not set these will be plain clear/copy_page */
3257 static inline void clear_user_highpage(struct page *page, unsigned long vaddr)
3258 {
3259@@ -69,6 +70,7 @@ static inline void clear_user_highpage(s
3260 clear_user_page(addr, vaddr, page);
3261 kunmap_atomic(addr, KM_USER0);
3262 }
3263+#endif
3264
3265 #ifndef __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
3266 /**
3267@@ -115,12 +117,14 @@ alloc_zeroed_user_highpage_movable(struc
3268 return __alloc_zeroed_user_highpage(__GFP_MOVABLE, vma, vaddr);
3269 }
3270
3271+#ifndef __HAVE_ARCH_CLEAR_HIGHPAGE
3272 static inline void clear_highpage(struct page *page)
3273 {
3274 void *kaddr = kmap_atomic(page, KM_USER0);
3275 clear_page(kaddr);
3276 kunmap_atomic(kaddr, KM_USER0);
3277 }
3278+#endif
3279
3280 static inline void zero_user_segments(struct page *page,
3281 unsigned start1, unsigned end1,
3282@@ -174,6 +178,8 @@ static inline void copy_user_highpage(st
3283
3284 #endif
3285
3286+#ifndef __HAVE_ARCH_COPY_HIGHPAGE
3287+
3288 static inline void copy_highpage(struct page *to, struct page *from)
3289 {
3290 char *vfrom, *vto;
3291@@ -185,4 +191,6 @@ static inline void copy_highpage(struct
3292 kunmap_atomic(vto, KM_USER1);
3293 }
3294
3295+#endif
3296+
3297 #endif /* _LINUX_HIGHMEM_H */
3298--- a/include/linux/interrupt.h
3299+++ b/include/linux/interrupt.h
3300@@ -218,6 +218,12 @@ static inline int disable_irq_wake(unsig
3301 }
3302 #endif /* CONFIG_GENERIC_HARDIRQS */
3303
3304+#ifdef CONFIG_HAVE_IRQ_IGNORE_UNHANDLED
3305+int irq_ignore_unhandled(unsigned int irq);
3306+#else
3307+#define irq_ignore_unhandled(irq) 0
3308+#endif
3309+
3310 #ifndef __ARCH_SET_SOFTIRQ_PENDING
3311 #define set_softirq_pending(x) (local_softirq_pending() = (x))
3312 #define or_softirq_pending(x) (local_softirq_pending() |= (x))
3313--- a/include/linux/kexec.h
3314+++ b/include/linux/kexec.h
3315@@ -46,6 +46,13 @@
3316 KEXEC_CORE_NOTE_NAME_BYTES + \
3317 KEXEC_CORE_NOTE_DESC_BYTES )
3318
3319+#ifndef KEXEC_ARCH_HAS_PAGE_MACROS
3320+#define kexec_page_to_pfn(page) page_to_pfn(page)
3321+#define kexec_pfn_to_page(pfn) pfn_to_page(pfn)
3322+#define kexec_virt_to_phys(addr) virt_to_phys(addr)
3323+#define kexec_phys_to_virt(addr) phys_to_virt(addr)
3324+#endif
3325+
3326 /*
3327 * This structure is used to hold the arguments that are used when loading
3328 * kernel binaries.
3329@@ -108,6 +115,12 @@ struct kimage {
3330 extern void machine_kexec(struct kimage *image);
3331 extern int machine_kexec_prepare(struct kimage *image);
3332 extern void machine_kexec_cleanup(struct kimage *image);
3333+#ifdef CONFIG_XEN
3334+extern int xen_machine_kexec_load(struct kimage *image);
3335+extern void xen_machine_kexec_unload(struct kimage *image);
3336+extern void xen_machine_kexec_setup_resources(void);
3337+extern void xen_machine_kexec_register_resources(struct resource *res);
3338+#endif
3339 extern asmlinkage long sys_kexec_load(unsigned long entry,
3340 unsigned long nr_segments,
3341 struct kexec_segment __user *segments,
3342--- a/include/linux/mm.h
3343+++ b/include/linux/mm.h
3344@@ -113,6 +113,9 @@ extern unsigned int kobjsize(const void
3345 #define VM_CAN_NONLINEAR 0x08000000 /* Has ->fault & does nonlinear pages */
3346 #define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */
3347 #define VM_SAO 0x20000000 /* Strong Access Ordering (powerpc) */
3348+#ifdef CONFIG_XEN
3349+#define VM_FOREIGN 0x40000000 /* Has pages belonging to another VM */
3350+#endif
3351
3352 #ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */
3353 #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
3354@@ -183,6 +186,11 @@ struct vm_operations_struct {
3355 */
3356 int (*access)(struct vm_area_struct *vma, unsigned long addr,
3357 void *buf, int len, int write);
3358+
3359+ /* Area-specific function for clearing the PTE at @ptep. Returns the
3360+ * original value of @ptep. */
3361+ pte_t (*zap_pte)(struct vm_area_struct *vma,
3362+ unsigned long addr, pte_t *ptep, int is_fullmm);
3363 #ifdef CONFIG_NUMA
3364 /*
3365 * set_policy() op must add a reference to any non-NULL @new mempolicy
3366--- a/include/linux/oprofile.h
3367+++ b/include/linux/oprofile.h
3368@@ -16,6 +16,8 @@
3369 #include <linux/types.h>
3370 #include <linux/spinlock.h>
3371 #include <asm/atomic.h>
3372+
3373+#include <xen/interface/xenoprof.h>
3374
3375 /* Each escaped entry is prefixed by ESCAPE_CODE
3376 * then one of the following codes, then the
3377@@ -28,7 +30,7 @@
3378 #define CPU_SWITCH_CODE 2
3379 #define COOKIE_SWITCH_CODE 3
3380 #define KERNEL_ENTER_SWITCH_CODE 4
3381-#define KERNEL_EXIT_SWITCH_CODE 5
3382+#define USER_ENTER_SWITCH_CODE 5
3383 #define MODULE_LOADED_CODE 6
3384 #define CTX_TGID_CODE 7
3385 #define TRACE_BEGIN_CODE 8
3386@@ -36,6 +38,7 @@
3387 #define XEN_ENTER_SWITCH_CODE 10
3388 #define SPU_PROFILING_CODE 11
3389 #define SPU_CTX_SWITCH_CODE 12
3390+#define DOMAIN_SWITCH_CODE 13
3391
3392 struct super_block;
3393 struct dentry;
3394@@ -47,6 +50,11 @@ struct oprofile_operations {
3395 /* create any necessary configuration files in the oprofile fs.
3396 * Optional. */
3397 int (*create_files)(struct super_block * sb, struct dentry * root);
3398+ /* setup active domains with Xen */
3399+ int (*set_active)(int *active_domains, unsigned int adomains);
3400+ /* setup passive domains with Xen */
3401+ int (*set_passive)(int *passive_domains, unsigned int pdomains);
3402+
3403 /* Do any necessary interrupt setup. Optional. */
3404 int (*setup)(void);
3405 /* Do any necessary interrupt shutdown. Optional. */
3406@@ -106,6 +114,8 @@ void oprofile_add_pc(unsigned long pc, i
3407 /* add a backtrace entry, to be called from the ->backtrace callback */
3408 void oprofile_add_trace(unsigned long eip);
3409
3410+/* add a domain switch entry */
3411+int oprofile_add_domain_switch(int32_t domain_id);
3412
3413 /**
3414 * Create a file of the given name as a child of the given root, with
3415--- a/include/linux/page-flags.h
3416+++ b/include/linux/page-flags.h
3417@@ -97,6 +97,9 @@ enum pageflags {
3418 #ifdef CONFIG_IA64_UNCACHED_ALLOCATOR
3419 PG_uncached, /* Page has been mapped as uncached */
3420 #endif
3421+#ifdef CONFIG_XEN
3422+ PG_foreign, /* Page is owned by foreign allocator. */
3423+#endif
3424 __NR_PAGEFLAGS,
3425
3426 /* Filesystems */
3427@@ -269,6 +272,19 @@ static inline void SetPageUptodate(struc
3428
3429 CLEARPAGEFLAG(Uptodate, uptodate)
3430
3431+#define PageForeign(page) test_bit(PG_foreign, &(page)->flags)
3432+#define SetPageForeign(_page, dtor) do { \
3433+ set_bit(PG_foreign, &(_page)->flags); \
3434+ BUG_ON((dtor) == (void (*)(struct page *))0); \
3435+ (_page)->index = (long)(dtor); \
3436+} while (0)
3437+#define ClearPageForeign(page) do { \
3438+ clear_bit(PG_foreign, &(page)->flags); \
3439+ (page)->index = 0; \
3440+} while (0)
3441+#define PageForeignDestructor(_page) \
3442+ ((void (*)(struct page *))(_page)->index)(_page)
3443+
3444 extern void cancel_dirty_page(struct page *page, unsigned int account_size);
3445
3446 int test_clear_page_writeback(struct page *page);
3447@@ -339,8 +355,16 @@ PAGEFLAG(MemError, memerror)
3448 PAGEFLAG_FALSE(MemError)
3449 #endif
3450
3451+#if !defined(CONFIG_XEN)
3452+# define PAGE_FLAGS_XEN 0
3453+#elif defined(CONFIG_X86)
3454+# define PAGE_FLAGS_XEN ((1 << PG_pinned) | (1 << PG_foreign))
3455+#else
3456+# define PAGE_FLAGS_XEN (1 << PG_foreign)
3457+#endif
3458+
3459 #define PAGE_FLAGS (1 << PG_lru | 1 << PG_private | 1 << PG_locked | \
3460- 1 << PG_buddy | 1 << PG_writeback | \
3461+ 1 << PG_buddy | 1 << PG_writeback | PAGE_FLAGS_XEN | \
3462 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active)
3463
3464 /*
3465--- a/include/linux/pci.h
3466+++ b/include/linux/pci.h
3467@@ -211,6 +211,9 @@ struct pci_dev {
3468 * directly, use the values stored here. They might be different!
3469 */
3470 unsigned int irq;
3471+#ifdef CONFIG_XEN
3472+ unsigned int irq_old;
3473+#endif
3474 struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */
3475
3476 /* These fields are used by common fixups */
3477@@ -767,6 +770,10 @@ static inline void msi_remove_pci_irq_ve
3478
3479 static inline void pci_restore_msi_state(struct pci_dev *dev)
3480 { }
3481+#ifdef CONFIG_XEN
3482+#define register_msi_get_owner(func) 0
3483+#define unregister_msi_get_owner(func) 0
3484+#endif
3485 #else
3486 extern int pci_enable_msi(struct pci_dev *dev);
3487 extern void pci_msi_shutdown(struct pci_dev *dev);
3488@@ -777,6 +784,10 @@ extern void pci_msix_shutdown(struct pci
3489 extern void pci_disable_msix(struct pci_dev *dev);
3490 extern void msi_remove_pci_irq_vectors(struct pci_dev *dev);
3491 extern void pci_restore_msi_state(struct pci_dev *dev);
3492+#ifdef CONFIG_XEN
3493+extern int register_msi_get_owner(int (*func)(struct pci_dev *dev));
3494+extern int unregister_msi_get_owner(int (*func)(struct pci_dev *dev));
3495+#endif
3496 #endif
3497
3498 #ifdef CONFIG_HT_IRQ
3499#--- a/include/linux/skbuff.h
3500#+++ b/include/linux/skbuff.h
3501#@@ -217,6 +217,8 @@ typedef unsigned char *sk_buff_data_t;
3502# * @local_df: allow local fragmentation
3503# * @cloned: Head may be cloned (check refcnt to be sure)
3504# * @nohdr: Payload reference only, must not modify header
3505#+ * @proto_data_valid: Protocol data validated since arriving at localhost
3506#+ * @proto_csum_blank: Protocol csum must be added before leaving localhost
3507# * @pkt_type: Packet class
3508# * @fclone: skbuff clone status
3509# * @ip_summed: Driver fed us an IP checksum
3510#@@ -323,7 +325,11 @@ struct sk_buff {
3511# #if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE)
3512# __u8 do_not_encrypt:1;
3513# #endif
3514#- /* 0/13/14 bit hole */
3515#+#ifdef CONFIG_XEN
3516#+ __u8 proto_data_valid:1,
3517#+ proto_csum_blank:1;
3518#+#endif
3519#+ /* 10-16 bit hole */
3520
3521 #ifdef CONFIG_NET_DMA
3522# dma_cookie_t dma_cookie;
3523--- a/include/linux/vermagic.h
3524+++ b/include/linux/vermagic.h
3525@@ -22,6 +22,11 @@
3526 #else
3527 #define MODULE_VERMAGIC_MODVERSIONS ""
3528 #endif
3529+#ifdef CONFIG_XEN
3530+#define MODULE_VERMAGIC_XEN "Xen "
3531+#else
3532+#define MODULE_VERMAGIC_XEN
3533+#endif
3534 #ifndef MODULE_ARCH_VERMAGIC
3535 #define MODULE_ARCH_VERMAGIC ""
3536 #endif
3537@@ -30,5 +35,5 @@
3538 UTS_RELEASE " " \
3539 MODULE_VERMAGIC_SMP MODULE_VERMAGIC_PREEMPT \
3540 MODULE_VERMAGIC_MODULE_UNLOAD MODULE_VERMAGIC_MODVERSIONS \
3541- MODULE_ARCH_VERMAGIC
3542+ MODULE_VERMAGIC_XEN MODULE_ARCH_VERMAGIC
3543
3544--- a/kernel/irq/spurious.c
3545+++ b/kernel/irq/spurious.c
3546@@ -182,7 +182,7 @@ void note_interrupt(unsigned int irq, st
3547 */
3548 if (time_after(jiffies, desc->last_unhandled + HZ/10))
3549 desc->irqs_unhandled = 1;
3550- else
3551+ else if (!irq_ignore_unhandled(irq))
3552 desc->irqs_unhandled++;
3553 desc->last_unhandled = jiffies;
3554 if (unlikely(action_ret != IRQ_NONE))
3555--- a/kernel/kexec.c
3556+++ b/kernel/kexec.c
3557@@ -359,13 +359,26 @@ static int kimage_is_destination_range(s
3558 return 0;
3559 }
3560
3561-static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order)
3562+static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order, unsigned long limit)
3563 {
3564 struct page *pages;
3565
3566 pages = alloc_pages(gfp_mask, order);
3567 if (pages) {
3568 unsigned int count, i;
3569+#ifdef CONFIG_XEN
3570+ int address_bits;
3571+
3572+ if (limit == ~0UL)
3573+ address_bits = BITS_PER_LONG;
3574+ else
3575+ address_bits = long_log2(limit);
3576+
3577+ if (xen_limit_pages_to_max_mfn(pages, order, address_bits) < 0) {
3578+ __free_pages(pages, order);
3579+ return NULL;
3580+ }
3581+#endif
3582 pages->mapping = NULL;
3583 set_page_private(pages, order);
3584 count = 1 << order;
3585@@ -384,6 +397,9 @@ static void kimage_free_pages(struct pag
3586 count = 1 << order;
3587 for (i = 0; i < count; i++)
3588 ClearPageReserved(page + i);
3589+#ifdef CONFIG_XEN
3590+ xen_destroy_contiguous_region((unsigned long)page_address(page), order);
3591+#endif
3592 __free_pages(page, order);
3593 }
3594
3595@@ -429,10 +445,10 @@ static struct page *kimage_alloc_normal_
3596 do {
3597 unsigned long pfn, epfn, addr, eaddr;
3598
3599- pages = kimage_alloc_pages(GFP_KERNEL, order);
3600+ pages = kimage_alloc_pages(GFP_KERNEL, order, KEXEC_CONTROL_MEMORY_LIMIT);
3601 if (!pages)
3602 break;
3603- pfn = page_to_pfn(pages);
3604+ pfn = kexec_page_to_pfn(pages);
3605 epfn = pfn + count;
3606 addr = pfn << PAGE_SHIFT;
3607 eaddr = epfn << PAGE_SHIFT;
3608@@ -466,6 +482,7 @@ static struct page *kimage_alloc_normal_
3609 return pages;
3610 }
3611
3612+#ifndef CONFIG_XEN
3613 static struct page *kimage_alloc_crash_control_pages(struct kimage *image,
3614 unsigned int order)
3615 {
3616@@ -519,7 +536,7 @@ static struct page *kimage_alloc_crash_c
3617 }
3618 /* If I don't overlap any segments I have found my hole! */
3619 if (i == image->nr_segments) {
3620- pages = pfn_to_page(hole_start >> PAGE_SHIFT);
3621+ pages = kexec_pfn_to_page(hole_start >> PAGE_SHIFT);
3622 break;
3623 }
3624 }
3625@@ -546,6 +563,13 @@ struct page *kimage_alloc_control_pages(
3626
3627 return pages;
3628 }
3629+#else /* !CONFIG_XEN */
3630+struct page *kimage_alloc_control_pages(struct kimage *image,
3631+ unsigned int order)
3632+{
3633+ return kimage_alloc_normal_control_pages(image, order);
3634+}
3635+#endif
3636
3637 static int kimage_add_entry(struct kimage *image, kimage_entry_t entry)
3638 {
3639@@ -561,7 +585,7 @@ static int kimage_add_entry(struct kimag
3640 return -ENOMEM;
3641
3642 ind_page = page_address(page);
3643- *image->entry = virt_to_phys(ind_page) | IND_INDIRECTION;
3644+ *image->entry = kexec_virt_to_phys(ind_page) | IND_INDIRECTION;
3645 image->entry = ind_page;
3646 image->last_entry = ind_page +
3647 ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1);
3648@@ -620,13 +644,13 @@ static void kimage_terminate(struct kima
3649 #define for_each_kimage_entry(image, ptr, entry) \
3650 for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \
3651 ptr = (entry & IND_INDIRECTION)? \
3652- phys_to_virt((entry & PAGE_MASK)): ptr +1)
3653+ kexec_phys_to_virt((entry & PAGE_MASK)): ptr +1)
3654
3655 static void kimage_free_entry(kimage_entry_t entry)
3656 {
3657 struct page *page;
3658
3659- page = pfn_to_page(entry >> PAGE_SHIFT);
3660+ page = kexec_pfn_to_page(entry >> PAGE_SHIFT);
3661 kimage_free_pages(page);
3662 }
3663
3664@@ -638,6 +662,10 @@ static void kimage_free(struct kimage *i
3665 if (!image)
3666 return;
3667
3668+#ifdef CONFIG_XEN
3669+ xen_machine_kexec_unload(image);
3670+#endif
3671+
3672 kimage_free_extra_pages(image);
3673 for_each_kimage_entry(image, ptr, entry) {
3674 if (entry & IND_INDIRECTION) {
3675@@ -713,7 +741,7 @@ static struct page *kimage_alloc_page(st
3676 * have a match.
3677 */
3678 list_for_each_entry(page, &image->dest_pages, lru) {
3679- addr = page_to_pfn(page) << PAGE_SHIFT;
3680+ addr = kexec_page_to_pfn(page) << PAGE_SHIFT;
3681 if (addr == destination) {
3682 list_del(&page->lru);
3683 return page;
3684@@ -724,16 +752,16 @@ static struct page *kimage_alloc_page(st
3685 kimage_entry_t *old;
3686
3687 /* Allocate a page, if we run out of memory give up */
3688- page = kimage_alloc_pages(gfp_mask, 0);
3689+ page = kimage_alloc_pages(gfp_mask, 0, KEXEC_SOURCE_MEMORY_LIMIT);
3690 if (!page)
3691 return NULL;
3692 /* If the page cannot be used file it away */
3693- if (page_to_pfn(page) >
3694+ if (kexec_page_to_pfn(page) >
3695 (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) {
3696 list_add(&page->lru, &image->unuseable_pages);
3697 continue;
3698 }
3699- addr = page_to_pfn(page) << PAGE_SHIFT;
3700+ addr = kexec_page_to_pfn(page) << PAGE_SHIFT;
3701
3702 /* If it is the destination page we want use it */
3703 if (addr == destination)
3704@@ -756,7 +784,7 @@ static struct page *kimage_alloc_page(st
3705 struct page *old_page;
3706
3707 old_addr = *old & PAGE_MASK;
3708- old_page = pfn_to_page(old_addr >> PAGE_SHIFT);
3709+ old_page = kexec_pfn_to_page(old_addr >> PAGE_SHIFT);
3710 copy_highpage(page, old_page);
3711 *old = addr | (*old & ~PAGE_MASK);
3712
3713@@ -812,7 +840,7 @@ static int kimage_load_normal_segment(st
3714 result = -ENOMEM;
3715 goto out;
3716 }
3717- result = kimage_add_page(image, page_to_pfn(page)
3718+ result = kimage_add_page(image, kexec_page_to_pfn(page)
3719 << PAGE_SHIFT);
3720 if (result < 0)
3721 goto out;
3722@@ -844,6 +872,7 @@ out:
3723 return result;
3724 }
3725
3726+#ifndef CONFIG_XEN
3727 static int kimage_load_crash_segment(struct kimage *image,
3728 struct kexec_segment *segment)
3729 {
3730@@ -866,7 +895,7 @@ static int kimage_load_crash_segment(str
3731 char *ptr;
3732 size_t uchunk, mchunk;
3733
3734- page = pfn_to_page(maddr >> PAGE_SHIFT);
3735+ page = kexec_pfn_to_page(maddr >> PAGE_SHIFT);
3736 if (!page) {
3737 result = -ENOMEM;
3738 goto out;
3739@@ -915,6 +944,13 @@ static int kimage_load_segment(struct ki
3740
3741 return result;
3742 }
3743+#else /* CONFIG_XEN */
3744+static int kimage_load_segment(struct kimage *image,
3745+ struct kexec_segment *segment)
3746+{
3747+ return kimage_load_normal_segment(image, segment);
3748+}
3749+#endif
3750
3751 /*
3752 * Exec Kernel system call: for obvious reasons only root may call it.
3753@@ -1019,6 +1055,13 @@ asmlinkage long sys_kexec_load(unsigned
3754 }
3755 kimage_terminate(image);
3756 }
3757+#ifdef CONFIG_XEN
3758+ if (image) {
3759+ result = xen_machine_kexec_load(image);
3760+ if (result)
3761+ goto out;
3762+ }
3763+#endif
3764 /* Install the new kernel, and Uninstall the old */
3765 image = xchg(dest_image, image);
3766
3767--- a/kernel/sysctl.c
3768+++ b/kernel/sysctl.c
3769@@ -751,7 +751,7 @@ static struct ctl_table kern_table[] = {
3770 .proc_handler = &proc_dointvec,
3771 },
3772 #endif
3773-#if defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
3774+#if defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86) && !defined(CONFIG_ACPI_PV_SLEEP)
3775 {
3776 .procname = "acpi_video_flags",
3777 .data = &acpi_realmode_flags,
3778--- a/mm/memory.c
3779+++ b/mm/memory.c
3780@@ -446,6 +446,12 @@ struct page *vm_normal_page(struct vm_ar
3781 {
3782 unsigned long pfn;
3783
3784+#if defined(CONFIG_XEN) && defined(CONFIG_X86)
3785+ /* XEN: Covers user-space grant mappings (even of local pages). */
3786+ if (unlikely(vma->vm_flags & VM_FOREIGN))
3787+ return NULL;
3788+#endif
3789+
3790 if (HAVE_PTE_SPECIAL) {
3791 if (likely(!pte_special(pte))) {
3792 VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
3793@@ -474,7 +480,14 @@ struct page *vm_normal_page(struct vm_ar
3794 }
3795 }
3796
3797+#ifndef CONFIG_XEN
3798 VM_BUG_ON(!pfn_valid(pfn));
3799+#else
3800+ if (unlikely(!pfn_valid(pfn))) {
3801+ VM_BUG_ON(!(vma->vm_flags & VM_RESERVED));
3802+ return NULL;
3803+ }
3804+#endif
3805
3806 /*
3807 * NOTE! We still have PageReserved() pages in the page tables.
3808@@ -745,8 +758,12 @@ static unsigned long zap_pte_range(struc
3809 page->index > details->last_index))
3810 continue;
3811 }
3812- ptent = ptep_get_and_clear_full(mm, addr, pte,
3813- tlb->fullmm);
3814+ if (unlikely(vma->vm_ops && vma->vm_ops->zap_pte))
3815+ ptent = vma->vm_ops->zap_pte(vma, addr, pte,
3816+ tlb->fullmm);
3817+ else
3818+ ptent = ptep_get_and_clear_full(mm, addr, pte,
3819+ tlb->fullmm);
3820 tlb_remove_tlb_entry(tlb, pte, addr);
3821 if (unlikely(!page))
3822 continue;
3823@@ -996,6 +1013,7 @@ unsigned long zap_page_range(struct vm_a
3824 tlb_finish_mmu(tlb, address, end);
3825 return end;
3826 }
3827+EXPORT_SYMBOL(zap_page_range);
3828
3829 /**
3830 * zap_vma_ptes - remove ptes mapping the vma
3831@@ -1193,6 +1211,26 @@ int get_user_pages(struct task_struct *t
3832 continue;
3833 }
3834
3835+#ifdef CONFIG_XEN
3836+ if (vma && (vma->vm_flags & VM_FOREIGN)) {
3837+ struct page **map = vma->vm_private_data;
3838+ int offset = (start - vma->vm_start) >> PAGE_SHIFT;
3839+ if (map[offset] != NULL) {
3840+ if (pages) {
3841+ struct page *page = map[offset];
3842+
3843+ pages[i] = page;
3844+ get_page(page);
3845+ }
3846+ if (vmas)
3847+ vmas[i] = vma;
3848+ i++;
3849+ start += PAGE_SIZE;
3850+ len--;
3851+ continue;
3852+ }
3853+ }
3854+#endif
3855 if (!vma || (vma->vm_flags & (VM_IO | VM_PFNMAP))
3856 || !(vm_flags & vma->vm_flags))
3857 return i ? : -EFAULT;
3858--- a/mm/mprotect.c
3859+++ b/mm/mprotect.c
3860@@ -92,6 +92,8 @@ static inline void change_pmd_range(stru
3861 next = pmd_addr_end(addr, end);
3862 if (pmd_none_or_clear_bad(pmd))
3863 continue;
3864+ if (arch_change_pte_range(mm, pmd, addr, next, newprot))
3865+ continue;
3866 change_pte_range(mm, pmd, addr, next, newprot, dirty_accountable);
3867 } while (pmd++, addr = next, addr != end);
3868 }
3869--- a/mm/page_alloc.c
3870+++ b/mm/page_alloc.c
3871@@ -533,7 +533,14 @@ static void __free_pages_ok(struct page
3872 unsigned long flags;
3873 int i;
3874 int reserved = 0;
3875
3876+#ifdef CONFIG_XEN
3877+ if (PageForeign(page)) {
3878+ PageForeignDestructor(page);
3879+ return;
3880+ }
3881+#endif
3882+
3883 for (i = 0 ; i < (1 << order) ; ++i)
3884 reserved += free_pages_check(page + i);
3885 if (reserved)
3886@@ -995,6 +1001,12 @@ static void free_hot_cold_page(struct pa
3887 struct per_cpu_pages *pcp;
3888 unsigned long flags;
3889
3890+#ifdef CONFIG_XEN
3891+ if (PageForeign(page)) {
3892+ PageForeignDestructor(page);
3893+ return;
3894+ }
3895+#endif
3896 if (PageAnon(page))
3897 page->mapping = NULL;
3898 if (free_pages_check(page))
3899--- a/net/core/dev.c
3900+++ b/net/core/dev.c
3901@@ -131,6 +131,12 @@
3902
3903 #include "net-sysfs.h"
3904
3905+#ifdef CONFIG_XEN
3906+#include <net/ip.h>
3907+#include <linux/tcp.h>
3908+#include <linux/udp.h>
3909+#endif
3910+
3911 /*
3912 * The list of packet types we will receive (as opposed to discard)
3913 * and the routines to invoke.
3914@@ -1734,6 +1740,42 @@ static struct netdev_queue *dev_pick_tx(
3915 return netdev_get_tx_queue(dev, queue_index);
3916 }
3917
3918+#ifdef CONFIG_XEN
3919+inline int skb_checksum_setup(struct sk_buff *skb)
3920+{
3921+ if (skb->proto_csum_blank) {
3922+ if (skb->protocol != htons(ETH_P_IP))
3923+ goto out;
3924+ skb->h.raw = (unsigned char *)skb->nh.iph + 4*skb->nh.iph->ihl;
3925+ if (skb->h.raw >= skb->tail)
3926+ goto out;
3927+ switch (skb->nh.iph->protocol) {
3928+ case IPPROTO_TCP:
3929+ skb->csum = offsetof(struct tcphdr, check);
3930+ break;
3931+ case IPPROTO_UDP:
3932+ skb->csum = offsetof(struct udphdr, check);
3933+ break;
3934+ default:
3935+ if (net_ratelimit())
3936+ printk(KERN_ERR "Attempting to checksum a non-"
3937+ "TCP/UDP packet, dropping a protocol"
3938+ " %d packet", skb->nh.iph->protocol);
3939+ goto out;
3940+ }
3941+ if ((skb->h.raw + skb->csum + 2) > skb->tail)
3942+ goto out;
3943+ skb->ip_summed = CHECKSUM_HW;
3944+ skb->proto_csum_blank = 0;
3945+ }
3946+ return 0;
3947+out:
3948+ return -EPROTO;
3949+}
3950+#else
3951+inline int skb_checksum_setup(struct sk_buff *skb) { return 0; }
3952+#endif
3953+
3954 /**
3955 * dev_queue_xmit - transmit a buffer
3956 * @skb: buffer to transmit
3957@@ -1766,6 +1808,12 @@ int dev_queue_xmit(struct sk_buff *skb)
3958 struct Qdisc *q;
3959 int rc = -ENOMEM;
3960
3961+ /* If a checksum-deferred packet is forwarded to a device that needs a
3962+ * checksum, correct the pointers and force checksumming.
3963+ */
3964+ if (skb_checksum_setup(skb))
3965+ goto out_kfree_skb;
3966+
3967 /* GSO will handle the following emulations directly. */
3968 if (netif_needs_gso(dev, skb))
3969 goto gso;
3970@@ -2271,6 +2319,19 @@ int netif_receive_skb(struct sk_buff *sk
3971 }
3972 #endif
3973
3974+#ifdef CONFIG_XEN
3975+ switch (skb->ip_summed) {
3976+ case CHECKSUM_UNNECESSARY:
3977+ skb->proto_data_valid = 1;
3978+ break;
3979+ case CHECKSUM_HW:
3980+ /* XXX Implement me. */
3981+ default:
3982+ skb->proto_data_valid = 0;
3983+ break;
3984+ }
3985+#endif
3986+
3987 list_for_each_entry_rcu(ptype, &ptype_all, list) {
3988 if (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
3989
3990@@ -4925,6 +4986,7 @@ EXPORT_SYMBOL(unregister_netdevice_notif
3991 EXPORT_SYMBOL(net_enable_timestamp);
3992 EXPORT_SYMBOL(net_disable_timestamp);
3993 EXPORT_SYMBOL(dev_get_flags);
3994+EXPORT_SYMBOL(skb_checksum_setup);
3995
3996 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
3997 EXPORT_SYMBOL(br_handle_frame_hook);
3998--- a/net/core/skbuff.c
3999+++ b/net/core/skbuff.c
4000@@ -559,6 +559,10 @@ static struct sk_buff *__skb_clone(struc
4001 n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len;
4002 n->cloned = 1;
4003 n->nohdr = 0;
4004+#ifdef CONFIG_XEN
4005+ C(proto_data_valid);
4006+ C(proto_csum_blank);
4007+#endif
4008 n->destructor = NULL;
4009 C(iif);
4010 C(tail);
4011--- a/net/ipv4/netfilter/nf_nat_proto_tcp.c
4012+++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c
4013@@ -75,6 +75,9 @@ tcp_manip_pkt(struct sk_buff *skb,
4014 if (hdrsize < sizeof(*hdr))
4015 return true;
4016
4017+ if (skb_checksum_setup(skb))
4018+ return false;
4019+
4020 inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1);
4021 inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, 0);
4022 return true;
4023--- a/net/ipv4/netfilter/nf_nat_proto_udp.c
4024+++ b/net/ipv4/netfilter/nf_nat_proto_udp.c
4025@@ -60,6 +60,10 @@ udp_manip_pkt(struct sk_buff *skb,
4026 newport = tuple->dst.u.udp.port;
4027 portptr = &hdr->dest;
4028 }
4029+
4030+ if (skb_checksum_setup(skb))
4031+ return false;
4032+
4033 if (hdr->check || skb->ip_summed == CHECKSUM_PARTIAL) {
4034 inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1);
4035 inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport,
4036--- a/net/ipv4/xfrm4_output.c
4037+++ b/net/ipv4/xfrm4_output.c
4038@@ -81,7 +81,7 @@ static int xfrm4_output_finish(struct sk
4039 #endif
4040
4041 skb->protocol = htons(ETH_P_IP);
4042- return xfrm_output(skb);
4043+ return skb_checksum_setup(skb) ?: xfrm_output(skb);
4044 }
4045
4046 int xfrm4_output(struct sk_buff *skb)
4047--- a/scripts/Makefile.build
4048+++ b/scripts/Makefile.build
4049@@ -73,6 +73,20 @@ ifndef obj
4050 $(warning kbuild: Makefile.build is included improperly)
4051 endif
4052
4053+ifeq ($(CONFIG_XEN),y)
4054+$(objtree)/scripts/Makefile.xen: $(srctree)/scripts/Makefile.xen.awk $(srctree)/scripts/Makefile.build
4055+ @echo ' Updating $@'
4056+ $(if $(shell echo a | $(AWK) '{ print gensub(/a/, "AA", "g"); }'),\
4057+ ,$(error 'Your awk program does not define gensub. Use gawk or another awk with gensub'))
4058+ @$(AWK) -f $< $(filter-out $<,$^) >$@
4059+
4060+xen-src-single-used-m := $(patsubst $(srctree)/%,%,$(wildcard $(addprefix $(srctree)/,$(single-used-m:.o=-xen.c))))
4061+xen-single-used-m := $(xen-src-single-used-m:-xen.c=.o)
4062+single-used-m := $(filter-out $(xen-single-used-m),$(single-used-m))
4063+
4064+-include $(objtree)/scripts/Makefile.xen
4065+endif
4066+
4067 # ===========================================================================
4068
4069 ifneq ($(strip $(lib-y) $(lib-m) $(lib-n) $(lib-)),)
4070--- a/scripts/Makefile.lib
4071+++ b/scripts/Makefile.lib
4072@@ -17,6 +17,12 @@ obj-m := $(filter-out $(obj-y),$(obj-m))
4073
4074 lib-y := $(filter-out $(obj-y), $(sort $(lib-y) $(lib-m)))
4075
4076+# Remove objects forcibly disabled
4077+
4078+obj-y := $(filter-out $(disabled-obj-y),$(obj-y))
4079+obj-m := $(filter-out $(disabled-obj-y),$(obj-m))
4080+lib-y := $(filter-out $(disabled-obj-y),$(lib-y))
4081+
4082
4083 # Handle objects in subdirs
4084 # ---------------------------------------------------------------------------