2 From: http://xenbits.xensource.com/linux-2.6.18-xen.hg (tip 728:832aac894efd)
3 Patch-mainline: obsolete
4 Acked-by: jbeulich@novell.com
6 List of files that don't require modification anymore (and hence
7 removed from this patch), for reference and in case upstream wants to
8 take the forward porting patches:
9 2.6.22/include/linux/sched.h
10 2.6.22/kernel/softlockup.c
14 --- sle11-2009-10-16.orig/drivers/Makefile 2009-10-16 14:48:16.000000000 +0200
15 +++ sle11-2009-10-16/drivers/Makefile 2009-08-26 11:52:33.000000000 +0200
16 @@ -37,6 +37,7 @@ obj-y += base/ block/ misc/ mfd/ net/
17 obj-$(CONFIG_NUBUS) += nubus/
18 obj-$(CONFIG_ATM) += atm/
20 +obj-$(CONFIG_XEN) += xen/
21 obj-$(CONFIG_SCSI) += scsi/
22 obj-$(CONFIG_ATA) += ata/
23 obj-$(CONFIG_IDE) += ide/
24 --- sle11-2009-10-16.orig/drivers/acpi/Makefile 2009-10-16 14:48:16.000000000 +0200
25 +++ sle11-2009-10-16/drivers/acpi/Makefile 2009-08-26 11:52:33.000000000 +0200
26 @@ -34,6 +34,9 @@ processor-objs += processor_core.o proce
28 processor-objs += processor_perflib.o
30 +ifdef CONFIG_PROCESSOR_EXTERNAL_CONTROL
31 +processor-objs += processor_perflib.o processor_extcntl.o
36 --- sle11-2009-10-16.orig/drivers/acpi/hardware/hwsleep.c 2009-10-16 14:48:16.000000000 +0200
37 +++ sle11-2009-10-16/drivers/acpi/hardware/hwsleep.c 2009-08-26 11:52:33.000000000 +0200
38 @@ -241,7 +241,11 @@ acpi_status asmlinkage acpi_enter_sleep_
40 struct acpi_bit_register_info *sleep_type_reg_info;
41 struct acpi_bit_register_info *sleep_enable_reg_info;
42 +#if !(defined(CONFIG_XEN) && defined(CONFIG_X86))
47 struct acpi_object_list arg_list;
48 union acpi_object arg;
50 @@ -351,6 +355,7 @@ acpi_status asmlinkage acpi_enter_sleep_
52 ACPI_FLUSH_CPU_CACHE();
54 +#if !(defined(CONFIG_XEN) && defined(CONFIG_X86))
55 status = acpi_hw_register_write(ACPI_REGISTER_PM1A_CONTROL,
57 if (ACPI_FAILURE(status)) {
58 @@ -397,6 +402,16 @@ acpi_status asmlinkage acpi_enter_sleep_
59 /* Spin until we wake */
63 + /* PV ACPI just need check hypercall return value */
64 + err = acpi_notify_hypervisor_state(sleep_state,
65 + PM1Acontrol, PM1Bcontrol);
67 + ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
68 + "Hypervisor failure [%d]\n", err));
69 + return_ACPI_STATUS(AE_ERROR);
73 return_ACPI_STATUS(AE_OK);
75 --- sle11-2009-10-16.orig/drivers/acpi/processor_core.c 2009-10-16 14:48:16.000000000 +0200
76 +++ sle11-2009-10-16/drivers/acpi/processor_core.c 2009-08-26 11:52:33.000000000 +0200
77 @@ -620,7 +620,8 @@ static int acpi_processor_get_info(struc
81 - (acpi_processor_hotadd_init(pr->handle, &pr->id))) {
82 + (acpi_processor_hotadd_init(pr->handle, &pr->id)) &&
83 + !processor_cntl_external()) {
87 @@ -671,7 +672,11 @@ static int acpi_processor_get_info(struc
92 static DEFINE_PER_CPU(void *, processor_device_array);
94 +static void *processor_device_array[NR_ACPI_CPUS];
97 static int __cpuinit acpi_processor_start(struct acpi_device *device)
99 @@ -680,30 +685,46 @@ static int __cpuinit acpi_processor_star
100 struct acpi_processor *pr;
101 struct sys_device *sysdev;
103 + processor_extcntl_init();
105 pr = acpi_driver_data(device);
107 result = acpi_processor_get_info(device);
110 + ((pr->id == -1) && !processor_cntl_external())) {
111 /* Processor is physically not present */
115 - BUG_ON((pr->id >= nr_cpu_ids) || (pr->id < 0));
116 + BUG_ON(!processor_cntl_external() &&
117 + ((pr->id >= nr_cpu_ids) || (pr->id < 0)));
121 * ACPI id of processors can be reported wrongly by the BIOS.
122 * Don't trust it blindly
125 if (per_cpu(processor_device_array, pr->id) != NULL &&
126 per_cpu(processor_device_array, pr->id) != device) {
128 + BUG_ON(pr->acpi_id >= NR_ACPI_CPUS);
129 + if (processor_device_array[pr->acpi_id] != NULL &&
130 + processor_device_array[pr->acpi_id] != device) {
132 printk(KERN_WARNING "BIOS reported wrong ACPI id "
133 "for the processor\n");
137 per_cpu(processor_device_array, pr->id) = device;
139 per_cpu(processors, pr->id) = pr;
141 + processor_device_array[pr->acpi_id] = device;
143 + per_cpu(processors, pr->id) = pr;
146 result = acpi_processor_add_fs(device);
148 @@ -719,15 +740,28 @@ static int __cpuinit acpi_processor_star
149 /* _PDC call should be done before doing anything else (if reqd.). */
150 arch_acpi_processor_init_pdc(pr);
151 acpi_processor_set_pdc(pr);
152 -#ifdef CONFIG_CPU_FREQ
153 +#if defined(CONFIG_CPU_FREQ) || defined(CONFIG_PROCESSOR_EXTERNAL_CONTROL)
154 acpi_processor_ppc_has_changed(pr);
156 - acpi_processor_get_throttling_info(pr);
157 - acpi_processor_get_limit_info(pr);
160 + * pr->id may equal to -1 while processor_cntl_external enabled.
161 + * throttle and thermal module don't support this case.
162 + * Tx only works when dom0 vcpu == pcpu num by far, as we give
165 + if (pr->id != -1) {
166 + acpi_processor_get_throttling_info(pr);
167 + acpi_processor_get_limit_info(pr);
171 acpi_processor_power_init(pr, device);
173 + result = processor_extcntl_prepare(pr);
177 pr->cdev = thermal_cooling_device_register("Processor", device,
178 &processor_cooling_ops);
179 if (IS_ERR(pr->cdev)) {
180 @@ -855,7 +889,7 @@ static int acpi_processor_remove(struct
182 pr = acpi_driver_data(device);
184 - if (pr->id >= nr_cpu_ids) {
185 + if (!processor_cntl_external() && pr->id >= nr_cpu_ids) {
189 @@ -881,8 +915,14 @@ static int acpi_processor_remove(struct
194 per_cpu(processors, pr->id) = NULL;
195 per_cpu(processor_device_array, pr->id) = NULL;
198 + per_cpu(processors, pr->id) = NULL;
199 + processor_device_array[pr->acpi_id] = NULL;
204 @@ -942,6 +982,10 @@ int acpi_processor_device_add(acpi_handl
208 + if (processor_cntl_external())
209 + processor_notify_external(pr,
210 + PROCESSOR_HOTPLUG, HOTPLUG_TYPE_ADD);
212 if ((pr->id >= 0) && (pr->id < nr_cpu_ids)) {
213 kobject_uevent(&(*device)->dev.kobj, KOBJ_ONLINE);
215 @@ -981,6 +1025,10 @@ static void __ref acpi_processor_hotplug
219 + if (processor_cntl_external())
220 + processor_notify_external(pr,
221 + PROCESSOR_HOTPLUG, HOTPLUG_TYPE_ADD);
223 if (pr->id >= 0 && (pr->id < nr_cpu_ids)) {
224 kobject_uevent(&device->dev.kobj, KOBJ_OFFLINE);
226 @@ -1012,6 +1060,11 @@ static void __ref acpi_processor_hotplug
228 if ((pr->id < nr_cpu_ids) && (cpu_present(pr->id)))
229 kobject_uevent(&device->dev.kobj, KOBJ_OFFLINE);
231 + if (processor_cntl_external())
232 + processor_notify_external(pr, PROCESSOR_HOTPLUG,
233 + HOTPLUG_TYPE_REMOVE);
237 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
238 @@ -1076,6 +1129,11 @@ static acpi_status acpi_processor_hotadd
240 static int acpi_processor_handle_eject(struct acpi_processor *pr)
247 if (cpu_online(pr->id))
250 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
251 +++ sle11-2009-10-16/drivers/acpi/processor_extcntl.c 2009-08-26 11:52:33.000000000 +0200
254 + * processor_extcntl.c - channel to external control logic
256 + * Copyright (C) 2008, Intel corporation
258 + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
260 + * This program is free software; you can redistribute it and/or modify
261 + * it under the terms of the GNU General Public License as published by
262 + * the Free Software Foundation; either version 2 of the License, or (at
263 + * your option) any later version.
265 + * This program is distributed in the hope that it will be useful, but
266 + * WITHOUT ANY WARRANTY; without even the implied warranty of
267 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
268 + * General Public License for more details.
270 + * You should have received a copy of the GNU General Public License along
271 + * with this program; if not, write to the Free Software Foundation, Inc.,
272 + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
276 +#include <linux/kernel.h>
277 +#include <linux/init.h>
278 +#include <linux/types.h>
279 +#include <linux/acpi.h>
280 +#include <linux/pm.h>
281 +#include <linux/cpu.h>
283 +#include <acpi/processor.h>
285 +#define ACPI_PROCESSOR_COMPONENT 0x01000000
286 +#define ACPI_PROCESSOR_CLASS "processor"
287 +#define ACPI_PROCESSOR_DRIVER_NAME "ACPI Processor Driver"
288 +#define _COMPONENT ACPI_PROCESSOR_COMPONENT
289 +ACPI_MODULE_NAME("acpi_processor")
291 +static int processor_extcntl_parse_csd(struct acpi_processor *pr);
292 +static int processor_extcntl_get_performance(struct acpi_processor *pr);
294 + * External processor control logic may register with its own set of
295 + * ops to get ACPI related notification. One example is like VMM.
297 +const struct processor_extcntl_ops *processor_extcntl_ops;
298 +EXPORT_SYMBOL(processor_extcntl_ops);
300 +static int processor_notify_smm(void)
302 + acpi_status status;
303 + static int is_done = 0;
305 + /* only need successfully notify BIOS once */
306 + /* avoid double notification which may lead to unexpected result */
310 + /* Can't write pstate_cnt to smi_cmd if either value is zero */
311 + if ((!acpi_fadt.smi_cmd) || (!acpi_fadt.pstate_cnt)) {
312 + ACPI_DEBUG_PRINT((ACPI_DB_INFO,"No SMI port or pstate_cnt\n"));
316 + ACPI_DEBUG_PRINT((ACPI_DB_INFO,
317 + "Writing pstate_cnt [0x%x] to smi_cmd [0x%x]\n",
318 + acpi_fadt.pstate_cnt, acpi_fadt.smi_cmd));
320 + /* FADT v1 doesn't support pstate_cnt, many BIOS vendors use
321 + * it anyway, so we need to support it... */
322 + if (acpi_fadt_is_v1) {
323 + ACPI_DEBUG_PRINT((ACPI_DB_INFO,
324 + "Using v1.0 FADT reserved value for pstate_cnt\n"));
327 + status = acpi_os_write_port(acpi_fadt.smi_cmd,
328 + (u32) acpi_fadt.pstate_cnt, 8);
329 + if (ACPI_FAILURE(status))
337 +int processor_notify_external(struct acpi_processor *pr, int event, int type)
341 + if (!processor_cntl_external())
345 + case PROCESSOR_PM_INIT:
346 + case PROCESSOR_PM_CHANGE:
347 + if ((type >= PM_TYPE_MAX) ||
348 + !processor_extcntl_ops->pm_ops[type])
351 + ret = processor_extcntl_ops->pm_ops[type](pr, event);
353 + case PROCESSOR_HOTPLUG:
354 + if (processor_extcntl_ops->hotplug)
355 + ret = processor_extcntl_ops->hotplug(pr, type);
358 + printk(KERN_ERR "Unsupport processor events %d.\n", event);
366 + * External control logic can decide to grab full or part of physical
367 + * processor control bits. Take a VMM for example, physical processors
368 + * are owned by VMM and thus existence information like hotplug is
369 + * always required to be notified to VMM. Similar is processor idle
370 + * state which is also necessarily controlled by VMM. But for other
371 + * control bits like performance/throttle states, VMM may choose to
372 + * control or not upon its own policy.
374 +void processor_extcntl_init(void)
376 + if (!processor_extcntl_ops)
377 + arch_acpi_processor_init_extcntl(&processor_extcntl_ops);
381 + * This is called from ACPI processor init, and targeted to hold
382 + * some tricky housekeeping jobs to satisfy external control model.
383 + * For example, we may put dependency parse stub here for idle
384 + * and performance state. Those information may be not available
385 + * if splitting from dom0 control logic like cpufreq driver.
387 +int processor_extcntl_prepare(struct acpi_processor *pr)
389 + /* parse cstate dependency information */
390 + if (processor_pm_external())
391 + processor_extcntl_parse_csd(pr);
393 + /* Initialize performance states */
394 + if (processor_pmperf_external())
395 + processor_extcntl_get_performance(pr);
401 + * Currently no _CSD is implemented which is why existing ACPI code
402 + * doesn't parse _CSD at all. But to keep interface complete with
403 + * external control logic, we put a placeholder here for future
406 +static int processor_extcntl_parse_csd(struct acpi_processor *pr)
410 + for (i = 0; i < pr->power.count; i++) {
411 + if (!pr->power.states[i].valid)
414 + /* No dependency by default */
415 + pr->power.states[i].domain_info = NULL;
416 + pr->power.states[i].csd_count = 0;
423 + * Existing ACPI module does parse performance states at some point,
424 + * when acpi-cpufreq driver is loaded which however is something
425 + * we'd like to disable to avoid confliction with external control
426 + * logic. So we have to collect raw performance information here
427 + * when ACPI processor object is found and started.
429 +static int processor_extcntl_get_performance(struct acpi_processor *pr)
432 + struct acpi_processor_performance *perf;
433 + struct acpi_psd_package *pdomain;
435 + if (pr->performance)
438 + perf = kzalloc(sizeof(struct acpi_processor_performance), GFP_KERNEL);
442 + pr->performance = perf;
443 + /* Get basic performance state information */
444 + ret = acpi_processor_get_performance_info(pr);
449 + * Well, here we need retrieve performance dependency information
450 + * from _PSD object. The reason why existing interface is not used
451 + * is due to the reason that existing interface sticks to Linux cpu
452 + * id to construct some bitmap, however we want to split ACPI
453 + * processor objects from Linux cpu id logic. For example, even
454 + * when Linux is configured as UP, we still want to parse all ACPI
455 + * processor objects to external logic. In this case, it's preferred
456 + * to use ACPI ID instead.
458 + pdomain = &pr->performance->domain_info;
459 + pdomain->num_processors = 0;
460 + ret = acpi_processor_get_psd(pr);
463 + * _PSD is optional - assume no coordination if absent (or
464 + * broken), matching native kernels' behavior.
466 + pdomain->num_entries = ACPI_PSD_REV0_ENTRIES;
467 + pdomain->revision = ACPI_PSD_REV0_REVISION;
468 + pdomain->domain = pr->acpi_id;
469 + pdomain->coord_type = DOMAIN_COORD_TYPE_SW_ALL;
470 + pdomain->num_processors = 1;
473 + /* Some sanity check */
474 + if ((pdomain->revision != ACPI_PSD_REV0_REVISION) ||
475 + (pdomain->num_entries != ACPI_PSD_REV0_ENTRIES) ||
476 + ((pdomain->coord_type != DOMAIN_COORD_TYPE_SW_ALL) &&
477 + (pdomain->coord_type != DOMAIN_COORD_TYPE_SW_ANY) &&
478 + (pdomain->coord_type != DOMAIN_COORD_TYPE_HW_ALL))) {
483 + /* Last step is to notify BIOS that external logic exists */
484 + processor_notify_smm();
486 + processor_notify_external(pr, PROCESSOR_PM_INIT, PM_TYPE_PERF);
490 + pr->performance = NULL;
494 --- sle11-2009-10-16.orig/drivers/acpi/processor_idle.c 2009-10-16 14:48:16.000000000 +0200
495 +++ sle11-2009-10-16/drivers/acpi/processor_idle.c 2009-08-26 11:52:33.000000000 +0200
496 @@ -908,7 +908,8 @@ static int acpi_processor_get_power_info
498 cx.entry_method = ACPI_CSTATE_HALT;
499 snprintf(cx.desc, ACPI_CX_DESC_LEN, "ACPI HLT");
501 + /* This doesn't apply to external control case */
502 + } else if (!processor_pm_external()) {
505 if (cx.type == ACPI_STATE_C1 &&
506 @@ -947,6 +948,12 @@ static int acpi_processor_get_power_info
508 cx.power = obj->integer.value;
510 +#ifdef CONFIG_PROCESSOR_EXTERNAL_CONTROL
511 + /* cache control methods to notify external logic */
512 + if (processor_pm_external())
513 + memcpy(&cx.reg, reg, sizeof(*reg));
517 memcpy(&(pr->power.states[current_count]), &cx, sizeof(cx));
519 @@ -1289,14 +1296,18 @@ int acpi_processor_cst_has_changed(struc
523 - pm_idle = pm_idle_save;
524 + if (!processor_pm_external())
525 + pm_idle = pm_idle_save;
526 /* Relies on interrupts forcing exit from idle. */
531 result = acpi_processor_get_power_info(pr);
532 - if ((pr->flags.power == 1) && (pr->flags.power_setup_done))
533 + if (processor_pm_external())
534 + processor_notify_external(pr,
535 + PROCESSOR_PM_CHANGE, PM_TYPE_IDLE);
536 + else if ((pr->flags.power == 1) && (pr->flags.power_setup_done))
537 pm_idle = acpi_processor_idle;
540 @@ -1821,7 +1832,7 @@ int __cpuinit acpi_processor_power_init(
543 #ifndef CONFIG_CPU_IDLE
545 + if (!processor_pm_external() && (pr->id == 0)) {
546 pm_idle_save = pm_idle;
547 pm_idle = acpi_processor_idle;
549 @@ -1835,6 +1846,11 @@ int __cpuinit acpi_processor_power_init(
550 acpi_driver_data(device));
554 + if (processor_pm_external())
555 + processor_notify_external(pr,
556 + PROCESSOR_PM_INIT, PM_TYPE_IDLE);
561 --- sle11-2009-10-16.orig/drivers/acpi/processor_perflib.c 2009-10-16 14:48:16.000000000 +0200
562 +++ sle11-2009-10-16/drivers/acpi/processor_perflib.c 2009-08-26 11:52:33.000000000 +0200
563 @@ -80,6 +80,7 @@ MODULE_PARM_DESC(ignore_ppc, "If the fre
565 static int acpi_processor_ppc_status;
567 +#ifdef CONFIG_CPU_FREQ
568 static int acpi_processor_ppc_notifier(struct notifier_block *nb,
569 unsigned long event, void *data)
571 @@ -122,6 +123,7 @@ static int acpi_processor_ppc_notifier(s
572 static struct notifier_block acpi_ppc_notifier_block = {
573 .notifier_call = acpi_processor_ppc_notifier,
575 +#endif /* CONFIG_CPU_FREQ */
577 static int acpi_processor_get_platform_limit(struct acpi_processor *pr)
579 @@ -166,9 +168,15 @@ int acpi_processor_ppc_has_changed(struc
583 +#ifdef CONFIG_CPU_FREQ
584 return cpufreq_update_policy(pr->id);
585 +#elif CONFIG_PROCESSOR_EXTERNAL_CONTROL
586 + return processor_notify_external(pr,
587 + PROCESSOR_PM_CHANGE, PM_TYPE_PERF);
591 +#ifdef CONFIG_CPU_FREQ
592 void acpi_processor_ppc_init(void)
594 if (!cpufreq_register_notifier
595 @@ -187,6 +195,7 @@ void acpi_processor_ppc_exit(void)
597 acpi_processor_ppc_status &= ~PPC_REGISTERED;
599 +#endif /* CONFIG_CPU_FREQ */
601 static int acpi_processor_get_performance_control(struct acpi_processor *pr)
603 @@ -328,7 +337,10 @@ static int acpi_processor_get_performanc
607 -static int acpi_processor_get_performance_info(struct acpi_processor *pr)
608 +#ifndef CONFIG_PROCESSOR_EXTERNAL_CONTROL
611 +int acpi_processor_get_performance_info(struct acpi_processor *pr)
614 acpi_status status = AE_OK;
615 @@ -356,6 +368,7 @@ static int acpi_processor_get_performanc
619 +#ifdef CONFIG_CPU_FREQ
620 int acpi_processor_notify_smm(struct module *calling_module)
623 @@ -416,6 +429,7 @@ int acpi_processor_notify_smm(struct mod
626 EXPORT_SYMBOL(acpi_processor_notify_smm);
627 +#endif /* CONFIG_CPU_FREQ */
629 #ifdef CONFIG_X86_ACPI_CPUFREQ_PROC_INTF
630 /* /proc/acpi/processor/../performance interface (DEPRECATED) */
631 @@ -507,7 +521,10 @@ static void acpi_cpufreq_remove_file(str
633 #endif /* CONFIG_X86_ACPI_CPUFREQ_PROC_INTF */
635 -static int acpi_processor_get_psd(struct acpi_processor *pr)
636 +#ifndef CONFIG_PROCESSOR_EXTERNAL_CONTROL
639 +int acpi_processor_get_psd(struct acpi_processor *pr)
642 acpi_status status = AE_OK;
643 --- sle11-2009-10-16.orig/drivers/acpi/sleep/main.c 2009-10-16 14:48:16.000000000 +0200
644 +++ sle11-2009-10-16/drivers/acpi/sleep/main.c 2009-08-26 11:52:33.000000000 +0200
645 @@ -27,6 +27,7 @@ u8 sleep_states[ACPI_S_STATE_COUNT];
646 static int acpi_sleep_prepare(u32 acpi_state)
648 #ifdef CONFIG_ACPI_SLEEP
649 +#ifndef CONFIG_ACPI_PV_SLEEP
650 /* do we have a wakeup address for S2 and S3? */
651 if (acpi_state == ACPI_STATE_S3) {
652 if (!acpi_wakeup_address) {
653 @@ -36,6 +37,7 @@ static int acpi_sleep_prepare(u32 acpi_s
654 (acpi_physical_address)acpi_wakeup_address);
658 ACPI_FLUSH_CPU_CACHE();
659 acpi_enable_wakeup_device_prep(acpi_state);
661 @@ -208,7 +210,14 @@ static int acpi_suspend_enter(suspend_st
665 +#ifdef CONFIG_ACPI_PV_SLEEP
666 + /* Hyperviosr will save and restore CPU context
667 + * and then we can skip low level housekeeping here.
669 + acpi_enter_sleep_state(acpi_state);
671 do_suspend_lowlevel();
676 --- sle11-2009-10-16.orig/drivers/char/agp/intel-agp.c 2009-10-16 14:48:16.000000000 +0200
677 +++ sle11-2009-10-16/drivers/char/agp/intel-agp.c 2009-10-16 14:49:12.000000000 +0200
678 @@ -259,6 +259,13 @@ static void *i8xx_alloc_pages(void)
683 + if (xen_create_contiguous_region((unsigned long)page_address(page), 2, 32)) {
684 + __free_pages(page, 2);
689 if (set_pages_uc(page, 4) < 0) {
690 set_pages_wb(page, 4);
691 __free_pages(page, 2);
692 @@ -278,6 +285,9 @@ static void i8xx_destroy_pages(void *add
694 page = virt_to_page(addr);
695 set_pages_wb(page, 4);
697 + xen_destroy_contiguous_region((unsigned long)page_address(page), 2);
700 __free_pages(page, 2);
701 atomic_dec(&agp_bridge->current_memory_agp);
702 --- sle11-2009-10-16.orig/drivers/char/mem.c 2009-10-16 14:48:16.000000000 +0200
703 +++ sle11-2009-10-16/drivers/char/mem.c 2009-08-26 11:52:33.000000000 +0200
704 @@ -110,6 +110,7 @@ void __attribute__((weak)) unxlate_dev_m
708 +#ifndef ARCH_HAS_DEV_MEM
710 * This funcion reads the *physical* memory. The f_pos points directly to the
712 @@ -254,6 +255,7 @@ static ssize_t write_mem(struct file * f
718 int __attribute__((weak)) phys_mem_access_prot_allowed(struct file *file,
719 unsigned long pfn, unsigned long size, pgprot_t *vma_prot)
720 @@ -372,6 +374,9 @@ static int mmap_mem(struct file * file,
721 static int mmap_kmem(struct file * file, struct vm_area_struct * vma)
725 + unsigned long i, count;
728 /* Turn a kernel-virtual address into a physical page frame */
729 pfn = __pa((u64)vma->vm_pgoff << PAGE_SHIFT) >> PAGE_SHIFT;
730 @@ -386,6 +391,13 @@ static int mmap_kmem(struct file * file,
735 + count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
736 + for (i = 0; i < count; i++)
737 + if ((pfn + i) != mfn_to_local_pfn(pfn_to_mfn(pfn + i)))
742 return mmap_mem(file, vma);
744 @@ -905,6 +917,7 @@ static int open_port(struct inode * inod
745 #define open_kmem open_mem
746 #define open_oldmem open_mem
748 +#ifndef ARCH_HAS_DEV_MEM
749 static const struct file_operations mem_fops = {
750 .llseek = memory_lseek,
752 @@ -913,6 +926,9 @@ static const struct file_operations mem_
754 .get_unmapped_area = get_unmapped_area_mem,
757 +extern const struct file_operations mem_fops;
760 #ifdef CONFIG_DEVKMEM
761 static const struct file_operations kmem_fops = {
762 --- sle11-2009-10-16.orig/drivers/char/tpm/Makefile 2009-10-16 14:48:16.000000000 +0200
763 +++ sle11-2009-10-16/drivers/char/tpm/Makefile 2009-08-26 11:52:33.000000000 +0200
764 @@ -9,3 +9,5 @@ obj-$(CONFIG_TCG_TIS) += tpm_tis.o
765 obj-$(CONFIG_TCG_NSC) += tpm_nsc.o
766 obj-$(CONFIG_TCG_ATMEL) += tpm_atmel.o
767 obj-$(CONFIG_TCG_INFINEON) += tpm_infineon.o
768 +obj-$(CONFIG_TCG_XEN) += tpm_xenu.o
769 +tpm_xenu-y = tpm_xen.o tpm_vtpm.o
770 --- sle11-2009-10-16.orig/drivers/char/tpm/tpm.h 2009-10-16 14:48:16.000000000 +0200
771 +++ sle11-2009-10-16/drivers/char/tpm/tpm.h 2009-08-26 11:52:33.000000000 +0200
772 @@ -107,6 +107,9 @@ struct tpm_chip {
773 struct dentry **bios_dir;
775 struct list_head list;
779 void (*release) (struct device *);
782 @@ -124,6 +127,18 @@ static inline void tpm_write_index(int b
783 outb(value & 0xFF, base+1);
787 +static inline void *chip_get_private(const struct tpm_chip *chip)
792 +static inline void chip_set_private(struct tpm_chip *chip, void *priv)
798 extern void tpm_get_timeouts(struct tpm_chip *);
799 extern void tpm_gen_interrupt(struct tpm_chip *);
800 extern void tpm_continue_selftest(struct tpm_chip *);
801 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
802 +++ sle11-2009-10-16/drivers/char/tpm/tpm_vtpm.c 2009-08-26 11:52:33.000000000 +0200
805 + * Copyright (C) 2006 IBM Corporation
808 + * Stefan Berger <stefanb@us.ibm.com>
810 + * Generic device driver part for device drivers in a virtualized
813 + * This program is free software; you can redistribute it and/or
814 + * modify it under the terms of the GNU General Public License as
815 + * published by the Free Software Foundation, version 2 of the
820 +#include <asm/uaccess.h>
821 +#include <linux/list.h>
822 +#include <linux/device.h>
823 +#include <linux/interrupt.h>
824 +#include <linux/platform_device.h>
826 +#include "tpm_vtpm.h"
828 +/* read status bits */
830 + STATUS_BUSY = 0x01,
831 + STATUS_DATA_AVAIL = 0x02,
832 + STATUS_READY = 0x04
835 +struct transmission {
836 + struct list_head next;
838 + unsigned char *request;
839 + size_t request_len;
840 + size_t request_buflen;
842 + unsigned char *response;
843 + size_t response_len;
844 + size_t response_buflen;
846 + unsigned int flags;
850 + TRANSMISSION_FLAG_WAS_QUEUED = 0x1
855 + DATAEX_FLAG_QUEUED_ONLY = 0x1
859 +/* local variables */
861 +/* local function prototypes */
862 +static int _vtpm_send_queued(struct tpm_chip *chip);
865 +/* =============================================================
866 + * Some utility functions
867 + * =============================================================
869 +static void vtpm_state_init(struct vtpm_state *vtpms)
871 + vtpms->current_request = NULL;
872 + spin_lock_init(&vtpms->req_list_lock);
873 + init_waitqueue_head(&vtpms->req_wait_queue);
874 + INIT_LIST_HEAD(&vtpms->queued_requests);
876 + vtpms->current_response = NULL;
877 + spin_lock_init(&vtpms->resp_list_lock);
878 + init_waitqueue_head(&vtpms->resp_wait_queue);
880 + vtpms->disconnect_time = jiffies;
884 +static inline struct transmission *transmission_alloc(void)
886 + return kzalloc(sizeof(struct transmission), GFP_ATOMIC);
889 +static unsigned char *
890 +transmission_set_req_buffer(struct transmission *t,
891 + unsigned char *buffer, size_t len)
893 + if (t->request_buflen < len) {
895 + t->request = kmalloc(len, GFP_KERNEL);
897 + t->request_buflen = 0;
900 + t->request_buflen = len;
903 + memcpy(t->request, buffer, len);
904 + t->request_len = len;
909 +static unsigned char *
910 +transmission_set_res_buffer(struct transmission *t,
911 + const unsigned char *buffer, size_t len)
913 + if (t->response_buflen < len) {
914 + kfree(t->response);
915 + t->response = kmalloc(len, GFP_ATOMIC);
916 + if (!t->response) {
917 + t->response_buflen = 0;
920 + t->response_buflen = len;
923 + memcpy(t->response, buffer, len);
924 + t->response_len = len;
926 + return t->response;
929 +static inline void transmission_free(struct transmission *t)
932 + kfree(t->response);
936 +/* =============================================================
937 + * Interface with the lower layer driver
938 + * =============================================================
941 + * Lower layer uses this function to make a response available.
943 +int vtpm_vd_recv(const struct tpm_chip *chip,
944 + const unsigned char *buffer, size_t count,
947 + unsigned long flags;
949 + struct transmission *t;
950 + struct vtpm_state *vtpms;
952 + vtpms = (struct vtpm_state *)chip_get_private(chip);
955 + * The list with requests must contain one request
956 + * only and the element there must be the one that
957 + * was passed to me from the front-end.
959 + spin_lock_irqsave(&vtpms->resp_list_lock, flags);
960 + if (vtpms->current_request != ptr) {
961 + spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
965 + if ((t = vtpms->current_request)) {
966 + transmission_free(t);
967 + vtpms->current_request = NULL;
970 + t = transmission_alloc();
972 + if (!transmission_set_res_buffer(t, buffer, count)) {
973 + transmission_free(t);
974 + spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
978 + vtpms->current_response = t;
979 + wake_up_interruptible(&vtpms->resp_wait_queue);
981 + spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
988 + * Lower layer indicates its status (connected/disconnected)
990 +void vtpm_vd_status(const struct tpm_chip *chip, u8 vd_status)
992 + struct vtpm_state *vtpms;
994 + vtpms = (struct vtpm_state *)chip_get_private(chip);
996 + vtpms->vd_status = vd_status;
997 + if ((vtpms->vd_status & TPM_VD_STATUS_CONNECTED) == 0) {
998 + vtpms->disconnect_time = jiffies;
1002 +/* =============================================================
1003 + * Interface with the generic TPM driver
1004 + * =============================================================
1006 +static int vtpm_recv(struct tpm_chip *chip, u8 *buf, size_t count)
1009 + unsigned long flags;
1010 + struct vtpm_state *vtpms;
1012 + vtpms = (struct vtpm_state *)chip_get_private(chip);
1015 + * Check if the previous operation only queued the command
1016 + * In this case there won't be a response, so I just
1017 + * return from here and reset that flag. In any other
1018 + * case I should receive a response from the back-end.
1020 + spin_lock_irqsave(&vtpms->resp_list_lock, flags);
1021 + if ((vtpms->flags & DATAEX_FLAG_QUEUED_ONLY) != 0) {
1022 + vtpms->flags &= ~DATAEX_FLAG_QUEUED_ONLY;
1023 + spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
1025 + * The first few commands (measurements) must be
1026 + * queued since it might not be possible to talk to the
1028 + * Return a response of up to 30 '0's.
1031 + count = min_t(size_t, count, 30);
1032 + memset(buf, 0x0, count);
1036 + * Check whether something is in the responselist and if
1037 + * there's nothing in the list wait for something to appear.
1040 + if (!vtpms->current_response) {
1041 + spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
1042 + interruptible_sleep_on_timeout(&vtpms->resp_wait_queue,
1044 + spin_lock_irqsave(&vtpms->resp_list_lock ,flags);
1047 + if (vtpms->current_response) {
1048 + struct transmission *t = vtpms->current_response;
1049 + vtpms->current_response = NULL;
1050 + rc = min(count, t->response_len);
1051 + memcpy(buf, t->response, rc);
1052 + transmission_free(t);
1055 + spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
1059 +static int vtpm_send(struct tpm_chip *chip, u8 *buf, size_t count)
1062 + unsigned long flags;
1063 + struct transmission *t = transmission_alloc();
1064 + struct vtpm_state *vtpms;
1066 + vtpms = (struct vtpm_state *)chip_get_private(chip);
1071 + * If there's a current request, it must be the
1072 + * previous request that has timed out.
1074 + spin_lock_irqsave(&vtpms->req_list_lock, flags);
1075 + if (vtpms->current_request != NULL) {
1076 + printk("WARNING: Sending although there is a request outstanding.\n"
1077 + " Previous request must have timed out.\n");
1078 + transmission_free(vtpms->current_request);
1079 + vtpms->current_request = NULL;
1081 + spin_unlock_irqrestore(&vtpms->req_list_lock, flags);
1084 + * Queue the packet if the driver below is not
1085 + * ready, yet, or there is any packet already
1087 + * If the driver below is ready, unqueue all
1088 + * packets first before sending our current
1090 + * For each unqueued packet, except for the
1091 + * last (=current) packet, call the function
1092 + * tpm_xen_recv to wait for the response to come
1095 + if ((vtpms->vd_status & TPM_VD_STATUS_CONNECTED) == 0) {
1096 + if (time_after(jiffies,
1097 + vtpms->disconnect_time + HZ * 10)) {
1104 + * Send all queued packets.
1106 + if (_vtpm_send_queued(chip) == 0) {
1108 + vtpms->current_request = t;
1110 + rc = vtpm_vd_send(vtpms->tpm_private,
1115 + * The generic TPM driver will call
1116 + * the function to receive the response.
1119 + vtpms->current_request = NULL;
1124 + if (!transmission_set_req_buffer(t, buf, count)) {
1125 + transmission_free(t);
1130 + * An error occurred. Don't event try
1131 + * to send the current request. Just
1134 + spin_lock_irqsave(&vtpms->req_list_lock, flags);
1135 + vtpms->flags |= DATAEX_FLAG_QUEUED_ONLY;
1136 + list_add_tail(&t->next, &vtpms->queued_requests);
1137 + spin_unlock_irqrestore(&vtpms->req_list_lock, flags);
1147 + * Send all queued requests.
1149 +static int _vtpm_send_queued(struct tpm_chip *chip)
1154 + unsigned char buffer[1];
1155 + struct vtpm_state *vtpms;
1156 + vtpms = (struct vtpm_state *)chip_get_private(chip);
1158 + spin_lock_irqsave(&vtpms->req_list_lock, flags);
1160 + while (!list_empty(&vtpms->queued_requests)) {
1162 + * Need to dequeue them.
1163 + * Read the result into a dummy buffer.
1165 + struct transmission *qt = (struct transmission *)
1166 + vtpms->queued_requests.next;
1167 + list_del(&qt->next);
1168 + vtpms->current_request = qt;
1169 + spin_unlock_irqrestore(&vtpms->req_list_lock, flags);
1171 + rc = vtpm_vd_send(vtpms->tpm_private,
1177 + spin_lock_irqsave(&vtpms->req_list_lock, flags);
1178 + if ((qt = vtpms->current_request) != NULL) {
1180 + * requeue it at the beginning
1183 + list_add(&qt->next,
1184 + &vtpms->queued_requests);
1186 + vtpms->current_request = NULL;
1191 + * After this point qt is not valid anymore!
1192 + * It is freed when the front-end is delivering
1193 + * the data by calling tpm_recv
1196 + * Receive response into provided dummy buffer
1198 + rc = vtpm_recv(chip, buffer, sizeof(buffer));
1199 + spin_lock_irqsave(&vtpms->req_list_lock, flags);
1202 + spin_unlock_irqrestore(&vtpms->req_list_lock, flags);
1207 +static void vtpm_cancel(struct tpm_chip *chip)
1209 + unsigned long flags;
1210 + struct vtpm_state *vtpms = (struct vtpm_state *)chip_get_private(chip);
1212 + spin_lock_irqsave(&vtpms->resp_list_lock,flags);
1214 + if (!vtpms->current_response && vtpms->current_request) {
1215 + spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
1216 + interruptible_sleep_on(&vtpms->resp_wait_queue);
1217 + spin_lock_irqsave(&vtpms->resp_list_lock,flags);
1220 + if (vtpms->current_response) {
1221 + struct transmission *t = vtpms->current_response;
1222 + vtpms->current_response = NULL;
1223 + transmission_free(t);
1226 + spin_unlock_irqrestore(&vtpms->resp_list_lock,flags);
1229 +static u8 vtpm_status(struct tpm_chip *chip)
1232 + unsigned long flags;
1233 + struct vtpm_state *vtpms;
1235 + vtpms = (struct vtpm_state *)chip_get_private(chip);
1237 + spin_lock_irqsave(&vtpms->resp_list_lock, flags);
1239 + * Data are available if:
1240 + * - there's a current response
1241 + * - the last packet was queued only (this is fake, but necessary to
1242 + * get the generic TPM layer to call the receive function.)
1244 + if (vtpms->current_response ||
1245 + 0 != (vtpms->flags & DATAEX_FLAG_QUEUED_ONLY)) {
1246 + rc = STATUS_DATA_AVAIL;
1247 + } else if (!vtpms->current_response && !vtpms->current_request) {
1248 + rc = STATUS_READY;
1251 + spin_unlock_irqrestore(&vtpms->resp_list_lock, flags);
1255 +static struct file_operations vtpm_ops = {
1256 + .owner = THIS_MODULE,
1257 + .llseek = no_llseek,
1260 + .write = tpm_write,
1261 + .release = tpm_release,
1264 +static DEVICE_ATTR(pubek, S_IRUGO, tpm_show_pubek, NULL);
1265 +static DEVICE_ATTR(pcrs, S_IRUGO, tpm_show_pcrs, NULL);
1266 +static DEVICE_ATTR(enabled, S_IRUGO, tpm_show_enabled, NULL);
1267 +static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL);
1268 +static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL);
1269 +static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated,
1271 +static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL);
1272 +static DEVICE_ATTR(cancel, S_IWUSR |S_IWGRP, NULL, tpm_store_cancel);
1274 +static struct attribute *vtpm_attrs[] = {
1275 + &dev_attr_pubek.attr,
1276 + &dev_attr_pcrs.attr,
1277 + &dev_attr_enabled.attr,
1278 + &dev_attr_active.attr,
1279 + &dev_attr_owned.attr,
1280 + &dev_attr_temp_deactivated.attr,
1281 + &dev_attr_caps.attr,
1282 + &dev_attr_cancel.attr,
1286 +static struct attribute_group vtpm_attr_grp = { .attrs = vtpm_attrs };
1288 +#define TPM_LONG_TIMEOUT (10 * 60 * HZ)
1290 +static struct tpm_vendor_specific tpm_vtpm = {
1291 + .recv = vtpm_recv,
1292 + .send = vtpm_send,
1293 + .cancel = vtpm_cancel,
1294 + .status = vtpm_status,
1295 + .req_complete_mask = STATUS_BUSY | STATUS_DATA_AVAIL,
1296 + .req_complete_val = STATUS_DATA_AVAIL,
1297 + .req_canceled = STATUS_READY,
1298 + .attr_group = &vtpm_attr_grp,
1300 + .fops = &vtpm_ops,
1309 +struct tpm_chip *init_vtpm(struct device *dev,
1310 + struct tpm_private *tp)
1313 + struct tpm_chip *chip;
1314 + struct vtpm_state *vtpms;
1316 + vtpms = kzalloc(sizeof(struct vtpm_state), GFP_KERNEL);
1318 + return ERR_PTR(-ENOMEM);
1320 + vtpm_state_init(vtpms);
1321 + vtpms->tpm_private = tp;
1323 + chip = tpm_register_hardware(dev, &tpm_vtpm);
1326 + goto err_free_mem;
1329 + chip_set_private(chip, vtpms);
1336 + return ERR_PTR(rc);
1339 +void cleanup_vtpm(struct device *dev)
1341 + struct tpm_chip *chip = dev_get_drvdata(dev);
1342 + struct vtpm_state *vtpms = (struct vtpm_state*)chip_get_private(chip);
1343 + tpm_remove_hardware(dev);
1346 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
1347 +++ sle11-2009-10-16/drivers/char/tpm/tpm_vtpm.h 2009-08-26 11:52:33.000000000 +0200
1353 +struct tpm_private;
1355 +struct vtpm_state {
1356 + struct transmission *current_request;
1357 + spinlock_t req_list_lock;
1358 + wait_queue_head_t req_wait_queue;
1360 + struct list_head queued_requests;
1362 + struct transmission *current_response;
1363 + spinlock_t resp_list_lock;
1364 + wait_queue_head_t resp_wait_queue; // processes waiting for responses
1369 + unsigned long disconnect_time;
1372 + * The following is a private structure of the underlying
1373 + * driver. It is passed as parameter in the send function.
1375 + struct tpm_private *tpm_private;
1380 + TPM_VD_STATUS_DISCONNECTED = 0x0,
1381 + TPM_VD_STATUS_CONNECTED = 0x1
1384 +/* this function is called from tpm_vtpm.c */
1385 +int vtpm_vd_send(struct tpm_private * tp,
1386 + const u8 * buf, size_t count, void *ptr);
1388 +/* these functions are offered by tpm_vtpm.c */
1389 +struct tpm_chip *init_vtpm(struct device *,
1390 + struct tpm_private *);
1391 +void cleanup_vtpm(struct device *);
1392 +int vtpm_vd_recv(const struct tpm_chip* chip,
1393 + const unsigned char *buffer, size_t count, void *ptr);
1394 +void vtpm_vd_status(const struct tpm_chip *, u8 status);
1396 +static inline struct tpm_private *tpm_private_from_dev(struct device *dev)
1398 + struct tpm_chip *chip = dev_get_drvdata(dev);
1399 + struct vtpm_state *vtpms = chip_get_private(chip);
1400 + return vtpms->tpm_private;
1404 --- /dev/null 1970-01-01 00:00:00.000000000 +0000
1405 +++ sle11-2009-10-16/drivers/char/tpm/tpm_xen.c 2009-08-26 11:52:33.000000000 +0200
1408 + * Copyright (c) 2005, IBM Corporation
1410 + * Author: Stefan Berger, stefanb@us.ibm.com
1411 + * Grant table support: Mahadevan Gomathisankaran
1413 + * This code has been derived from drivers/xen/netfront/netfront.c
1415 + * Copyright (c) 2002-2004, K A Fraser
1417 + * This program is free software; you can redistribute it and/or
1418 + * modify it under the terms of the GNU General Public License version 2
1419 + * as published by the Free Software Foundation; or, when distributed
1420 + * separately from the Linux kernel or incorporated into other
1421 + * software packages, subject to the following license:
1423 + * Permission is hereby granted, free of charge, to any person obtaining a copy
1424 + * of this source file (the "Software"), to deal in the Software without
1425 + * restriction, including without limitation the rights to use, copy, modify,
1426 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
1427 + * and to permit persons to whom the Software is furnished to do so, subject to
1428 + * the following conditions:
1430 + * The above copyright notice and this permission notice shall be included in
1431 + * all copies or substantial portions of the Software.
1433 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1434 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1435 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
1436 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1437 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
1438 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
1439 + * IN THE SOFTWARE.
1442 +#include <linux/errno.h>
1443 +#include <linux/err.h>
1444 +#include <linux/interrupt.h>
1445 +#include <linux/mutex.h>
1446 +#include <asm/uaccess.h>
1447 +#include <xen/evtchn.h>
1448 +#include <xen/interface/grant_table.h>
1449 +#include <xen/interface/io/tpmif.h>
1450 +#include <xen/gnttab.h>
1451 +#include <xen/xenbus.h>
1453 +#include "tpm_vtpm.h"
1457 +/* local structures */
1458 +struct tpm_private {
1459 + struct tpm_chip *chip;
1461 + tpmif_tx_interface_t *tx;
1467 + spinlock_t tx_lock;
1469 + struct tx_buffer *tx_buffers[TPMIF_TX_RING_SIZE];
1472 + void *tx_remember;
1474 + domid_t backend_id;
1475 + wait_queue_head_t wait_q;
1477 + struct xenbus_device *dev;
1482 + unsigned int size; // available space in data
1483 + unsigned int len; // used space in data
1484 + unsigned char *data; // pointer to a page
1488 +/* locally visible variables */
1489 +static grant_ref_t gref_head;
1490 +static struct tpm_private *my_priv;
1492 +/* local function prototypes */
1493 +static irqreturn_t tpmif_int(int irq,
1495 + struct pt_regs *ptregs);
1496 +static void tpmif_rx_action(unsigned long unused);
1497 +static int tpmif_connect(struct xenbus_device *dev,
1498 + struct tpm_private *tp,
1500 +static DECLARE_TASKLET(tpmif_rx_tasklet, tpmif_rx_action, 0);
1501 +static int tpmif_allocate_tx_buffers(struct tpm_private *tp);
1502 +static void tpmif_free_tx_buffers(struct tpm_private *tp);
1503 +static void tpmif_set_connected_state(struct tpm_private *tp,
1505 +static int tpm_xmit(struct tpm_private *tp,
1506 + const u8 * buf, size_t count, int userbuffer,
1508 +static void destroy_tpmring(struct tpm_private *tp);
1509 +void __exit tpmif_exit(void);
1511 +#define DPRINTK(fmt, args...) \
1512 + pr_debug("xen_tpm_fr (%s:%d) " fmt, __FUNCTION__, __LINE__, ##args)
1513 +#define IPRINTK(fmt, args...) \
1514 + printk(KERN_INFO "xen_tpm_fr: " fmt, ##args)
1515 +#define WPRINTK(fmt, args...) \
1516 + printk(KERN_WARNING "xen_tpm_fr: " fmt, ##args)
1518 +#define GRANT_INVALID_REF 0
1522 +tx_buffer_copy(struct tx_buffer *txb, const u8 *src, int len,
1527 + if (len > txb->size)
1528 + copied = txb->size;
1529 + if (isuserbuffer) {
1530 + if (copy_from_user(txb->data, src, copied))
1533 + memcpy(txb->data, src, copied);
1539 +static inline struct tx_buffer *tx_buffer_alloc(void)
1541 + struct tx_buffer *txb;
1543 + txb = kzalloc(sizeof(struct tx_buffer), GFP_KERNEL);
1548 + txb->size = PAGE_SIZE;
1549 + txb->data = (unsigned char *)__get_free_page(GFP_KERNEL);
1550 + if (txb->data == NULL) {
1559 +static inline void tx_buffer_free(struct tx_buffer *txb)
1562 + free_page((long)txb->data);
1567 +/**************************************************************
1568 + Utility function for the tpm_private structure
1569 +**************************************************************/
1570 +static void tpm_private_init(struct tpm_private *tp)
1572 + spin_lock_init(&tp->tx_lock);
1573 + init_waitqueue_head(&tp->wait_q);
1574 + atomic_set(&tp->refcnt, 1);
1577 +static void tpm_private_put(void)
1579 + if (!atomic_dec_and_test(&my_priv->refcnt))
1582 + tpmif_free_tx_buffers(my_priv);
1587 +static struct tpm_private *tpm_private_get(void)
1592 + atomic_inc(&my_priv->refcnt);
1596 + my_priv = kzalloc(sizeof(struct tpm_private), GFP_KERNEL);
1600 + tpm_private_init(my_priv);
1601 + err = tpmif_allocate_tx_buffers(my_priv);
1603 + tpm_private_put();
1608 +/**************************************************************
1610 + The interface to let the tpm plugin register its callback
1611 + function and send data to another partition using this module
1613 +**************************************************************/
1615 +static DEFINE_MUTEX(suspend_lock);
1617 + * Send data via this module by calling this function
1619 +int vtpm_vd_send(struct tpm_private *tp,
1620 + const u8 * buf, size_t count, void *ptr)
1624 + mutex_lock(&suspend_lock);
1625 + sent = tpm_xmit(tp, buf, count, 0, ptr);
1626 + mutex_unlock(&suspend_lock);
1631 +/**************************************************************
1632 + XENBUS support code
1633 +**************************************************************/
1635 +static int setup_tpmring(struct xenbus_device *dev,
1636 + struct tpm_private *tp)
1638 + tpmif_tx_interface_t *sring;
1641 + tp->ring_ref = GRANT_INVALID_REF;
1643 + sring = (void *)__get_free_page(GFP_KERNEL);
1645 + xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
1650 + err = xenbus_grant_ring(dev, virt_to_mfn(tp->tx));
1652 + free_page((unsigned long)sring);
1654 + xenbus_dev_fatal(dev, err, "allocating grant reference");
1657 + tp->ring_ref = err;
1659 + err = tpmif_connect(dev, tp, dev->otherend_id);
1665 + destroy_tpmring(tp);
1670 +static void destroy_tpmring(struct tpm_private *tp)
1672 + tpmif_set_connected_state(tp, 0);
1674 + if (tp->ring_ref != GRANT_INVALID_REF) {
1675 + gnttab_end_foreign_access(tp->ring_ref, (unsigned long)tp->tx);
1676 + tp->ring_ref = GRANT_INVALID_REF;
1681 + unbind_from_irqhandler(tp->irq, tp);
1687 +static int talk_to_backend(struct xenbus_device *dev,
1688 + struct tpm_private *tp)
1690 + const char *message = NULL;
1692 + struct xenbus_transaction xbt;
1694 + err = setup_tpmring(dev, tp);
1696 + xenbus_dev_fatal(dev, err, "setting up ring");
1701 + err = xenbus_transaction_start(&xbt);
1703 + xenbus_dev_fatal(dev, err, "starting transaction");
1704 + goto destroy_tpmring;
1707 + err = xenbus_printf(xbt, dev->nodename,
1708 + "ring-ref","%u", tp->ring_ref);
1710 + message = "writing ring-ref";
1711 + goto abort_transaction;
1714 + err = xenbus_printf(xbt, dev->nodename, "event-channel", "%u",
1715 + irq_to_evtchn_port(tp->irq));
1717 + message = "writing event-channel";
1718 + goto abort_transaction;
1721 + err = xenbus_transaction_end(xbt, 0);
1722 + if (err == -EAGAIN)
1725 + xenbus_dev_fatal(dev, err, "completing transaction");
1726 + goto destroy_tpmring;
1729 + xenbus_switch_state(dev, XenbusStateConnected);
1734 + xenbus_transaction_end(xbt, 1);
1736 + xenbus_dev_error(dev, err, "%s", message);
1738 + destroy_tpmring(tp);
1744 + * Callback received when the backend's state changes.
1746 +static void backend_changed(struct xenbus_device *dev,
1747 + enum xenbus_state backend_state)
1749 + struct tpm_private *tp = tpm_private_from_dev(&dev->dev);
1752 + switch (backend_state) {
1753 + case XenbusStateInitialising:
1754 + case XenbusStateInitWait:
1755 + case XenbusStateInitialised:
1756 + case XenbusStateReconfiguring:
1757 + case XenbusStateReconfigured:
1758 + case XenbusStateUnknown:
1761 + case XenbusStateConnected:
1762 + tpmif_set_connected_state(tp, 1);
1765 + case XenbusStateClosing:
1766 + tpmif_set_connected_state(tp, 0);
1767 + xenbus_frontend_closed(dev);
1770 + case XenbusStateClosed:
1771 + tpmif_set_connected_state(tp, 0);
1772 + if (tp->is_suspended == 0)
1773 + device_unregister(&dev->dev);
1774 + xenbus_frontend_closed(dev);
1779 +static int tpmfront_probe(struct xenbus_device *dev,
1780 + const struct xenbus_device_id *id)
1784 + struct tpm_private *tp = tpm_private_get();
1789 + tp->chip = init_vtpm(&dev->dev, tp);
1790 + if (IS_ERR(tp->chip))
1791 + return PTR_ERR(tp->chip);
1793 + err = xenbus_scanf(XBT_NIL, dev->nodename,
1794 + "handle", "%i", &handle);
1795 + if (XENBUS_EXIST_ERR(err))
1799 + xenbus_dev_fatal(dev,err,"reading virtual-device");
1805 + err = talk_to_backend(dev, tp);
1807 + tpm_private_put();
1815 +static int tpmfront_remove(struct xenbus_device *dev)
1817 + struct tpm_private *tp = tpm_private_from_dev(&dev->dev);
1818 + destroy_tpmring(tp);
1819 + cleanup_vtpm(&dev->dev);
1823 +static int tpmfront_suspend(struct xenbus_device *dev)
1825 + struct tpm_private *tp = tpm_private_from_dev(&dev->dev);
1828 + /* Take the lock, preventing any application from sending. */
1829 + mutex_lock(&suspend_lock);
1830 + tp->is_suspended = 1;
1832 + for (ctr = 0; atomic_read(&tp->tx_busy); ctr++) {
1833 + if ((ctr % 10) == 0)
1834 + printk("TPM-FE [INFO]: Waiting for outstanding "
1836 + /* Wait for a request to be responded to. */
1837 + interruptible_sleep_on_timeout(&tp->wait_q, 100);
1843 +static int tpmfront_suspend_finish(struct tpm_private *tp)
1845 + tp->is_suspended = 0;
1846 + /* Allow applications to send again. */
1847 + mutex_unlock(&suspend_lock);
1851 +static int tpmfront_suspend_cancel(struct xenbus_device *dev)
1853 + struct tpm_private *tp = tpm_private_from_dev(&dev->dev);
1854 + return tpmfront_suspend_finish(tp);
1857 +static int tpmfront_resume(struct xenbus_device *dev)
1859 + struct tpm_private *tp = tpm_private_from_dev(&dev->dev);
1860 + destroy_tpmring(tp);
1861 + return talk_to_backend(dev, tp);
1864 +static int tpmif_connect(struct xenbus_device *dev,
1865 + struct tpm_private *tp,
1870 + tp->backend_id = domid;
1872 + err = bind_listening_port_to_irqhandler(
1873 + domid, tpmif_int, SA_SAMPLE_RANDOM, "tpmif", tp);
1875 + WPRINTK("bind_listening_port_to_irqhandler failed "
1876 + "(err=%d)\n", err);
1884 +static struct xenbus_device_id tpmfront_ids[] = {
1889 +static struct xenbus_driver tpmfront = {
1891 + .owner = THIS_MODULE,
1892 + .ids = tpmfront_ids,
1893 + .probe = tpmfront_probe,
1894 + .remove = tpmfront_remove,
1895 + .resume = tpmfront_resume,
1896 + .otherend_changed = backend_changed,
1897 + .suspend = tpmfront_suspend,
1898 + .suspend_cancel = tpmfront_suspend_cancel,
1901 +static void __init init_tpm_xenbus(void)
1903 + xenbus_register_frontend(&tpmfront);
1906 +static int tpmif_allocate_tx_buffers(struct tpm_private *tp)
1910 + for (i = 0; i < TPMIF_TX_RING_SIZE; i++) {
1911 + tp->tx_buffers[i] = tx_buffer_alloc();
1912 + if (!tp->tx_buffers[i]) {
1913 + tpmif_free_tx_buffers(tp);
1920 +static void tpmif_free_tx_buffers(struct tpm_private *tp)
1924 + for (i = 0; i < TPMIF_TX_RING_SIZE; i++)
1925 + tx_buffer_free(tp->tx_buffers[i]);
1928 +static void tpmif_rx_action(unsigned long priv)
1930 + struct tpm_private *tp = (struct tpm_private *)priv;
1932 + unsigned int received;
1933 + unsigned int offset = 0;
1935 + tpmif_tx_request_t *tx = &tp->tx->ring[i].req;
1937 + atomic_set(&tp->tx_busy, 0);
1938 + wake_up_interruptible(&tp->wait_q);
1940 + received = tx->size;
1942 + buffer = kmalloc(received, GFP_ATOMIC);
1946 + for (i = 0; i < TPMIF_TX_RING_SIZE && offset < received; i++) {
1947 + struct tx_buffer *txb = tp->tx_buffers[i];
1948 + tpmif_tx_request_t *tx;
1949 + unsigned int tocopy;
1951 + tx = &tp->tx->ring[i].req;
1952 + tocopy = tx->size;
1953 + if (tocopy > PAGE_SIZE)
1954 + tocopy = PAGE_SIZE;
1956 + memcpy(&buffer[offset], txb->data, tocopy);
1958 + gnttab_release_grant_reference(&gref_head, tx->ref);
1963 + vtpm_vd_recv(tp->chip, buffer, received, tp->tx_remember);
1968 +static irqreturn_t tpmif_int(int irq, void *tpm_priv, struct pt_regs *ptregs)
1970 + struct tpm_private *tp = tpm_priv;
1971 + unsigned long flags;
1973 + spin_lock_irqsave(&tp->tx_lock, flags);
1974 + tpmif_rx_tasklet.data = (unsigned long)tp;
1975 + tasklet_schedule(&tpmif_rx_tasklet);
1976 + spin_unlock_irqrestore(&tp->tx_lock, flags);
1978 + return IRQ_HANDLED;
1982 +static int tpm_xmit(struct tpm_private *tp,
1983 + const u8 * buf, size_t count, int isuserbuffer,
1986 + tpmif_tx_request_t *tx;
1988 + unsigned int offset = 0;
1990 + spin_lock_irq(&tp->tx_lock);
1992 + if (unlikely(atomic_read(&tp->tx_busy))) {
1993 + printk("tpm_xmit: There's an outstanding request/response "
1995 + spin_unlock_irq(&tp->tx_lock);
1999 + if (tp->is_connected != 1) {
2000 + spin_unlock_irq(&tp->tx_lock);
2004 + for (i = 0; count > 0 && i < TPMIF_TX_RING_SIZE; i++) {
2005 + struct tx_buffer *txb = tp->tx_buffers[i];
2009 + DPRINTK("txb (i=%d) is NULL. buffers initilized?\n"
2010 + "Not transmitting anything!\n", i);
2011 + spin_unlock_irq(&tp->tx_lock);
2015 + copied = tx_buffer_copy(txb, &buf[offset], count,
2018 + /* An error occurred */
2019 + spin_unlock_irq(&tp->tx_lock);
2025 + tx = &tp->tx->ring[i].req;
2026 + tx->addr = virt_to_machine(txb->data);
2027 + tx->size = txb->len;
2030 + DPRINTK("First 4 characters sent by TPM-FE are "
2031 + "0x%02x 0x%02x 0x%02x 0x%02x\n",
2032 + txb->data[0],txb->data[1],txb->data[2],txb->data[3]);
2034 + /* Get the granttable reference for this page. */
2035 + tx->ref = gnttab_claim_grant_reference(&gref_head);
2036 + if (tx->ref == -ENOSPC) {
2037 + spin_unlock_irq(&tp->tx_lock);
2038 + DPRINTK("Grant table claim reference failed in "
2039 + "func:%s line:%d file:%s\n",
2040 + __FUNCTION__, __LINE__, __FILE__);
2043 + gnttab_grant_foreign_access_ref(tx->ref,
2045 + virt_to_mfn(txb->data),
2050 + atomic_set(&tp->tx_busy, 1);
2051 + tp->tx_remember = remember;
2055 + notify_remote_via_irq(tp->irq);
2057 + spin_unlock_irq(&tp->tx_lock);
2062 +static void tpmif_notify_upperlayer(struct tpm_private *tp)
2064 + /* Notify upper layer about the state of the connection to the BE. */
2065 + vtpm_vd_status(tp->chip, (tp->is_connected
2066 + ? TPM_VD_STATUS_CONNECTED
2067 + : TPM_VD_STATUS_DISCONNECTED));
2071 +static void tpmif_set_connected_state(struct tpm_private *tp, u8 is_connected)
2074 + * Don't notify upper layer if we are in suspend mode and
2075 + * should disconnect - assumption is that we will resume
2076 + * The mutex keeps apps from sending.
2078 + if (is_connected == 0 && tp->is_suspended == 1)
2082 + * Unlock the mutex if we are connected again
2083 + * after being suspended - now resuming.
2084 + * This also removes the suspend state.
2086 + if (is_connected == 1 && tp->is_suspended == 1)
2087 + tpmfront_suspend_finish(tp);
2089 + if (is_connected != tp->is_connected) {
2090 + tp->is_connected = is_connected;
2091 + tpmif_notify_upperlayer(tp);
2097 +/* =================================================================
2098 + * Initialization function.
2099 + * =================================================================
2103 +static int __init tpmif_init(void)
2105 + struct tpm_private *tp;
2107 + if (is_initial_xendomain())
2110 + tp = tpm_private_get();
2114 + IPRINTK("Initialising the vTPM driver.\n");
2115 + if (gnttab_alloc_grant_references(TPMIF_TX_RING_SIZE,
2116 + &gref_head) < 0) {
2117 + tpm_private_put();
2121 + init_tpm_xenbus();
2126 +module_init(tpmif_init);
2128 +MODULE_LICENSE("Dual BSD/GPL");
2129 --- sle11-2009-10-16.orig/drivers/ide/ide-lib.c 2009-10-16 14:48:16.000000000 +0200
2130 +++ sle11-2009-10-16/drivers/ide/ide-lib.c 2009-08-26 11:52:33.000000000 +0200
2131 @@ -177,12 +177,12 @@ void ide_toggle_bounce(ide_drive_t *driv
2133 u64 addr = BLK_BOUNCE_HIGH; /* dma64_addr_t */
2135 - if (!PCI_DMA_BUS_IS_PHYS) {
2136 - addr = BLK_BOUNCE_ANY;
2137 - } else if (on && drive->media == ide_disk) {
2138 + if (on && drive->media == ide_disk) {
2139 struct device *dev = drive->hwif->dev;
2141 - if (dev && dev->dma_mask)
2142 + if (!PCI_DMA_BUS_IS_PHYS)
2143 + addr = BLK_BOUNCE_ANY;
2144 + else if (dev && dev->dma_mask)
2145 addr = *dev->dma_mask;
2148 --- sle11-2009-10-16.orig/drivers/oprofile/buffer_sync.c 2009-10-16 14:48:16.000000000 +0200
2149 +++ sle11-2009-10-16/drivers/oprofile/buffer_sync.c 2009-08-26 11:52:33.000000000 +0200
2152 * @author John Levon <levon@movementarian.org>
2154 + * Modified by Aravind Menon for Xen
2155 + * These modifications are:
2156 + * Copyright (C) 2005 Hewlett-Packard Co.
2158 * This is the core of the buffer management. Each
2159 * CPU buffer is processed and entered into the
2160 * global event buffer. Such processing is necessary
2161 @@ -40,6 +44,7 @@ static cpumask_t marked_cpus = CPU_MASK_
2162 static DEFINE_SPINLOCK(task_mortuary);
2163 static void process_task_mortuary(void);
2165 +static int cpu_current_domain[NR_CPUS];
2167 /* Take ownership of the task struct and place it on the
2168 * list for processing. Only after two full buffer syncs
2169 @@ -148,6 +153,11 @@ static void end_sync(void)
2170 int sync_start(void)
2175 + for (i = 0; i < NR_CPUS; i++) {
2176 + cpu_current_domain[i] = COORDINATOR_DOMAIN;
2181 @@ -274,15 +284,31 @@ static void add_cpu_switch(int i)
2182 last_cookie = INVALID_COOKIE;
2185 -static void add_kernel_ctx_switch(unsigned int in_kernel)
2186 +static void add_cpu_mode_switch(unsigned int cpu_mode)
2188 add_event_entry(ESCAPE_CODE);
2190 - add_event_entry(KERNEL_ENTER_SWITCH_CODE);
2192 - add_event_entry(KERNEL_EXIT_SWITCH_CODE);
2193 + switch (cpu_mode) {
2194 + case CPU_MODE_USER:
2195 + add_event_entry(USER_ENTER_SWITCH_CODE);
2197 + case CPU_MODE_KERNEL:
2198 + add_event_entry(KERNEL_ENTER_SWITCH_CODE);
2200 + case CPU_MODE_XEN:
2201 + add_event_entry(XEN_ENTER_SWITCH_CODE);
2209 +static void add_domain_switch(unsigned long domain_id)
2211 + add_event_entry(ESCAPE_CODE);
2212 + add_event_entry(DOMAIN_SWITCH_CODE);
2213 + add_event_entry(domain_id);
2217 add_user_ctx_switch(struct task_struct const * task, unsigned long cookie)
2219 @@ -347,9 +373,9 @@ static int add_us_sample(struct mm_struc
2220 * for later lookup from userspace.
2223 -add_sample(struct mm_struct * mm, struct op_sample * s, int in_kernel)
2224 +add_sample(struct mm_struct * mm, struct op_sample * s, int cpu_mode)
2227 + if (cpu_mode >= CPU_MODE_KERNEL) {
2228 add_sample_entry(s->eip, s->event);
2231 @@ -495,15 +521,21 @@ void sync_buffer(int cpu)
2232 struct mm_struct *mm = NULL;
2233 struct task_struct * new;
2234 unsigned long cookie = 0;
2235 - int in_kernel = 1;
2238 sync_buffer_state state = sb_buffer_start;
2239 unsigned long available;
2240 + int domain_switch = 0;
2242 mutex_lock(&buffer_mutex);
2244 add_cpu_switch(cpu);
2246 + /* We need to assign the first samples in this CPU buffer to the
2247 + same domain that we were processing at the last sync_buffer */
2248 + if (cpu_current_domain[cpu] != COORDINATOR_DOMAIN) {
2249 + add_domain_switch(cpu_current_domain[cpu]);
2251 /* Remember, only we can modify tail_pos */
2253 available = get_slots(cpu_buf);
2254 @@ -511,16 +543,18 @@ void sync_buffer(int cpu)
2255 for (i = 0; i < available; ++i) {
2256 struct op_sample * s = &cpu_buf->buffer[cpu_buf->tail_pos];
2258 - if (is_code(s->eip)) {
2259 - if (s->event <= CPU_IS_KERNEL) {
2260 - /* kernel/userspace switch */
2261 - in_kernel = s->event;
2262 + if (is_code(s->eip) && !domain_switch) {
2263 + if (s->event <= CPU_MODE_XEN) {
2264 + /* xen/kernel/userspace switch */
2265 + cpu_mode = s->event;
2266 if (state == sb_buffer_start)
2267 state = sb_sample_start;
2268 - add_kernel_ctx_switch(s->event);
2269 + add_cpu_mode_switch(s->event);
2270 } else if (s->event == CPU_TRACE_BEGIN) {
2271 state = sb_bt_start;
2273 + } else if (s->event == CPU_DOMAIN_SWITCH) {
2274 + domain_switch = 1;
2276 struct mm_struct * oldmm = mm;
2278 @@ -534,11 +568,21 @@ void sync_buffer(int cpu)
2279 add_user_ctx_switch(new, cookie);
2282 - if (state >= sb_bt_start &&
2283 - !add_sample(mm, s, in_kernel)) {
2284 - if (state == sb_bt_start) {
2285 - state = sb_bt_ignore;
2286 - atomic_inc(&oprofile_stats.bt_lost_no_mapping);
2287 + if (domain_switch) {
2288 + cpu_current_domain[cpu] = s->eip;
2289 + add_domain_switch(s->eip);
2290 + domain_switch = 0;
2292 + if (cpu_current_domain[cpu] !=
2293 + COORDINATOR_DOMAIN) {
2294 + add_sample_entry(s->eip, s->event);
2296 + else if (state >= sb_bt_start &&
2297 + !add_sample(mm, s, cpu_mode)) {
2298 + if (state == sb_bt_start) {
2299 + state = sb_bt_ignore;
2300 + atomic_inc(&oprofile_stats.bt_lost_no_mapping);
2305 @@ -547,6 +591,11 @@ void sync_buffer(int cpu)
2309 + /* We reset domain to COORDINATOR at each CPU switch */
2310 + if (cpu_current_domain[cpu] != COORDINATOR_DOMAIN) {
2311 + add_domain_switch(COORDINATOR_DOMAIN);
2316 mutex_unlock(&buffer_mutex);
2317 --- sle11-2009-10-16.orig/drivers/oprofile/cpu_buffer.c 2009-10-16 14:48:16.000000000 +0200
2318 +++ sle11-2009-10-16/drivers/oprofile/cpu_buffer.c 2009-08-26 11:52:33.000000000 +0200
2321 * @author John Levon <levon@movementarian.org>
2323 + * Modified by Aravind Menon for Xen
2324 + * These modifications are:
2325 + * Copyright (C) 2005 Hewlett-Packard Co.
2327 * Each CPU has a local buffer that stores PC value/event
2328 * pairs. We also log context switches when we notice them.
2329 * Eventually each CPU's buffer is processed into the global
2330 @@ -34,6 +38,8 @@ static void wq_sync_buffer(struct work_s
2331 #define DEFAULT_TIMER_EXPIRE (HZ / 10)
2332 static int work_enabled;
2334 +static int32_t current_domain = COORDINATOR_DOMAIN;
2336 void free_cpu_buffers(void)
2339 @@ -72,7 +78,7 @@ int alloc_cpu_buffers(void)
2342 b->last_task = NULL;
2343 - b->last_is_kernel = -1;
2344 + b->last_cpu_mode = -1;
2346 b->buffer_size = buffer_size;
2348 @@ -130,7 +136,7 @@ void cpu_buffer_reset(struct oprofile_cp
2349 * collected will populate the buffer with proper
2350 * values to initialize the buffer
2352 - cpu_buf->last_is_kernel = -1;
2353 + cpu_buf->last_cpu_mode = -1;
2354 cpu_buf->last_task = NULL;
2357 @@ -180,13 +186,13 @@ add_code(struct oprofile_cpu_buffer * bu
2358 * because of the head/tail separation of the writer and reader
2359 * of the CPU buffer.
2361 - * is_kernel is needed because on some architectures you cannot
2362 + * cpu_mode is needed because on some architectures you cannot
2363 * tell if you are in kernel or user space simply by looking at
2364 - * pc. We tag this in the buffer by generating kernel enter/exit
2365 - * events whenever is_kernel changes
2366 + * pc. We tag this in the buffer by generating kernel/user (and xen)
2367 + * enter events whenever cpu_mode changes
2369 static int log_sample(struct oprofile_cpu_buffer * cpu_buf, unsigned long pc,
2370 - int is_kernel, unsigned long event)
2371 + int cpu_mode, unsigned long event)
2373 struct task_struct * task;
2375 @@ -202,18 +208,18 @@ static int log_sample(struct oprofile_cp
2379 - is_kernel = !!is_kernel;
2383 /* notice a switch from user->kernel or vice versa */
2384 - if (cpu_buf->last_is_kernel != is_kernel) {
2385 - cpu_buf->last_is_kernel = is_kernel;
2386 - add_code(cpu_buf, is_kernel);
2387 + if (cpu_buf->last_cpu_mode != cpu_mode) {
2388 + cpu_buf->last_cpu_mode = cpu_mode;
2389 + add_code(cpu_buf, cpu_mode);
2393 /* notice a task switch */
2394 - if (cpu_buf->last_task != task) {
2395 + /* if not processing other domain samples */
2396 + if ((cpu_buf->last_task != task) &&
2397 + (current_domain == COORDINATOR_DOMAIN)) {
2398 cpu_buf->last_task = task;
2399 add_code(cpu_buf, (unsigned long)task);
2401 @@ -297,6 +303,25 @@ void oprofile_add_trace(unsigned long pc
2402 add_sample(cpu_buf, pc, 0);
2405 +int oprofile_add_domain_switch(int32_t domain_id)
2407 + struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[smp_processor_id()];
2409 + /* should have space for switching into and out of domain
2410 + (2 slots each) plus one sample and one cpu mode switch */
2411 + if (((nr_available_slots(cpu_buf) < 6) &&
2412 + (domain_id != COORDINATOR_DOMAIN)) ||
2413 + (nr_available_slots(cpu_buf) < 2))
2416 + add_code(cpu_buf, CPU_DOMAIN_SWITCH);
2417 + add_sample(cpu_buf, domain_id, 0);
2419 + current_domain = domain_id;
2425 * This serves to avoid cpu buffer overflow, and makes sure
2426 * the task mortuary progresses
2427 --- sle11-2009-10-16.orig/drivers/oprofile/cpu_buffer.h 2009-10-16 14:48:16.000000000 +0200
2428 +++ sle11-2009-10-16/drivers/oprofile/cpu_buffer.h 2009-08-26 11:52:33.000000000 +0200
2429 @@ -37,7 +37,7 @@ struct oprofile_cpu_buffer {
2430 volatile unsigned long tail_pos;
2431 unsigned long buffer_size;
2432 struct task_struct * last_task;
2433 - int last_is_kernel;
2434 + int last_cpu_mode;
2436 struct op_sample * buffer;
2437 unsigned long sample_received;
2438 @@ -53,7 +53,10 @@ DECLARE_PER_CPU(struct oprofile_cpu_buff
2439 void cpu_buffer_reset(struct oprofile_cpu_buffer * cpu_buf);
2441 /* transient events for the CPU buffer -> event buffer */
2442 -#define CPU_IS_KERNEL 1
2443 -#define CPU_TRACE_BEGIN 2
2444 +#define CPU_MODE_USER 0
2445 +#define CPU_MODE_KERNEL 1
2446 +#define CPU_MODE_XEN 2
2447 +#define CPU_TRACE_BEGIN 3
2448 +#define CPU_DOMAIN_SWITCH 4
2450 #endif /* OPROFILE_CPU_BUFFER_H */
2451 --- sle11-2009-10-16.orig/drivers/oprofile/event_buffer.h 2009-10-16 14:48:16.000000000 +0200
2452 +++ sle11-2009-10-16/drivers/oprofile/event_buffer.h 2009-08-26 11:52:33.000000000 +0200
2453 @@ -30,6 +30,9 @@ void wake_up_buffer_waiter(void);
2454 #define INVALID_COOKIE ~0UL
2455 #define NO_COOKIE 0UL
2457 +/* Constant used to refer to coordinator domain (Xen) */
2458 +#define COORDINATOR_DOMAIN -1
2460 extern const struct file_operations event_buffer_fops;
2462 /* mutex between sync_cpu_buffers() and the
2463 --- sle11-2009-10-16.orig/drivers/oprofile/oprof.c 2009-10-16 14:48:16.000000000 +0200
2464 +++ sle11-2009-10-16/drivers/oprofile/oprof.c 2009-08-26 11:52:33.000000000 +0200
2466 * @remark Read the file COPYING
2468 * @author John Levon <levon@movementarian.org>
2470 + * Modified by Aravind Menon for Xen
2471 + * These modifications are:
2472 + * Copyright (C) 2005 Hewlett-Packard Co.
2475 #include <linux/kernel.h>
2476 @@ -33,6 +37,32 @@ static DEFINE_MUTEX(start_mutex);
2478 static int timer = 0;
2480 +int oprofile_set_active(int active_domains[], unsigned int adomains)
2484 + if (!oprofile_ops.set_active)
2487 + mutex_lock(&start_mutex);
2488 + err = oprofile_ops.set_active(active_domains, adomains);
2489 + mutex_unlock(&start_mutex);
2493 +int oprofile_set_passive(int passive_domains[], unsigned int pdomains)
2497 + if (!oprofile_ops.set_passive)
2500 + mutex_lock(&start_mutex);
2501 + err = oprofile_ops.set_passive(passive_domains, pdomains);
2502 + mutex_unlock(&start_mutex);
2506 int oprofile_setup(void)
2509 --- sle11-2009-10-16.orig/drivers/oprofile/oprof.h 2009-10-16 14:48:16.000000000 +0200
2510 +++ sle11-2009-10-16/drivers/oprofile/oprof.h 2009-08-26 11:52:33.000000000 +0200
2511 @@ -35,5 +35,8 @@ void oprofile_create_files(struct super_
2512 void oprofile_timer_init(struct oprofile_operations * ops);
2514 int oprofile_set_backtrace(unsigned long depth);
2516 +int oprofile_set_active(int active_domains[], unsigned int adomains);
2517 +int oprofile_set_passive(int passive_domains[], unsigned int pdomains);
2519 #endif /* OPROF_H */
2520 --- sle11-2009-10-16.orig/drivers/oprofile/oprofile_files.c 2009-10-16 14:48:16.000000000 +0200
2521 +++ sle11-2009-10-16/drivers/oprofile/oprofile_files.c 2009-08-26 11:52:33.000000000 +0200
2523 * @remark Read the file COPYING
2525 * @author John Levon <levon@movementarian.org>
2527 + * Modified by Aravind Menon for Xen
2528 + * These modifications are:
2529 + * Copyright (C) 2005 Hewlett-Packard Co.
2532 #include <linux/fs.h>
2533 #include <linux/oprofile.h>
2534 +#include <asm/uaccess.h>
2535 +#include <linux/ctype.h>
2537 #include "event_buffer.h"
2538 #include "oprofile_stats.h"
2542 unsigned long fs_buffer_size = 131072;
2543 unsigned long fs_cpu_buffer_size = 8192;
2544 unsigned long fs_buffer_watershed = 32768; /* FIXME: tune */
2545 @@ -117,11 +123,202 @@ static ssize_t dump_write(struct file *
2546 static const struct file_operations dump_fops = {
2547 .write = dump_write,
2551 +#define TMPBUFSIZE 512
2553 +static unsigned int adomains = 0;
2554 +static int active_domains[MAX_OPROF_DOMAINS + 1];
2555 +static DEFINE_MUTEX(adom_mutex);
2557 +static ssize_t adomain_write(struct file * file, char const __user * buf,
2558 + size_t count, loff_t * offset)
2561 + char *startp, *endp;
2563 + unsigned long val;
2564 + ssize_t retval = count;
2568 + if (count > TMPBUFSIZE - 1)
2571 + if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
2574 + if (copy_from_user(tmpbuf, buf, count)) {
2578 + tmpbuf[count] = 0;
2580 + mutex_lock(&adom_mutex);
2583 + /* Parse one more than MAX_OPROF_DOMAINS, for easy error checking */
2584 + for (i = 0; i <= MAX_OPROF_DOMAINS; i++) {
2585 + val = simple_strtoul(startp, &endp, 0);
2586 + if (endp == startp)
2588 + while (ispunct(*endp) || isspace(*endp))
2590 + active_domains[i] = val;
2591 + if (active_domains[i] != val)
2592 + /* Overflow, force error below */
2593 + i = MAX_OPROF_DOMAINS + 1;
2596 + /* Force error on trailing junk */
2597 + adomains = *startp ? MAX_OPROF_DOMAINS + 1 : i;
2601 + if (adomains > MAX_OPROF_DOMAINS
2602 + || oprofile_set_active(active_domains, adomains)) {
2607 + mutex_unlock(&adom_mutex);
2611 +static ssize_t adomain_read(struct file * file, char __user * buf,
2612 + size_t count, loff_t * offset)
2619 + if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
2622 + mutex_lock(&adom_mutex);
2625 + for (i = 0; i < adomains; i++)
2626 + len += snprintf(tmpbuf + len,
2627 + len < TMPBUFSIZE ? TMPBUFSIZE - len : 0,
2628 + "%u ", active_domains[i]);
2629 + WARN_ON(len > TMPBUFSIZE);
2630 + if (len != 0 && len <= TMPBUFSIZE)
2631 + tmpbuf[len-1] = '\n';
2633 + mutex_unlock(&adom_mutex);
2635 + retval = simple_read_from_buffer(buf, count, offset, tmpbuf, len);
2642 +static struct file_operations active_domain_ops = {
2643 + .read = adomain_read,
2644 + .write = adomain_write,
2647 +static unsigned int pdomains = 0;
2648 +static int passive_domains[MAX_OPROF_DOMAINS];
2649 +static DEFINE_MUTEX(pdom_mutex);
2651 +static ssize_t pdomain_write(struct file * file, char const __user * buf,
2652 + size_t count, loff_t * offset)
2655 + char *startp, *endp;
2657 + unsigned long val;
2658 + ssize_t retval = count;
2662 + if (count > TMPBUFSIZE - 1)
2665 + if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
2668 + if (copy_from_user(tmpbuf, buf, count)) {
2672 + tmpbuf[count] = 0;
2674 + mutex_lock(&pdom_mutex);
2677 + /* Parse one more than MAX_OPROF_DOMAINS, for easy error checking */
2678 + for (i = 0; i <= MAX_OPROF_DOMAINS; i++) {
2679 + val = simple_strtoul(startp, &endp, 0);
2680 + if (endp == startp)
2682 + while (ispunct(*endp) || isspace(*endp))
2684 + passive_domains[i] = val;
2685 + if (passive_domains[i] != val)
2686 + /* Overflow, force error below */
2687 + i = MAX_OPROF_DOMAINS + 1;
2690 + /* Force error on trailing junk */
2691 + pdomains = *startp ? MAX_OPROF_DOMAINS + 1 : i;
2695 + if (pdomains > MAX_OPROF_DOMAINS
2696 + || oprofile_set_passive(passive_domains, pdomains)) {
2701 + mutex_unlock(&pdom_mutex);
2705 +static ssize_t pdomain_read(struct file * file, char __user * buf,
2706 + size_t count, loff_t * offset)
2713 + if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
2716 + mutex_lock(&pdom_mutex);
2719 + for (i = 0; i < pdomains; i++)
2720 + len += snprintf(tmpbuf + len,
2721 + len < TMPBUFSIZE ? TMPBUFSIZE - len : 0,
2722 + "%u ", passive_domains[i]);
2723 + WARN_ON(len > TMPBUFSIZE);
2724 + if (len != 0 && len <= TMPBUFSIZE)
2725 + tmpbuf[len-1] = '\n';
2727 + mutex_unlock(&pdom_mutex);
2729 + retval = simple_read_from_buffer(buf, count, offset, tmpbuf, len);
2735 +static struct file_operations passive_domain_ops = {
2736 + .read = pdomain_read,
2737 + .write = pdomain_write,
2740 void oprofile_create_files(struct super_block * sb, struct dentry * root)
2742 oprofilefs_create_file(sb, root, "enable", &enable_fops);
2743 oprofilefs_create_file_perm(sb, root, "dump", &dump_fops, 0666);
2744 + oprofilefs_create_file(sb, root, "active_domains", &active_domain_ops);
2745 + oprofilefs_create_file(sb, root, "passive_domains", &passive_domain_ops);
2746 oprofilefs_create_file(sb, root, "buffer", &event_buffer_fops);
2747 oprofilefs_create_ulong(sb, root, "buffer_size", &fs_buffer_size);
2748 oprofilefs_create_ulong(sb, root, "buffer_watershed", &fs_buffer_watershed);
2749 --- sle11-2009-10-16.orig/fs/aio.c 2009-10-16 14:48:16.000000000 +0200
2750 +++ sle11-2009-10-16/fs/aio.c 2009-08-26 11:52:33.000000000 +0200
2752 #include <asm/uaccess.h>
2753 #include <asm/mmu_context.h>
2755 +#ifdef CONFIG_EPOLL
2756 +#include <linux/poll.h>
2757 +#include <linux/eventpoll.h>
2761 #define dprintk printk
2763 @@ -1026,6 +1031,11 @@ put_rq:
2764 if (waitqueue_active(&ctx->wait))
2765 wake_up(&ctx->wait);
2767 +#ifdef CONFIG_EPOLL
2768 + if (ctx->file && waitqueue_active(&ctx->poll_wait))
2769 + wake_up(&ctx->poll_wait);
2772 spin_unlock_irqrestore(&ctx->ctx_lock, flags);
2775 @@ -1033,6 +1043,8 @@ put_rq:
2777 * Pull an event off of the ioctx's event ring. Returns the number of
2778 * events fetched (0 or 1 ;-)
2779 + * If ent parameter is 0, just returns the number of events that would
2781 * FIXME: make this use cmpxchg.
2782 * TODO: make the ringbuffer user mmap()able (requires FIXME).
2784 @@ -1055,13 +1067,18 @@ static int aio_read_evt(struct kioctx *i
2786 head = ring->head % info->nr;
2787 if (head != ring->tail) {
2788 - struct io_event *evp = aio_ring_event(info, head, KM_USER1);
2790 - head = (head + 1) % info->nr;
2791 - smp_mb(); /* finish reading the event before updatng the head */
2792 - ring->head = head;
2794 - put_aio_ring_event(evp, KM_USER1);
2795 + if (ent) { /* event requested */
2796 + struct io_event *evp =
2797 + aio_ring_event(info, head, KM_USER1);
2799 + head = (head + 1) % info->nr;
2800 + /* finish reading the event before updatng the head */
2802 + ring->head = head;
2804 + put_aio_ring_event(evp, KM_USER1);
2805 + } else /* only need to know availability */
2808 spin_unlock(&info->ring_lock);
2810 @@ -1251,6 +1268,13 @@ static void io_destroy(struct kioctx *io
2812 aio_cancel_all(ioctx);
2813 wait_for_all_aios(ioctx);
2814 +#ifdef CONFIG_EPOLL
2815 + /* forget the poll file, but it's up to the user to close it */
2816 + if (ioctx->file) {
2817 + ioctx->file->private_data = 0;
2823 * Wake up any waiters. The setting of ctx->dead must be seen
2824 @@ -1261,6 +1285,67 @@ static void io_destroy(struct kioctx *io
2825 put_ioctx(ioctx); /* once for the lookup */
2828 +#ifdef CONFIG_EPOLL
2830 +static int aio_queue_fd_close(struct inode *inode, struct file *file)
2832 + struct kioctx *ioctx = file->private_data;
2834 + file->private_data = 0;
2835 + spin_lock_irq(&ioctx->ctx_lock);
2837 + spin_unlock_irq(&ioctx->ctx_lock);
2842 +static unsigned int aio_queue_fd_poll(struct file *file, poll_table *wait)
2843 +{ unsigned int pollflags = 0;
2844 + struct kioctx *ioctx = file->private_data;
2848 + spin_lock_irq(&ioctx->ctx_lock);
2849 + /* Insert inside our poll wait queue */
2850 + poll_wait(file, &ioctx->poll_wait, wait);
2852 + /* Check our condition */
2853 + if (aio_read_evt(ioctx, 0))
2854 + pollflags = POLLIN | POLLRDNORM;
2855 + spin_unlock_irq(&ioctx->ctx_lock);
2861 +static const struct file_operations aioq_fops = {
2862 + .release = aio_queue_fd_close,
2863 + .poll = aio_queue_fd_poll
2867 + * Create a file descriptor that can be used to poll the event queue.
2868 + * Based and piggybacked on the excellent epoll code.
2871 +static int make_aio_fd(struct kioctx *ioctx)
2874 + struct inode *inode;
2875 + struct file *file;
2877 + error = ep_getfd(&fd, &inode, &file, NULL, &aioq_fops);
2881 + /* associate the file with the IO context */
2882 + file->private_data = ioctx;
2883 + ioctx->file = file;
2884 + init_waitqueue_head(&ioctx->poll_wait);
2890 * Create an aio_context capable of receiving at least nr_events.
2891 * ctxp must not point to an aio_context that already exists, and
2892 @@ -1273,18 +1358,30 @@ static void io_destroy(struct kioctx *io
2893 * resources are available. May fail with -EFAULT if an invalid
2894 * pointer is passed for ctxp. Will fail with -ENOSYS if not
2897 + * To request a selectable fd, the user context has to be initialized
2898 + * to 1, instead of 0, and the return value is the fd.
2899 + * This keeps the system call compatible, since a non-zero value
2900 + * was not allowed so far.
2902 SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
2904 struct kioctx *ioctx = NULL;
2909 ret = get_user(ctx, ctxp);
2914 +#ifdef CONFIG_EPOLL
2920 if (unlikely(ctx || nr_events == 0)) {
2921 pr_debug("EINVAL: io_setup: ctx %lu nr_events %u\n",
2923 @@ -1295,8 +1392,12 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_e
2924 ret = PTR_ERR(ioctx);
2925 if (!IS_ERR(ioctx)) {
2926 ret = put_user(ioctx->user_id, ctxp);
2929 +#ifdef CONFIG_EPOLL
2930 + if (make_fd && ret >= 0)
2931 + ret = make_aio_fd(ioctx);
2936 get_ioctx(ioctx); /* io_destroy() expects us to hold a ref */
2938 --- sle11-2009-10-16.orig/fs/compat_ioctl.c 2009-10-16 14:48:16.000000000 +0200
2939 +++ sle11-2009-10-16/fs/compat_ioctl.c 2009-08-26 11:52:33.000000000 +0200
2940 @@ -114,6 +114,13 @@
2941 #include <asm/fbio.h>
2945 +#include <xen/interface/xen.h>
2946 +#include <xen/public/evtchn.h>
2947 +#include <xen/public/privcmd.h>
2948 +#include <xen/compat_ioctl.h>
2951 static int do_ioctl32_pointer(unsigned int fd, unsigned int cmd,
2952 unsigned long arg, struct file *f)
2954 @@ -2736,6 +2743,18 @@ IGNORE_IOCTL(FBIOGETCMAP32)
2955 IGNORE_IOCTL(FBIOSCURSOR32)
2956 IGNORE_IOCTL(FBIOGCURSOR32)
2960 +HANDLE_IOCTL(IOCTL_PRIVCMD_MMAP_32, privcmd_ioctl_32)
2961 +HANDLE_IOCTL(IOCTL_PRIVCMD_MMAPBATCH_32, privcmd_ioctl_32)
2962 +COMPATIBLE_IOCTL(IOCTL_PRIVCMD_HYPERCALL)
2963 +COMPATIBLE_IOCTL(IOCTL_EVTCHN_BIND_VIRQ)
2964 +COMPATIBLE_IOCTL(IOCTL_EVTCHN_BIND_INTERDOMAIN)
2965 +COMPATIBLE_IOCTL(IOCTL_EVTCHN_BIND_UNBOUND_PORT)
2966 +COMPATIBLE_IOCTL(IOCTL_EVTCHN_UNBIND)
2967 +COMPATIBLE_IOCTL(IOCTL_EVTCHN_NOTIFY)
2968 +COMPATIBLE_IOCTL(IOCTL_EVTCHN_RESET)
2972 #define IOCTL_HASHSIZE 256
2973 --- sle11-2009-10-16.orig/include/acpi/processor.h 2009-10-16 14:48:16.000000000 +0200
2974 +++ sle11-2009-10-16/include/acpi/processor.h 2009-08-26 11:52:33.000000000 +0200
2976 #define ACPI_PROCESSOR_MAX_THROTTLE 250 /* 25% */
2977 #define ACPI_PROCESSOR_MAX_DUTY_WIDTH 4
2980 +#define NR_ACPI_CPUS (NR_CPUS < 256 ? 256 : NR_CPUS)
2982 +#define NR_ACPI_CPUS NR_CPUS
2983 +#endif /* CONFIG_XEN */
2985 #define ACPI_PDC_REVISION_ID 0x1
2987 #define ACPI_PSD_REV0_REVISION 0 /* Support for _PSD as in ACPI 3.0 */
2990 struct acpi_processor_cx;
2992 +#ifdef CONFIG_PROCESSOR_EXTERNAL_CONTROL
2993 +struct acpi_csd_package {
2994 + acpi_integer num_entries;
2995 + acpi_integer revision;
2996 + acpi_integer domain;
2997 + acpi_integer coord_type;
2998 + acpi_integer num_processors;
2999 + acpi_integer index;
3000 +} __attribute__ ((packed));
3003 struct acpi_power_register {
3006 @@ -74,6 +91,12 @@ struct acpi_processor_cx {
3010 +#ifdef CONFIG_PROCESSOR_EXTERNAL_CONTROL
3011 + /* Require raw information for external control logic */
3012 + struct acpi_power_register reg;
3014 + struct acpi_csd_package *domain_info;
3016 struct acpi_processor_cx_policy promotion;
3017 struct acpi_processor_cx_policy demotion;
3018 char desc[ACPI_CX_DESC_LEN];
3019 @@ -304,6 +327,9 @@ static inline void acpi_processor_ppc_ex
3023 +#ifdef CONFIG_PROCESSOR_EXTERNAL_CONTROL
3024 +int acpi_processor_ppc_has_changed(struct acpi_processor *pr);
3026 static inline int acpi_processor_ppc_has_changed(struct acpi_processor *pr)
3028 static unsigned int printout = 1;
3029 @@ -316,6 +342,7 @@ static inline int acpi_processor_ppc_has
3033 +#endif /* CONFIG_PROCESSOR_EXTERNAL_CONTROL */
3034 #endif /* CONFIG_CPU_FREQ */
3036 /* in processor_throttling.c */
3037 @@ -352,4 +379,120 @@ static inline void acpi_thermal_cpufreq_
3042 + * Following are interfaces geared to external processor PM control
3043 + * logic like a VMM
3045 +/* Events notified to external control logic */
3046 +#define PROCESSOR_PM_INIT 1
3047 +#define PROCESSOR_PM_CHANGE 2
3048 +#define PROCESSOR_HOTPLUG 3
3050 +/* Objects for the PM events */
3051 +#define PM_TYPE_IDLE 0
3052 +#define PM_TYPE_PERF 1
3053 +#define PM_TYPE_THR 2
3054 +#define PM_TYPE_MAX 3
3056 +/* Processor hotplug events */
3057 +#define HOTPLUG_TYPE_ADD 0
3058 +#define HOTPLUG_TYPE_REMOVE 1
3060 +#ifdef CONFIG_PROCESSOR_EXTERNAL_CONTROL
3061 +struct processor_extcntl_ops {
3062 + /* Transfer processor PM events to external control logic */
3063 + int (*pm_ops[PM_TYPE_MAX])(struct acpi_processor *pr, int event);
3064 + /* Notify physical processor status to external control logic */
3065 + int (*hotplug)(struct acpi_processor *pr, int type);
3067 +extern const struct processor_extcntl_ops *processor_extcntl_ops;
3069 +static inline int processor_cntl_external(void)
3071 + return (processor_extcntl_ops != NULL);
3074 +static inline int processor_pm_external(void)
3076 + return processor_cntl_external() &&
3077 + (processor_extcntl_ops->pm_ops[PM_TYPE_IDLE] != NULL);
3080 +static inline int processor_pmperf_external(void)
3082 + return processor_cntl_external() &&
3083 + (processor_extcntl_ops->pm_ops[PM_TYPE_PERF] != NULL);
3086 +static inline int processor_pmthr_external(void)
3088 + return processor_cntl_external() &&
3089 + (processor_extcntl_ops->pm_ops[PM_TYPE_THR] != NULL);
3092 +extern int processor_notify_external(struct acpi_processor *pr,
3093 + int event, int type);
3094 +extern void processor_extcntl_init(void);
3095 +extern int processor_extcntl_prepare(struct acpi_processor *pr);
3096 +extern int acpi_processor_get_performance_info(struct acpi_processor *pr);
3097 +extern int acpi_processor_get_psd(struct acpi_processor *pr);
3098 +void arch_acpi_processor_init_extcntl(const struct processor_extcntl_ops **);
3100 +static inline int processor_cntl_external(void) {return 0;}
3101 +static inline int processor_pm_external(void) {return 0;}
3102 +static inline int processor_pmperf_external(void) {return 0;}
3103 +static inline int processor_pmthr_external(void) {return 0;}
3104 +static inline int processor_notify_external(struct acpi_processor *pr,
3105 + int event, int type)
3109 +static inline void processor_extcntl_init(void) {}
3110 +static inline int processor_extcntl_prepare(struct acpi_processor *pr)
3114 +#endif /* CONFIG_PROCESSOR_EXTERNAL_CONTROL */
3117 +static inline void xen_convert_pct_reg(struct xen_pct_register *xpct,
3118 + struct acpi_pct_register *apct)
3120 + xpct->descriptor = apct->descriptor;
3121 + xpct->length = apct->length;
3122 + xpct->space_id = apct->space_id;
3123 + xpct->bit_width = apct->bit_width;
3124 + xpct->bit_offset = apct->bit_offset;
3125 + xpct->reserved = apct->reserved;
3126 + xpct->address = apct->address;
3129 +static inline void xen_convert_pss_states(struct xen_processor_px *xpss,
3130 + struct acpi_processor_px *apss, int state_count)
3133 + for(i=0; i<state_count; i++) {
3134 + xpss->core_frequency = apss->core_frequency;
3135 + xpss->power = apss->power;
3136 + xpss->transition_latency = apss->transition_latency;
3137 + xpss->bus_master_latency = apss->bus_master_latency;
3138 + xpss->control = apss->control;
3139 + xpss->status = apss->status;
3145 +static inline void xen_convert_psd_pack(struct xen_psd_package *xpsd,
3146 + struct acpi_psd_package *apsd)
3148 + xpsd->num_entries = apsd->num_entries;
3149 + xpsd->revision = apsd->revision;
3150 + xpsd->domain = apsd->domain;
3151 + xpsd->coord_type = apsd->coord_type;
3152 + xpsd->num_processors = apsd->num_processors;
3155 +#endif /* CONFIG_XEN */
3158 --- sle11-2009-10-16.orig/include/asm-generic/pci.h 2009-10-16 14:48:16.000000000 +0200
3159 +++ sle11-2009-10-16/include/asm-generic/pci.h 2009-08-26 11:52:33.000000000 +0200
3160 @@ -43,7 +43,9 @@ pcibios_select_root(struct pci_dev *pdev
3164 +#ifndef pcibios_scan_all_fns
3165 #define pcibios_scan_all_fns(a, b) 0
3168 #ifndef HAVE_ARCH_PCI_GET_LEGACY_IDE_IRQ
3169 static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
3170 --- sle11-2009-10-16.orig/include/asm-generic/pgtable.h 2009-10-16 14:48:16.000000000 +0200
3171 +++ sle11-2009-10-16/include/asm-generic/pgtable.h 2009-08-26 11:52:33.000000000 +0200
3172 @@ -99,6 +99,10 @@ static inline void ptep_set_wrprotect(st
3176 +#ifndef arch_change_pte_range
3177 +#define arch_change_pte_range(mm, pmd, addr, end, newprot) 0
3180 #ifndef __HAVE_ARCH_PTE_SAME
3181 #define pte_same(A,B) (pte_val(A) == pte_val(B))
3183 --- sle11-2009-10-16.orig/include/linux/aio.h 2009-10-16 14:48:16.000000000 +0200
3184 +++ sle11-2009-10-16/include/linux/aio.h 2009-08-26 11:52:33.000000000 +0200
3185 @@ -199,6 +199,11 @@ struct kioctx {
3186 struct aio_ring_info ring_info;
3188 struct delayed_work wq;
3189 +#ifdef CONFIG_EPOLL
3190 + // poll integration
3191 + wait_queue_head_t poll_wait;
3192 + struct file *file;
3197 --- sle11-2009-10-16.orig/include/linux/highmem.h 2009-10-16 14:48:16.000000000 +0200
3198 +++ sle11-2009-10-16/include/linux/highmem.h 2009-08-26 11:52:33.000000000 +0200
3199 @@ -62,6 +62,7 @@ static inline void *kmap_atomic(struct p
3201 #endif /* CONFIG_HIGHMEM */
3203 +#ifndef __HAVE_ARCH_CLEAR_USER_HIGHPAGE
3204 /* when CONFIG_HIGHMEM is not set these will be plain clear/copy_page */
3205 static inline void clear_user_highpage(struct page *page, unsigned long vaddr)
3207 @@ -69,6 +70,7 @@ static inline void clear_user_highpage(s
3208 clear_user_page(addr, vaddr, page);
3209 kunmap_atomic(addr, KM_USER0);
3213 #ifndef __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
3215 @@ -115,12 +117,14 @@ alloc_zeroed_user_highpage_movable(struc
3216 return __alloc_zeroed_user_highpage(__GFP_MOVABLE, vma, vaddr);
3219 +#ifndef __HAVE_ARCH_CLEAR_HIGHPAGE
3220 static inline void clear_highpage(struct page *page)
3222 void *kaddr = kmap_atomic(page, KM_USER0);
3224 kunmap_atomic(kaddr, KM_USER0);
3228 static inline void zero_user_segments(struct page *page,
3229 unsigned start1, unsigned end1,
3230 @@ -174,6 +178,8 @@ static inline void copy_user_highpage(st
3234 +#ifndef __HAVE_ARCH_COPY_HIGHPAGE
3236 static inline void copy_highpage(struct page *to, struct page *from)
3239 @@ -185,4 +191,6 @@ static inline void copy_highpage(struct
3240 kunmap_atomic(vto, KM_USER1);
3245 #endif /* _LINUX_HIGHMEM_H */
3246 --- sle11-2009-10-16.orig/include/linux/interrupt.h 2009-10-16 14:48:16.000000000 +0200
3247 +++ sle11-2009-10-16/include/linux/interrupt.h 2009-08-26 11:52:33.000000000 +0200
3248 @@ -218,6 +218,12 @@ static inline int disable_irq_wake(unsig
3250 #endif /* CONFIG_GENERIC_HARDIRQS */
3252 +#ifdef CONFIG_HAVE_IRQ_IGNORE_UNHANDLED
3253 +int irq_ignore_unhandled(unsigned int irq);
3255 +#define irq_ignore_unhandled(irq) 0
3258 #ifndef __ARCH_SET_SOFTIRQ_PENDING
3259 #define set_softirq_pending(x) (local_softirq_pending() = (x))
3260 #define or_softirq_pending(x) (local_softirq_pending() |= (x))
3261 --- sle11-2009-10-16.orig/include/linux/kexec.h 2009-10-16 14:48:16.000000000 +0200
3262 +++ sle11-2009-10-16/include/linux/kexec.h 2009-08-26 11:52:33.000000000 +0200
3264 KEXEC_CORE_NOTE_NAME_BYTES + \
3265 KEXEC_CORE_NOTE_DESC_BYTES )
3267 +#ifndef KEXEC_ARCH_HAS_PAGE_MACROS
3268 +#define kexec_page_to_pfn(page) page_to_pfn(page)
3269 +#define kexec_pfn_to_page(pfn) pfn_to_page(pfn)
3270 +#define kexec_virt_to_phys(addr) virt_to_phys(addr)
3271 +#define kexec_phys_to_virt(addr) phys_to_virt(addr)
3275 * This structure is used to hold the arguments that are used when loading
3277 @@ -108,6 +115,12 @@ struct kimage {
3278 extern void machine_kexec(struct kimage *image);
3279 extern int machine_kexec_prepare(struct kimage *image);
3280 extern void machine_kexec_cleanup(struct kimage *image);
3282 +extern int xen_machine_kexec_load(struct kimage *image);
3283 +extern void xen_machine_kexec_unload(struct kimage *image);
3284 +extern void xen_machine_kexec_setup_resources(void);
3285 +extern void xen_machine_kexec_register_resources(struct resource *res);
3287 extern asmlinkage long sys_kexec_load(unsigned long entry,
3288 unsigned long nr_segments,
3289 struct kexec_segment __user *segments,
3290 --- sle11-2009-10-16.orig/include/linux/mm.h 2009-10-16 14:48:16.000000000 +0200
3291 +++ sle11-2009-10-16/include/linux/mm.h 2009-08-26 11:52:33.000000000 +0200
3292 @@ -114,6 +114,9 @@ extern unsigned int kobjsize(const void
3293 #define VM_CAN_NONLINEAR 0x08000000 /* Has ->fault & does nonlinear pages */
3294 #define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */
3295 #define VM_SAO 0x20000000 /* Strong Access Ordering (powerpc) */
3297 +#define VM_FOREIGN 0x40000000 /* Has pages belonging to another VM */
3299 #define VM_PAGE_MKWRITE2 0x80000000 /* Uses page_mkwrite2 rather than page_mkwrite */
3301 #ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */
3302 @@ -199,6 +202,11 @@ struct vm_operations_struct {
3304 int (*access)(struct vm_area_struct *vma, unsigned long addr,
3305 void *buf, int len, int write);
3307 + /* Area-specific function for clearing the PTE at @ptep. Returns the
3308 + * original value of @ptep. */
3309 + pte_t (*zap_pte)(struct vm_area_struct *vma,
3310 + unsigned long addr, pte_t *ptep, int is_fullmm);
3313 * set_policy() op must add a reference to any non-NULL @new mempolicy
3314 --- sle11-2009-10-16.orig/include/linux/oprofile.h 2009-10-16 14:48:16.000000000 +0200
3315 +++ sle11-2009-10-16/include/linux/oprofile.h 2009-08-26 11:52:33.000000000 +0200
3317 #include <linux/types.h>
3318 #include <linux/spinlock.h>
3319 #include <asm/atomic.h>
3321 +#include <xen/interface/xenoprof.h>
3323 /* Each escaped entry is prefixed by ESCAPE_CODE
3324 * then one of the following codes, then the
3326 #define CPU_SWITCH_CODE 2
3327 #define COOKIE_SWITCH_CODE 3
3328 #define KERNEL_ENTER_SWITCH_CODE 4
3329 -#define KERNEL_EXIT_SWITCH_CODE 5
3330 +#define USER_ENTER_SWITCH_CODE 5
3331 #define MODULE_LOADED_CODE 6
3332 #define CTX_TGID_CODE 7
3333 #define TRACE_BEGIN_CODE 8
3334 #define TRACE_END_CODE 9
3335 #define XEN_ENTER_SWITCH_CODE 10
3337 #define SPU_PROFILING_CODE 11
3338 #define SPU_CTX_SWITCH_CODE 12
3340 +#define DOMAIN_SWITCH_CODE 11
3345 @@ -47,6 +53,11 @@ struct oprofile_operations {
3346 /* create any necessary configuration files in the oprofile fs.
3348 int (*create_files)(struct super_block * sb, struct dentry * root);
3349 + /* setup active domains with Xen */
3350 + int (*set_active)(int *active_domains, unsigned int adomains);
3351 + /* setup passive domains with Xen */
3352 + int (*set_passive)(int *passive_domains, unsigned int pdomains);
3354 /* Do any necessary interrupt setup. Optional. */
3356 /* Do any necessary interrupt shutdown. Optional. */
3357 @@ -106,6 +117,8 @@ void oprofile_add_pc(unsigned long pc, i
3358 /* add a backtrace entry, to be called from the ->backtrace callback */
3359 void oprofile_add_trace(unsigned long eip);
3361 +/* add a domain switch entry */
3362 +int oprofile_add_domain_switch(int32_t domain_id);
3365 * Create a file of the given name as a child of the given root, with
3366 --- sle11-2009-10-16.orig/include/linux/page-flags.h 2009-10-16 14:48:16.000000000 +0200
3367 +++ sle11-2009-10-16/include/linux/page-flags.h 2009-08-26 11:52:33.000000000 +0200
3368 @@ -98,6 +98,9 @@ enum pageflags {
3369 #ifdef CONFIG_IA64_UNCACHED_ALLOCATOR
3370 PG_uncached, /* Page has been mapped as uncached */
3373 + PG_foreign, /* Page is owned by foreign allocator. */
3378 @@ -271,6 +274,19 @@ static inline void SetPageUptodate(struc
3380 CLEARPAGEFLAG(Uptodate, uptodate)
3382 +#define PageForeign(page) test_bit(PG_foreign, &(page)->flags)
3383 +#define SetPageForeign(_page, dtor) do { \
3384 + set_bit(PG_foreign, &(_page)->flags); \
3385 + BUG_ON((dtor) == (void (*)(struct page *))0); \
3386 + (_page)->index = (long)(dtor); \
3388 +#define ClearPageForeign(page) do { \
3389 + clear_bit(PG_foreign, &(page)->flags); \
3390 + (page)->index = 0; \
3392 +#define PageForeignDestructor(_page) \
3393 + ((void (*)(struct page *))(_page)->index)(_page)
3395 extern void cancel_dirty_page(struct page *page, unsigned int account_size);
3397 int test_clear_page_writeback(struct page *page);
3398 @@ -341,9 +357,18 @@ PAGEFLAG(MemError, memerror)
3399 PAGEFLAG_FALSE(MemError)
3402 +#if !defined(CONFIG_XEN)
3403 +# define PAGE_FLAGS_XEN 0
3404 +#elif defined(CONFIG_X86)
3405 +# define PAGE_FLAGS_XEN ((1 << PG_pinned) | (1 << PG_foreign))
3407 +# define PAGE_FLAGS_XEN (1 << PG_foreign)
3410 #define PAGE_FLAGS (1 << PG_lru | 1 << PG_private | 1 << PG_locked | \
3411 1 << PG_buddy | 1 << PG_writeback | 1 << PG_waiters | \
3412 - 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active)
3413 + 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \
3417 * Flags checked in bad_page(). Pages on the free list should not have
3418 --- sle11-2009-10-16.orig/include/linux/pci.h 2009-10-16 14:48:16.000000000 +0200
3419 +++ sle11-2009-10-16/include/linux/pci.h 2009-08-26 11:52:33.000000000 +0200
3420 @@ -211,6 +211,9 @@ struct pci_dev {
3421 * directly, use the values stored here. They might be different!
3425 + unsigned int irq_old;
3427 struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */
3429 /* These fields are used by common fixups */
3430 @@ -772,6 +775,11 @@ static inline int pci_msi_enabled(void)
3436 +#define register_msi_get_owner(func) 0
3437 +#define unregister_msi_get_owner(func) 0
3440 extern int pci_enable_msi(struct pci_dev *dev);
3441 extern void pci_msi_shutdown(struct pci_dev *dev);
3442 @@ -784,6 +792,10 @@ extern void msi_remove_pci_irq_vectors(s
3443 extern void pci_restore_msi_state(struct pci_dev *dev);
3444 extern int pci_msi_enabled(void);
3447 +extern int register_msi_get_owner(int (*func)(struct pci_dev *dev));
3448 +extern int unregister_msi_get_owner(int (*func)(struct pci_dev *dev));
3452 #ifndef CONFIG_PCIEASPM
3453 --- sle11-2009-10-16.orig/include/linux/skbuff.h 2009-10-16 14:48:16.000000000 +0200
3454 +++ sle11-2009-10-16/include/linux/skbuff.h 2009-08-26 11:52:33.000000000 +0200
3455 @@ -217,6 +217,8 @@ typedef unsigned char *sk_buff_data_t;
3456 * @local_df: allow local fragmentation
3457 * @cloned: Head may be cloned (check refcnt to be sure)
3458 * @nohdr: Payload reference only, must not modify header
3459 + * @proto_data_valid: Protocol data validated since arriving at localhost
3460 + * @proto_csum_blank: Protocol csum must be added before leaving localhost
3461 * @pkt_type: Packet class
3462 * @fclone: skbuff clone status
3463 * @ip_summed: Driver fed us an IP checksum
3464 @@ -323,7 +325,11 @@ struct sk_buff {
3468 - /* 12-16 bit hole */
3470 + __u8 proto_data_valid:1,
3471 + proto_csum_blank:1;
3473 + /* 10-16 bit hole */
3475 #ifdef CONFIG_NET_DMA
3476 dma_cookie_t dma_cookie;
3477 --- sle11-2009-10-16.orig/include/linux/vermagic.h 2009-10-16 14:48:16.000000000 +0200
3478 +++ sle11-2009-10-16/include/linux/vermagic.h 2009-08-26 11:52:33.000000000 +0200
3481 #define MODULE_VERMAGIC_MODVERSIONS ""
3484 +#define MODULE_VERMAGIC_XEN "Xen "
3486 +#define MODULE_VERMAGIC_XEN
3488 #ifndef MODULE_ARCH_VERMAGIC
3489 #define MODULE_ARCH_VERMAGIC ""
3493 MODULE_VERMAGIC_SMP MODULE_VERMAGIC_PREEMPT \
3494 MODULE_VERMAGIC_MODULE_UNLOAD MODULE_VERMAGIC_MODVERSIONS \
3495 - MODULE_ARCH_VERMAGIC
3496 + MODULE_VERMAGIC_XEN MODULE_ARCH_VERMAGIC
3498 --- sle11-2009-10-16.orig/kernel/irq/spurious.c 2009-10-16 14:48:16.000000000 +0200
3499 +++ sle11-2009-10-16/kernel/irq/spurious.c 2009-08-26 11:52:33.000000000 +0200
3500 @@ -193,7 +193,7 @@ void note_interrupt(unsigned int irq, st
3502 if (time_after(jiffies, desc->last_unhandled + HZ/10))
3503 desc->irqs_unhandled = 1;
3505 + else if (!irq_ignore_unhandled(irq))
3506 desc->irqs_unhandled++;
3507 desc->last_unhandled = jiffies;
3508 if (unlikely(action_ret != IRQ_NONE))
3509 --- sle11-2009-10-16.orig/kernel/kexec.c 2009-10-16 14:48:16.000000000 +0200
3510 +++ sle11-2009-10-16/kernel/kexec.c 2009-08-26 11:52:33.000000000 +0200
3511 @@ -359,13 +359,26 @@ static int kimage_is_destination_range(s
3515 -static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order)
3516 +static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order, unsigned long limit)
3520 pages = alloc_pages(gfp_mask, order);
3522 unsigned int count, i;
3526 + if (limit == ~0UL)
3527 + address_bits = BITS_PER_LONG;
3529 + address_bits = long_log2(limit);
3531 + if (xen_limit_pages_to_max_mfn(pages, order, address_bits) < 0) {
3532 + __free_pages(pages, order);
3536 pages->mapping = NULL;
3537 set_page_private(pages, order);
3539 @@ -384,6 +397,9 @@ static void kimage_free_pages(struct pag
3541 for (i = 0; i < count; i++)
3542 ClearPageReserved(page + i);
3544 + xen_destroy_contiguous_region((unsigned long)page_address(page), order);
3546 __free_pages(page, order);
3549 @@ -429,10 +445,10 @@ static struct page *kimage_alloc_normal_
3551 unsigned long pfn, epfn, addr, eaddr;
3553 - pages = kimage_alloc_pages(GFP_KERNEL, order);
3554 + pages = kimage_alloc_pages(GFP_KERNEL, order, KEXEC_CONTROL_MEMORY_LIMIT);
3557 - pfn = page_to_pfn(pages);
3558 + pfn = kexec_page_to_pfn(pages);
3560 addr = pfn << PAGE_SHIFT;
3561 eaddr = epfn << PAGE_SHIFT;
3562 @@ -466,6 +482,7 @@ static struct page *kimage_alloc_normal_
3567 static struct page *kimage_alloc_crash_control_pages(struct kimage *image,
3570 @@ -519,7 +536,7 @@ static struct page *kimage_alloc_crash_c
3572 /* If I don't overlap any segments I have found my hole! */
3573 if (i == image->nr_segments) {
3574 - pages = pfn_to_page(hole_start >> PAGE_SHIFT);
3575 + pages = kexec_pfn_to_page(hole_start >> PAGE_SHIFT);
3579 @@ -546,6 +563,13 @@ struct page *kimage_alloc_control_pages(
3583 +#else /* !CONFIG_XEN */
3584 +struct page *kimage_alloc_control_pages(struct kimage *image,
3585 + unsigned int order)
3587 + return kimage_alloc_normal_control_pages(image, order);
3591 static int kimage_add_entry(struct kimage *image, kimage_entry_t entry)
3593 @@ -561,7 +585,7 @@ static int kimage_add_entry(struct kimag
3596 ind_page = page_address(page);
3597 - *image->entry = virt_to_phys(ind_page) | IND_INDIRECTION;
3598 + *image->entry = kexec_virt_to_phys(ind_page) | IND_INDIRECTION;
3599 image->entry = ind_page;
3600 image->last_entry = ind_page +
3601 ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1);
3602 @@ -620,13 +644,13 @@ static void kimage_terminate(struct kima
3603 #define for_each_kimage_entry(image, ptr, entry) \
3604 for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \
3605 ptr = (entry & IND_INDIRECTION)? \
3606 - phys_to_virt((entry & PAGE_MASK)): ptr +1)
3607 + kexec_phys_to_virt((entry & PAGE_MASK)): ptr +1)
3609 static void kimage_free_entry(kimage_entry_t entry)
3613 - page = pfn_to_page(entry >> PAGE_SHIFT);
3614 + page = kexec_pfn_to_page(entry >> PAGE_SHIFT);
3615 kimage_free_pages(page);
3618 @@ -638,6 +662,10 @@ static void kimage_free(struct kimage *i
3623 + xen_machine_kexec_unload(image);
3626 kimage_free_extra_pages(image);
3627 for_each_kimage_entry(image, ptr, entry) {
3628 if (entry & IND_INDIRECTION) {
3629 @@ -713,7 +741,7 @@ static struct page *kimage_alloc_page(st
3632 list_for_each_entry(page, &image->dest_pages, lru) {
3633 - addr = page_to_pfn(page) << PAGE_SHIFT;
3634 + addr = kexec_page_to_pfn(page) << PAGE_SHIFT;
3635 if (addr == destination) {
3636 list_del(&page->lru);
3638 @@ -724,16 +752,16 @@ static struct page *kimage_alloc_page(st
3639 kimage_entry_t *old;
3641 /* Allocate a page, if we run out of memory give up */
3642 - page = kimage_alloc_pages(gfp_mask, 0);
3643 + page = kimage_alloc_pages(gfp_mask, 0, KEXEC_SOURCE_MEMORY_LIMIT);
3646 /* If the page cannot be used file it away */
3647 - if (page_to_pfn(page) >
3648 + if (kexec_page_to_pfn(page) >
3649 (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) {
3650 list_add(&page->lru, &image->unuseable_pages);
3653 - addr = page_to_pfn(page) << PAGE_SHIFT;
3654 + addr = kexec_page_to_pfn(page) << PAGE_SHIFT;
3656 /* If it is the destination page we want use it */
3657 if (addr == destination)
3658 @@ -756,7 +784,7 @@ static struct page *kimage_alloc_page(st
3659 struct page *old_page;
3661 old_addr = *old & PAGE_MASK;
3662 - old_page = pfn_to_page(old_addr >> PAGE_SHIFT);
3663 + old_page = kexec_pfn_to_page(old_addr >> PAGE_SHIFT);
3664 copy_highpage(page, old_page);
3665 *old = addr | (*old & ~PAGE_MASK);
3667 @@ -812,7 +840,7 @@ static int kimage_load_normal_segment(st
3671 - result = kimage_add_page(image, page_to_pfn(page)
3672 + result = kimage_add_page(image, kexec_page_to_pfn(page)
3676 @@ -844,6 +872,7 @@ out:
3681 static int kimage_load_crash_segment(struct kimage *image,
3682 struct kexec_segment *segment)
3684 @@ -866,7 +895,7 @@ static int kimage_load_crash_segment(str
3686 size_t uchunk, mchunk;
3688 - page = pfn_to_page(maddr >> PAGE_SHIFT);
3689 + page = kexec_pfn_to_page(maddr >> PAGE_SHIFT);
3693 @@ -915,6 +944,13 @@ static int kimage_load_segment(struct ki
3697 +#else /* CONFIG_XEN */
3698 +static int kimage_load_segment(struct kimage *image,
3699 + struct kexec_segment *segment)
3701 + return kimage_load_normal_segment(image, segment);
3706 * Exec Kernel system call: for obvious reasons only root may call it.
3707 @@ -1018,6 +1054,13 @@ SYSCALL_DEFINE4(kexec_load, unsigned lon
3709 kimage_terminate(image);
3713 + result = xen_machine_kexec_load(image);
3718 /* Install the new kernel, and Uninstall the old */
3719 image = xchg(dest_image, image);
3721 --- sle11-2009-10-16.orig/kernel/sysctl.c 2009-10-16 14:48:16.000000000 +0200
3722 +++ sle11-2009-10-16/kernel/sysctl.c 2009-08-26 11:52:33.000000000 +0200
3723 @@ -751,7 +751,7 @@ static struct ctl_table kern_table[] = {
3724 .proc_handler = &proc_dointvec,
3727 -#if defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
3728 +#if defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86) && !defined(CONFIG_ACPI_PV_SLEEP)
3730 .procname = "acpi_video_flags",
3731 .data = &acpi_realmode_flags,
3732 --- sle11-2009-10-16.orig/mm/memory.c 2009-10-16 14:48:16.000000000 +0200
3733 +++ sle11-2009-10-16/mm/memory.c 2009-08-26 11:52:33.000000000 +0200
3734 @@ -446,6 +446,12 @@ struct page *vm_normal_page(struct vm_ar
3738 +#if defined(CONFIG_XEN) && defined(CONFIG_X86)
3739 + /* XEN: Covers user-space grant mappings (even of local pages). */
3740 + if (unlikely(vma->vm_flags & VM_FOREIGN))
3744 if (HAVE_PTE_SPECIAL) {
3745 if (likely(!pte_special(pte))) {
3746 VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
3747 @@ -474,7 +480,14 @@ struct page *vm_normal_page(struct vm_ar
3752 VM_BUG_ON(!pfn_valid(pfn));
3754 + if (unlikely(!pfn_valid(pfn))) {
3755 + VM_BUG_ON(!(vma->vm_flags & VM_RESERVED));
3761 * NOTE! We still have PageReserved() pages in the page tables.
3762 @@ -745,8 +758,12 @@ static unsigned long zap_pte_range(struc
3763 page->index > details->last_index))
3766 - ptent = ptep_get_and_clear_full(mm, addr, pte,
3768 + if (unlikely(vma->vm_ops && vma->vm_ops->zap_pte))
3769 + ptent = vma->vm_ops->zap_pte(vma, addr, pte,
3772 + ptent = ptep_get_and_clear_full(mm, addr, pte,
3774 tlb_remove_tlb_entry(tlb, pte, addr);
3775 if (unlikely(!page))
3777 @@ -996,6 +1013,7 @@ unsigned long zap_page_range(struct vm_a
3778 tlb_finish_mmu(tlb, address, end);
3781 +EXPORT_SYMBOL(zap_page_range);
3784 * zap_vma_ptes - remove ptes mapping the vma
3785 @@ -1193,6 +1211,26 @@ int get_user_pages(struct task_struct *t
3790 + if (vma && (vma->vm_flags & VM_FOREIGN)) {
3791 + struct page **map = vma->vm_private_data;
3792 + int offset = (start - vma->vm_start) >> PAGE_SHIFT;
3793 + if (map[offset] != NULL) {
3795 + struct page *page = map[offset];
3803 + start += PAGE_SIZE;
3809 if (!vma || (vma->vm_flags & (VM_IO | VM_PFNMAP))
3810 || !(vm_flags & vma->vm_flags))
3811 return i ? : -EFAULT;
3812 --- sle11-2009-10-16.orig/mm/mprotect.c 2009-10-16 14:48:16.000000000 +0200
3813 +++ sle11-2009-10-16/mm/mprotect.c 2009-08-26 11:52:33.000000000 +0200
3814 @@ -92,6 +92,8 @@ static inline void change_pmd_range(stru
3815 next = pmd_addr_end(addr, end);
3816 if (pmd_none_or_clear_bad(pmd))
3818 + if (arch_change_pte_range(mm, pmd, addr, next, newprot))
3820 change_pte_range(mm, pmd, addr, next, newprot, dirty_accountable);
3821 } while (pmd++, addr = next, addr != end);
3823 --- sle11-2009-10-16.orig/mm/page_alloc.c 2009-10-16 14:48:16.000000000 +0200
3824 +++ sle11-2009-10-16/mm/page_alloc.c 2009-08-26 11:52:33.000000000 +0200
3825 @@ -533,6 +533,12 @@ static void __free_pages_ok(struct page
3830 + if (PageForeign(page)) {
3831 + PageForeignDestructor(page);
3835 trace_page_free(page, order);
3837 for (i = 0 ; i < (1 << order) ; ++i)
3838 @@ -998,6 +1004,12 @@ static void free_hot_cold_page(struct pa
3839 struct per_cpu_pages *pcp;
3840 unsigned long flags;
3843 + if (PageForeign(page)) {
3844 + PageForeignDestructor(page);
3848 trace_page_free(page, 0);
3851 --- sle11-2009-10-16.orig/net/core/dev.c 2009-10-16 14:48:16.000000000 +0200
3852 +++ sle11-2009-10-16/net/core/dev.c 2009-08-26 11:52:33.000000000 +0200
3853 @@ -131,6 +131,12 @@
3855 #include "net-sysfs.h"
3858 +#include <net/ip.h>
3859 +#include <linux/tcp.h>
3860 +#include <linux/udp.h>
3864 * The list of packet types we will receive (as opposed to discard)
3865 * and the routines to invoke.
3866 @@ -1734,6 +1740,42 @@ static struct netdev_queue *dev_pick_tx(
3867 return netdev_get_tx_queue(dev, queue_index);
3871 +inline int skb_checksum_setup(struct sk_buff *skb)
3873 + if (skb->proto_csum_blank) {
3874 + if (skb->protocol != htons(ETH_P_IP))
3876 + skb->h.raw = (unsigned char *)skb->nh.iph + 4*skb->nh.iph->ihl;
3877 + if (skb->h.raw >= skb->tail)
3879 + switch (skb->nh.iph->protocol) {
3881 + skb->csum = offsetof(struct tcphdr, check);
3884 + skb->csum = offsetof(struct udphdr, check);
3887 + if (net_ratelimit())
3888 + printk(KERN_ERR "Attempting to checksum a non-"
3889 + "TCP/UDP packet, dropping a protocol"
3890 + " %d packet", skb->nh.iph->protocol);
3893 + if ((skb->h.raw + skb->csum + 2) > skb->tail)
3895 + skb->ip_summed = CHECKSUM_HW;
3896 + skb->proto_csum_blank = 0;
3903 +inline int skb_checksum_setup(struct sk_buff *skb) { return 0; }
3907 * dev_queue_xmit - transmit a buffer
3908 * @skb: buffer to transmit
3909 @@ -1766,6 +1808,12 @@ int dev_queue_xmit(struct sk_buff *skb)
3913 + /* If a checksum-deferred packet is forwarded to a device that needs a
3914 + * checksum, correct the pointers and force checksumming.
3916 + if (skb_checksum_setup(skb))
3917 + goto out_kfree_skb;
3919 /* GSO will handle the following emulations directly. */
3920 if (netif_needs_gso(dev, skb))
3922 @@ -2274,6 +2322,19 @@ int netif_receive_skb(struct sk_buff *sk
3927 + switch (skb->ip_summed) {
3928 + case CHECKSUM_UNNECESSARY:
3929 + skb->proto_data_valid = 1;
3932 + /* XXX Implement me. */
3934 + skb->proto_data_valid = 0;
3939 if (skb_emergency(skb))
3942 @@ -4928,6 +4989,7 @@ EXPORT_SYMBOL(unregister_netdevice_notif
3943 EXPORT_SYMBOL(net_enable_timestamp);
3944 EXPORT_SYMBOL(net_disable_timestamp);
3945 EXPORT_SYMBOL(dev_get_flags);
3946 +EXPORT_SYMBOL(skb_checksum_setup);
3948 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
3949 EXPORT_SYMBOL(br_handle_frame_hook);
3950 --- sle11-2009-10-16.orig/net/core/skbuff.c 2009-10-16 14:48:16.000000000 +0200
3951 +++ sle11-2009-10-16/net/core/skbuff.c 2009-08-26 11:52:33.000000000 +0200
3952 @@ -555,6 +555,10 @@ static struct sk_buff *__skb_clone(struc
3953 n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len;
3957 + C(proto_data_valid);
3958 + C(proto_csum_blank);
3960 n->destructor = NULL;
3963 --- sle11-2009-10-16.orig/net/ipv4/netfilter/nf_nat_proto_tcp.c 2009-10-16 14:48:16.000000000 +0200
3964 +++ sle11-2009-10-16/net/ipv4/netfilter/nf_nat_proto_tcp.c 2009-08-26 11:52:33.000000000 +0200
3965 @@ -75,6 +75,9 @@ tcp_manip_pkt(struct sk_buff *skb,
3966 if (hdrsize < sizeof(*hdr))
3969 + if (skb_checksum_setup(skb))
3972 inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1);
3973 inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, 0);
3975 --- sle11-2009-10-16.orig/net/ipv4/netfilter/nf_nat_proto_udp.c 2009-10-16 14:48:16.000000000 +0200
3976 +++ sle11-2009-10-16/net/ipv4/netfilter/nf_nat_proto_udp.c 2009-08-26 11:52:33.000000000 +0200
3977 @@ -60,6 +60,10 @@ udp_manip_pkt(struct sk_buff *skb,
3978 newport = tuple->dst.u.udp.port;
3979 portptr = &hdr->dest;
3982 + if (skb_checksum_setup(skb))
3985 if (hdr->check || skb->ip_summed == CHECKSUM_PARTIAL) {
3986 inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1);
3987 inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport,
3988 --- sle11-2009-10-16.orig/net/ipv4/xfrm4_output.c 2009-10-16 14:48:16.000000000 +0200
3989 +++ sle11-2009-10-16/net/ipv4/xfrm4_output.c 2009-08-26 11:52:33.000000000 +0200
3990 @@ -81,7 +81,7 @@ static int xfrm4_output_finish(struct sk
3993 skb->protocol = htons(ETH_P_IP);
3994 - return xfrm_output(skb);
3995 + return skb_checksum_setup(skb) ?: xfrm_output(skb);
3998 int xfrm4_output(struct sk_buff *skb)
3999 --- sle11-2009-10-16.orig/scripts/Makefile.build 2009-10-16 14:48:16.000000000 +0200
4000 +++ sle11-2009-10-16/scripts/Makefile.build 2009-08-26 11:52:33.000000000 +0200
4001 @@ -73,6 +73,20 @@ ifndef obj
4002 $(warning kbuild: Makefile.build is included improperly)
4005 +ifeq ($(CONFIG_XEN),y)
4006 +$(objtree)/scripts/Makefile.xen: $(srctree)/scripts/Makefile.xen.awk $(srctree)/scripts/Makefile.build
4007 + @echo ' Updating $@'
4008 + $(if $(shell echo a | $(AWK) '{ print gensub(/a/, "AA", "g"); }'),\
4009 + ,$(error 'Your awk program does not define gensub. Use gawk or another awk with gensub'))
4010 + @$(AWK) -f $< $(filter-out $<,$^) >$@
4012 +xen-src-single-used-m := $(patsubst $(srctree)/%,%,$(wildcard $(addprefix $(srctree)/,$(single-used-m:.o=-xen.c))))
4013 +xen-single-used-m := $(xen-src-single-used-m:-xen.c=.o)
4014 +single-used-m := $(filter-out $(xen-single-used-m),$(single-used-m))
4016 +-include $(objtree)/scripts/Makefile.xen
4019 # ===========================================================================
4021 ifneq ($(strip $(lib-y) $(lib-m) $(lib-n) $(lib-)),)
4022 --- sle11-2009-10-16.orig/scripts/Makefile.lib 2009-10-16 14:48:16.000000000 +0200
4023 +++ sle11-2009-10-16/scripts/Makefile.lib 2009-08-26 11:52:33.000000000 +0200
4024 @@ -17,6 +17,12 @@ obj-m := $(filter-out $(obj-y),$(obj-m))
4026 lib-y := $(filter-out $(obj-y), $(sort $(lib-y) $(lib-m)))
4028 +# Remove objects forcibly disabled
4030 +obj-y := $(filter-out $(disabled-obj-y),$(obj-y))
4031 +obj-m := $(filter-out $(disabled-obj-y),$(obj-m))
4032 +lib-y := $(filter-out $(disabled-obj-y),$(lib-y))
4035 # Handle objects in subdirs
4036 # ---------------------------------------------------------------------------