1 // SPDX-License-Identifier: GPL-2.0-only
5 * Xen models interrupts with abstract event channels. Because each
6 * domain gets 1024 event channels, but NR_IRQ is not that large, we
7 * must dynamically map irqs<->event channels. The event channels
8 * interface with the rest of the kernel by defining a xen interrupt
9 * chip. When an event is received, it is mapped to an irq and sent
10 * through the normal interrupt processing path.
12 * There are four kinds of events which can be mapped to an event
15 * 1. Inter-domain notifications. This includes all the virtual
16 * device events, since they're driven by front-ends in another domain
18 * 2. VIRQs, typically used for timers. These are per-cpu events.
20 * 4. PIRQs - Hardware interrupts.
22 * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
25 #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
27 #include <linux/linkage.h>
28 #include <linux/interrupt.h>
29 #include <linux/irq.h>
30 #include <linux/moduleparam.h>
31 #include <linux/string.h>
32 #include <linux/memblock.h>
33 #include <linux/slab.h>
34 #include <linux/irqnr.h>
35 #include <linux/pci.h>
36 #include <linux/rcupdate.h>
37 #include <linux/spinlock.h>
38 #include <linux/cpuhotplug.h>
39 #include <linux/atomic.h>
40 #include <linux/ktime.h>
44 #include <asm/ptrace.h>
45 #include <asm/idtentry.h>
47 #include <asm/io_apic.h>
48 #include <asm/i8259.h>
49 #include <asm/xen/cpuid.h>
50 #include <asm/xen/pci.h>
52 #include <asm/sync_bitops.h>
53 #include <asm/xen/hypercall.h>
54 #include <asm/xen/hypervisor.h>
59 #include <xen/xen-ops.h>
60 #include <xen/events.h>
61 #include <xen/interface/xen.h>
62 #include <xen/interface/event_channel.h>
63 #include <xen/interface/hvm/hvm_op.h>
64 #include <xen/interface/hvm/params.h>
65 #include <xen/interface/physdev.h>
66 #include <xen/interface/sched.h>
67 #include <xen/interface/vcpu.h>
68 #include <xen/xenbus.h>
69 #include <asm/hw_irq.h>
71 #include "events_internal.h"
73 #undef MODULE_PARAM_PREFIX
74 #define MODULE_PARAM_PREFIX "xen."
76 /* Interrupt types. */
86 * Packed IRQ information:
87 * type - enum xen_irq_type
88 * event channel - irq->event channel mapping
89 * cpu - cpu this event channel is bound to
90 * index - type-specific information:
91 * PIRQ - vector, with MSB being "needs EIO", or physical IRQ of the HVM
92 * guest, or GSI (real passthrough IRQ) of the device.
98 struct list_head list
;
99 struct list_head eoi_list
;
100 struct rcu_work rwork
;
104 short type
; /* type: IRQT_* */
105 u8 mask_reason
; /* Why is event channel masked */
106 #define EVT_MASK_REASON_EXPLICIT 0x01
107 #define EVT_MASK_REASON_TEMPORARY 0x02
108 #define EVT_MASK_REASON_EOI_PENDING 0x04
109 u8 is_active
; /* Is event just being handled? */
111 evtchn_port_t evtchn
; /* event channel */
112 unsigned short cpu
; /* cpu bound */
113 unsigned short eoi_cpu
; /* EOI must happen on this cpu-1 */
114 unsigned int irq_epoch
; /* If eoi_cpu valid: irq_epoch of event */
115 u64 eoi_time
; /* Time in jiffies when to EOI. */
117 bool is_static
; /* Is event channel static */
125 unsigned char vector
;
129 struct xenbus_device
*interdomain
;
133 #define PIRQ_NEEDS_EOI (1 << 0)
134 #define PIRQ_SHAREABLE (1 << 1)
135 #define PIRQ_MSI_GROUP (1 << 2)
137 static uint __read_mostly event_loop_timeout
= 2;
138 module_param(event_loop_timeout
, uint
, 0644);
140 static uint __read_mostly event_eoi_delay
= 10;
141 module_param(event_eoi_delay
, uint
, 0644);
143 const struct evtchn_ops
*evtchn_ops
;
146 * This lock protects updates to the following mapping and reference-count
147 * arrays. The lock does not need to be acquired to read the mapping tables.
149 static DEFINE_MUTEX(irq_mapping_update_lock
);
154 * irq_mapping_update_lock
156 * percpu eoi_list_lock
160 static LIST_HEAD(xen_irq_list_head
);
162 /* IRQ <-> VIRQ mapping. */
163 static DEFINE_PER_CPU(int [NR_VIRQS
], virq_to_irq
) = {[0 ... NR_VIRQS
-1] = -1};
165 /* IRQ <-> IPI mapping */
166 static DEFINE_PER_CPU(int [XEN_NR_IPIS
], ipi_to_irq
) = {[0 ... XEN_NR_IPIS
-1] = -1};
168 /* Event channel distribution data */
169 static atomic_t channels_on_cpu
[NR_CPUS
];
171 static int **evtchn_to_irq
;
173 static unsigned long *pirq_eoi_map
;
175 static bool (*pirq_needs_eoi
)(unsigned irq
);
177 #define EVTCHN_ROW(e) (e / (PAGE_SIZE/sizeof(**evtchn_to_irq)))
178 #define EVTCHN_COL(e) (e % (PAGE_SIZE/sizeof(**evtchn_to_irq)))
179 #define EVTCHN_PER_ROW (PAGE_SIZE / sizeof(**evtchn_to_irq))
181 /* Xen will never allocate port zero for any purpose. */
182 #define VALID_EVTCHN(chn) ((chn) != 0)
184 static struct irq_info
*legacy_info_ptrs
[NR_IRQS_LEGACY
];
186 static struct irq_chip xen_dynamic_chip
;
187 static struct irq_chip xen_lateeoi_chip
;
188 static struct irq_chip xen_percpu_chip
;
189 static struct irq_chip xen_pirq_chip
;
190 static void enable_dynirq(struct irq_data
*data
);
191 static void disable_dynirq(struct irq_data
*data
);
193 static DEFINE_PER_CPU(unsigned int, irq_epoch
);
195 static void clear_evtchn_to_irq_row(int *evtchn_row
)
199 for (col
= 0; col
< EVTCHN_PER_ROW
; col
++)
200 WRITE_ONCE(evtchn_row
[col
], -1);
203 static void clear_evtchn_to_irq_all(void)
207 for (row
= 0; row
< EVTCHN_ROW(xen_evtchn_max_channels()); row
++) {
208 if (evtchn_to_irq
[row
] == NULL
)
210 clear_evtchn_to_irq_row(evtchn_to_irq
[row
]);
214 static int set_evtchn_to_irq(evtchn_port_t evtchn
, unsigned int irq
)
220 if (evtchn
>= xen_evtchn_max_channels())
223 row
= EVTCHN_ROW(evtchn
);
224 col
= EVTCHN_COL(evtchn
);
226 if (evtchn_to_irq
[row
] == NULL
) {
227 /* Unallocated irq entries return -1 anyway */
231 evtchn_row
= (int *) __get_free_pages(GFP_KERNEL
, 0);
232 if (evtchn_row
== NULL
)
235 clear_evtchn_to_irq_row(evtchn_row
);
238 * We've prepared an empty row for the mapping. If a different
239 * thread was faster inserting it, we can drop ours.
241 if (cmpxchg(&evtchn_to_irq
[row
], NULL
, evtchn_row
) != NULL
)
242 free_page((unsigned long) evtchn_row
);
245 WRITE_ONCE(evtchn_to_irq
[row
][col
], irq
);
249 int get_evtchn_to_irq(evtchn_port_t evtchn
)
251 if (evtchn
>= xen_evtchn_max_channels())
253 if (evtchn_to_irq
[EVTCHN_ROW(evtchn
)] == NULL
)
255 return READ_ONCE(evtchn_to_irq
[EVTCHN_ROW(evtchn
)][EVTCHN_COL(evtchn
)]);
258 /* Get info for IRQ */
259 static struct irq_info
*info_for_irq(unsigned irq
)
261 if (irq
< nr_legacy_irqs())
262 return legacy_info_ptrs
[irq
];
264 return irq_get_chip_data(irq
);
267 static void set_info_for_irq(unsigned int irq
, struct irq_info
*info
)
269 if (irq
< nr_legacy_irqs())
270 legacy_info_ptrs
[irq
] = info
;
272 irq_set_chip_data(irq
, info
);
275 /* Per CPU channel accounting */
276 static void channels_on_cpu_dec(struct irq_info
*info
)
278 if (!info
->is_accounted
)
281 info
->is_accounted
= 0;
283 if (WARN_ON_ONCE(info
->cpu
>= nr_cpu_ids
))
286 WARN_ON_ONCE(!atomic_add_unless(&channels_on_cpu
[info
->cpu
], -1 , 0));
289 static void channels_on_cpu_inc(struct irq_info
*info
)
291 if (WARN_ON_ONCE(info
->cpu
>= nr_cpu_ids
))
294 if (WARN_ON_ONCE(!atomic_add_unless(&channels_on_cpu
[info
->cpu
], 1,
298 info
->is_accounted
= 1;
301 static void delayed_free_irq(struct work_struct
*work
)
303 struct irq_info
*info
= container_of(to_rcu_work(work
), struct irq_info
,
305 unsigned int irq
= info
->irq
;
307 /* Remove the info pointer only now, with no potential users left. */
308 set_info_for_irq(irq
, NULL
);
312 /* Legacy IRQ descriptors are managed by the arch. */
313 if (irq
>= nr_legacy_irqs())
317 /* Constructors for packed IRQ information. */
318 static int xen_irq_info_common_setup(struct irq_info
*info
,
320 enum xen_irq_type type
,
321 evtchn_port_t evtchn
,
326 BUG_ON(info
->type
!= IRQT_UNBOUND
&& info
->type
!= type
);
330 info
->evtchn
= evtchn
;
332 info
->mask_reason
= EVT_MASK_REASON_EXPLICIT
;
333 raw_spin_lock_init(&info
->lock
);
335 ret
= set_evtchn_to_irq(evtchn
, irq
);
339 irq_clear_status_flags(irq
, IRQ_NOREQUEST
|IRQ_NOAUTOEN
);
341 return xen_evtchn_port_setup(evtchn
);
344 static int xen_irq_info_evtchn_setup(unsigned irq
,
345 evtchn_port_t evtchn
,
346 struct xenbus_device
*dev
)
348 struct irq_info
*info
= info_for_irq(irq
);
351 ret
= xen_irq_info_common_setup(info
, irq
, IRQT_EVTCHN
, evtchn
, 0);
352 info
->u
.interdomain
= dev
;
354 atomic_inc(&dev
->event_channels
);
359 static int xen_irq_info_ipi_setup(unsigned cpu
,
361 evtchn_port_t evtchn
,
364 struct irq_info
*info
= info_for_irq(irq
);
368 per_cpu(ipi_to_irq
, cpu
)[ipi
] = irq
;
370 return xen_irq_info_common_setup(info
, irq
, IRQT_IPI
, evtchn
, 0);
373 static int xen_irq_info_virq_setup(unsigned cpu
,
375 evtchn_port_t evtchn
,
378 struct irq_info
*info
= info_for_irq(irq
);
382 per_cpu(virq_to_irq
, cpu
)[virq
] = irq
;
384 return xen_irq_info_common_setup(info
, irq
, IRQT_VIRQ
, evtchn
, 0);
387 static int xen_irq_info_pirq_setup(unsigned irq
,
388 evtchn_port_t evtchn
,
394 struct irq_info
*info
= info_for_irq(irq
);
396 info
->u
.pirq
.pirq
= pirq
;
397 info
->u
.pirq
.gsi
= gsi
;
398 info
->u
.pirq
.domid
= domid
;
399 info
->u
.pirq
.flags
= flags
;
401 return xen_irq_info_common_setup(info
, irq
, IRQT_PIRQ
, evtchn
, 0);
404 static void xen_irq_info_cleanup(struct irq_info
*info
)
406 set_evtchn_to_irq(info
->evtchn
, -1);
407 xen_evtchn_port_remove(info
->evtchn
, info
->cpu
);
409 channels_on_cpu_dec(info
);
413 * Accessors for packed IRQ information.
415 evtchn_port_t
evtchn_from_irq(unsigned irq
)
417 const struct irq_info
*info
= NULL
;
419 if (likely(irq
< nr_irqs
))
420 info
= info_for_irq(irq
);
427 unsigned int irq_from_evtchn(evtchn_port_t evtchn
)
429 return get_evtchn_to_irq(evtchn
);
431 EXPORT_SYMBOL_GPL(irq_from_evtchn
);
433 int irq_from_virq(unsigned int cpu
, unsigned int virq
)
435 return per_cpu(virq_to_irq
, cpu
)[virq
];
438 static enum ipi_vector
ipi_from_irq(unsigned irq
)
440 struct irq_info
*info
= info_for_irq(irq
);
442 BUG_ON(info
== NULL
);
443 BUG_ON(info
->type
!= IRQT_IPI
);
448 static unsigned virq_from_irq(unsigned irq
)
450 struct irq_info
*info
= info_for_irq(irq
);
452 BUG_ON(info
== NULL
);
453 BUG_ON(info
->type
!= IRQT_VIRQ
);
458 static unsigned pirq_from_irq(unsigned irq
)
460 struct irq_info
*info
= info_for_irq(irq
);
462 BUG_ON(info
== NULL
);
463 BUG_ON(info
->type
!= IRQT_PIRQ
);
465 return info
->u
.pirq
.pirq
;
468 static enum xen_irq_type
type_from_irq(unsigned irq
)
470 return info_for_irq(irq
)->type
;
473 static unsigned cpu_from_irq(unsigned irq
)
475 return info_for_irq(irq
)->cpu
;
478 unsigned int cpu_from_evtchn(evtchn_port_t evtchn
)
480 int irq
= get_evtchn_to_irq(evtchn
);
484 ret
= cpu_from_irq(irq
);
489 static void do_mask(struct irq_info
*info
, u8 reason
)
493 raw_spin_lock_irqsave(&info
->lock
, flags
);
495 if (!info
->mask_reason
)
496 mask_evtchn(info
->evtchn
);
498 info
->mask_reason
|= reason
;
500 raw_spin_unlock_irqrestore(&info
->lock
, flags
);
503 static void do_unmask(struct irq_info
*info
, u8 reason
)
507 raw_spin_lock_irqsave(&info
->lock
, flags
);
509 info
->mask_reason
&= ~reason
;
511 if (!info
->mask_reason
)
512 unmask_evtchn(info
->evtchn
);
514 raw_spin_unlock_irqrestore(&info
->lock
, flags
);
518 static bool pirq_check_eoi_map(unsigned irq
)
520 return test_bit(pirq_from_irq(irq
), pirq_eoi_map
);
524 static bool pirq_needs_eoi_flag(unsigned irq
)
526 struct irq_info
*info
= info_for_irq(irq
);
527 BUG_ON(info
->type
!= IRQT_PIRQ
);
529 return info
->u
.pirq
.flags
& PIRQ_NEEDS_EOI
;
532 static void bind_evtchn_to_cpu(evtchn_port_t evtchn
, unsigned int cpu
,
535 int irq
= get_evtchn_to_irq(evtchn
);
536 struct irq_info
*info
= info_for_irq(irq
);
540 if (IS_ENABLED(CONFIG_SMP
) && force_affinity
) {
541 struct irq_data
*data
= irq_get_irq_data(irq
);
543 irq_data_update_affinity(data
, cpumask_of(cpu
));
544 irq_data_update_effective_affinity(data
, cpumask_of(cpu
));
547 xen_evtchn_port_bind_to_cpu(evtchn
, cpu
, info
->cpu
);
549 channels_on_cpu_dec(info
);
551 channels_on_cpu_inc(info
);
555 * notify_remote_via_irq - send event to remote end of event channel via irq
556 * @irq: irq of event channel to send event to
558 * Unlike notify_remote_via_evtchn(), this is safe to use across
559 * save/restore. Notifications on a broken connection are silently
562 void notify_remote_via_irq(int irq
)
564 evtchn_port_t evtchn
= evtchn_from_irq(irq
);
566 if (VALID_EVTCHN(evtchn
))
567 notify_remote_via_evtchn(evtchn
);
569 EXPORT_SYMBOL_GPL(notify_remote_via_irq
);
571 struct lateeoi_work
{
572 struct delayed_work delayed
;
573 spinlock_t eoi_list_lock
;
574 struct list_head eoi_list
;
577 static DEFINE_PER_CPU(struct lateeoi_work
, lateeoi
);
579 static void lateeoi_list_del(struct irq_info
*info
)
581 struct lateeoi_work
*eoi
= &per_cpu(lateeoi
, info
->eoi_cpu
);
584 spin_lock_irqsave(&eoi
->eoi_list_lock
, flags
);
585 list_del_init(&info
->eoi_list
);
586 spin_unlock_irqrestore(&eoi
->eoi_list_lock
, flags
);
589 static void lateeoi_list_add(struct irq_info
*info
)
591 struct lateeoi_work
*eoi
= &per_cpu(lateeoi
, info
->eoi_cpu
);
592 struct irq_info
*elem
;
593 u64 now
= get_jiffies_64();
597 if (now
< info
->eoi_time
)
598 delay
= info
->eoi_time
- now
;
602 spin_lock_irqsave(&eoi
->eoi_list_lock
, flags
);
604 if (list_empty(&eoi
->eoi_list
)) {
605 list_add(&info
->eoi_list
, &eoi
->eoi_list
);
606 mod_delayed_work_on(info
->eoi_cpu
, system_wq
,
607 &eoi
->delayed
, delay
);
609 list_for_each_entry_reverse(elem
, &eoi
->eoi_list
, eoi_list
) {
610 if (elem
->eoi_time
<= info
->eoi_time
)
613 list_add(&info
->eoi_list
, &elem
->eoi_list
);
616 spin_unlock_irqrestore(&eoi
->eoi_list_lock
, flags
);
619 static void xen_irq_lateeoi_locked(struct irq_info
*info
, bool spurious
)
621 evtchn_port_t evtchn
;
623 unsigned int delay
= 0;
625 evtchn
= info
->evtchn
;
626 if (!VALID_EVTCHN(evtchn
) || !list_empty(&info
->eoi_list
))
630 struct xenbus_device
*dev
= info
->u
.interdomain
;
631 unsigned int threshold
= 1;
633 if (dev
&& dev
->spurious_threshold
)
634 threshold
= dev
->spurious_threshold
;
636 if ((1 << info
->spurious_cnt
) < (HZ
<< 2)) {
637 if (info
->spurious_cnt
!= 0xFF)
638 info
->spurious_cnt
++;
640 if (info
->spurious_cnt
> threshold
) {
641 delay
= 1 << (info
->spurious_cnt
- 1 - threshold
);
645 info
->eoi_cpu
= smp_processor_id();
646 info
->eoi_time
= get_jiffies_64() + delay
;
648 atomic_add(delay
, &dev
->jiffies_eoi_delayed
);
651 atomic_inc(&dev
->spurious_events
);
653 info
->spurious_cnt
= 0;
657 if (info
->eoi_time
&&
658 (info
->irq_epoch
== per_cpu(irq_epoch
, cpu
) || delay
)) {
659 lateeoi_list_add(info
);
665 /* is_active hasn't been reset yet, do it now. */
666 smp_store_release(&info
->is_active
, 0);
667 do_unmask(info
, EVT_MASK_REASON_EOI_PENDING
);
670 static void xen_irq_lateeoi_worker(struct work_struct
*work
)
672 struct lateeoi_work
*eoi
;
673 struct irq_info
*info
;
674 u64 now
= get_jiffies_64();
677 eoi
= container_of(to_delayed_work(work
), struct lateeoi_work
, delayed
);
682 spin_lock_irqsave(&eoi
->eoi_list_lock
, flags
);
684 info
= list_first_entry_or_null(&eoi
->eoi_list
, struct irq_info
,
690 if (now
< info
->eoi_time
) {
691 mod_delayed_work_on(info
->eoi_cpu
, system_wq
,
693 info
->eoi_time
- now
);
697 list_del_init(&info
->eoi_list
);
699 spin_unlock_irqrestore(&eoi
->eoi_list_lock
, flags
);
703 xen_irq_lateeoi_locked(info
, false);
706 spin_unlock_irqrestore(&eoi
->eoi_list_lock
, flags
);
711 static void xen_cpu_init_eoi(unsigned int cpu
)
713 struct lateeoi_work
*eoi
= &per_cpu(lateeoi
, cpu
);
715 INIT_DELAYED_WORK(&eoi
->delayed
, xen_irq_lateeoi_worker
);
716 spin_lock_init(&eoi
->eoi_list_lock
);
717 INIT_LIST_HEAD(&eoi
->eoi_list
);
720 void xen_irq_lateeoi(unsigned int irq
, unsigned int eoi_flags
)
722 struct irq_info
*info
;
726 info
= info_for_irq(irq
);
729 xen_irq_lateeoi_locked(info
, eoi_flags
& XEN_EOI_FLAG_SPURIOUS
);
733 EXPORT_SYMBOL_GPL(xen_irq_lateeoi
);
735 static void xen_irq_init(unsigned irq
)
737 struct irq_info
*info
;
739 info
= kzalloc(sizeof(*info
), GFP_KERNEL
);
741 panic("Unable to allocate metadata for IRQ%d\n", irq
);
743 info
->type
= IRQT_UNBOUND
;
745 INIT_RCU_WORK(&info
->rwork
, delayed_free_irq
);
747 set_info_for_irq(irq
, info
);
749 * Interrupt affinity setting can be immediate. No point
750 * in delaying it until an interrupt is handled.
752 irq_set_status_flags(irq
, IRQ_MOVE_PCNTXT
);
754 INIT_LIST_HEAD(&info
->eoi_list
);
755 list_add_tail(&info
->list
, &xen_irq_list_head
);
758 static int __must_check
xen_allocate_irqs_dynamic(int nvec
)
760 int i
, irq
= irq_alloc_descs(-1, 0, nvec
, -1);
763 for (i
= 0; i
< nvec
; i
++)
764 xen_irq_init(irq
+ i
);
770 static inline int __must_check
xen_allocate_irq_dynamic(void)
773 return xen_allocate_irqs_dynamic(1);
776 static int __must_check
xen_allocate_irq_gsi(unsigned gsi
)
781 * A PV guest has no concept of a GSI (since it has no ACPI
782 * nor access to/knowledge of the physical APICs). Therefore
783 * all IRQs are dynamically allocated from the entire IRQ
786 if (xen_pv_domain() && !xen_initial_domain())
787 return xen_allocate_irq_dynamic();
789 /* Legacy IRQ descriptors are already allocated by the arch. */
790 if (gsi
< nr_legacy_irqs())
793 irq
= irq_alloc_desc_at(gsi
, -1);
800 static void xen_free_irq(unsigned irq
)
802 struct irq_info
*info
= info_for_irq(irq
);
807 if (!list_empty(&info
->eoi_list
))
808 lateeoi_list_del(info
);
810 list_del(&info
->list
);
812 WARN_ON(info
->refcnt
> 0);
814 queue_rcu_work(system_wq
, &info
->rwork
);
817 /* Not called for lateeoi events. */
818 static void event_handler_exit(struct irq_info
*info
)
820 smp_store_release(&info
->is_active
, 0);
821 clear_evtchn(info
->evtchn
);
824 static void pirq_query_unmask(int irq
)
826 struct physdev_irq_status_query irq_status
;
827 struct irq_info
*info
= info_for_irq(irq
);
829 BUG_ON(info
->type
!= IRQT_PIRQ
);
831 irq_status
.irq
= pirq_from_irq(irq
);
832 if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query
, &irq_status
))
833 irq_status
.flags
= 0;
835 info
->u
.pirq
.flags
&= ~PIRQ_NEEDS_EOI
;
836 if (irq_status
.flags
& XENIRQSTAT_needs_eoi
)
837 info
->u
.pirq
.flags
|= PIRQ_NEEDS_EOI
;
840 static void eoi_pirq(struct irq_data
*data
)
842 struct irq_info
*info
= info_for_irq(data
->irq
);
843 evtchn_port_t evtchn
= info
? info
->evtchn
: 0;
844 struct physdev_eoi eoi
= { .irq
= pirq_from_irq(data
->irq
) };
847 if (!VALID_EVTCHN(evtchn
))
850 event_handler_exit(info
);
852 if (pirq_needs_eoi(data
->irq
)) {
853 rc
= HYPERVISOR_physdev_op(PHYSDEVOP_eoi
, &eoi
);
858 static void mask_ack_pirq(struct irq_data
*data
)
860 disable_dynirq(data
);
864 static unsigned int __startup_pirq(unsigned int irq
)
866 struct evtchn_bind_pirq bind_pirq
;
867 struct irq_info
*info
= info_for_irq(irq
);
868 evtchn_port_t evtchn
= evtchn_from_irq(irq
);
871 BUG_ON(info
->type
!= IRQT_PIRQ
);
873 if (VALID_EVTCHN(evtchn
))
876 bind_pirq
.pirq
= pirq_from_irq(irq
);
877 /* NB. We are happy to share unless we are probing. */
878 bind_pirq
.flags
= info
->u
.pirq
.flags
& PIRQ_SHAREABLE
?
879 BIND_PIRQ__WILL_SHARE
: 0;
880 rc
= HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq
, &bind_pirq
);
882 pr_warn("Failed to obtain physical IRQ %d\n", irq
);
885 evtchn
= bind_pirq
.port
;
887 pirq_query_unmask(irq
);
889 rc
= set_evtchn_to_irq(evtchn
, irq
);
893 info
->evtchn
= evtchn
;
894 bind_evtchn_to_cpu(evtchn
, 0, false);
896 rc
= xen_evtchn_port_setup(evtchn
);
901 do_unmask(info
, EVT_MASK_REASON_EXPLICIT
);
903 eoi_pirq(irq_get_irq_data(irq
));
908 pr_err("irq%d: Failed to set port to irq mapping (%d)\n", irq
, rc
);
909 xen_evtchn_close(evtchn
);
913 static unsigned int startup_pirq(struct irq_data
*data
)
915 return __startup_pirq(data
->irq
);
918 static void shutdown_pirq(struct irq_data
*data
)
920 unsigned int irq
= data
->irq
;
921 struct irq_info
*info
= info_for_irq(irq
);
922 evtchn_port_t evtchn
= evtchn_from_irq(irq
);
924 BUG_ON(info
->type
!= IRQT_PIRQ
);
926 if (!VALID_EVTCHN(evtchn
))
929 do_mask(info
, EVT_MASK_REASON_EXPLICIT
);
930 xen_evtchn_close(evtchn
);
931 xen_irq_info_cleanup(info
);
934 static void enable_pirq(struct irq_data
*data
)
939 static void disable_pirq(struct irq_data
*data
)
941 disable_dynirq(data
);
944 int xen_irq_from_gsi(unsigned gsi
)
946 struct irq_info
*info
;
948 list_for_each_entry(info
, &xen_irq_list_head
, list
) {
949 if (info
->type
!= IRQT_PIRQ
)
952 if (info
->u
.pirq
.gsi
== gsi
)
958 EXPORT_SYMBOL_GPL(xen_irq_from_gsi
);
960 static void __unbind_from_irq(unsigned int irq
)
962 evtchn_port_t evtchn
= evtchn_from_irq(irq
);
963 struct irq_info
*info
= info_for_irq(irq
);
965 if (info
->refcnt
> 0) {
967 if (info
->refcnt
!= 0)
971 if (VALID_EVTCHN(evtchn
)) {
972 unsigned int cpu
= cpu_from_irq(irq
);
973 struct xenbus_device
*dev
;
975 if (!info
->is_static
)
976 xen_evtchn_close(evtchn
);
978 switch (type_from_irq(irq
)) {
980 per_cpu(virq_to_irq
, cpu
)[virq_from_irq(irq
)] = -1;
983 per_cpu(ipi_to_irq
, cpu
)[ipi_from_irq(irq
)] = -1;
986 dev
= info
->u
.interdomain
;
988 atomic_dec(&dev
->event_channels
);
994 xen_irq_info_cleanup(info
);
1001 * Do not make any assumptions regarding the relationship between the
1002 * IRQ number returned here and the Xen pirq argument.
1004 * Note: We don't assign an event channel until the irq actually started
1005 * up. Return an existing irq if we've already got one for the gsi.
1007 * Shareable implies level triggered, not shareable implies edge
1010 int xen_bind_pirq_gsi_to_irq(unsigned gsi
,
1011 unsigned pirq
, int shareable
, char *name
)
1014 struct physdev_irq irq_op
;
1017 mutex_lock(&irq_mapping_update_lock
);
1019 irq
= xen_irq_from_gsi(gsi
);
1021 pr_info("%s: returning irq %d for gsi %u\n",
1022 __func__
, irq
, gsi
);
1026 irq
= xen_allocate_irq_gsi(gsi
);
1033 /* Only the privileged domain can do this. For non-priv, the pcifront
1034 * driver provides a PCI bus that does the call to do exactly
1035 * this in the priv domain. */
1036 if (xen_initial_domain() &&
1037 HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector
, &irq_op
)) {
1043 ret
= xen_irq_info_pirq_setup(irq
, 0, pirq
, gsi
, DOMID_SELF
,
1044 shareable
? PIRQ_SHAREABLE
: 0);
1046 __unbind_from_irq(irq
);
1051 pirq_query_unmask(irq
);
1052 /* We try to use the handler with the appropriate semantic for the
1053 * type of interrupt: if the interrupt is an edge triggered
1054 * interrupt we use handle_edge_irq.
1056 * On the other hand if the interrupt is level triggered we use
1057 * handle_fasteoi_irq like the native code does for this kind of
1060 * Depending on the Xen version, pirq_needs_eoi might return true
1061 * not only for level triggered interrupts but for edge triggered
1062 * interrupts too. In any case Xen always honors the eoi mechanism,
1063 * not injecting any more pirqs of the same kind if the first one
1064 * hasn't received an eoi yet. Therefore using the fasteoi handler
1065 * is the right choice either way.
1068 irq_set_chip_and_handler_name(irq
, &xen_pirq_chip
,
1069 handle_fasteoi_irq
, name
);
1071 irq_set_chip_and_handler_name(irq
, &xen_pirq_chip
,
1072 handle_edge_irq
, name
);
1075 mutex_unlock(&irq_mapping_update_lock
);
1080 #ifdef CONFIG_PCI_MSI
1081 int xen_allocate_pirq_msi(struct pci_dev
*dev
, struct msi_desc
*msidesc
)
1084 struct physdev_get_free_pirq op_get_free_pirq
;
1086 op_get_free_pirq
.type
= MAP_PIRQ_TYPE_MSI
;
1087 rc
= HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq
, &op_get_free_pirq
);
1089 WARN_ONCE(rc
== -ENOSYS
,
1090 "hypervisor does not support the PHYSDEVOP_get_free_pirq interface\n");
1092 return rc
? -1 : op_get_free_pirq
.pirq
;
1095 int xen_bind_pirq_msi_to_irq(struct pci_dev
*dev
, struct msi_desc
*msidesc
,
1096 int pirq
, int nvec
, const char *name
, domid_t domid
)
1100 mutex_lock(&irq_mapping_update_lock
);
1102 irq
= xen_allocate_irqs_dynamic(nvec
);
1106 for (i
= 0; i
< nvec
; i
++) {
1107 irq_set_chip_and_handler_name(irq
+ i
, &xen_pirq_chip
, handle_edge_irq
, name
);
1109 ret
= xen_irq_info_pirq_setup(irq
+ i
, 0, pirq
+ i
, 0, domid
,
1110 i
== 0 ? 0 : PIRQ_MSI_GROUP
);
1115 ret
= irq_set_msi_desc(irq
, msidesc
);
1119 mutex_unlock(&irq_mapping_update_lock
);
1123 __unbind_from_irq(irq
+ nvec
);
1124 mutex_unlock(&irq_mapping_update_lock
);
1129 int xen_destroy_irq(int irq
)
1131 struct physdev_unmap_pirq unmap_irq
;
1132 struct irq_info
*info
= info_for_irq(irq
);
1135 mutex_lock(&irq_mapping_update_lock
);
1138 * If trying to remove a vector in a MSI group different
1139 * than the first one skip the PIRQ unmap unless this vector
1140 * is the first one in the group.
1142 if (xen_initial_domain() && !(info
->u
.pirq
.flags
& PIRQ_MSI_GROUP
)) {
1143 unmap_irq
.pirq
= info
->u
.pirq
.pirq
;
1144 unmap_irq
.domid
= info
->u
.pirq
.domid
;
1145 rc
= HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq
, &unmap_irq
);
1146 /* If another domain quits without making the pci_disable_msix
1147 * call, the Xen hypervisor takes care of freeing the PIRQs
1148 * (free_domain_pirqs).
1150 if ((rc
== -ESRCH
&& info
->u
.pirq
.domid
!= DOMID_SELF
))
1151 pr_info("domain %d does not have %d anymore\n",
1152 info
->u
.pirq
.domid
, info
->u
.pirq
.pirq
);
1154 pr_warn("unmap irq failed %d\n", rc
);
1162 mutex_unlock(&irq_mapping_update_lock
);
1166 int xen_irq_from_pirq(unsigned pirq
)
1170 struct irq_info
*info
;
1172 mutex_lock(&irq_mapping_update_lock
);
1174 list_for_each_entry(info
, &xen_irq_list_head
, list
) {
1175 if (info
->type
!= IRQT_PIRQ
)
1178 if (info
->u
.pirq
.pirq
== pirq
)
1183 mutex_unlock(&irq_mapping_update_lock
);
1189 int xen_pirq_from_irq(unsigned irq
)
1191 return pirq_from_irq(irq
);
1193 EXPORT_SYMBOL_GPL(xen_pirq_from_irq
);
1195 static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn
, struct irq_chip
*chip
,
1196 struct xenbus_device
*dev
)
1201 if (evtchn
>= xen_evtchn_max_channels())
1204 mutex_lock(&irq_mapping_update_lock
);
1206 irq
= get_evtchn_to_irq(evtchn
);
1209 irq
= xen_allocate_irq_dynamic();
1213 irq_set_chip_and_handler_name(irq
, chip
,
1214 handle_edge_irq
, "event");
1216 ret
= xen_irq_info_evtchn_setup(irq
, evtchn
, dev
);
1218 __unbind_from_irq(irq
);
1223 * New interdomain events are initially bound to vCPU0 This
1224 * is required to setup the event channel in the first
1225 * place and also important for UP guests because the
1226 * affinity setting is not invoked on them so nothing would
1229 bind_evtchn_to_cpu(evtchn
, 0, false);
1231 struct irq_info
*info
= info_for_irq(irq
);
1232 WARN_ON(info
== NULL
|| info
->type
!= IRQT_EVTCHN
);
1236 mutex_unlock(&irq_mapping_update_lock
);
1241 int bind_evtchn_to_irq(evtchn_port_t evtchn
)
1243 return bind_evtchn_to_irq_chip(evtchn
, &xen_dynamic_chip
, NULL
);
1245 EXPORT_SYMBOL_GPL(bind_evtchn_to_irq
);
1247 int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn
)
1249 return bind_evtchn_to_irq_chip(evtchn
, &xen_lateeoi_chip
, NULL
);
1251 EXPORT_SYMBOL_GPL(bind_evtchn_to_irq_lateeoi
);
1253 static int bind_ipi_to_irq(unsigned int ipi
, unsigned int cpu
)
1255 struct evtchn_bind_ipi bind_ipi
;
1256 evtchn_port_t evtchn
;
1259 mutex_lock(&irq_mapping_update_lock
);
1261 irq
= per_cpu(ipi_to_irq
, cpu
)[ipi
];
1264 irq
= xen_allocate_irq_dynamic();
1268 irq_set_chip_and_handler_name(irq
, &xen_percpu_chip
,
1269 handle_percpu_irq
, "ipi");
1271 bind_ipi
.vcpu
= xen_vcpu_nr(cpu
);
1272 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi
,
1275 evtchn
= bind_ipi
.port
;
1277 ret
= xen_irq_info_ipi_setup(cpu
, irq
, evtchn
, ipi
);
1279 __unbind_from_irq(irq
);
1284 * Force the affinity mask to the target CPU so proc shows
1285 * the correct target.
1287 bind_evtchn_to_cpu(evtchn
, cpu
, true);
1289 struct irq_info
*info
= info_for_irq(irq
);
1290 WARN_ON(info
== NULL
|| info
->type
!= IRQT_IPI
);
1294 mutex_unlock(&irq_mapping_update_lock
);
1298 static int bind_interdomain_evtchn_to_irq_chip(struct xenbus_device
*dev
,
1299 evtchn_port_t remote_port
,
1300 struct irq_chip
*chip
)
1302 struct evtchn_bind_interdomain bind_interdomain
;
1305 bind_interdomain
.remote_dom
= dev
->otherend_id
;
1306 bind_interdomain
.remote_port
= remote_port
;
1308 err
= HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain
,
1311 return err
? : bind_evtchn_to_irq_chip(bind_interdomain
.local_port
,
1315 int bind_interdomain_evtchn_to_irq_lateeoi(struct xenbus_device
*dev
,
1316 evtchn_port_t remote_port
)
1318 return bind_interdomain_evtchn_to_irq_chip(dev
, remote_port
,
1321 EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq_lateeoi
);
1323 static int find_virq(unsigned int virq
, unsigned int cpu
, evtchn_port_t
*evtchn
)
1325 struct evtchn_status status
;
1329 memset(&status
, 0, sizeof(status
));
1330 for (port
= 0; port
< xen_evtchn_max_channels(); port
++) {
1331 status
.dom
= DOMID_SELF
;
1333 rc
= HYPERVISOR_event_channel_op(EVTCHNOP_status
, &status
);
1336 if (status
.status
!= EVTCHNSTAT_virq
)
1338 if (status
.u
.virq
== virq
&& status
.vcpu
== xen_vcpu_nr(cpu
)) {
1347 * xen_evtchn_nr_channels - number of usable event channel ports
1349 * This may be less than the maximum supported by the current
1350 * hypervisor ABI. Use xen_evtchn_max_channels() for the maximum
1353 unsigned xen_evtchn_nr_channels(void)
1355 return evtchn_ops
->nr_channels();
1357 EXPORT_SYMBOL_GPL(xen_evtchn_nr_channels
);
1359 int bind_virq_to_irq(unsigned int virq
, unsigned int cpu
, bool percpu
)
1361 struct evtchn_bind_virq bind_virq
;
1362 evtchn_port_t evtchn
= 0;
1365 mutex_lock(&irq_mapping_update_lock
);
1367 irq
= per_cpu(virq_to_irq
, cpu
)[virq
];
1370 irq
= xen_allocate_irq_dynamic();
1375 irq_set_chip_and_handler_name(irq
, &xen_percpu_chip
,
1376 handle_percpu_irq
, "virq");
1378 irq_set_chip_and_handler_name(irq
, &xen_dynamic_chip
,
1379 handle_edge_irq
, "virq");
1381 bind_virq
.virq
= virq
;
1382 bind_virq
.vcpu
= xen_vcpu_nr(cpu
);
1383 ret
= HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq
,
1386 evtchn
= bind_virq
.port
;
1389 ret
= find_virq(virq
, cpu
, &evtchn
);
1393 ret
= xen_irq_info_virq_setup(cpu
, irq
, evtchn
, virq
);
1395 __unbind_from_irq(irq
);
1401 * Force the affinity mask for percpu interrupts so proc
1402 * shows the correct target.
1404 bind_evtchn_to_cpu(evtchn
, cpu
, percpu
);
1406 struct irq_info
*info
= info_for_irq(irq
);
1407 WARN_ON(info
== NULL
|| info
->type
!= IRQT_VIRQ
);
1411 mutex_unlock(&irq_mapping_update_lock
);
1416 static void unbind_from_irq(unsigned int irq
)
1418 mutex_lock(&irq_mapping_update_lock
);
1419 __unbind_from_irq(irq
);
1420 mutex_unlock(&irq_mapping_update_lock
);
1423 static int bind_evtchn_to_irqhandler_chip(evtchn_port_t evtchn
,
1424 irq_handler_t handler
,
1425 unsigned long irqflags
,
1426 const char *devname
, void *dev_id
,
1427 struct irq_chip
*chip
)
1431 irq
= bind_evtchn_to_irq_chip(evtchn
, chip
, NULL
);
1434 retval
= request_irq(irq
, handler
, irqflags
, devname
, dev_id
);
1436 unbind_from_irq(irq
);
1443 int bind_evtchn_to_irqhandler(evtchn_port_t evtchn
,
1444 irq_handler_t handler
,
1445 unsigned long irqflags
,
1446 const char *devname
, void *dev_id
)
1448 return bind_evtchn_to_irqhandler_chip(evtchn
, handler
, irqflags
,
1452 EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler
);
1454 int bind_evtchn_to_irqhandler_lateeoi(evtchn_port_t evtchn
,
1455 irq_handler_t handler
,
1456 unsigned long irqflags
,
1457 const char *devname
, void *dev_id
)
1459 return bind_evtchn_to_irqhandler_chip(evtchn
, handler
, irqflags
,
1463 EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler_lateeoi
);
1465 static int bind_interdomain_evtchn_to_irqhandler_chip(
1466 struct xenbus_device
*dev
, evtchn_port_t remote_port
,
1467 irq_handler_t handler
, unsigned long irqflags
,
1468 const char *devname
, void *dev_id
, struct irq_chip
*chip
)
1472 irq
= bind_interdomain_evtchn_to_irq_chip(dev
, remote_port
, chip
);
1476 retval
= request_irq(irq
, handler
, irqflags
, devname
, dev_id
);
1478 unbind_from_irq(irq
);
1485 int bind_interdomain_evtchn_to_irqhandler_lateeoi(struct xenbus_device
*dev
,
1486 evtchn_port_t remote_port
,
1487 irq_handler_t handler
,
1488 unsigned long irqflags
,
1489 const char *devname
,
1492 return bind_interdomain_evtchn_to_irqhandler_chip(dev
,
1493 remote_port
, handler
, irqflags
, devname
,
1494 dev_id
, &xen_lateeoi_chip
);
1496 EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler_lateeoi
);
1498 int bind_virq_to_irqhandler(unsigned int virq
, unsigned int cpu
,
1499 irq_handler_t handler
,
1500 unsigned long irqflags
, const char *devname
, void *dev_id
)
1504 irq
= bind_virq_to_irq(virq
, cpu
, irqflags
& IRQF_PERCPU
);
1507 retval
= request_irq(irq
, handler
, irqflags
, devname
, dev_id
);
1509 unbind_from_irq(irq
);
1515 EXPORT_SYMBOL_GPL(bind_virq_to_irqhandler
);
1517 int bind_ipi_to_irqhandler(enum ipi_vector ipi
,
1519 irq_handler_t handler
,
1520 unsigned long irqflags
,
1521 const char *devname
,
1526 irq
= bind_ipi_to_irq(ipi
, cpu
);
1530 irqflags
|= IRQF_NO_SUSPEND
| IRQF_FORCE_RESUME
| IRQF_EARLY_RESUME
;
1531 retval
= request_irq(irq
, handler
, irqflags
, devname
, dev_id
);
1533 unbind_from_irq(irq
);
1540 void unbind_from_irqhandler(unsigned int irq
, void *dev_id
)
1542 struct irq_info
*info
= info_for_irq(irq
);
1546 free_irq(irq
, dev_id
);
1547 unbind_from_irq(irq
);
1549 EXPORT_SYMBOL_GPL(unbind_from_irqhandler
);
1552 * xen_set_irq_priority() - set an event channel priority.
1553 * @irq:irq bound to an event channel.
1554 * @priority: priority between XEN_IRQ_PRIORITY_MAX and XEN_IRQ_PRIORITY_MIN.
1556 int xen_set_irq_priority(unsigned irq
, unsigned priority
)
1558 struct evtchn_set_priority set_priority
;
1560 set_priority
.port
= evtchn_from_irq(irq
);
1561 set_priority
.priority
= priority
;
1563 return HYPERVISOR_event_channel_op(EVTCHNOP_set_priority
,
1566 EXPORT_SYMBOL_GPL(xen_set_irq_priority
);
1568 int evtchn_make_refcounted(evtchn_port_t evtchn
, bool is_static
)
1570 int irq
= get_evtchn_to_irq(evtchn
);
1571 struct irq_info
*info
;
1576 info
= info_for_irq(irq
);
1581 WARN_ON(info
->refcnt
!= -1);
1584 info
->is_static
= is_static
;
1588 EXPORT_SYMBOL_GPL(evtchn_make_refcounted
);
1590 int evtchn_get(evtchn_port_t evtchn
)
1593 struct irq_info
*info
;
1596 if (evtchn
>= xen_evtchn_max_channels())
1599 mutex_lock(&irq_mapping_update_lock
);
1601 irq
= get_evtchn_to_irq(evtchn
);
1605 info
= info_for_irq(irq
);
1611 if (info
->refcnt
<= 0 || info
->refcnt
== SHRT_MAX
)
1617 mutex_unlock(&irq_mapping_update_lock
);
1621 EXPORT_SYMBOL_GPL(evtchn_get
);
1623 void evtchn_put(evtchn_port_t evtchn
)
1625 int irq
= get_evtchn_to_irq(evtchn
);
1626 if (WARN_ON(irq
== -1))
1628 unbind_from_irq(irq
);
1630 EXPORT_SYMBOL_GPL(evtchn_put
);
1632 void xen_send_IPI_one(unsigned int cpu
, enum ipi_vector vector
)
1637 if (unlikely(vector
== XEN_NMI_VECTOR
)) {
1638 int rc
= HYPERVISOR_vcpu_op(VCPUOP_send_nmi
, xen_vcpu_nr(cpu
),
1641 printk(KERN_WARNING
"Sending nmi to CPU%d failed (rc:%d)\n", cpu
, rc
);
1645 irq
= per_cpu(ipi_to_irq
, cpu
)[vector
];
1647 notify_remote_via_irq(irq
);
1650 struct evtchn_loop_ctrl
{
1656 void handle_irq_for_port(evtchn_port_t port
, struct evtchn_loop_ctrl
*ctrl
)
1659 struct irq_info
*info
;
1660 struct xenbus_device
*dev
;
1662 irq
= get_evtchn_to_irq(port
);
1667 * Check for timeout every 256 events.
1668 * We are setting the timeout value only after the first 256
1669 * events in order to not hurt the common case of few loop
1670 * iterations. The 256 is basically an arbitrary value.
1672 * In case we are hitting the timeout we need to defer all further
1673 * EOIs in order to ensure to leave the event handling loop rather
1674 * sooner than later.
1676 if (!ctrl
->defer_eoi
&& !(++ctrl
->count
& 0xff)) {
1677 ktime_t kt
= ktime_get();
1679 if (!ctrl
->timeout
) {
1680 kt
= ktime_add_ms(kt
,
1681 jiffies_to_msecs(event_loop_timeout
));
1683 } else if (kt
> ctrl
->timeout
) {
1684 ctrl
->defer_eoi
= true;
1688 info
= info_for_irq(irq
);
1689 if (xchg_acquire(&info
->is_active
, 1))
1692 dev
= (info
->type
== IRQT_EVTCHN
) ? info
->u
.interdomain
: NULL
;
1694 atomic_inc(&dev
->events
);
1696 if (ctrl
->defer_eoi
) {
1697 info
->eoi_cpu
= smp_processor_id();
1698 info
->irq_epoch
= __this_cpu_read(irq_epoch
);
1699 info
->eoi_time
= get_jiffies_64() + event_eoi_delay
;
1702 generic_handle_irq(irq
);
1705 int xen_evtchn_do_upcall(void)
1707 struct vcpu_info
*vcpu_info
= __this_cpu_read(xen_vcpu
);
1708 int ret
= vcpu_info
->evtchn_upcall_pending
? IRQ_HANDLED
: IRQ_NONE
;
1709 int cpu
= smp_processor_id();
1710 struct evtchn_loop_ctrl ctrl
= { 0 };
1713 * When closing an event channel the associated IRQ must not be freed
1714 * until all cpus have left the event handling loop. This is ensured
1715 * by taking the rcu_read_lock() while handling events, as freeing of
1716 * the IRQ is handled via queue_rcu_work() _after_ closing the event
1722 vcpu_info
->evtchn_upcall_pending
= 0;
1724 xen_evtchn_handle_events(cpu
, &ctrl
);
1726 BUG_ON(!irqs_disabled());
1728 virt_rmb(); /* Hypervisor can set upcall pending. */
1730 } while (vcpu_info
->evtchn_upcall_pending
);
1735 * Increment irq_epoch only now to defer EOIs only for
1736 * xen_irq_lateeoi() invocations occurring from inside the loop
1739 __this_cpu_inc(irq_epoch
);
1743 EXPORT_SYMBOL_GPL(xen_evtchn_do_upcall
);
1745 /* Rebind a new event channel to an existing irq. */
1746 void rebind_evtchn_irq(evtchn_port_t evtchn
, int irq
)
1748 struct irq_info
*info
= info_for_irq(irq
);
1753 /* Make sure the irq is masked, since the new event channel
1754 will also be masked. */
1757 mutex_lock(&irq_mapping_update_lock
);
1759 /* After resume the irq<->evtchn mappings are all cleared out */
1760 BUG_ON(get_evtchn_to_irq(evtchn
) != -1);
1761 /* Expect irq to have been bound before,
1762 so there should be a proper type */
1763 BUG_ON(info
->type
== IRQT_UNBOUND
);
1765 (void)xen_irq_info_evtchn_setup(irq
, evtchn
, NULL
);
1767 mutex_unlock(&irq_mapping_update_lock
);
1769 bind_evtchn_to_cpu(evtchn
, info
->cpu
, false);
1771 /* Unmask the event channel. */
1775 /* Rebind an evtchn so that it gets delivered to a specific cpu */
1776 static int xen_rebind_evtchn_to_cpu(struct irq_info
*info
, unsigned int tcpu
)
1778 struct evtchn_bind_vcpu bind_vcpu
;
1779 evtchn_port_t evtchn
= info
? info
->evtchn
: 0;
1781 if (!VALID_EVTCHN(evtchn
))
1784 if (!xen_support_evtchn_rebind())
1787 /* Send future instances of this interrupt to other vcpu. */
1788 bind_vcpu
.port
= evtchn
;
1789 bind_vcpu
.vcpu
= xen_vcpu_nr(tcpu
);
1792 * Mask the event while changing the VCPU binding to prevent
1793 * it being delivered on an unexpected VCPU.
1795 do_mask(info
, EVT_MASK_REASON_TEMPORARY
);
1798 * If this fails, it usually just indicates that we're dealing with a
1799 * virq or IPI channel, which don't actually need to be rebound. Ignore
1800 * it, but don't do the xenlinux-level rebind in that case.
1802 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu
, &bind_vcpu
) >= 0)
1803 bind_evtchn_to_cpu(evtchn
, tcpu
, false);
1805 do_unmask(info
, EVT_MASK_REASON_TEMPORARY
);
1811 * Find the CPU within @dest mask which has the least number of channels
1812 * assigned. This is not precise as the per cpu counts can be modified
1815 static unsigned int select_target_cpu(const struct cpumask
*dest
)
1817 unsigned int cpu
, best_cpu
= UINT_MAX
, minch
= UINT_MAX
;
1819 for_each_cpu_and(cpu
, dest
, cpu_online_mask
) {
1820 unsigned int curch
= atomic_read(&channels_on_cpu
[cpu
]);
1822 if (curch
< minch
) {
1829 * Catch the unlikely case that dest contains no online CPUs. Can't
1832 if (best_cpu
== UINT_MAX
)
1833 return select_target_cpu(cpu_online_mask
);
1838 static int set_affinity_irq(struct irq_data
*data
, const struct cpumask
*dest
,
1841 unsigned int tcpu
= select_target_cpu(dest
);
1844 ret
= xen_rebind_evtchn_to_cpu(info_for_irq(data
->irq
), tcpu
);
1846 irq_data_update_effective_affinity(data
, cpumask_of(tcpu
));
1851 static void enable_dynirq(struct irq_data
*data
)
1853 struct irq_info
*info
= info_for_irq(data
->irq
);
1854 evtchn_port_t evtchn
= info
? info
->evtchn
: 0;
1856 if (VALID_EVTCHN(evtchn
))
1857 do_unmask(info
, EVT_MASK_REASON_EXPLICIT
);
1860 static void disable_dynirq(struct irq_data
*data
)
1862 struct irq_info
*info
= info_for_irq(data
->irq
);
1863 evtchn_port_t evtchn
= info
? info
->evtchn
: 0;
1865 if (VALID_EVTCHN(evtchn
))
1866 do_mask(info
, EVT_MASK_REASON_EXPLICIT
);
1869 static void ack_dynirq(struct irq_data
*data
)
1871 struct irq_info
*info
= info_for_irq(data
->irq
);
1872 evtchn_port_t evtchn
= info
? info
->evtchn
: 0;
1874 if (VALID_EVTCHN(evtchn
))
1875 event_handler_exit(info
);
1878 static void mask_ack_dynirq(struct irq_data
*data
)
1880 disable_dynirq(data
);
1884 static void lateeoi_ack_dynirq(struct irq_data
*data
)
1886 struct irq_info
*info
= info_for_irq(data
->irq
);
1887 evtchn_port_t evtchn
= info
? info
->evtchn
: 0;
1889 if (VALID_EVTCHN(evtchn
)) {
1890 do_mask(info
, EVT_MASK_REASON_EOI_PENDING
);
1892 * Don't call event_handler_exit().
1893 * Need to keep is_active non-zero in order to ignore re-raised
1894 * events after cpu affinity changes while a lateeoi is pending.
1896 clear_evtchn(evtchn
);
1900 static void lateeoi_mask_ack_dynirq(struct irq_data
*data
)
1902 struct irq_info
*info
= info_for_irq(data
->irq
);
1903 evtchn_port_t evtchn
= info
? info
->evtchn
: 0;
1905 if (VALID_EVTCHN(evtchn
)) {
1906 do_mask(info
, EVT_MASK_REASON_EXPLICIT
);
1907 event_handler_exit(info
);
1911 static int retrigger_dynirq(struct irq_data
*data
)
1913 struct irq_info
*info
= info_for_irq(data
->irq
);
1914 evtchn_port_t evtchn
= info
? info
->evtchn
: 0;
1916 if (!VALID_EVTCHN(evtchn
))
1919 do_mask(info
, EVT_MASK_REASON_TEMPORARY
);
1921 do_unmask(info
, EVT_MASK_REASON_TEMPORARY
);
1926 static void restore_pirqs(void)
1928 int pirq
, rc
, irq
, gsi
;
1929 struct physdev_map_pirq map_irq
;
1930 struct irq_info
*info
;
1932 list_for_each_entry(info
, &xen_irq_list_head
, list
) {
1933 if (info
->type
!= IRQT_PIRQ
)
1936 pirq
= info
->u
.pirq
.pirq
;
1937 gsi
= info
->u
.pirq
.gsi
;
1940 /* save/restore of PT devices doesn't work, so at this point the
1941 * only devices present are GSI based emulated devices */
1945 map_irq
.domid
= DOMID_SELF
;
1946 map_irq
.type
= MAP_PIRQ_TYPE_GSI
;
1947 map_irq
.index
= gsi
;
1948 map_irq
.pirq
= pirq
;
1950 rc
= HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq
, &map_irq
);
1952 pr_warn("xen map irq failed gsi=%d irq=%d pirq=%d rc=%d\n",
1953 gsi
, irq
, pirq
, rc
);
1958 printk(KERN_DEBUG
"xen: --> irq=%d, pirq=%d\n", irq
, map_irq
.pirq
);
1960 __startup_pirq(irq
);
1964 static void restore_cpu_virqs(unsigned int cpu
)
1966 struct evtchn_bind_virq bind_virq
;
1967 evtchn_port_t evtchn
;
1970 for (virq
= 0; virq
< NR_VIRQS
; virq
++) {
1971 if ((irq
= per_cpu(virq_to_irq
, cpu
)[virq
]) == -1)
1974 BUG_ON(virq_from_irq(irq
) != virq
);
1976 /* Get a new binding from Xen. */
1977 bind_virq
.virq
= virq
;
1978 bind_virq
.vcpu
= xen_vcpu_nr(cpu
);
1979 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq
,
1982 evtchn
= bind_virq
.port
;
1984 /* Record the new mapping. */
1985 (void)xen_irq_info_virq_setup(cpu
, irq
, evtchn
, virq
);
1986 /* The affinity mask is still valid */
1987 bind_evtchn_to_cpu(evtchn
, cpu
, false);
1991 static void restore_cpu_ipis(unsigned int cpu
)
1993 struct evtchn_bind_ipi bind_ipi
;
1994 evtchn_port_t evtchn
;
1997 for (ipi
= 0; ipi
< XEN_NR_IPIS
; ipi
++) {
1998 if ((irq
= per_cpu(ipi_to_irq
, cpu
)[ipi
]) == -1)
2001 BUG_ON(ipi_from_irq(irq
) != ipi
);
2003 /* Get a new binding from Xen. */
2004 bind_ipi
.vcpu
= xen_vcpu_nr(cpu
);
2005 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi
,
2008 evtchn
= bind_ipi
.port
;
2010 /* Record the new mapping. */
2011 (void)xen_irq_info_ipi_setup(cpu
, irq
, evtchn
, ipi
);
2012 /* The affinity mask is still valid */
2013 bind_evtchn_to_cpu(evtchn
, cpu
, false);
2017 /* Clear an irq's pending state, in preparation for polling on it */
2018 void xen_clear_irq_pending(int irq
)
2020 struct irq_info
*info
= info_for_irq(irq
);
2021 evtchn_port_t evtchn
= info
? info
->evtchn
: 0;
2023 if (VALID_EVTCHN(evtchn
))
2024 event_handler_exit(info
);
2026 EXPORT_SYMBOL(xen_clear_irq_pending
);
2027 void xen_set_irq_pending(int irq
)
2029 evtchn_port_t evtchn
= evtchn_from_irq(irq
);
2031 if (VALID_EVTCHN(evtchn
))
2035 bool xen_test_irq_pending(int irq
)
2037 evtchn_port_t evtchn
= evtchn_from_irq(irq
);
2040 if (VALID_EVTCHN(evtchn
))
2041 ret
= test_evtchn(evtchn
);
2046 /* Poll waiting for an irq to become pending with timeout. In the usual case,
2047 * the irq will be disabled so it won't deliver an interrupt. */
2048 void xen_poll_irq_timeout(int irq
, u64 timeout
)
2050 evtchn_port_t evtchn
= evtchn_from_irq(irq
);
2052 if (VALID_EVTCHN(evtchn
)) {
2053 struct sched_poll poll
;
2056 poll
.timeout
= timeout
;
2057 set_xen_guest_handle(poll
.ports
, &evtchn
);
2059 if (HYPERVISOR_sched_op(SCHEDOP_poll
, &poll
) != 0)
2063 EXPORT_SYMBOL(xen_poll_irq_timeout
);
2064 /* Poll waiting for an irq to become pending. In the usual case, the
2065 * irq will be disabled so it won't deliver an interrupt. */
2066 void xen_poll_irq(int irq
)
2068 xen_poll_irq_timeout(irq
, 0 /* no timeout */);
2071 /* Check whether the IRQ line is shared with other guests. */
2072 int xen_test_irq_shared(int irq
)
2074 struct irq_info
*info
= info_for_irq(irq
);
2075 struct physdev_irq_status_query irq_status
;
2080 irq_status
.irq
= info
->u
.pirq
.pirq
;
2082 if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query
, &irq_status
))
2084 return !(irq_status
.flags
& XENIRQSTAT_shared
);
2086 EXPORT_SYMBOL_GPL(xen_test_irq_shared
);
2088 void xen_irq_resume(void)
2091 struct irq_info
*info
;
2093 /* New event-channel space is not 'live' yet. */
2094 xen_evtchn_resume();
2096 /* No IRQ <-> event-channel mappings. */
2097 list_for_each_entry(info
, &xen_irq_list_head
, list
) {
2098 /* Zap event-channel binding */
2100 /* Adjust accounting */
2101 channels_on_cpu_dec(info
);
2104 clear_evtchn_to_irq_all();
2106 for_each_possible_cpu(cpu
) {
2107 restore_cpu_virqs(cpu
);
2108 restore_cpu_ipis(cpu
);
2114 static struct irq_chip xen_dynamic_chip __read_mostly
= {
2117 .irq_disable
= disable_dynirq
,
2118 .irq_mask
= disable_dynirq
,
2119 .irq_unmask
= enable_dynirq
,
2121 .irq_ack
= ack_dynirq
,
2122 .irq_mask_ack
= mask_ack_dynirq
,
2124 .irq_set_affinity
= set_affinity_irq
,
2125 .irq_retrigger
= retrigger_dynirq
,
2128 static struct irq_chip xen_lateeoi_chip __read_mostly
= {
2129 /* The chip name needs to contain "xen-dyn" for irqbalance to work. */
2130 .name
= "xen-dyn-lateeoi",
2132 .irq_disable
= disable_dynirq
,
2133 .irq_mask
= disable_dynirq
,
2134 .irq_unmask
= enable_dynirq
,
2136 .irq_ack
= lateeoi_ack_dynirq
,
2137 .irq_mask_ack
= lateeoi_mask_ack_dynirq
,
2139 .irq_set_affinity
= set_affinity_irq
,
2140 .irq_retrigger
= retrigger_dynirq
,
2143 static struct irq_chip xen_pirq_chip __read_mostly
= {
2146 .irq_startup
= startup_pirq
,
2147 .irq_shutdown
= shutdown_pirq
,
2148 .irq_enable
= enable_pirq
,
2149 .irq_disable
= disable_pirq
,
2151 .irq_mask
= disable_dynirq
,
2152 .irq_unmask
= enable_dynirq
,
2154 .irq_ack
= eoi_pirq
,
2155 .irq_eoi
= eoi_pirq
,
2156 .irq_mask_ack
= mask_ack_pirq
,
2158 .irq_set_affinity
= set_affinity_irq
,
2160 .irq_retrigger
= retrigger_dynirq
,
2163 static struct irq_chip xen_percpu_chip __read_mostly
= {
2164 .name
= "xen-percpu",
2166 .irq_disable
= disable_dynirq
,
2167 .irq_mask
= disable_dynirq
,
2168 .irq_unmask
= enable_dynirq
,
2170 .irq_ack
= ack_dynirq
,
2174 #ifdef CONFIG_XEN_PVHVM
2175 /* Vector callbacks are better than PCI interrupts to receive event
2176 * channel notifications because we can receive vector callbacks on any
2177 * vcpu and we don't need PCI support or APIC interactions. */
2178 void xen_setup_callback_vector(void)
2180 uint64_t callback_via
;
2182 if (xen_have_vector_callback
) {
2183 callback_via
= HVM_CALLBACK_VECTOR(HYPERVISOR_CALLBACK_VECTOR
);
2184 if (xen_set_callback_via(callback_via
)) {
2185 pr_err("Request for Xen HVM callback vector failed\n");
2186 xen_have_vector_callback
= false;
2192 * Setup per-vCPU vector-type callbacks. If this setup is unavailable,
2193 * fallback to the global vector-type callback.
2195 static __init
void xen_init_setup_upcall_vector(void)
2197 if (!xen_have_vector_callback
)
2200 if ((cpuid_eax(xen_cpuid_base() + 4) & XEN_HVM_CPUID_UPCALL_VECTOR
) &&
2201 !xen_set_upcall_vector(0))
2202 xen_percpu_upcall
= true;
2203 else if (xen_feature(XENFEAT_hvm_callback_vector
))
2204 xen_setup_callback_vector();
2206 xen_have_vector_callback
= false;
2209 int xen_set_upcall_vector(unsigned int cpu
)
2212 xen_hvm_evtchn_upcall_vector_t op
= {
2213 .vector
= HYPERVISOR_CALLBACK_VECTOR
,
2214 .vcpu
= per_cpu(xen_vcpu_id
, cpu
),
2217 rc
= HYPERVISOR_hvm_op(HVMOP_set_evtchn_upcall_vector
, &op
);
2221 /* Trick toolstack to think we are enlightened. */
2223 rc
= xen_set_callback_via(1);
2228 static __init
void xen_alloc_callback_vector(void)
2230 if (!xen_have_vector_callback
)
2233 pr_info("Xen HVM callback vector for event delivery is enabled\n");
2234 alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR
, asm_sysvec_xen_hvm_callback
);
2237 void xen_setup_callback_vector(void) {}
2238 static inline void xen_init_setup_upcall_vector(void) {}
2239 int xen_set_upcall_vector(unsigned int cpu
) {}
2240 static inline void xen_alloc_callback_vector(void) {}
2241 #endif /* CONFIG_XEN_PVHVM */
2242 #endif /* CONFIG_X86 */
2244 bool xen_fifo_events
= true;
2245 module_param_named(fifo_events
, xen_fifo_events
, bool, 0);
2247 static int xen_evtchn_cpu_prepare(unsigned int cpu
)
2251 xen_cpu_init_eoi(cpu
);
2253 if (evtchn_ops
->percpu_init
)
2254 ret
= evtchn_ops
->percpu_init(cpu
);
2259 static int xen_evtchn_cpu_dead(unsigned int cpu
)
2263 if (evtchn_ops
->percpu_deinit
)
2264 ret
= evtchn_ops
->percpu_deinit(cpu
);
2269 void __init
xen_init_IRQ(void)
2272 evtchn_port_t evtchn
;
2274 if (xen_fifo_events
)
2275 ret
= xen_evtchn_fifo_init();
2277 xen_evtchn_2l_init();
2278 xen_fifo_events
= false;
2281 xen_cpu_init_eoi(smp_processor_id());
2283 cpuhp_setup_state_nocalls(CPUHP_XEN_EVTCHN_PREPARE
,
2284 "xen/evtchn:prepare",
2285 xen_evtchn_cpu_prepare
, xen_evtchn_cpu_dead
);
2287 evtchn_to_irq
= kcalloc(EVTCHN_ROW(xen_evtchn_max_channels()),
2288 sizeof(*evtchn_to_irq
), GFP_KERNEL
);
2289 BUG_ON(!evtchn_to_irq
);
2291 /* No event channels are 'live' right now. */
2292 for (evtchn
= 0; evtchn
< xen_evtchn_nr_channels(); evtchn
++)
2293 mask_evtchn(evtchn
);
2295 pirq_needs_eoi
= pirq_needs_eoi_flag
;
2298 if (xen_pv_domain()) {
2299 if (xen_initial_domain())
2300 pci_xen_initial_domain();
2302 xen_init_setup_upcall_vector();
2303 xen_alloc_callback_vector();
2306 if (xen_hvm_domain()) {
2308 /* pci_xen_hvm_init must be called after native_init_IRQ so that
2309 * __acpi_register_gsi can point at the right function */
2313 struct physdev_pirq_eoi_gmfn eoi_gmfn
;
2315 pirq_eoi_map
= (void *)__get_free_page(GFP_KERNEL
|__GFP_ZERO
);
2316 eoi_gmfn
.gmfn
= virt_to_gfn(pirq_eoi_map
);
2317 rc
= HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn_v2
, &eoi_gmfn
);
2319 free_page((unsigned long) pirq_eoi_map
);
2320 pirq_eoi_map
= NULL
;
2322 pirq_needs_eoi
= pirq_check_eoi_map
;