--- /dev/null
+From: Mike Travis <travis@sgi.com>
+Subject: x86 cpumask: Updates to support NR_CPUS=4096
+References: bnc#425240 FATE304266
+Patch-mainline: 2.6.28
+
+Signed-off-by: Thomas Renninger <trenn@suse.de>
+
+ * Add for_each_cpu_mask_and() function to eliminate need for a common use
+ of a temporary cpumask_t variable.
+
+ * Change genapic interfaces to accept cpumask_t pointers where possible.
+ Modify external callers to use cpumask_t pointers in function calls.
+
+ * Create new send_IPI_mask_allbutself which is the same as the
+ send_IPI_mask functions but removes smp_processor_id() from list.
+ This removes another common need for a temporary cpumask_t variable.
+
+ * Use node_to_cpumask_ptr in place of node_to_cpumask to reduce stack
+ requirements in sched.c.
+
+ * Modify arch/x86/Kconfig to enable MAXSMP and 4096 cpus.
+
+Signed-off-by: Mike Travis <travis@sgi.com>
+Acked-by: Rusty Russell <rusty@rustcorp.com.au>
+Signed-off-by: Jiri Slaby <jslaby@suse.de> [bigsmp cpu_mask_to_apicid fix]
+---
+ arch/x86/Kconfig | 11 +--
+ arch/x86/kernel/apic_32.c | 2
+ arch/x86/kernel/apic_64.c | 2
+ arch/x86/kernel/crash.c | 5 -
+ arch/x86/kernel/genapic_flat_64.c | 76 ++++++++++++++++--------
+ arch/x86/kernel/genx2apic_cluster.c | 60 +++++++++++++------
+ arch/x86/kernel/genx2apic_phys.c | 55 ++++++++++++-----
+ arch/x86/kernel/genx2apic_uv_x.c | 43 ++++++++------
+ arch/x86/kernel/io_apic_32.c | 16 ++---
+ arch/x86/kernel/io_apic_64.c | 95 +++++++++++++++----------------
+ arch/x86/kernel/ipi.c | 26 ++++++--
+ arch/x86/kernel/smp.c | 15 ----
+ arch/x86/kernel/tlb_32.c | 2
+ arch/x86/kernel/tlb_64.c | 2
+ arch/x86/xen/smp.c | 15 ++--
+ include/asm-x86/genapic_32.h | 8 +-
+ include/asm-x86/genapic_64.h | 11 ++-
+ include/asm-x86/ipi.h | 22 ++++++-
+ include/asm-x86/mach-bigsmp/mach_apic.h | 8 +-
+ include/asm-x86/mach-bigsmp/mach_ipi.h | 21 ++++--
+ include/asm-x86/mach-default/mach_apic.h | 12 +--
+ include/asm-x86/mach-default/mach_ipi.h | 18 ++---
+ include/asm-x86/mach-es7000/mach_apic.h | 8 +-
+ include/asm-x86/mach-es7000/mach_ipi.h | 20 ++++--
+ include/asm-x86/mach-generic/mach_ipi.h | 1
+ include/asm-x86/mach-numaq/mach_apic.h | 6 -
+ include/asm-x86/mach-numaq/mach_ipi.h | 22 ++++---
+ include/asm-x86/mach-summit/mach_apic.h | 6 -
+ include/asm-x86/mach-summit/mach_ipi.h | 22 ++++---
+ 29 files changed, 363 insertions(+), 247 deletions(-)
+
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -584,15 +584,15 @@ config IOMMU_HELPER
+
+ config MAXSMP
+ bool "Configure Maximum number of SMP Processors and NUMA Nodes"
+- depends on X86_64 && SMP && BROKEN
++ depends on X86_64 && SMP
+ default n
+ help
+ Configure maximum number of CPUS and NUMA Nodes for this architecture.
+ If unsure, say N.
+
+ config NR_CPUS
+- int "Maximum number of CPUs (2-512)" if !MAXSMP
+- range 2 512
++ int "Maximum number of CPUs (2-4096)"
++ range 2 4096
+ depends on SMP
+ default "4096" if MAXSMP
+ default "32" if X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000
+@@ -603,7 +603,7 @@ config NR_CPUS
+ minimum value which makes sense is 2.
+
+ This is purely to save memory - each supported CPU adds
+- approximately eight kilobytes to the kernel image.
++ approximately one kilobyte to the kernel image.
+
+ config SCHED_SMT
+ bool "SMT (Hyperthreading) scheduler support"
+@@ -1019,7 +1019,8 @@ config NUMA_EMU
+ number of nodes. This is only useful for debugging.
+
+ config NODES_SHIFT
+- int "Maximum NUMA Nodes (as a power of 2)" if !MAXSMP
++ int "Maximum NUMA Nodes (as a power of 2)"
++ range 9 9 if MAXSMP
+ range 1 9 if X86_64
+ default "9" if MAXSMP
+ default "6" if X86_64
+--- a/arch/x86/kernel/apic_32.c
++++ b/arch/x86/kernel/apic_32.c
+@@ -319,7 +319,7 @@ static void lapic_timer_setup(enum clock
+ static void lapic_timer_broadcast(cpumask_t mask)
+ {
+ #ifdef CONFIG_SMP
+- send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
++ send_IPI_mask(&mask, LOCAL_TIMER_VECTOR);
+ #endif
+ }
+
+--- a/arch/x86/kernel/apic_64.c
++++ b/arch/x86/kernel/apic_64.c
+@@ -351,7 +351,7 @@ static void lapic_timer_setup(enum clock
+ static void lapic_timer_broadcast(cpumask_t mask)
+ {
+ #ifdef CONFIG_SMP
+- send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
++ send_IPI_mask(&mask, LOCAL_TIMER_VECTOR);
+ #endif
+ }
+
+--- a/arch/x86/kernel/crash.c
++++ b/arch/x86/kernel/crash.c
+@@ -77,10 +77,7 @@ static int crash_nmi_callback(struct not
+
+ static void smp_send_nmi_allbutself(void)
+ {
+- cpumask_t mask = cpu_online_map;
+- cpu_clear(safe_smp_processor_id(), mask);
+- if (!cpus_empty(mask))
+- send_IPI_mask(mask, NMI_VECTOR);
++ send_IPI_allbutself(NMI_VECTOR);
+ }
+
+ static struct notifier_block crash_nmi_nb = {
+--- a/arch/x86/kernel/genapic_flat_64.c
++++ b/arch/x86/kernel/genapic_flat_64.c
+@@ -30,12 +30,12 @@ static int flat_acpi_madt_oem_check(char
+ return 1;
+ }
+
+-static cpumask_t flat_target_cpus(void)
++static const cpumask_t *flat_target_cpus(void)
+ {
+- return cpu_online_map;
++ return &cpu_online_map;
+ }
+
+-static cpumask_t flat_vector_allocation_domain(int cpu)
++static void flat_vector_allocation_domain(int cpu, cpumask_t *retmask)
+ {
+ /* Careful. Some cpus do not strictly honor the set of cpus
+ * specified in the interrupt destination when using lowest
+@@ -45,8 +45,7 @@ static cpumask_t flat_vector_allocation_
+ * deliver interrupts to the wrong hyperthread when only one
+ * hyperthread was specified in the interrupt desitination.
+ */
+- cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
+- return domain;
++ *retmask = (cpumask_t) { {[0] = APIC_ALL_CPUS, } };
+ }
+
+ /*
+@@ -69,9 +68,8 @@ static void flat_init_apic_ldr(void)
+ apic_write(APIC_LDR, val);
+ }
+
+-static void flat_send_IPI_mask(cpumask_t cpumask, int vector)
++static inline void _flat_send_IPI_mask(unsigned long mask, int vector)
+ {
+- unsigned long mask = cpus_addr(cpumask)[0];
+ unsigned long flags;
+
+ local_irq_save(flags);
+@@ -79,20 +77,40 @@ static void flat_send_IPI_mask(cpumask_t
+ local_irq_restore(flags);
+ }
+
++static void flat_send_IPI_mask(const cpumask_t *cpumask, int vector)
++{
++ unsigned long mask = cpus_addr(*cpumask)[0];
++
++ _flat_send_IPI_mask(mask, vector);
++}
++
++static void flat_send_IPI_mask_allbutself(const cpumask_t *cpumask, int vector)
++{
++ unsigned long mask = cpus_addr(*cpumask)[0];
++ int cpu = smp_processor_id();
++
++ if (cpu < BITS_PER_LONG)
++ clear_bit(cpu, &mask);
++ _flat_send_IPI_mask(mask, vector);
++}
++
+ static void flat_send_IPI_allbutself(int vector)
+ {
++ int cpu = smp_processor_id();
+ #ifdef CONFIG_HOTPLUG_CPU
+ int hotplug = 1;
+ #else
+ int hotplug = 0;
+ #endif
+ if (hotplug || vector == NMI_VECTOR) {
+- cpumask_t allbutme = cpu_online_map;
++ if (!cpus_equal(cpu_online_map, cpumask_of_cpu(cpu))) {
++ unsigned long mask = cpus_addr(cpu_online_map)[0];
+
+- cpu_clear(smp_processor_id(), allbutme);
++ if (cpu < BITS_PER_LONG)
++ clear_bit(cpu, &mask);
+
+- if (!cpus_empty(allbutme))
+- flat_send_IPI_mask(allbutme, vector);
++ _flat_send_IPI_mask(mask, vector);
++ }
+ } else if (num_online_cpus() > 1) {
+ __send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL);
+ }
+@@ -101,7 +119,7 @@ static void flat_send_IPI_allbutself(int
+ static void flat_send_IPI_all(int vector)
+ {
+ if (vector == NMI_VECTOR)
+- flat_send_IPI_mask(cpu_online_map, vector);
++ flat_send_IPI_mask(&cpu_online_map, vector);
+ else
+ __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
+ }
+@@ -135,9 +153,9 @@ static int flat_apic_id_registered(void)
+ return physid_isset(read_xapic_id(), phys_cpu_present_map);
+ }
+
+-static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask)
++static unsigned int flat_cpu_mask_to_apicid(const cpumask_t *cpumask)
+ {
+- return cpus_addr(cpumask)[0] & APIC_ALL_CPUS;
++ return cpus_addr(*cpumask)[0] & APIC_ALL_CPUS;
+ }
+
+ static unsigned int phys_pkg_id(int index_msb)
+@@ -157,6 +175,7 @@ struct genapic apic_flat = {
+ .send_IPI_all = flat_send_IPI_all,
+ .send_IPI_allbutself = flat_send_IPI_allbutself,
+ .send_IPI_mask = flat_send_IPI_mask,
++ .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself,
+ .send_IPI_self = apic_send_IPI_self,
+ .cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
+ .phys_pkg_id = phys_pkg_id,
+@@ -186,35 +205,39 @@ static int physflat_acpi_madt_oem_check(
+ return 0;
+ }
+
+-static cpumask_t physflat_target_cpus(void)
++static const cpumask_t *physflat_target_cpus(void)
+ {
+- return cpu_online_map;
++ return &cpu_online_map;
+ }
+
+-static cpumask_t physflat_vector_allocation_domain(int cpu)
++static void physflat_vector_allocation_domain(int cpu, cpumask_t *retmask)
+ {
+- return cpumask_of_cpu(cpu);
++ cpus_clear(*retmask);
++ cpu_set(cpu, *retmask);
+ }
+
+-static void physflat_send_IPI_mask(cpumask_t cpumask, int vector)
++static void physflat_send_IPI_mask(const cpumask_t *cpumask, int vector)
+ {
+ send_IPI_mask_sequence(cpumask, vector);
+ }
+
+-static void physflat_send_IPI_allbutself(int vector)
++static void physflat_send_IPI_mask_allbutself(const cpumask_t *cpumask,
++ int vector)
+ {
+- cpumask_t allbutme = cpu_online_map;
++ send_IPI_mask_allbutself(cpumask, vector);
++}
+
+- cpu_clear(smp_processor_id(), allbutme);
+- physflat_send_IPI_mask(allbutme, vector);
++static void physflat_send_IPI_allbutself(int vector)
++{
++ send_IPI_mask_allbutself(&cpu_online_map, vector);
+ }
+
+ static void physflat_send_IPI_all(int vector)
+ {
+- physflat_send_IPI_mask(cpu_online_map, vector);
++ physflat_send_IPI_mask(&cpu_online_map, vector);
+ }
+
+-static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask)
++static unsigned int physflat_cpu_mask_to_apicid(const cpumask_t *cpumask)
+ {
+ int cpu;
+
+@@ -222,7 +245,7 @@ static unsigned int physflat_cpu_mask_to
+ * We're using fixed IRQ delivery, can only return one phys APIC ID.
+ * May as well be the first.
+ */
+- cpu = first_cpu(cpumask);
++ cpu = first_cpu(*cpumask);
+ if ((unsigned)cpu < nr_cpu_ids)
+ return per_cpu(x86_cpu_to_apicid, cpu);
+ else
+@@ -241,6 +264,7 @@ struct genapic apic_physflat = {
+ .send_IPI_all = physflat_send_IPI_all,
+ .send_IPI_allbutself = physflat_send_IPI_allbutself,
+ .send_IPI_mask = physflat_send_IPI_mask,
++ .send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself,
+ .send_IPI_self = apic_send_IPI_self,
+ .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid,
+ .phys_pkg_id = phys_pkg_id,
+--- a/arch/x86/kernel/genx2apic_cluster.c
++++ b/arch/x86/kernel/genx2apic_cluster.c
+@@ -19,19 +19,18 @@ static int x2apic_acpi_madt_oem_check(ch
+
+ /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
+
+-static cpumask_t x2apic_target_cpus(void)
++static const cpumask_t *x2apic_target_cpus(void)
+ {
+- return cpumask_of_cpu(0);
++ return &cpumask_of_cpu(0);
+ }
+
+ /*
+ * for now each logical cpu is in its own vector allocation domain.
+ */
+-static cpumask_t x2apic_vector_allocation_domain(int cpu)
++static void x2apic_vector_allocation_domain(int cpu, cpumask_t *retmask)
+ {
+- cpumask_t domain = CPU_MASK_NONE;
+- cpu_set(cpu, domain);
+- return domain;
++ cpus_clear(*retmask);
++ cpu_set(cpu, *retmask);
+ }
+
+ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
+@@ -53,32 +52,52 @@ static void __x2apic_send_IPI_dest(unsig
+ * at once. We have 16 cpu's in a cluster. This will minimize IPI register
+ * writes.
+ */
+-static void x2apic_send_IPI_mask(cpumask_t mask, int vector)
++static void x2apic_send_IPI_mask(const cpumask_t *mask, int vector)
+ {
+ unsigned long flags;
+ unsigned long query_cpu;
+
+ local_irq_save(flags);
+- for_each_cpu_mask(query_cpu, mask) {
+- __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_logical_apicid, query_cpu),
+- vector, APIC_DEST_LOGICAL);
+- }
++ for_each_cpu_mask_and(query_cpu, *mask, cpu_online_map)
++ __x2apic_send_IPI_dest(
++ per_cpu(x86_cpu_to_logical_apicid, query_cpu),
++ vector, APIC_DEST_LOGICAL);
+ local_irq_restore(flags);
+ }
+
+-static void x2apic_send_IPI_allbutself(int vector)
++static void x2apic_send_IPI_mask_allbutself(const cpumask_t *mask, int vector)
+ {
+- cpumask_t mask = cpu_online_map;
++ unsigned long flags;
++ unsigned long query_cpu;
++ unsigned long this_cpu = smp_processor_id();
+
+- cpu_clear(smp_processor_id(), mask);
++ local_irq_save(flags);
++ for_each_cpu_mask_and(query_cpu, *mask, cpu_online_map)
++ if (query_cpu != this_cpu)
++ __x2apic_send_IPI_dest(
++ per_cpu(x86_cpu_to_logical_apicid, query_cpu),
++ vector, APIC_DEST_LOGICAL);
++ local_irq_restore(flags);
++}
+
+- if (!cpus_empty(mask))
+- x2apic_send_IPI_mask(mask, vector);
++static void x2apic_send_IPI_allbutself(int vector)
++{
++ unsigned long flags;
++ unsigned long query_cpu;
++ unsigned long this_cpu = smp_processor_id();
++
++ local_irq_save(flags);
++ for_each_online_cpu(query_cpu)
++ if (query_cpu != this_cpu)
++ __x2apic_send_IPI_dest(
++ per_cpu(x86_cpu_to_logical_apicid, query_cpu),
++ vector, APIC_DEST_LOGICAL);
++ local_irq_restore(flags);
+ }
+
+ static void x2apic_send_IPI_all(int vector)
+ {
+- x2apic_send_IPI_mask(cpu_online_map, vector);
++ x2apic_send_IPI_mask(&cpu_online_map, vector);
+ }
+
+ static int x2apic_apic_id_registered(void)
+@@ -86,7 +105,7 @@ static int x2apic_apic_id_registered(voi
+ return 1;
+ }
+
+-static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
++static unsigned int x2apic_cpu_mask_to_apicid(const cpumask_t *cpumask)
+ {
+ int cpu;
+
+@@ -94,8 +113,8 @@ static unsigned int x2apic_cpu_mask_to_a
+ * We're using fixed IRQ delivery, can only return one phys APIC ID.
+ * May as well be the first.
+ */
+- cpu = first_cpu(cpumask);
+- if ((unsigned)cpu < NR_CPUS)
++ cpu = first_cpu(*cpumask);
++ if ((unsigned)cpu < nr_cpu_ids)
+ return per_cpu(x86_cpu_to_logical_apicid, cpu);
+ else
+ return BAD_APICID;
+@@ -147,6 +166,7 @@ struct genapic apic_x2apic_cluster = {
+ .send_IPI_all = x2apic_send_IPI_all,
+ .send_IPI_allbutself = x2apic_send_IPI_allbutself,
+ .send_IPI_mask = x2apic_send_IPI_mask,
++ .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
+ .send_IPI_self = x2apic_send_IPI_self,
+ .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
+ .phys_pkg_id = phys_pkg_id,
+--- a/arch/x86/kernel/genx2apic_phys.c
++++ b/arch/x86/kernel/genx2apic_phys.c
+@@ -31,16 +31,15 @@ static int x2apic_acpi_madt_oem_check(ch
+
+ /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
+
+-static cpumask_t x2apic_target_cpus(void)
++static const cpumask_t *x2apic_target_cpus(void)
+ {
+- return cpumask_of_cpu(0);
++ return &cpumask_of_cpu(0);
+ }
+
+-static cpumask_t x2apic_vector_allocation_domain(int cpu)
++static void x2apic_vector_allocation_domain(int cpu, cpumask_t *retmask)
+ {
+- cpumask_t domain = CPU_MASK_NONE;
+- cpu_set(cpu, domain);
+- return domain;
++ cpus_clear(*retmask);
++ cpu_set(cpu, *retmask);
+ }
+
+ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
+@@ -56,32 +55,53 @@ static void __x2apic_send_IPI_dest(unsig
+ x2apic_icr_write(cfg, apicid);
+ }
+
+-static void x2apic_send_IPI_mask(cpumask_t mask, int vector)
++static void x2apic_send_IPI_mask(const cpumask_t *mask, int vector)
+ {
+ unsigned long flags;
+ unsigned long query_cpu;
+
+ local_irq_save(flags);
+- for_each_cpu_mask(query_cpu, mask) {
++ for_each_cpu_mask_and(query_cpu, *mask, cpu_online_map) {
+ __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
+ vector, APIC_DEST_PHYSICAL);
+ }
+ local_irq_restore(flags);
+ }
+
+-static void x2apic_send_IPI_allbutself(int vector)
++static void x2apic_send_IPI_mask_allbutself(const cpumask_t *mask, int vector)
+ {
+- cpumask_t mask = cpu_online_map;
++ unsigned long flags;
++ unsigned long query_cpu;
++ unsigned long this_cpu = smp_processor_id();
++
++ local_irq_save(flags);
++ for_each_cpu_mask_and(query_cpu, *mask, cpu_online_map) {
++ if (query_cpu != this_cpu)
++ __x2apic_send_IPI_dest(
++ per_cpu(x86_cpu_to_apicid, query_cpu),
++ vector, APIC_DEST_PHYSICAL);
++ }
++ local_irq_restore(flags);
++}
+
+- cpu_clear(smp_processor_id(), mask);
++static void x2apic_send_IPI_allbutself(int vector)
++{
++ unsigned long flags;
++ unsigned long query_cpu;
++ unsigned long this_cpu = smp_processor_id();
+
+- if (!cpus_empty(mask))
+- x2apic_send_IPI_mask(mask, vector);
++ local_irq_save(flags);
++ for_each_online_cpu(query_cpu)
++ if (query_cpu != this_cpu)
++ __x2apic_send_IPI_dest(
++ per_cpu(x86_cpu_to_apicid, query_cpu),
++ vector, APIC_DEST_PHYSICAL);
++ local_irq_restore(flags);
+ }
+
+ static void x2apic_send_IPI_all(int vector)
+ {
+- x2apic_send_IPI_mask(cpu_online_map, vector);
++ x2apic_send_IPI_mask(&cpu_online_map, vector);
+ }
+
+ static int x2apic_apic_id_registered(void)
+@@ -89,7 +109,7 @@ static int x2apic_apic_id_registered(voi
+ return 1;
+ }
+
+-static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
++static unsigned int x2apic_cpu_mask_to_apicid(const cpumask_t *cpumask)
+ {
+ int cpu;
+
+@@ -97,8 +117,8 @@ static unsigned int x2apic_cpu_mask_to_a
+ * We're using fixed IRQ delivery, can only return one phys APIC ID.
+ * May as well be the first.
+ */
+- cpu = first_cpu(cpumask);
+- if ((unsigned)cpu < NR_CPUS)
++ cpu = first_cpu(*cpumask);
++ if ((unsigned)cpu < nr_cpu_ids)
+ return per_cpu(x86_cpu_to_apicid, cpu);
+ else
+ return BAD_APICID;
+@@ -147,6 +167,7 @@ struct genapic apic_x2apic_phys = {
+ .send_IPI_all = x2apic_send_IPI_all,
+ .send_IPI_allbutself = x2apic_send_IPI_allbutself,
+ .send_IPI_mask = x2apic_send_IPI_mask,
++ .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
+ .send_IPI_self = x2apic_send_IPI_self,
+ .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
+ .phys_pkg_id = phys_pkg_id,
+--- a/arch/x86/kernel/genx2apic_uv_x.c
++++ b/arch/x86/kernel/genx2apic_uv_x.c
+@@ -75,16 +75,15 @@ EXPORT_SYMBOL(sn_rtc_cycles_per_second);
+
+ /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
+
+-static cpumask_t uv_target_cpus(void)
++static const cpumask_t *uv_target_cpus(void)
+ {
+- return cpumask_of_cpu(0);
++ return &cpumask_of_cpu(0);
+ }
+
+-static cpumask_t uv_vector_allocation_domain(int cpu)
++static void uv_vector_allocation_domain(int cpu, cpumask_t *retmask)
+ {
+- cpumask_t domain = CPU_MASK_NONE;
+- cpu_set(cpu, domain);
+- return domain;
++ cpus_clear(*retmask);
++ cpu_set(cpu, *retmask);
+ }
+
+ int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip)
+@@ -123,28 +122,37 @@ static void uv_send_IPI_one(int cpu, int
+ uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
+ }
+
+-static void uv_send_IPI_mask(cpumask_t mask, int vector)
++static void uv_send_IPI_mask(const cpumask_t *mask, int vector)
+ {
+ unsigned int cpu;
+
+- for_each_possible_cpu(cpu)
+- if (cpu_isset(cpu, mask))
++ for_each_cpu_mask_and(cpu, *mask, cpu_online_map)
++ uv_send_IPI_one(cpu, vector);
++}
++
++static void uv_send_IPI_mask_allbutself(const cpumask_t *mask, int vector)
++{
++ unsigned int cpu;
++ unsigned int this_cpu = smp_processor_id();
++
++ for_each_cpu_mask_and(cpu, *mask, cpu_online_map)
++ if (cpu != this_cpu)
+ uv_send_IPI_one(cpu, vector);
+ }
+
+ static void uv_send_IPI_allbutself(int vector)
+ {
+- cpumask_t mask = cpu_online_map;
+-
+- cpu_clear(smp_processor_id(), mask);
++ unsigned int cpu;
++ unsigned int this_cpu = smp_processor_id();
+
+- if (!cpus_empty(mask))
+- uv_send_IPI_mask(mask, vector);
++ for_each_online_cpu(cpu)
++ if (cpu != this_cpu)
++ uv_send_IPI_one(cpu, vector);
+ }
+
+ static void uv_send_IPI_all(int vector)
+ {
+- uv_send_IPI_mask(cpu_online_map, vector);
++ uv_send_IPI_mask(&cpu_online_map, vector);
+ }
+
+ static int uv_apic_id_registered(void)
+@@ -156,7 +164,7 @@ static void uv_init_apic_ldr(void)
+ {
+ }
+
+-static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
++static unsigned int uv_cpu_mask_to_apicid(const cpumask_t *cpumask)
+ {
+ int cpu;
+
+@@ -164,7 +172,7 @@ static unsigned int uv_cpu_mask_to_apici
+ * We're using fixed IRQ delivery, can only return one phys APIC ID.
+ * May as well be the first.
+ */
+- cpu = first_cpu(cpumask);
++ cpu = first_cpu(*cpumask);
+ if ((unsigned)cpu < nr_cpu_ids)
+ return per_cpu(x86_cpu_to_apicid, cpu);
+ else
+@@ -219,6 +227,7 @@ struct genapic apic_x2apic_uv_x = {
+ .init_apic_ldr = uv_init_apic_ldr,
+ .send_IPI_all = uv_send_IPI_all,
+ .send_IPI_allbutself = uv_send_IPI_allbutself,
++ .send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself,
+ .send_IPI_mask = uv_send_IPI_mask,
+ /* ZZZ.send_IPI_self = uv_send_IPI_self, */
+ .cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
+--- a/arch/x86/kernel/io_apic_32.c
++++ b/arch/x86/kernel/io_apic_32.c
+@@ -344,11 +344,11 @@ static void set_ioapic_affinity_irq(unsi
+
+ cpus_and(tmp, cpumask, cpu_online_map);
+ if (cpus_empty(tmp))
+- tmp = TARGET_CPUS;
++ tmp = *TARGET_CPUS;
+
+ cpus_and(cpumask, tmp, CPU_MASK_ALL);
+
+- apicid_value = cpu_mask_to_apicid(cpumask);
++ apicid_value = cpu_mask_to_apicid(&cpumask);
+ /* Prepare to do the io_apic_write */
+ apicid_value = apicid_value << 24;
+ spin_lock_irqsave(&ioapic_lock, flags);
+@@ -926,7 +926,7 @@ void __init setup_ioapic_dest(void)
+ if (irq_entry == -1)
+ continue;
+ irq = pin_2_irq(irq_entry, ioapic, pin);
+- set_ioapic_affinity_irq(irq, TARGET_CPUS);
++ set_ioapic_affinity_irq(irq, *TARGET_CPUS);
+ }
+
+ }
+@@ -2522,13 +2522,13 @@ static void set_msi_irq_affinity(unsigne
+
+ cpus_and(tmp, mask, cpu_online_map);
+ if (cpus_empty(tmp))
+- tmp = TARGET_CPUS;
++ tmp = *TARGET_CPUS;
+
+ vector = assign_irq_vector(irq);
+ if (vector < 0)
+ return;
+
+- dest = cpu_mask_to_apicid(mask);
++ dest = cpu_mask_to_apicid(&mask);
+
+ read_msi_msg(irq, &msg);
+
+@@ -2615,11 +2615,11 @@ static void set_ht_irq_affinity(unsigned
+
+ cpus_and(tmp, mask, cpu_online_map);
+ if (cpus_empty(tmp))
+- tmp = TARGET_CPUS;
++ tmp = *TARGET_CPUS;
+
+ cpus_and(mask, tmp, CPU_MASK_ALL);
+
+- dest = cpu_mask_to_apicid(mask);
++ dest = cpu_mask_to_apicid(&mask);
+
+ target_ht_irq(irq, dest);
+ irq_desc[irq].affinity = mask;
+@@ -2649,7 +2649,7 @@ int arch_setup_ht_irq(unsigned int irq,
+
+ cpus_clear(tmp);
+ cpu_set(vector >> 8, tmp);
+- dest = cpu_mask_to_apicid(tmp);
++ dest = cpu_mask_to_apicid(&tmp);
+
+ msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
+
+--- a/arch/x86/kernel/io_apic_64.c
++++ b/arch/x86/kernel/io_apic_64.c
+@@ -83,7 +83,7 @@ static struct irq_cfg irq_cfg[NR_IRQS] _
+ [15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
+ };
+
+-static int assign_irq_vector(int irq, cpumask_t mask);
++static int assign_irq_vector(int irq, const cpumask_t *mask);
+
+ int first_system_vector = 0xfe;
+
+@@ -335,11 +335,11 @@ static void set_ioapic_affinity_irq(unsi
+ if (cpus_empty(tmp))
+ return;
+
+- if (assign_irq_vector(irq, mask))
++ if (assign_irq_vector(irq, &mask))
+ return;
+
+ cpus_and(tmp, cfg->domain, mask);
+- dest = cpu_mask_to_apicid(tmp);
++ dest = cpu_mask_to_apicid(&tmp);
+
+ /*
+ * Only the high 8 bits are valid.
+@@ -798,7 +798,7 @@ void unlock_vector_lock(void)
+ spin_unlock(&vector_lock);
+ }
+
+-static int __assign_irq_vector(int irq, cpumask_t mask)
++static int __assign_irq_vector(int irq, const cpumask_t *mask)
+ {
+ /*
+ * NOTE! The local APIC isn't very good at handling
+@@ -815,31 +815,28 @@ static int __assign_irq_vector(int irq,
+ unsigned int old_vector;
+ int cpu;
+ struct irq_cfg *cfg;
++ cpumask_t tmp_mask;
+
+ BUG_ON((unsigned)irq >= NR_IRQS);
+ cfg = &irq_cfg[irq];
+
+- /* Only try and allocate irqs on cpus that are present */
+- cpus_and(mask, mask, cpu_online_map);
+-
+ if ((cfg->move_in_progress) || cfg->move_cleanup_count)
+ return -EBUSY;
+
+ old_vector = cfg->vector;
+ if (old_vector) {
+- cpumask_t tmp;
+- cpus_and(tmp, cfg->domain, mask);
+- if (!cpus_empty(tmp))
++ cpus_and(tmp_mask, *mask, cpu_online_map);
++ cpus_and(tmp_mask, cfg->domain, tmp_mask);
++ if (!cpus_empty(tmp_mask))
+ return 0;
+ }
+
+- for_each_cpu_mask_nr(cpu, mask) {
+- cpumask_t domain, new_mask;
++ /* Only try and allocate irqs on cpus that are present */
++ for_each_cpu_mask_and(cpu, *mask, cpu_online_map) {
+ int new_cpu;
+ int vector, offset;
+
+- domain = vector_allocation_domain(cpu);
+- cpus_and(new_mask, domain, cpu_online_map);
++ vector_allocation_domain(cpu, &tmp_mask);
+
+ vector = current_vector;
+ offset = current_offset;
+@@ -854,7 +851,7 @@ next:
+ continue;
+ if (vector == IA32_SYSCALL_VECTOR)
+ goto next;
+- for_each_cpu_mask_nr(new_cpu, new_mask)
++ for_each_cpu_mask_and(new_cpu, tmp_mask, cpu_online_map)
+ if (per_cpu(vector_irq, new_cpu)[vector] != -1)
+ goto next;
+ /* Found one! */
+@@ -864,16 +861,16 @@ next:
+ cfg->move_in_progress = 1;
+ cfg->old_domain = cfg->domain;
+ }
+- for_each_cpu_mask_nr(new_cpu, new_mask)
++ for_each_cpu_mask_and(new_cpu, tmp_mask, cpu_online_map)
+ per_cpu(vector_irq, new_cpu)[vector] = irq;
+ cfg->vector = vector;
+- cfg->domain = domain;
++ cfg->domain = tmp_mask;
+ return 0;
+ }
+ return -ENOSPC;
+ }
+
+-static int assign_irq_vector(int irq, cpumask_t mask)
++static int assign_irq_vector(int irq, const cpumask_t *mask)
+ {
+ int err;
+ unsigned long flags;
+@@ -1031,8 +1028,8 @@ static void setup_IO_APIC_irq(int apic,
+ if (!IO_APIC_IRQ(irq))
+ return;
+
+- mask = TARGET_CPUS;
+- if (assign_irq_vector(irq, mask))
++ mask = *TARGET_CPUS;
++ if (assign_irq_vector(irq, &mask))
+ return;
+
+ cpus_and(mask, cfg->domain, mask);
+@@ -1045,7 +1042,7 @@ static void setup_IO_APIC_irq(int apic,
+
+
+ if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
+- cpu_mask_to_apicid(mask), trigger, polarity,
++ cpu_mask_to_apicid(&mask), trigger, polarity,
+ cfg->vector)) {
+ printk("Failed to setup ioapic entry for ioapic %d, pin %d\n",
+ mp_ioapics[apic].mp_apicid, pin);
+@@ -1543,7 +1540,7 @@ static int ioapic_retrigger_irq(unsigned
+ unsigned long flags;
+
+ spin_lock_irqsave(&vector_lock, flags);
+- send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector);
++ send_IPI_mask(&cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector);
+ spin_unlock_irqrestore(&vector_lock, flags);
+
+ return 1;
+@@ -1588,7 +1585,7 @@ static void migrate_ioapic_irq(int irq,
+ {
+ struct irq_cfg *cfg = irq_cfg + irq;
+ struct irq_desc *desc = irq_desc + irq;
+- cpumask_t tmp, cleanup_mask;
++ cpumask_t tmp;
+ struct irte irte;
+ int modify_ioapic_rte = desc->status & IRQ_LEVEL;
+ unsigned int dest;
+@@ -1601,11 +1598,11 @@ static void migrate_ioapic_irq(int irq,
+ if (get_irte(irq, &irte))
+ return;
+
+- if (assign_irq_vector(irq, mask))
++ if (assign_irq_vector(irq, &mask))
+ return;
+
+ cpus_and(tmp, cfg->domain, mask);
+- dest = cpu_mask_to_apicid(tmp);
++ dest = cpu_mask_to_apicid(&tmp);
+
+ if (modify_ioapic_rte) {
+ spin_lock_irqsave(&ioapic_lock, flags);
+@@ -1622,9 +1619,9 @@ static void migrate_ioapic_irq(int irq,
+ modify_irte(irq, &irte);
+
+ if (cfg->move_in_progress) {
+- cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
+- cfg->move_cleanup_count = cpus_weight(cleanup_mask);
+- send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
++ cpus_and(tmp, cfg->old_domain, cpu_online_map);
++ cfg->move_cleanup_count = cpus_weight(tmp);
++ send_IPI_mask(&tmp, IRQ_MOVE_CLEANUP_VECTOR);
+ cfg->move_in_progress = 0;
+ }
+
+@@ -1749,7 +1746,7 @@ static void irq_complete_move(unsigned i
+
+ cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
+ cfg->move_cleanup_count = cpus_weight(cleanup_mask);
+- send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
++ send_IPI_mask(&cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+ cfg->move_in_progress = 0;
+ }
+ }
+@@ -2329,13 +2326,13 @@ static int msi_compose_msg(struct pci_de
+ unsigned dest;
+ cpumask_t tmp;
+
+- tmp = TARGET_CPUS;
+- err = assign_irq_vector(irq, tmp);
++ tmp = *TARGET_CPUS;
++ err = assign_irq_vector(irq, &tmp);
+ if (err)
+ return err;
+
+ cpus_and(tmp, cfg->domain, tmp);
+- dest = cpu_mask_to_apicid(tmp);
++ dest = cpu_mask_to_apicid(&tmp);
+
+ #ifdef CONFIG_INTR_REMAP
+ if (irq_remapped(irq)) {
+@@ -2400,11 +2397,11 @@ static void set_msi_irq_affinity(unsigne
+ if (cpus_empty(tmp))
+ return;
+
+- if (assign_irq_vector(irq, mask))
++ if (assign_irq_vector(irq, &mask))
+ return;
+
+ cpus_and(tmp, cfg->domain, mask);
+- dest = cpu_mask_to_apicid(tmp);
++ dest = cpu_mask_to_apicid(&tmp);
+
+ read_msi_msg(irq, &msg);
+
+@@ -2426,7 +2423,7 @@ static void ir_set_msi_irq_affinity(unsi
+ {
+ struct irq_cfg *cfg = irq_cfg + irq;
+ unsigned int dest;
+- cpumask_t tmp, cleanup_mask;
++ cpumask_t tmp;
+ struct irte irte;
+
+ cpus_and(tmp, mask, cpu_online_map);
+@@ -2436,11 +2433,11 @@ static void ir_set_msi_irq_affinity(unsi
+ if (get_irte(irq, &irte))
+ return;
+
+- if (assign_irq_vector(irq, mask))
++ if (assign_irq_vector(irq, &mask))
+ return;
+
+ cpus_and(tmp, cfg->domain, mask);
+- dest = cpu_mask_to_apicid(tmp);
++ dest = cpu_mask_to_apicid(&tmp);
+
+ irte.vector = cfg->vector;
+ irte.dest_id = IRTE_DEST(dest);
+@@ -2456,9 +2453,9 @@ static void ir_set_msi_irq_affinity(unsi
+ * vector allocation.
+ */
+ if (cfg->move_in_progress) {
+- cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
+- cfg->move_cleanup_count = cpus_weight(cleanup_mask);
+- send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
++ cpus_and(tmp, cfg->old_domain, cpu_online_map);
++ cfg->move_cleanup_count = cpus_weight(tmp);
++ send_IPI_mask(&tmp, IRQ_MOVE_CLEANUP_VECTOR);
+ cfg->move_in_progress = 0;
+ }
+
+@@ -2653,11 +2650,11 @@ static void dmar_msi_set_affinity(unsign
+ if (cpus_empty(tmp))
+ return;
+
+- if (assign_irq_vector(irq, mask))
++ if (assign_irq_vector(irq, &mask))
+ return;
+
+ cpus_and(tmp, cfg->domain, mask);
+- dest = cpu_mask_to_apicid(tmp);
++ dest = cpu_mask_to_apicid(&tmp);
+
+ dmar_msi_read(irq, &msg);
+
+@@ -2729,11 +2726,11 @@ static void set_ht_irq_affinity(unsigned
+ if (cpus_empty(tmp))
+ return;
+
+- if (assign_irq_vector(irq, mask))
++ if (assign_irq_vector(irq, &mask))
+ return;
+
+ cpus_and(tmp, cfg->domain, mask);
+- dest = cpu_mask_to_apicid(tmp);
++ dest = cpu_mask_to_apicid(&tmp);
+
+ target_ht_irq(irq, dest, cfg->vector);
+ irq_desc[irq].affinity = mask;
+@@ -2757,14 +2754,14 @@ int arch_setup_ht_irq(unsigned int irq,
+ int err;
+ cpumask_t tmp;
+
+- tmp = TARGET_CPUS;
+- err = assign_irq_vector(irq, tmp);
++ tmp = *TARGET_CPUS;
++ err = assign_irq_vector(irq, &tmp);
+ if (!err) {
+ struct ht_irq_msg msg;
+ unsigned dest;
+
+ cpus_and(tmp, cfg->domain, tmp);
+- dest = cpu_mask_to_apicid(tmp);
++ dest = cpu_mask_to_apicid(&tmp);
+
+ msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
+
+@@ -2882,10 +2879,10 @@ void __init setup_ioapic_dest(void)
+ irq_polarity(irq_entry));
+ #ifdef CONFIG_INTR_REMAP
+ else if (intr_remapping_enabled)
+- set_ir_ioapic_affinity_irq(irq, TARGET_CPUS);
++ set_ir_ioapic_affinity_irq(irq, *TARGET_CPUS);
+ #endif
+ else
+- set_ioapic_affinity_irq(irq, TARGET_CPUS);
++ set_ioapic_affinity_irq(irq, *TARGET_CPUS);
+ }
+
+ }
+--- a/arch/x86/kernel/ipi.c
++++ b/arch/x86/kernel/ipi.c
+@@ -114,9 +114,9 @@ static inline void __send_IPI_dest_field
+ /*
+ * This is only used on smaller machines.
+ */
+-void send_IPI_mask_bitmask(cpumask_t cpumask, int vector)
++void send_IPI_mask_bitmask(const cpumask_t *cpumask, int vector)
+ {
+- unsigned long mask = cpus_addr(cpumask)[0];
++ unsigned long mask = cpus_addr(*cpumask)[0];
+ unsigned long flags;
+
+ local_irq_save(flags);
+@@ -125,7 +125,7 @@ void send_IPI_mask_bitmask(cpumask_t cpu
+ local_irq_restore(flags);
+ }
+
+-void send_IPI_mask_sequence(cpumask_t mask, int vector)
++void send_IPI_mask_sequence(const cpumask_t *mask, int vector)
+ {
+ unsigned long flags;
+ unsigned int query_cpu;
+@@ -137,12 +137,24 @@ void send_IPI_mask_sequence(cpumask_t ma
+ */
+
+ local_irq_save(flags);
+- for_each_possible_cpu(query_cpu) {
+- if (cpu_isset(query_cpu, mask)) {
++ for_each_cpu_mask_and(query_cpu, *mask, cpu_online_map)
++ __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), vector);
++ local_irq_restore(flags);
++}
++
++void send_IPI_mask_allbutself(const cpumask_t *mask, int vector)
++{
++ unsigned long flags;
++ unsigned int query_cpu;
++ unsigned int this_cpu = smp_processor_id();
++
++ /* See Hack comment above */
++
++ local_irq_save(flags);
++ for_each_cpu_mask_and(query_cpu, *mask, cpu_online_map)
++ if (query_cpu != this_cpu)
+ __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu),
+ vector);
+- }
+- }
+ local_irq_restore(flags);
+ }
+
+--- a/arch/x86/kernel/smp.c
++++ b/arch/x86/kernel/smp.c
+@@ -118,26 +118,17 @@ static void native_smp_send_reschedule(i
+ WARN_ON(1);
+ return;
+ }
+- send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
++ send_IPI_mask(&cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
+ }
+
+ void native_send_call_func_single_ipi(int cpu)
+ {
+- send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_SINGLE_VECTOR);
++ send_IPI_mask(&cpumask_of_cpu(cpu), CALL_FUNCTION_SINGLE_VECTOR);
+ }
+
+ void native_send_call_func_ipi(const cpumask_t *mask)
+ {
+- cpumask_t allbutself;
+-
+- allbutself = cpu_online_map;
+- cpu_clear(smp_processor_id(), allbutself);
+-
+- if (cpus_equal(*mask, allbutself) &&
+- cpus_equal(cpu_online_map, cpu_callout_map))
+- send_IPI_allbutself(CALL_FUNCTION_VECTOR);
+- else
+- send_IPI_mask(*mask, CALL_FUNCTION_VECTOR);
++ send_IPI_mask_allbutself(mask, CALL_FUNCTION_VECTOR);
+ }
+
+ static void stop_this_cpu(void *dummy)
+--- a/arch/x86/kernel/tlb_32.c
++++ b/arch/x86/kernel/tlb_32.c
+@@ -158,7 +158,7 @@ void native_flush_tlb_others(const cpuma
+ * We have to send the IPI only to
+ * CPUs affected.
+ */
+- send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR);
++ send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR);
+
+ while (!cpus_empty(flush_cpumask))
+ /* nothing. lockup detection does not belong here */
+--- a/arch/x86/kernel/tlb_64.c
++++ b/arch/x86/kernel/tlb_64.c
+@@ -186,7 +186,7 @@ void native_flush_tlb_others(const cpuma
+ * We have to send the IPI only to
+ * CPUs affected.
+ */
+- send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender);
++ send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR_START + sender);
+
+ while (!cpus_empty(f->flush_cpumask))
+ cpu_relax();
+--- a/arch/x86/xen/smp.c
++++ b/arch/x86/xen/smp.c
+@@ -157,7 +157,7 @@ static void __init xen_fill_possible_map
+ {
+ int i, rc;
+
+- for (i = 0; i < NR_CPUS; i++) {
++ for (i = 0; i < nr_cpu_ids; i++) {
+ rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
+ if (rc >= 0) {
+ num_processors++;
+@@ -195,7 +195,7 @@ static void __init xen_smp_prepare_cpus(
+
+ /* Restrict the possible_map according to max_cpus. */
+ while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) {
+- for (cpu = NR_CPUS - 1; !cpu_possible(cpu); cpu--)
++ for (cpu = nr_cpu_ids - 1; !cpu_possible(cpu); cpu--)
+ continue;
+ cpu_clear(cpu, cpu_possible_map);
+ }
+@@ -361,13 +361,11 @@ static void xen_smp_send_reschedule(int
+ xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR);
+ }
+
+-static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector)
++static void xen_send_IPI_mask(const cpumask_t *mask, enum ipi_vector vector)
+ {
+ unsigned cpu;
+
+- cpus_and(mask, mask, cpu_online_map);
+-
+- for_each_cpu_mask_nr(cpu, mask)
++ for_each_cpu_mask_and(cpu, *mask, cpu_online_map)
+ xen_send_IPI_one(cpu, vector);
+ }
+
+@@ -375,7 +373,7 @@ static void xen_smp_send_call_function_i
+ {
+ int cpu;
+
+- xen_send_IPI_mask(*mask, XEN_CALL_FUNCTION_VECTOR);
++ xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
+
+ /* Make sure other vcpus get a chance to run if they need to. */
+ for_each_cpu_mask_nr(cpu, *mask) {
+@@ -388,7 +386,8 @@ static void xen_smp_send_call_function_i
+
+ static void xen_smp_send_call_function_single_ipi(int cpu)
+ {
+- xen_send_IPI_mask(cpumask_of_cpu(cpu), XEN_CALL_FUNCTION_SINGLE_VECTOR);
++ xen_send_IPI_mask(&cpumask_of_cpu(cpu),
++ XEN_CALL_FUNCTION_SINGLE_VECTOR);
+ }
+
+ static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
+--- a/include/asm-x86/genapic_32.h
++++ b/include/asm-x86/genapic_32.h
+@@ -23,7 +23,7 @@ struct genapic {
+ int (*probe)(void);
+
+ int (*apic_id_registered)(void);
+- cpumask_t (*target_cpus)(void);
++ const cpumask_t *(*target_cpus)(void);
+ int int_delivery_mode;
+ int int_dest_mode;
+ int ESR_DISABLE;
+@@ -56,11 +56,12 @@ struct genapic {
+
+ unsigned (*get_apic_id)(unsigned long x);
+ unsigned long apic_id_mask;
+- unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask);
++ unsigned int (*cpu_mask_to_apicid)(const cpumask_t *cpumask);
+
+ #ifdef CONFIG_SMP
+ /* ipi */
+- void (*send_IPI_mask)(cpumask_t mask, int vector);
++ void (*send_IPI_mask)(const cpumask_t *mask, int vector);
++ void (*send_IPI_mask_allbutself)(const cpumask_t *mask, int vector);
+ void (*send_IPI_allbutself)(int vector);
+ void (*send_IPI_all)(int vector);
+ #endif
+@@ -106,6 +107,7 @@ struct genapic {
+ APICFUNC(cpu_mask_to_apicid) \
+ APICFUNC(acpi_madt_oem_check) \
+ IPIFUNC(send_IPI_mask) \
++ IPIFUNC(send_IPI_mask_allbutself) \
+ IPIFUNC(send_IPI_allbutself) \
+ IPIFUNC(send_IPI_all) \
+ APICFUNC(enable_apic_mode) \
+--- a/include/asm-x86/genapic_64.h
++++ b/include/asm-x86/genapic_64.h
+@@ -1,6 +1,8 @@
+ #ifndef _ASM_GENAPIC_H
+ #define _ASM_GENAPIC_H 1
+
++#include <linux/cpumask.h>
++
+ /*
+ * Copyright 2004 James Cleverdon, IBM.
+ * Subject to the GNU Public License, v.2
+@@ -18,16 +20,17 @@ struct genapic {
+ u32 int_delivery_mode;
+ u32 int_dest_mode;
+ int (*apic_id_registered)(void);
+- cpumask_t (*target_cpus)(void);
+- cpumask_t (*vector_allocation_domain)(int cpu);
++ const cpumask_t *(*target_cpus)(void);
++ void (*vector_allocation_domain)(int cpu, cpumask_t *retmask);
+ void (*init_apic_ldr)(void);
+ /* ipi */
+- void (*send_IPI_mask)(cpumask_t mask, int vector);
++ void (*send_IPI_mask)(const cpumask_t *mask, int vector);
++ void (*send_IPI_mask_allbutself)(const cpumask_t *mask, int vector);
+ void (*send_IPI_allbutself)(int vector);
+ void (*send_IPI_all)(int vector);
+ void (*send_IPI_self)(int vector);
+ /* */
+- unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask);
++ unsigned int (*cpu_mask_to_apicid)(const cpumask_t *cpumask);
+ unsigned int (*phys_pkg_id)(int index_msb);
+ unsigned int (*get_apic_id)(unsigned long x);
+ unsigned long (*set_apic_id)(unsigned int id);
+--- a/include/asm-x86/ipi.h
++++ b/include/asm-x86/ipi.h
+@@ -117,7 +117,7 @@ static inline void __send_IPI_dest_field
+ native_apic_mem_write(APIC_ICR, cfg);
+ }
+
+-static inline void send_IPI_mask_sequence(cpumask_t mask, int vector)
++static inline void send_IPI_mask_sequence(const cpumask_t *mask, int vector)
+ {
+ unsigned long flags;
+ unsigned long query_cpu;
+@@ -128,10 +128,26 @@ static inline void send_IPI_mask_sequenc
+ * - mbligh
+ */
+ local_irq_save(flags);
+- for_each_cpu_mask_nr(query_cpu, mask) {
++ for_each_cpu_mask_and(query_cpu, *mask, cpu_online_map)
+ __send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, query_cpu),
+ vector, APIC_DEST_PHYSICAL);
+- }
++ local_irq_restore(flags);
++}
++
++static inline void send_IPI_mask_allbutself(const cpumask_t *mask, int vector)
++{
++ unsigned long flags;
++ unsigned int query_cpu;
++ unsigned int this_cpu = smp_processor_id();
++
++ /* See Hack comment above */
++
++ local_irq_save(flags);
++ for_each_cpu_mask_and(query_cpu, *mask, cpu_online_map)
++ if (query_cpu != this_cpu)
++ __send_IPI_dest_field(
++ per_cpu(x86_cpu_to_apicid, query_cpu),
++ vector, APIC_DEST_PHYSICAL);
+ local_irq_restore(flags);
+ }
+
+--- a/include/asm-x86/mach-bigsmp/mach_apic.h
++++ b/include/asm-x86/mach-bigsmp/mach_apic.h
+@@ -10,7 +10,7 @@ static inline int apic_id_registered(voi
+ }
+
+ /* Round robin the irqs amoung the online cpus */
+-static inline cpumask_t target_cpus(void)
++static inline const cpumask_t *target_cpus(void)
+ {
+ static unsigned long cpu = NR_CPUS;
+ do {
+@@ -19,7 +19,7 @@ static inline cpumask_t target_cpus(void
+ else
+ cpu = next_cpu(cpu, cpu_online_map);
+ } while (cpu >= NR_CPUS);
+- return cpumask_of_cpu(cpu);
++ return &cpumask_of_cpu(cpu);
+ }
+
+ #undef APIC_DEST_LOGICAL
+@@ -126,12 +126,12 @@ static inline int check_phys_apicid_pres
+ }
+
+ /* As we are using single CPU as destination, pick only one CPU here */
+-static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
++static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
+ {
+ int cpu;
+ int apicid;
+
+- cpu = first_cpu(cpumask);
++ cpu = first_cpu(*cpumask);
+ apicid = cpu_to_logical_apicid(cpu);
+ return apicid;
+ }
+--- a/include/asm-x86/mach-bigsmp/mach_ipi.h
++++ b/include/asm-x86/mach-bigsmp/mach_ipi.h
+@@ -1,25 +1,30 @@
+ #ifndef __ASM_MACH_IPI_H
+ #define __ASM_MACH_IPI_H
+
+-void send_IPI_mask_sequence(cpumask_t mask, int vector);
++void send_IPI_mask_sequence(const cpumask_t *mask, int vector);
+
+-static inline void send_IPI_mask(cpumask_t mask, int vector)
++static inline void send_IPI_mask(const cpumask_t *mask, int vector)
+ {
+ send_IPI_mask_sequence(mask, vector);
+ }
+
+-static inline void send_IPI_allbutself(int vector)
++static inline void send_IPI_mask_allbutself(const cpumask_t *mask, int vector)
+ {
+- cpumask_t mask = cpu_online_map;
+- cpu_clear(smp_processor_id(), mask);
++ cpumask_t allbutself = *mask;
++ cpu_clear(smp_processor_id(), allbutself);
++
++ if (!cpus_empty(allbutself))
++ send_IPI_mask_sequence(&allbutself, vector);
++}
+
+- if (!cpus_empty(mask))
+- send_IPI_mask(mask, vector);
++static inline void send_IPI_allbutself(int vector)
++{
++ send_IPI_mask_allbutself(&cpu_online_map, vector);
+ }
+
+ static inline void send_IPI_all(int vector)
+ {
+- send_IPI_mask(cpu_online_map, vector);
++ send_IPI_mask(&cpu_online_map, vector);
+ }
+
+ #endif /* __ASM_MACH_IPI_H */
+--- a/include/asm-x86/mach-default/mach_apic.h
++++ b/include/asm-x86/mach-default/mach_apic.h
+@@ -8,12 +8,12 @@
+
+ #define APIC_DFR_VALUE (APIC_DFR_FLAT)
+
+-static inline cpumask_t target_cpus(void)
++static inline const cpumask_t *target_cpus(void)
+ {
+ #ifdef CONFIG_SMP
+- return cpu_online_map;
++ return &cpu_online_map;
+ #else
+- return cpumask_of_cpu(0);
++ return &cpumask_of_cpu(0);
+ #endif
+ }
+
+@@ -59,9 +59,9 @@ static inline int apic_id_registered(voi
+ return physid_isset(read_apic_id(), phys_cpu_present_map);
+ }
+
+-static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
++static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
+ {
+- return cpus_addr(cpumask)[0];
++ return cpus_addr(*cpumask)[0];
+ }
+
+ static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
+@@ -115,7 +115,7 @@ static inline int cpu_to_logical_apicid(
+
+ static inline int cpu_present_to_apicid(int mps_cpu)
+ {
+- if (mps_cpu < NR_CPUS && cpu_present(mps_cpu))
++ if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu))
+ return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu);
+ else
+ return BAD_APICID;
+--- a/include/asm-x86/mach-default/mach_ipi.h
++++ b/include/asm-x86/mach-default/mach_ipi.h
+@@ -4,7 +4,8 @@
+ /* Avoid include hell */
+ #define NMI_VECTOR 0x02
+
+-void send_IPI_mask_bitmask(cpumask_t mask, int vector);
++void send_IPI_mask_bitmask(const cpumask_t *mask, int vector);
++void send_IPI_mask_allbutself(const cpumask_t *mask, int vector);
+ void __send_IPI_shortcut(unsigned int shortcut, int vector);
+
+ extern int no_broadcast;
+@@ -12,28 +13,27 @@ extern int no_broadcast;
+ #ifdef CONFIG_X86_64
+ #include <asm/genapic.h>
+ #define send_IPI_mask (genapic->send_IPI_mask)
++#define send_IPI_mask_allbutself (genapic->send_IPI_mask_allbutself)
+ #else
+-static inline void send_IPI_mask(cpumask_t mask, int vector)
++static inline void send_IPI_mask(const cpumask_t *mask, int vector)
+ {
+ send_IPI_mask_bitmask(mask, vector);
+ }
++void send_IPI_mask_allbutself(const cpumask_t *mask, int vector);
+ #endif
+
+ static inline void __local_send_IPI_allbutself(int vector)
+ {
+- if (no_broadcast || vector == NMI_VECTOR) {
+- cpumask_t mask = cpu_online_map;
+-
+- cpu_clear(smp_processor_id(), mask);
+- send_IPI_mask(mask, vector);
+- } else
++ if (no_broadcast || vector == NMI_VECTOR)
++ send_IPI_mask_allbutself(&cpu_online_map, vector);
++ else
+ __send_IPI_shortcut(APIC_DEST_ALLBUT, vector);
+ }
+
+ static inline void __local_send_IPI_all(int vector)
+ {
+ if (no_broadcast || vector == NMI_VECTOR)
+- send_IPI_mask(cpu_online_map, vector);
++ send_IPI_mask(&cpu_online_map, vector);
+ else
+ __send_IPI_shortcut(APIC_DEST_ALLINC, vector);
+ }
+--- a/include/asm-x86/mach-es7000/mach_apic.h
++++ b/include/asm-x86/mach-es7000/mach_apic.h
+@@ -9,12 +9,12 @@ static inline int apic_id_registered(voi
+ return (1);
+ }
+
+-static inline cpumask_t target_cpus(void)
++static inline cpumask_t *target_cpus(void)
+ {
+ #if defined CONFIG_ES7000_CLUSTERED_APIC
+- return CPU_MASK_ALL;
++ return &CPU_MASK_ALL;
+ #else
+- return cpumask_of_cpu(smp_processor_id());
++ return &cpumask_of_cpu(smp_processor_id());
+ #endif
+ }
+ #define TARGET_CPUS (target_cpus())
+@@ -145,7 +145,7 @@ static inline int check_phys_apicid_pres
+ return (1);
+ }
+
+-static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
++static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
+ {
+ int num_bits_set;
+ int cpus_found = 0;
+--- a/include/asm-x86/mach-es7000/mach_ipi.h
++++ b/include/asm-x86/mach-es7000/mach_ipi.h
+@@ -1,24 +1,30 @@
+ #ifndef __ASM_MACH_IPI_H
+ #define __ASM_MACH_IPI_H
+
+-void send_IPI_mask_sequence(cpumask_t mask, int vector);
++void send_IPI_mask_sequence(const cpumask_t *mask, int vector);
+
+-static inline void send_IPI_mask(cpumask_t mask, int vector)
++static inline void send_IPI_mask(const cpumask_t *mask, int vector)
+ {
+ send_IPI_mask_sequence(mask, vector);
+ }
+
++static inline void send_IPI_mask_allbutself(const cpumask_t *mask, int vector)
++{
++ cpumask_t allbutself = *mask;
++ cpu_clear(smp_processor_id(), allbutself);
++
++ if (!cpus_empty(allbutself))
++ send_IPI_mask_sequence(&allbutself, vector);
++}
++
+ static inline void send_IPI_allbutself(int vector)
+ {
+- cpumask_t mask = cpu_online_map;
+- cpu_clear(smp_processor_id(), mask);
+- if (!cpus_empty(mask))
+- send_IPI_mask(mask, vector);
++ send_IPI_mask_allbutself(&cpu_online_map, vector);
+ }
+
+ static inline void send_IPI_all(int vector)
+ {
+- send_IPI_mask(cpu_online_map, vector);
++ send_IPI_mask(&cpu_online_map, vector);
+ }
+
+ #endif /* __ASM_MACH_IPI_H */
+--- a/include/asm-x86/mach-generic/mach_ipi.h
++++ b/include/asm-x86/mach-generic/mach_ipi.h
+@@ -4,6 +4,7 @@
+ #include <asm/genapic.h>
+
+ #define send_IPI_mask (genapic->send_IPI_mask)
++#define send_IPI_mask_allbutself (genapic->send_IPI_mask_allbutself)
+ #define send_IPI_allbutself (genapic->send_IPI_allbutself)
+ #define send_IPI_all (genapic->send_IPI_all)
+
+--- a/include/asm-x86/mach-numaq/mach_apic.h
++++ b/include/asm-x86/mach-numaq/mach_apic.h
+@@ -7,9 +7,9 @@
+
+ #define APIC_DFR_VALUE (APIC_DFR_CLUSTER)
+
+-static inline cpumask_t target_cpus(void)
++static inline const cpumask_t *target_cpus(void)
+ {
+- return CPU_MASK_ALL;
++ return &CPU_MASK_ALL;
+ }
+
+ #define TARGET_CPUS (target_cpus())
+@@ -124,7 +124,7 @@ static inline void enable_apic_mode(void
+ * We use physical apicids here, not logical, so just return the default
+ * physical broadcast to stop people from breaking us
+ */
+-static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
++static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
+ {
+ return (int) 0xF;
+ }
+--- a/include/asm-x86/mach-numaq/mach_ipi.h
++++ b/include/asm-x86/mach-numaq/mach_ipi.h
+@@ -1,25 +1,31 @@
+ #ifndef __ASM_MACH_IPI_H
+ #define __ASM_MACH_IPI_H
+
+-void send_IPI_mask_sequence(cpumask_t, int vector);
++void send_IPI_mask_sequence(const cpumask_t *mask, int vector);
+
+-static inline void send_IPI_mask(cpumask_t mask, int vector)
++static inline void send_IPI_mask(const cpumask_t *mask, int vector)
+ {
+ send_IPI_mask_sequence(mask, vector);
+ }
+
+-static inline void send_IPI_allbutself(int vector)
++static inline void send_IPI_mask_allbutself(const cpumask_t *mask, int vector)
+ {
+- cpumask_t mask = cpu_online_map;
+- cpu_clear(smp_processor_id(), mask);
++ cpumask_t allbutself = *mask;
++ cpu_clear(smp_processor_id(), allbutself);
++
++ if (!cpus_empty(allbutself))
++ send_IPI_mask_sequence(&allbutself, vector);
++}
+
+- if (!cpus_empty(mask))
+- send_IPI_mask(mask, vector);
++static inline void send_IPI_allbutself(int vector)
++{
++ send_IPI_mask_allbutself(&cpu_online_map, vector);
+ }
+
+ static inline void send_IPI_all(int vector)
+ {
+- send_IPI_mask(cpu_online_map, vector);
++ send_IPI_mask(&cpu_online_map, vector);
+ }
+
+ #endif /* __ASM_MACH_IPI_H */
++
+--- a/include/asm-x86/mach-summit/mach_apic.h
++++ b/include/asm-x86/mach-summit/mach_apic.h
+@@ -14,13 +14,13 @@
+
+ #define APIC_DFR_VALUE (APIC_DFR_CLUSTER)
+
+-static inline cpumask_t target_cpus(void)
++static inline const cpumask_t *target_cpus(void)
+ {
+ /* CPU_MASK_ALL (0xff) has undefined behaviour with
+ * dest_LowestPrio mode logical clustered apic interrupt routing
+ * Just start on cpu 0. IRQ balancing will spread load
+ */
+- return cpumask_of_cpu(0);
++ return &cpumask_of_cpu(0);
+ }
+ #define TARGET_CPUS (target_cpus())
+
+@@ -138,7 +138,7 @@ static inline void enable_apic_mode(void
+ {
+ }
+
+-static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
++static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
+ {
+ int num_bits_set;
+ int cpus_found = 0;
+--- a/include/asm-x86/mach-summit/mach_ipi.h
++++ b/include/asm-x86/mach-summit/mach_ipi.h
+@@ -1,25 +1,31 @@
+ #ifndef __ASM_MACH_IPI_H
+ #define __ASM_MACH_IPI_H
+
+-void send_IPI_mask_sequence(cpumask_t mask, int vector);
++void send_IPI_mask_sequence(const cpumask_t *mask, int vector);
+
+-static inline void send_IPI_mask(cpumask_t mask, int vector)
++static inline void send_IPI_mask(const cpumask_t *mask, int vector)
+ {
+ send_IPI_mask_sequence(mask, vector);
+ }
+
+-static inline void send_IPI_allbutself(int vector)
++static inline void send_IPI_mask_allbutself(const cpumask_t *mask, int vector)
+ {
+- cpumask_t mask = cpu_online_map;
+- cpu_clear(smp_processor_id(), mask);
++ cpumask_t allbutself = *mask;
++ cpu_clear(smp_processor_id(), allbutself);
++
++ if (!cpus_empty(allbutself))
++ send_IPI_mask_sequence(&allbutself, vector);
++}
+
+- if (!cpus_empty(mask))
+- send_IPI_mask(mask, vector);
++static inline void send_IPI_allbutself(int vector)
++{
++ send_IPI_mask_allbutself(&cpu_online_map, vector);
+ }
+
+ static inline void send_IPI_all(int vector)
+ {
+- send_IPI_mask(cpu_online_map, vector);
++ send_IPI_mask(&cpu_online_map, vector);
+ }
+
+ #endif /* __ASM_MACH_IPI_H */
++