]> git.ipfire.org Git - people/teissler/ipfire-2.x.git/blobdiff - src/patches/suse-2.6.27.31/patches.arch/x86_sgi_cpus4096-05-update-send_IPI_mask.patch
Move xen patchset to new version's subdir.
[people/teissler/ipfire-2.x.git] / src / patches / suse-2.6.27.31 / patches.arch / x86_sgi_cpus4096-05-update-send_IPI_mask.patch
diff --git a/src/patches/suse-2.6.27.31/patches.arch/x86_sgi_cpus4096-05-update-send_IPI_mask.patch b/src/patches/suse-2.6.27.31/patches.arch/x86_sgi_cpus4096-05-update-send_IPI_mask.patch
new file mode 100644 (file)
index 0000000..107c06f
--- /dev/null
@@ -0,0 +1,1652 @@
+From: Mike Travis <travis@sgi.com>
+Subject: x86 cpumask: Updates to support NR_CPUS=4096
+References: bnc#425240 FATE304266
+Patch-mainline: 2.6.28
+
+Signed-off-by: Thomas Renninger <trenn@suse.de>
+
+  * Add for_each_cpu_mask_and() function to eliminate need for a common use
+    of a temporary cpumask_t variable. 
+
+  * Change genapic interfaces to accept cpumask_t pointers where possible.
+    Modify external callers to use cpumask_t pointers in function calls.
+
+  * Create new send_IPI_mask_allbutself which is the same as the
+    send_IPI_mask functions but removes smp_processor_id() from list.
+    This removes another common need for a temporary cpumask_t variable.
+
+  * Use node_to_cpumask_ptr in place of node_to_cpumask to reduce stack
+    requirements in sched.c.
+
+  * Modify arch/x86/Kconfig to enable MAXSMP and 4096 cpus.
+
+Signed-off-by: Mike Travis <travis@sgi.com>
+Acked-by: Rusty Russell <rusty@rustcorp.com.au>
+Signed-off-by: Jiri Slaby <jslaby@suse.de> [bigsmp cpu_mask_to_apicid fix]
+---
+ arch/x86/Kconfig                         |   11 +--
+ arch/x86/kernel/apic_32.c                |    2 
+ arch/x86/kernel/apic_64.c                |    2 
+ arch/x86/kernel/crash.c                  |    5 -
+ arch/x86/kernel/genapic_flat_64.c        |   76 ++++++++++++++++--------
+ arch/x86/kernel/genx2apic_cluster.c      |   60 +++++++++++++------
+ arch/x86/kernel/genx2apic_phys.c         |   55 ++++++++++++-----
+ arch/x86/kernel/genx2apic_uv_x.c         |   43 ++++++++------
+ arch/x86/kernel/io_apic_32.c             |   16 ++---
+ arch/x86/kernel/io_apic_64.c             |   95 +++++++++++++++----------------
+ arch/x86/kernel/ipi.c                    |   26 ++++++--
+ arch/x86/kernel/smp.c                    |   15 ----
+ arch/x86/kernel/tlb_32.c                 |    2 
+ arch/x86/kernel/tlb_64.c                 |    2 
+ arch/x86/xen/smp.c                       |   15 ++--
+ include/asm-x86/genapic_32.h             |    8 +-
+ include/asm-x86/genapic_64.h             |   11 ++-
+ include/asm-x86/ipi.h                    |   22 ++++++-
+ include/asm-x86/mach-bigsmp/mach_apic.h  |    8 +-
+ include/asm-x86/mach-bigsmp/mach_ipi.h   |   21 ++++--
+ include/asm-x86/mach-default/mach_apic.h |   12 +--
+ include/asm-x86/mach-default/mach_ipi.h  |   18 ++---
+ include/asm-x86/mach-es7000/mach_apic.h  |    8 +-
+ include/asm-x86/mach-es7000/mach_ipi.h   |   20 ++++--
+ include/asm-x86/mach-generic/mach_ipi.h  |    1 
+ include/asm-x86/mach-numaq/mach_apic.h   |    6 -
+ include/asm-x86/mach-numaq/mach_ipi.h    |   22 ++++---
+ include/asm-x86/mach-summit/mach_apic.h  |    6 -
+ include/asm-x86/mach-summit/mach_ipi.h   |   22 ++++---
+ 29 files changed, 363 insertions(+), 247 deletions(-)
+
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -584,15 +584,15 @@ config IOMMU_HELPER
+ config MAXSMP
+       bool "Configure Maximum number of SMP Processors and NUMA Nodes"
+-      depends on X86_64 && SMP && BROKEN
++      depends on X86_64 && SMP
+       default n
+       help
+         Configure maximum number of CPUS and NUMA Nodes for this architecture.
+         If unsure, say N.
+ config NR_CPUS
+-      int "Maximum number of CPUs (2-512)" if !MAXSMP
+-      range 2 512
++      int "Maximum number of CPUs (2-4096)"
++      range 2 4096
+       depends on SMP
+       default "4096" if MAXSMP
+       default "32" if X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000
+@@ -603,7 +603,7 @@ config NR_CPUS
+         minimum value which makes sense is 2.
+         This is purely to save memory - each supported CPU adds
+-        approximately eight kilobytes to the kernel image.
++        approximately one kilobyte to the kernel image.
+ config SCHED_SMT
+       bool "SMT (Hyperthreading) scheduler support"
+@@ -1019,7 +1019,8 @@ config NUMA_EMU
+         number of nodes. This is only useful for debugging.
+ config NODES_SHIFT
+-      int "Maximum NUMA Nodes (as a power of 2)" if !MAXSMP
++      int "Maximum NUMA Nodes (as a power of 2)"
++      range 9 9   if MAXSMP
+       range 1 9   if X86_64
+       default "9" if MAXSMP
+       default "6" if X86_64
+--- a/arch/x86/kernel/apic_32.c
++++ b/arch/x86/kernel/apic_32.c
+@@ -319,7 +319,7 @@ static void lapic_timer_setup(enum clock
+ static void lapic_timer_broadcast(cpumask_t mask)
+ {
+ #ifdef CONFIG_SMP
+-      send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
++      send_IPI_mask(&mask, LOCAL_TIMER_VECTOR);
+ #endif
+ }
+--- a/arch/x86/kernel/apic_64.c
++++ b/arch/x86/kernel/apic_64.c
+@@ -351,7 +351,7 @@ static void lapic_timer_setup(enum clock
+ static void lapic_timer_broadcast(cpumask_t mask)
+ {
+ #ifdef CONFIG_SMP
+-      send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
++      send_IPI_mask(&mask, LOCAL_TIMER_VECTOR);
+ #endif
+ }
+--- a/arch/x86/kernel/crash.c
++++ b/arch/x86/kernel/crash.c
+@@ -77,10 +77,7 @@ static int crash_nmi_callback(struct not
+ static void smp_send_nmi_allbutself(void)
+ {
+-      cpumask_t mask = cpu_online_map;
+-      cpu_clear(safe_smp_processor_id(), mask);
+-      if (!cpus_empty(mask))
+-              send_IPI_mask(mask, NMI_VECTOR);
++      send_IPI_allbutself(NMI_VECTOR);
+ }
+ static struct notifier_block crash_nmi_nb = {
+--- a/arch/x86/kernel/genapic_flat_64.c
++++ b/arch/x86/kernel/genapic_flat_64.c
+@@ -30,12 +30,12 @@ static int flat_acpi_madt_oem_check(char
+       return 1;
+ }
+-static cpumask_t flat_target_cpus(void)
++static const cpumask_t *flat_target_cpus(void)
+ {
+-      return cpu_online_map;
++      return &cpu_online_map;
+ }
+-static cpumask_t flat_vector_allocation_domain(int cpu)
++static void flat_vector_allocation_domain(int cpu, cpumask_t *retmask)
+ {
+       /* Careful. Some cpus do not strictly honor the set of cpus
+        * specified in the interrupt destination when using lowest
+@@ -45,8 +45,7 @@ static cpumask_t flat_vector_allocation_
+        * deliver interrupts to the wrong hyperthread when only one
+        * hyperthread was specified in the interrupt desitination.
+        */
+-      cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
+-      return domain;
++      *retmask = (cpumask_t) { {[0] = APIC_ALL_CPUS, } };
+ }
+ /*
+@@ -69,9 +68,8 @@ static void flat_init_apic_ldr(void)
+       apic_write(APIC_LDR, val);
+ }
+-static void flat_send_IPI_mask(cpumask_t cpumask, int vector)
++static inline void _flat_send_IPI_mask(unsigned long mask, int vector)
+ {
+-      unsigned long mask = cpus_addr(cpumask)[0];
+       unsigned long flags;
+       local_irq_save(flags);
+@@ -79,20 +77,40 @@ static void flat_send_IPI_mask(cpumask_t
+       local_irq_restore(flags);
+ }
++static void flat_send_IPI_mask(const cpumask_t *cpumask, int vector)
++{
++      unsigned long mask = cpus_addr(*cpumask)[0];
++
++      _flat_send_IPI_mask(mask, vector);
++}
++
++static void flat_send_IPI_mask_allbutself(const cpumask_t *cpumask, int vector)
++{
++      unsigned long mask = cpus_addr(*cpumask)[0];
++      int cpu = smp_processor_id();
++
++      if (cpu < BITS_PER_LONG)
++              clear_bit(cpu, &mask);
++      _flat_send_IPI_mask(mask, vector);
++}
++
+ static void flat_send_IPI_allbutself(int vector)
+ {
++      int cpu = smp_processor_id();
+ #ifdef        CONFIG_HOTPLUG_CPU
+       int hotplug = 1;
+ #else
+       int hotplug = 0;
+ #endif
+       if (hotplug || vector == NMI_VECTOR) {
+-              cpumask_t allbutme = cpu_online_map;
++              if (!cpus_equal(cpu_online_map, cpumask_of_cpu(cpu))) {
++                      unsigned long mask = cpus_addr(cpu_online_map)[0];
+-              cpu_clear(smp_processor_id(), allbutme);
++                      if (cpu < BITS_PER_LONG)
++                              clear_bit(cpu, &mask);
+-              if (!cpus_empty(allbutme))
+-                      flat_send_IPI_mask(allbutme, vector);
++                      _flat_send_IPI_mask(mask, vector);
++              }
+       } else if (num_online_cpus() > 1) {
+               __send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL);
+       }
+@@ -101,7 +119,7 @@ static void flat_send_IPI_allbutself(int
+ static void flat_send_IPI_all(int vector)
+ {
+       if (vector == NMI_VECTOR)
+-              flat_send_IPI_mask(cpu_online_map, vector);
++              flat_send_IPI_mask(&cpu_online_map, vector);
+       else
+               __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
+ }
+@@ -135,9 +153,9 @@ static int flat_apic_id_registered(void)
+       return physid_isset(read_xapic_id(), phys_cpu_present_map);
+ }
+-static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask)
++static unsigned int flat_cpu_mask_to_apicid(const cpumask_t *cpumask)
+ {
+-      return cpus_addr(cpumask)[0] & APIC_ALL_CPUS;
++      return cpus_addr(*cpumask)[0] & APIC_ALL_CPUS;
+ }
+ static unsigned int phys_pkg_id(int index_msb)
+@@ -157,6 +175,7 @@ struct genapic apic_flat =  {
+       .send_IPI_all = flat_send_IPI_all,
+       .send_IPI_allbutself = flat_send_IPI_allbutself,
+       .send_IPI_mask = flat_send_IPI_mask,
++      .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself,
+       .send_IPI_self = apic_send_IPI_self,
+       .cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
+       .phys_pkg_id = phys_pkg_id,
+@@ -186,35 +205,39 @@ static int physflat_acpi_madt_oem_check(
+       return 0;
+ }
+-static cpumask_t physflat_target_cpus(void)
++static const cpumask_t *physflat_target_cpus(void)
+ {
+-      return cpu_online_map;
++      return &cpu_online_map;
+ }
+-static cpumask_t physflat_vector_allocation_domain(int cpu)
++static void physflat_vector_allocation_domain(int cpu, cpumask_t *retmask)
+ {
+-      return cpumask_of_cpu(cpu);
++      cpus_clear(*retmask);
++      cpu_set(cpu, *retmask);
+ }
+-static void physflat_send_IPI_mask(cpumask_t cpumask, int vector)
++static void physflat_send_IPI_mask(const cpumask_t *cpumask, int vector)
+ {
+       send_IPI_mask_sequence(cpumask, vector);
+ }
+-static void physflat_send_IPI_allbutself(int vector)
++static void physflat_send_IPI_mask_allbutself(const cpumask_t *cpumask,
++                                            int vector)
+ {
+-      cpumask_t allbutme = cpu_online_map;
++      send_IPI_mask_allbutself(cpumask, vector);
++}
+-      cpu_clear(smp_processor_id(), allbutme);
+-      physflat_send_IPI_mask(allbutme, vector);
++static void physflat_send_IPI_allbutself(int vector)
++{
++      send_IPI_mask_allbutself(&cpu_online_map, vector);
+ }
+ static void physflat_send_IPI_all(int vector)
+ {
+-      physflat_send_IPI_mask(cpu_online_map, vector);
++      physflat_send_IPI_mask(&cpu_online_map, vector);
+ }
+-static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask)
++static unsigned int physflat_cpu_mask_to_apicid(const cpumask_t *cpumask)
+ {
+       int cpu;
+@@ -222,7 +245,7 @@ static unsigned int physflat_cpu_mask_to
+        * We're using fixed IRQ delivery, can only return one phys APIC ID.
+        * May as well be the first.
+        */
+-      cpu = first_cpu(cpumask);
++      cpu = first_cpu(*cpumask);
+       if ((unsigned)cpu < nr_cpu_ids)
+               return per_cpu(x86_cpu_to_apicid, cpu);
+       else
+@@ -241,6 +264,7 @@ struct genapic apic_physflat =  {
+       .send_IPI_all = physflat_send_IPI_all,
+       .send_IPI_allbutself = physflat_send_IPI_allbutself,
+       .send_IPI_mask = physflat_send_IPI_mask,
++      .send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself,
+       .send_IPI_self = apic_send_IPI_self,
+       .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid,
+       .phys_pkg_id = phys_pkg_id,
+--- a/arch/x86/kernel/genx2apic_cluster.c
++++ b/arch/x86/kernel/genx2apic_cluster.c
+@@ -19,19 +19,18 @@ static int x2apic_acpi_madt_oem_check(ch
+ /* Start with all IRQs pointing to boot CPU.  IRQ balancing will shift them. */
+-static cpumask_t x2apic_target_cpus(void)
++static const cpumask_t *x2apic_target_cpus(void)
+ {
+-      return cpumask_of_cpu(0);
++      return &cpumask_of_cpu(0);
+ }
+ /*
+  * for now each logical cpu is in its own vector allocation domain.
+  */
+-static cpumask_t x2apic_vector_allocation_domain(int cpu)
++static void x2apic_vector_allocation_domain(int cpu, cpumask_t *retmask)
+ {
+-      cpumask_t domain = CPU_MASK_NONE;
+-      cpu_set(cpu, domain);
+-      return domain;
++      cpus_clear(*retmask);
++      cpu_set(cpu, *retmask);
+ }
+ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
+@@ -53,32 +52,52 @@ static void __x2apic_send_IPI_dest(unsig
+  * at once. We have 16 cpu's in a cluster. This will minimize IPI register
+  * writes.
+  */
+-static void x2apic_send_IPI_mask(cpumask_t mask, int vector)
++static void x2apic_send_IPI_mask(const cpumask_t *mask, int vector)
+ {
+       unsigned long flags;
+       unsigned long query_cpu;
+       local_irq_save(flags);
+-      for_each_cpu_mask(query_cpu, mask) {
+-              __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_logical_apicid, query_cpu),
+-                                     vector, APIC_DEST_LOGICAL);
+-      }
++      for_each_cpu_mask_and(query_cpu, *mask, cpu_online_map)
++              __x2apic_send_IPI_dest(
++                      per_cpu(x86_cpu_to_logical_apicid, query_cpu),
++                      vector, APIC_DEST_LOGICAL);
+       local_irq_restore(flags);
+ }
+-static void x2apic_send_IPI_allbutself(int vector)
++static void x2apic_send_IPI_mask_allbutself(const cpumask_t *mask, int vector)
+ {
+-      cpumask_t mask = cpu_online_map;
++      unsigned long flags;
++      unsigned long query_cpu;
++      unsigned long this_cpu = smp_processor_id();
+-      cpu_clear(smp_processor_id(), mask);
++      local_irq_save(flags);
++      for_each_cpu_mask_and(query_cpu, *mask, cpu_online_map)
++              if (query_cpu != this_cpu)
++                      __x2apic_send_IPI_dest(
++                              per_cpu(x86_cpu_to_logical_apicid, query_cpu),
++                              vector, APIC_DEST_LOGICAL);
++      local_irq_restore(flags);
++}
+-      if (!cpus_empty(mask))
+-              x2apic_send_IPI_mask(mask, vector);
++static void x2apic_send_IPI_allbutself(int vector)
++{
++      unsigned long flags;
++      unsigned long query_cpu;
++      unsigned long this_cpu = smp_processor_id();
++
++      local_irq_save(flags);
++      for_each_online_cpu(query_cpu)
++              if (query_cpu != this_cpu)
++                      __x2apic_send_IPI_dest(
++                              per_cpu(x86_cpu_to_logical_apicid, query_cpu),
++                              vector, APIC_DEST_LOGICAL);
++      local_irq_restore(flags);
+ }
+ static void x2apic_send_IPI_all(int vector)
+ {
+-      x2apic_send_IPI_mask(cpu_online_map, vector);
++      x2apic_send_IPI_mask(&cpu_online_map, vector);
+ }
+ static int x2apic_apic_id_registered(void)
+@@ -86,7 +105,7 @@ static int x2apic_apic_id_registered(voi
+       return 1;
+ }
+-static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
++static unsigned int x2apic_cpu_mask_to_apicid(const cpumask_t *cpumask)
+ {
+       int cpu;
+@@ -94,8 +113,8 @@ static unsigned int x2apic_cpu_mask_to_a
+        * We're using fixed IRQ delivery, can only return one phys APIC ID.
+        * May as well be the first.
+        */
+-      cpu = first_cpu(cpumask);
+-      if ((unsigned)cpu < NR_CPUS)
++      cpu = first_cpu(*cpumask);
++      if ((unsigned)cpu < nr_cpu_ids)
+               return per_cpu(x86_cpu_to_logical_apicid, cpu);
+       else
+               return BAD_APICID;
+@@ -147,6 +166,7 @@ struct genapic apic_x2apic_cluster = {
+       .send_IPI_all = x2apic_send_IPI_all,
+       .send_IPI_allbutself = x2apic_send_IPI_allbutself,
+       .send_IPI_mask = x2apic_send_IPI_mask,
++      .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
+       .send_IPI_self = x2apic_send_IPI_self,
+       .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
+       .phys_pkg_id = phys_pkg_id,
+--- a/arch/x86/kernel/genx2apic_phys.c
++++ b/arch/x86/kernel/genx2apic_phys.c
+@@ -31,16 +31,15 @@ static int x2apic_acpi_madt_oem_check(ch
+ /* Start with all IRQs pointing to boot CPU.  IRQ balancing will shift them. */
+-static cpumask_t x2apic_target_cpus(void)
++static const cpumask_t *x2apic_target_cpus(void)
+ {
+-      return cpumask_of_cpu(0);
++      return &cpumask_of_cpu(0);
+ }
+-static cpumask_t x2apic_vector_allocation_domain(int cpu)
++static void x2apic_vector_allocation_domain(int cpu, cpumask_t *retmask)
+ {
+-      cpumask_t domain = CPU_MASK_NONE;
+-      cpu_set(cpu, domain);
+-      return domain;
++      cpus_clear(*retmask);
++      cpu_set(cpu, *retmask);
+ }
+ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
+@@ -56,32 +55,53 @@ static void __x2apic_send_IPI_dest(unsig
+       x2apic_icr_write(cfg, apicid);
+ }
+-static void x2apic_send_IPI_mask(cpumask_t mask, int vector)
++static void x2apic_send_IPI_mask(const cpumask_t *mask, int vector)
+ {
+       unsigned long flags;
+       unsigned long query_cpu;
+       local_irq_save(flags);
+-      for_each_cpu_mask(query_cpu, mask) {
++      for_each_cpu_mask_and(query_cpu, *mask, cpu_online_map) {
+               __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
+                                      vector, APIC_DEST_PHYSICAL);
+       }
+       local_irq_restore(flags);
+ }
+-static void x2apic_send_IPI_allbutself(int vector)
++static void x2apic_send_IPI_mask_allbutself(const cpumask_t *mask, int vector)
+ {
+-      cpumask_t mask = cpu_online_map;
++      unsigned long flags;
++      unsigned long query_cpu;
++      unsigned long this_cpu = smp_processor_id();
++
++      local_irq_save(flags);
++      for_each_cpu_mask_and(query_cpu, *mask, cpu_online_map) {
++              if (query_cpu != this_cpu)
++                      __x2apic_send_IPI_dest(
++                              per_cpu(x86_cpu_to_apicid, query_cpu),
++                              vector, APIC_DEST_PHYSICAL);
++      }
++      local_irq_restore(flags);
++}
+-      cpu_clear(smp_processor_id(), mask);
++static void x2apic_send_IPI_allbutself(int vector)
++{
++      unsigned long flags;
++      unsigned long query_cpu;
++      unsigned long this_cpu = smp_processor_id();
+-      if (!cpus_empty(mask))
+-              x2apic_send_IPI_mask(mask, vector);
++      local_irq_save(flags);
++      for_each_online_cpu(query_cpu)
++              if (query_cpu != this_cpu)
++                      __x2apic_send_IPI_dest(
++                              per_cpu(x86_cpu_to_apicid, query_cpu),
++                              vector, APIC_DEST_PHYSICAL);
++      local_irq_restore(flags);
+ }
+ static void x2apic_send_IPI_all(int vector)
+ {
+-      x2apic_send_IPI_mask(cpu_online_map, vector);
++      x2apic_send_IPI_mask(&cpu_online_map, vector);
+ }
+ static int x2apic_apic_id_registered(void)
+@@ -89,7 +109,7 @@ static int x2apic_apic_id_registered(voi
+       return 1;
+ }
+-static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
++static unsigned int x2apic_cpu_mask_to_apicid(const cpumask_t *cpumask)
+ {
+       int cpu;
+@@ -97,8 +117,8 @@ static unsigned int x2apic_cpu_mask_to_a
+        * We're using fixed IRQ delivery, can only return one phys APIC ID.
+        * May as well be the first.
+        */
+-      cpu = first_cpu(cpumask);
+-      if ((unsigned)cpu < NR_CPUS)
++      cpu = first_cpu(*cpumask);
++      if ((unsigned)cpu < nr_cpu_ids)
+               return per_cpu(x86_cpu_to_apicid, cpu);
+       else
+               return BAD_APICID;
+@@ -147,6 +167,7 @@ struct genapic apic_x2apic_phys = {
+       .send_IPI_all = x2apic_send_IPI_all,
+       .send_IPI_allbutself = x2apic_send_IPI_allbutself,
+       .send_IPI_mask = x2apic_send_IPI_mask,
++      .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
+       .send_IPI_self = x2apic_send_IPI_self,
+       .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
+       .phys_pkg_id = phys_pkg_id,
+--- a/arch/x86/kernel/genx2apic_uv_x.c
++++ b/arch/x86/kernel/genx2apic_uv_x.c
+@@ -75,16 +75,15 @@ EXPORT_SYMBOL(sn_rtc_cycles_per_second);
+ /* Start with all IRQs pointing to boot CPU.  IRQ balancing will shift them. */
+-static cpumask_t uv_target_cpus(void)
++static const cpumask_t *uv_target_cpus(void)
+ {
+-      return cpumask_of_cpu(0);
++      return &cpumask_of_cpu(0);
+ }
+-static cpumask_t uv_vector_allocation_domain(int cpu)
++static void uv_vector_allocation_domain(int cpu, cpumask_t *retmask)
+ {
+-      cpumask_t domain = CPU_MASK_NONE;
+-      cpu_set(cpu, domain);
+-      return domain;
++      cpus_clear(*retmask);
++      cpu_set(cpu, *retmask);
+ }
+ int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip)
+@@ -123,28 +122,37 @@ static void uv_send_IPI_one(int cpu, int
+       uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
+ }
+-static void uv_send_IPI_mask(cpumask_t mask, int vector)
++static void uv_send_IPI_mask(const cpumask_t *mask, int vector)
+ {
+       unsigned int cpu;
+-      for_each_possible_cpu(cpu)
+-              if (cpu_isset(cpu, mask))
++      for_each_cpu_mask_and(cpu, *mask, cpu_online_map)
++              uv_send_IPI_one(cpu, vector);
++}
++
++static void uv_send_IPI_mask_allbutself(const cpumask_t *mask, int vector)
++{
++      unsigned int cpu;
++      unsigned int this_cpu = smp_processor_id();
++
++      for_each_cpu_mask_and(cpu, *mask, cpu_online_map)
++              if (cpu != this_cpu)
+                       uv_send_IPI_one(cpu, vector);
+ }
+ static void uv_send_IPI_allbutself(int vector)
+ {
+-      cpumask_t mask = cpu_online_map;
+-
+-      cpu_clear(smp_processor_id(), mask);
++      unsigned int cpu;
++      unsigned int this_cpu = smp_processor_id();
+-      if (!cpus_empty(mask))
+-              uv_send_IPI_mask(mask, vector);
++      for_each_online_cpu(cpu)
++              if (cpu != this_cpu)
++                      uv_send_IPI_one(cpu, vector);
+ }
+ static void uv_send_IPI_all(int vector)
+ {
+-      uv_send_IPI_mask(cpu_online_map, vector);
++      uv_send_IPI_mask(&cpu_online_map, vector);
+ }
+ static int uv_apic_id_registered(void)
+@@ -156,7 +164,7 @@ static void uv_init_apic_ldr(void)
+ {
+ }
+-static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
++static unsigned int uv_cpu_mask_to_apicid(const cpumask_t *cpumask)
+ {
+       int cpu;
+@@ -164,7 +172,7 @@ static unsigned int uv_cpu_mask_to_apici
+        * We're using fixed IRQ delivery, can only return one phys APIC ID.
+        * May as well be the first.
+        */
+-      cpu = first_cpu(cpumask);
++      cpu = first_cpu(*cpumask);
+       if ((unsigned)cpu < nr_cpu_ids)
+               return per_cpu(x86_cpu_to_apicid, cpu);
+       else
+@@ -219,6 +227,7 @@ struct genapic apic_x2apic_uv_x = {
+       .init_apic_ldr = uv_init_apic_ldr,
+       .send_IPI_all = uv_send_IPI_all,
+       .send_IPI_allbutself = uv_send_IPI_allbutself,
++      .send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself,
+       .send_IPI_mask = uv_send_IPI_mask,
+       /* ZZZ.send_IPI_self = uv_send_IPI_self, */
+       .cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
+--- a/arch/x86/kernel/io_apic_32.c
++++ b/arch/x86/kernel/io_apic_32.c
+@@ -344,11 +344,11 @@ static void set_ioapic_affinity_irq(unsi
+       cpus_and(tmp, cpumask, cpu_online_map);
+       if (cpus_empty(tmp))
+-              tmp = TARGET_CPUS;
++              tmp = *TARGET_CPUS;
+       cpus_and(cpumask, tmp, CPU_MASK_ALL);
+-      apicid_value = cpu_mask_to_apicid(cpumask);
++      apicid_value = cpu_mask_to_apicid(&cpumask);
+       /* Prepare to do the io_apic_write */
+       apicid_value = apicid_value << 24;
+       spin_lock_irqsave(&ioapic_lock, flags);
+@@ -926,7 +926,7 @@ void __init setup_ioapic_dest(void)
+                       if (irq_entry == -1)
+                               continue;
+                       irq = pin_2_irq(irq_entry, ioapic, pin);
+-                      set_ioapic_affinity_irq(irq, TARGET_CPUS);
++                      set_ioapic_affinity_irq(irq, *TARGET_CPUS);
+               }
+       }
+@@ -2522,13 +2522,13 @@ static void set_msi_irq_affinity(unsigne
+       cpus_and(tmp, mask, cpu_online_map);
+       if (cpus_empty(tmp))
+-              tmp = TARGET_CPUS;
++              tmp = *TARGET_CPUS;
+       vector = assign_irq_vector(irq);
+       if (vector < 0)
+               return;
+-      dest = cpu_mask_to_apicid(mask);
++      dest = cpu_mask_to_apicid(&mask);
+       read_msi_msg(irq, &msg);
+@@ -2615,11 +2615,11 @@ static void set_ht_irq_affinity(unsigned
+       cpus_and(tmp, mask, cpu_online_map);
+       if (cpus_empty(tmp))
+-              tmp = TARGET_CPUS;
++              tmp = *TARGET_CPUS;
+       cpus_and(mask, tmp, CPU_MASK_ALL);
+-      dest = cpu_mask_to_apicid(mask);
++      dest = cpu_mask_to_apicid(&mask);
+       target_ht_irq(irq, dest);
+       irq_desc[irq].affinity = mask;
+@@ -2649,7 +2649,7 @@ int arch_setup_ht_irq(unsigned int irq,
+               cpus_clear(tmp);
+               cpu_set(vector >> 8, tmp);
+-              dest = cpu_mask_to_apicid(tmp);
++              dest = cpu_mask_to_apicid(&tmp);
+               msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
+--- a/arch/x86/kernel/io_apic_64.c
++++ b/arch/x86/kernel/io_apic_64.c
+@@ -83,7 +83,7 @@ static struct irq_cfg irq_cfg[NR_IRQS] _
+       [15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
+ };
+-static int assign_irq_vector(int irq, cpumask_t mask);
++static int assign_irq_vector(int irq, const cpumask_t *mask);
+ int first_system_vector = 0xfe;
+@@ -335,11 +335,11 @@ static void set_ioapic_affinity_irq(unsi
+       if (cpus_empty(tmp))
+               return;
+-      if (assign_irq_vector(irq, mask))
++      if (assign_irq_vector(irq, &mask))
+               return;
+       cpus_and(tmp, cfg->domain, mask);
+-      dest = cpu_mask_to_apicid(tmp);
++      dest = cpu_mask_to_apicid(&tmp);
+       /*
+        * Only the high 8 bits are valid.
+@@ -798,7 +798,7 @@ void unlock_vector_lock(void)
+       spin_unlock(&vector_lock);
+ }
+-static int __assign_irq_vector(int irq, cpumask_t mask)
++static int __assign_irq_vector(int irq, const cpumask_t *mask)
+ {
+       /*
+        * NOTE! The local APIC isn't very good at handling
+@@ -815,31 +815,28 @@ static int __assign_irq_vector(int irq,
+       unsigned int old_vector;
+       int cpu;
+       struct irq_cfg *cfg;
++      cpumask_t tmp_mask;
+       BUG_ON((unsigned)irq >= NR_IRQS);
+       cfg = &irq_cfg[irq];
+-      /* Only try and allocate irqs on cpus that are present */
+-      cpus_and(mask, mask, cpu_online_map);
+-
+       if ((cfg->move_in_progress) || cfg->move_cleanup_count)
+               return -EBUSY;
+       old_vector = cfg->vector;
+       if (old_vector) {
+-              cpumask_t tmp;
+-              cpus_and(tmp, cfg->domain, mask);
+-              if (!cpus_empty(tmp))
++              cpus_and(tmp_mask, *mask, cpu_online_map);
++              cpus_and(tmp_mask, cfg->domain, tmp_mask);
++              if (!cpus_empty(tmp_mask))
+                       return 0;
+       }
+-      for_each_cpu_mask_nr(cpu, mask) {
+-              cpumask_t domain, new_mask;
++      /* Only try and allocate irqs on cpus that are present */
++      for_each_cpu_mask_and(cpu, *mask, cpu_online_map) {
+               int new_cpu;
+               int vector, offset;
+-              domain = vector_allocation_domain(cpu);
+-              cpus_and(new_mask, domain, cpu_online_map);
++              vector_allocation_domain(cpu, &tmp_mask);
+               vector = current_vector;
+               offset = current_offset;
+@@ -854,7 +851,7 @@ next:
+                       continue;
+               if (vector == IA32_SYSCALL_VECTOR)
+                       goto next;
+-              for_each_cpu_mask_nr(new_cpu, new_mask)
++              for_each_cpu_mask_and(new_cpu, tmp_mask, cpu_online_map)
+                       if (per_cpu(vector_irq, new_cpu)[vector] != -1)
+                               goto next;
+               /* Found one! */
+@@ -864,16 +861,16 @@ next:
+                       cfg->move_in_progress = 1;
+                       cfg->old_domain = cfg->domain;
+               }
+-              for_each_cpu_mask_nr(new_cpu, new_mask)
++              for_each_cpu_mask_and(new_cpu, tmp_mask, cpu_online_map)
+                       per_cpu(vector_irq, new_cpu)[vector] = irq;
+               cfg->vector = vector;
+-              cfg->domain = domain;
++              cfg->domain = tmp_mask;
+               return 0;
+       }
+       return -ENOSPC;
+ }
+-static int assign_irq_vector(int irq, cpumask_t mask)
++static int assign_irq_vector(int irq, const cpumask_t *mask)
+ {
+       int err;
+       unsigned long flags;
+@@ -1031,8 +1028,8 @@ static void setup_IO_APIC_irq(int apic,
+       if (!IO_APIC_IRQ(irq))
+               return;
+-      mask = TARGET_CPUS;
+-      if (assign_irq_vector(irq, mask))
++      mask = *TARGET_CPUS;
++      if (assign_irq_vector(irq, &mask))
+               return;
+       cpus_and(mask, cfg->domain, mask);
+@@ -1045,7 +1042,7 @@ static void setup_IO_APIC_irq(int apic,
+       if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
+-                             cpu_mask_to_apicid(mask), trigger, polarity,
++                             cpu_mask_to_apicid(&mask), trigger, polarity,
+                              cfg->vector)) {
+               printk("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
+                      mp_ioapics[apic].mp_apicid, pin);
+@@ -1543,7 +1540,7 @@ static int ioapic_retrigger_irq(unsigned
+       unsigned long flags;
+       spin_lock_irqsave(&vector_lock, flags);
+-      send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector);
++      send_IPI_mask(&cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector);
+       spin_unlock_irqrestore(&vector_lock, flags);
+       return 1;
+@@ -1588,7 +1585,7 @@ static void migrate_ioapic_irq(int irq,
+ {
+       struct irq_cfg *cfg = irq_cfg + irq;
+       struct irq_desc *desc = irq_desc + irq;
+-      cpumask_t tmp, cleanup_mask;
++      cpumask_t tmp;
+       struct irte irte;
+       int modify_ioapic_rte = desc->status & IRQ_LEVEL;
+       unsigned int dest;
+@@ -1601,11 +1598,11 @@ static void migrate_ioapic_irq(int irq,
+       if (get_irte(irq, &irte))
+               return;
+-      if (assign_irq_vector(irq, mask))
++      if (assign_irq_vector(irq, &mask))
+               return;
+       cpus_and(tmp, cfg->domain, mask);
+-      dest = cpu_mask_to_apicid(tmp);
++      dest = cpu_mask_to_apicid(&tmp);
+       if (modify_ioapic_rte) {
+               spin_lock_irqsave(&ioapic_lock, flags);
+@@ -1622,9 +1619,9 @@ static void migrate_ioapic_irq(int irq,
+       modify_irte(irq, &irte);
+       if (cfg->move_in_progress) {
+-              cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
+-              cfg->move_cleanup_count = cpus_weight(cleanup_mask);
+-              send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
++              cpus_and(tmp, cfg->old_domain, cpu_online_map);
++              cfg->move_cleanup_count = cpus_weight(tmp);
++              send_IPI_mask(&tmp, IRQ_MOVE_CLEANUP_VECTOR);
+               cfg->move_in_progress = 0;
+       }
+@@ -1749,7 +1746,7 @@ static void irq_complete_move(unsigned i
+               cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
+               cfg->move_cleanup_count = cpus_weight(cleanup_mask);
+-              send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
++              send_IPI_mask(&cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+               cfg->move_in_progress = 0;
+       }
+ }
+@@ -2329,13 +2326,13 @@ static int msi_compose_msg(struct pci_de
+       unsigned dest;
+       cpumask_t tmp;
+-      tmp = TARGET_CPUS;
+-      err = assign_irq_vector(irq, tmp);
++      tmp = *TARGET_CPUS;
++      err = assign_irq_vector(irq, &tmp);
+       if (err)
+               return err;
+       cpus_and(tmp, cfg->domain, tmp);
+-      dest = cpu_mask_to_apicid(tmp);
++      dest = cpu_mask_to_apicid(&tmp);
+ #ifdef CONFIG_INTR_REMAP
+       if (irq_remapped(irq)) {
+@@ -2400,11 +2397,11 @@ static void set_msi_irq_affinity(unsigne
+       if (cpus_empty(tmp))
+               return;
+-      if (assign_irq_vector(irq, mask))
++      if (assign_irq_vector(irq, &mask))
+               return;
+       cpus_and(tmp, cfg->domain, mask);
+-      dest = cpu_mask_to_apicid(tmp);
++      dest = cpu_mask_to_apicid(&tmp);
+       read_msi_msg(irq, &msg);
+@@ -2426,7 +2423,7 @@ static void ir_set_msi_irq_affinity(unsi
+ {
+       struct irq_cfg *cfg = irq_cfg + irq;
+       unsigned int dest;
+-      cpumask_t tmp, cleanup_mask;
++      cpumask_t tmp;
+       struct irte irte;
+       cpus_and(tmp, mask, cpu_online_map);
+@@ -2436,11 +2433,11 @@ static void ir_set_msi_irq_affinity(unsi
+       if (get_irte(irq, &irte))
+               return;
+-      if (assign_irq_vector(irq, mask))
++      if (assign_irq_vector(irq, &mask))
+               return;
+       cpus_and(tmp, cfg->domain, mask);
+-      dest = cpu_mask_to_apicid(tmp);
++      dest = cpu_mask_to_apicid(&tmp);
+       irte.vector = cfg->vector;
+       irte.dest_id = IRTE_DEST(dest);
+@@ -2456,9 +2453,9 @@ static void ir_set_msi_irq_affinity(unsi
+        * vector allocation.
+        */
+       if (cfg->move_in_progress) {
+-              cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
+-              cfg->move_cleanup_count = cpus_weight(cleanup_mask);
+-              send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
++              cpus_and(tmp, cfg->old_domain, cpu_online_map);
++              cfg->move_cleanup_count = cpus_weight(tmp);
++              send_IPI_mask(&tmp, IRQ_MOVE_CLEANUP_VECTOR);
+               cfg->move_in_progress = 0;
+       }
+@@ -2653,11 +2650,11 @@ static void dmar_msi_set_affinity(unsign
+       if (cpus_empty(tmp))
+               return;
+-      if (assign_irq_vector(irq, mask))
++      if (assign_irq_vector(irq, &mask))
+               return;
+       cpus_and(tmp, cfg->domain, mask);
+-      dest = cpu_mask_to_apicid(tmp);
++      dest = cpu_mask_to_apicid(&tmp);
+       dmar_msi_read(irq, &msg);
+@@ -2729,11 +2726,11 @@ static void set_ht_irq_affinity(unsigned
+       if (cpus_empty(tmp))
+               return;
+-      if (assign_irq_vector(irq, mask))
++      if (assign_irq_vector(irq, &mask))
+               return;
+       cpus_and(tmp, cfg->domain, mask);
+-      dest = cpu_mask_to_apicid(tmp);
++      dest = cpu_mask_to_apicid(&tmp);
+       target_ht_irq(irq, dest, cfg->vector);
+       irq_desc[irq].affinity = mask;
+@@ -2757,14 +2754,14 @@ int arch_setup_ht_irq(unsigned int irq,
+       int err;
+       cpumask_t tmp;
+-      tmp = TARGET_CPUS;
+-      err = assign_irq_vector(irq, tmp);
++      tmp = *TARGET_CPUS;
++      err = assign_irq_vector(irq, &tmp);
+       if (!err) {
+               struct ht_irq_msg msg;
+               unsigned dest;
+               cpus_and(tmp, cfg->domain, tmp);
+-              dest = cpu_mask_to_apicid(tmp);
++              dest = cpu_mask_to_apicid(&tmp);
+               msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
+@@ -2882,10 +2879,10 @@ void __init setup_ioapic_dest(void)
+                                                 irq_polarity(irq_entry));
+ #ifdef CONFIG_INTR_REMAP
+                       else if (intr_remapping_enabled)
+-                              set_ir_ioapic_affinity_irq(irq, TARGET_CPUS);
++                              set_ir_ioapic_affinity_irq(irq, *TARGET_CPUS);
+ #endif
+                       else
+-                              set_ioapic_affinity_irq(irq, TARGET_CPUS);
++                              set_ioapic_affinity_irq(irq, *TARGET_CPUS);
+               }
+       }
+--- a/arch/x86/kernel/ipi.c
++++ b/arch/x86/kernel/ipi.c
+@@ -114,9 +114,9 @@ static inline void __send_IPI_dest_field
+ /*
+  * This is only used on smaller machines.
+  */
+-void send_IPI_mask_bitmask(cpumask_t cpumask, int vector)
++void send_IPI_mask_bitmask(const cpumask_t *cpumask, int vector)
+ {
+-      unsigned long mask = cpus_addr(cpumask)[0];
++      unsigned long mask = cpus_addr(*cpumask)[0];
+       unsigned long flags;
+       local_irq_save(flags);
+@@ -125,7 +125,7 @@ void send_IPI_mask_bitmask(cpumask_t cpu
+       local_irq_restore(flags);
+ }
+-void send_IPI_mask_sequence(cpumask_t mask, int vector)
++void send_IPI_mask_sequence(const cpumask_t *mask, int vector)
+ {
+       unsigned long flags;
+       unsigned int query_cpu;
+@@ -137,12 +137,24 @@ void send_IPI_mask_sequence(cpumask_t ma
+        */
+       local_irq_save(flags);
+-      for_each_possible_cpu(query_cpu) {
+-              if (cpu_isset(query_cpu, mask)) {
++      for_each_cpu_mask_and(query_cpu, *mask, cpu_online_map)
++              __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), vector);
++      local_irq_restore(flags);
++}
++
++void send_IPI_mask_allbutself(const cpumask_t *mask, int vector)
++{
++      unsigned long flags;
++      unsigned int query_cpu;
++      unsigned int this_cpu = smp_processor_id();
++
++      /* See Hack comment above */
++
++      local_irq_save(flags);
++      for_each_cpu_mask_and(query_cpu, *mask, cpu_online_map)
++              if (query_cpu != this_cpu)
+                       __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu),
+                                             vector);
+-              }
+-      }
+       local_irq_restore(flags);
+ }
+--- a/arch/x86/kernel/smp.c
++++ b/arch/x86/kernel/smp.c
+@@ -118,26 +118,17 @@ static void native_smp_send_reschedule(i
+               WARN_ON(1);
+               return;
+       }
+-      send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
++      send_IPI_mask(&cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
+ }
+ void native_send_call_func_single_ipi(int cpu)
+ {
+-      send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_SINGLE_VECTOR);
++      send_IPI_mask(&cpumask_of_cpu(cpu), CALL_FUNCTION_SINGLE_VECTOR);
+ }
+ void native_send_call_func_ipi(const cpumask_t *mask)
+ {
+-      cpumask_t allbutself;
+-
+-      allbutself = cpu_online_map;
+-      cpu_clear(smp_processor_id(), allbutself);
+-
+-      if (cpus_equal(*mask, allbutself) &&
+-          cpus_equal(cpu_online_map, cpu_callout_map))
+-              send_IPI_allbutself(CALL_FUNCTION_VECTOR);
+-      else
+-              send_IPI_mask(*mask, CALL_FUNCTION_VECTOR);
++      send_IPI_mask_allbutself(mask, CALL_FUNCTION_VECTOR);
+ }
+ static void stop_this_cpu(void *dummy)
+--- a/arch/x86/kernel/tlb_32.c
++++ b/arch/x86/kernel/tlb_32.c
+@@ -158,7 +158,7 @@ void native_flush_tlb_others(const cpuma
+        * We have to send the IPI only to
+        * CPUs affected.
+        */
+-      send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR);
++      send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR);
+       while (!cpus_empty(flush_cpumask))
+               /* nothing. lockup detection does not belong here */
+--- a/arch/x86/kernel/tlb_64.c
++++ b/arch/x86/kernel/tlb_64.c
+@@ -186,7 +186,7 @@ void native_flush_tlb_others(const cpuma
+        * We have to send the IPI only to
+        * CPUs affected.
+        */
+-      send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender);
++      send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR_START + sender);
+       while (!cpus_empty(f->flush_cpumask))
+               cpu_relax();
+--- a/arch/x86/xen/smp.c
++++ b/arch/x86/xen/smp.c
+@@ -157,7 +157,7 @@ static void __init xen_fill_possible_map
+ {
+       int i, rc;
+-      for (i = 0; i < NR_CPUS; i++) {
++      for (i = 0; i < nr_cpu_ids; i++) {
+               rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
+               if (rc >= 0) {
+                       num_processors++;
+@@ -195,7 +195,7 @@ static void __init xen_smp_prepare_cpus(
+       /* Restrict the possible_map according to max_cpus. */
+       while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) {
+-              for (cpu = NR_CPUS - 1; !cpu_possible(cpu); cpu--)
++              for (cpu = nr_cpu_ids - 1; !cpu_possible(cpu); cpu--)
+                       continue;
+               cpu_clear(cpu, cpu_possible_map);
+       }
+@@ -361,13 +361,11 @@ static void xen_smp_send_reschedule(int
+       xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR);
+ }
+-static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector)
++static void xen_send_IPI_mask(const cpumask_t *mask, enum ipi_vector vector)
+ {
+       unsigned cpu;
+-      cpus_and(mask, mask, cpu_online_map);
+-
+-      for_each_cpu_mask_nr(cpu, mask)
++      for_each_cpu_mask_and(cpu, *mask, cpu_online_map)
+               xen_send_IPI_one(cpu, vector);
+ }
+@@ -375,7 +373,7 @@ static void xen_smp_send_call_function_i
+ {
+       int cpu;
+-      xen_send_IPI_mask(*mask, XEN_CALL_FUNCTION_VECTOR);
++      xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
+       /* Make sure other vcpus get a chance to run if they need to. */
+       for_each_cpu_mask_nr(cpu, *mask) {
+@@ -388,7 +386,8 @@ static void xen_smp_send_call_function_i
+ static void xen_smp_send_call_function_single_ipi(int cpu)
+ {
+-      xen_send_IPI_mask(cpumask_of_cpu(cpu), XEN_CALL_FUNCTION_SINGLE_VECTOR);
++      xen_send_IPI_mask(&cpumask_of_cpu(cpu),
++                        XEN_CALL_FUNCTION_SINGLE_VECTOR);
+ }
+ static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
+--- a/include/asm-x86/genapic_32.h
++++ b/include/asm-x86/genapic_32.h
+@@ -23,7 +23,7 @@ struct genapic {
+       int (*probe)(void);
+       int (*apic_id_registered)(void);
+-      cpumask_t (*target_cpus)(void);
++      const cpumask_t *(*target_cpus)(void);
+       int int_delivery_mode;
+       int int_dest_mode;
+       int ESR_DISABLE;
+@@ -56,11 +56,12 @@ struct genapic {
+       unsigned (*get_apic_id)(unsigned long x);
+       unsigned long apic_id_mask;
+-      unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask);
++      unsigned int (*cpu_mask_to_apicid)(const cpumask_t *cpumask);
+ #ifdef CONFIG_SMP
+       /* ipi */
+-      void (*send_IPI_mask)(cpumask_t mask, int vector);
++      void (*send_IPI_mask)(const cpumask_t *mask, int vector);
++      void (*send_IPI_mask_allbutself)(const cpumask_t *mask, int vector);
+       void (*send_IPI_allbutself)(int vector);
+       void (*send_IPI_all)(int vector);
+ #endif
+@@ -106,6 +107,7 @@ struct genapic {
+       APICFUNC(cpu_mask_to_apicid)                    \
+       APICFUNC(acpi_madt_oem_check)                   \
+       IPIFUNC(send_IPI_mask)                          \
++      IPIFUNC(send_IPI_mask_allbutself)               \
+       IPIFUNC(send_IPI_allbutself)                    \
+       IPIFUNC(send_IPI_all)                           \
+       APICFUNC(enable_apic_mode)                      \
+--- a/include/asm-x86/genapic_64.h
++++ b/include/asm-x86/genapic_64.h
+@@ -1,6 +1,8 @@
+ #ifndef _ASM_GENAPIC_H
+ #define _ASM_GENAPIC_H 1
++#include <linux/cpumask.h>
++
+ /*
+  * Copyright 2004 James Cleverdon, IBM.
+  * Subject to the GNU Public License, v.2
+@@ -18,16 +20,17 @@ struct genapic {
+       u32 int_delivery_mode;
+       u32 int_dest_mode;
+       int (*apic_id_registered)(void);
+-      cpumask_t (*target_cpus)(void);
+-      cpumask_t (*vector_allocation_domain)(int cpu);
++      const cpumask_t *(*target_cpus)(void);
++      void (*vector_allocation_domain)(int cpu, cpumask_t *retmask);
+       void (*init_apic_ldr)(void);
+       /* ipi */
+-      void (*send_IPI_mask)(cpumask_t mask, int vector);
++      void (*send_IPI_mask)(const cpumask_t *mask, int vector);
++      void (*send_IPI_mask_allbutself)(const cpumask_t *mask, int vector);
+       void (*send_IPI_allbutself)(int vector);
+       void (*send_IPI_all)(int vector);
+       void (*send_IPI_self)(int vector);
+       /* */
+-      unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask);
++      unsigned int (*cpu_mask_to_apicid)(const cpumask_t *cpumask);
+       unsigned int (*phys_pkg_id)(int index_msb);
+       unsigned int (*get_apic_id)(unsigned long x);
+       unsigned long (*set_apic_id)(unsigned int id);
+--- a/include/asm-x86/ipi.h
++++ b/include/asm-x86/ipi.h
+@@ -117,7 +117,7 @@ static inline void __send_IPI_dest_field
+       native_apic_mem_write(APIC_ICR, cfg);
+ }
+-static inline void send_IPI_mask_sequence(cpumask_t mask, int vector)
++static inline void send_IPI_mask_sequence(const cpumask_t *mask, int vector)
+ {
+       unsigned long flags;
+       unsigned long query_cpu;
+@@ -128,10 +128,26 @@ static inline void send_IPI_mask_sequenc
+        * - mbligh
+        */
+       local_irq_save(flags);
+-      for_each_cpu_mask_nr(query_cpu, mask) {
++      for_each_cpu_mask_and(query_cpu, *mask, cpu_online_map)
+               __send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, query_cpu),
+                                     vector, APIC_DEST_PHYSICAL);
+-      }
++      local_irq_restore(flags);
++}
++
++static inline void send_IPI_mask_allbutself(const cpumask_t *mask, int vector)
++{
++      unsigned long flags;
++      unsigned int query_cpu;
++      unsigned int this_cpu = smp_processor_id();
++
++      /* See Hack comment above */
++
++      local_irq_save(flags);
++      for_each_cpu_mask_and(query_cpu, *mask, cpu_online_map)
++              if (query_cpu != this_cpu)
++                      __send_IPI_dest_field(
++                              per_cpu(x86_cpu_to_apicid, query_cpu),
++                              vector, APIC_DEST_PHYSICAL);
+       local_irq_restore(flags);
+ }
+--- a/include/asm-x86/mach-bigsmp/mach_apic.h
++++ b/include/asm-x86/mach-bigsmp/mach_apic.h
+@@ -10,7 +10,7 @@ static inline int apic_id_registered(voi
+ }
+ /* Round robin the irqs amoung the online cpus */
+-static inline cpumask_t target_cpus(void)
++static inline const cpumask_t *target_cpus(void)
+ { 
+       static unsigned long cpu = NR_CPUS;
+       do {
+@@ -19,7 +19,7 @@ static inline cpumask_t target_cpus(void
+               else
+                       cpu = next_cpu(cpu, cpu_online_map);
+       } while (cpu >= NR_CPUS);
+-      return cpumask_of_cpu(cpu);
++      return &cpumask_of_cpu(cpu);
+ }
+ #undef APIC_DEST_LOGICAL
+@@ -126,12 +126,12 @@ static inline int check_phys_apicid_pres
+ }
+ /* As we are using single CPU as destination, pick only one CPU here */
+-static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
++static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
+ {
+       int cpu;
+       int apicid;     
+-      cpu = first_cpu(cpumask);
++      cpu = first_cpu(*cpumask);
+       apicid = cpu_to_logical_apicid(cpu);
+       return apicid;
+ }
+--- a/include/asm-x86/mach-bigsmp/mach_ipi.h
++++ b/include/asm-x86/mach-bigsmp/mach_ipi.h
+@@ -1,25 +1,30 @@
+ #ifndef __ASM_MACH_IPI_H
+ #define __ASM_MACH_IPI_H
+-void send_IPI_mask_sequence(cpumask_t mask, int vector);
++void send_IPI_mask_sequence(const cpumask_t *mask, int vector);
+-static inline void send_IPI_mask(cpumask_t mask, int vector)
++static inline void send_IPI_mask(const cpumask_t *mask, int vector)
+ {
+       send_IPI_mask_sequence(mask, vector);
+ }
+-static inline void send_IPI_allbutself(int vector)
++static inline void send_IPI_mask_allbutself(const cpumask_t *mask, int vector)
+ {
+-      cpumask_t mask = cpu_online_map;
+-      cpu_clear(smp_processor_id(), mask);
++      cpumask_t allbutself = *mask;
++      cpu_clear(smp_processor_id(), allbutself);
++
++      if (!cpus_empty(allbutself))
++              send_IPI_mask_sequence(&allbutself, vector);
++}
+-      if (!cpus_empty(mask))
+-              send_IPI_mask(mask, vector);
++static inline void send_IPI_allbutself(int vector)
++{
++      send_IPI_mask_allbutself(&cpu_online_map, vector);
+ }
+ static inline void send_IPI_all(int vector)
+ {
+-      send_IPI_mask(cpu_online_map, vector);
++      send_IPI_mask(&cpu_online_map, vector);
+ }
+ #endif /* __ASM_MACH_IPI_H */
+--- a/include/asm-x86/mach-default/mach_apic.h
++++ b/include/asm-x86/mach-default/mach_apic.h
+@@ -8,12 +8,12 @@
+ #define APIC_DFR_VALUE        (APIC_DFR_FLAT)
+-static inline cpumask_t target_cpus(void)
++static inline const cpumask_t *target_cpus(void)
+ { 
+ #ifdef CONFIG_SMP
+-      return cpu_online_map;
++      return &cpu_online_map;
+ #else
+-      return cpumask_of_cpu(0);
++      return &cpumask_of_cpu(0);
+ #endif
+ } 
+@@ -59,9 +59,9 @@ static inline int apic_id_registered(voi
+       return physid_isset(read_apic_id(), phys_cpu_present_map);
+ }
+-static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
++static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
+ {
+-      return cpus_addr(cpumask)[0];
++      return cpus_addr(*cpumask)[0];
+ }
+ static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
+@@ -115,7 +115,7 @@ static inline int cpu_to_logical_apicid(
+ static inline int cpu_present_to_apicid(int mps_cpu)
+ {
+-      if (mps_cpu < NR_CPUS && cpu_present(mps_cpu))
++      if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu))
+               return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu);
+       else
+               return BAD_APICID;
+--- a/include/asm-x86/mach-default/mach_ipi.h
++++ b/include/asm-x86/mach-default/mach_ipi.h
+@@ -4,7 +4,8 @@
+ /* Avoid include hell */
+ #define NMI_VECTOR 0x02
+-void send_IPI_mask_bitmask(cpumask_t mask, int vector);
++void send_IPI_mask_bitmask(const cpumask_t *mask, int vector);
++void send_IPI_mask_allbutself(const cpumask_t *mask, int vector);
+ void __send_IPI_shortcut(unsigned int shortcut, int vector);
+ extern int no_broadcast;
+@@ -12,28 +13,27 @@ extern int no_broadcast;
+ #ifdef CONFIG_X86_64
+ #include <asm/genapic.h>
+ #define send_IPI_mask (genapic->send_IPI_mask)
++#define send_IPI_mask_allbutself (genapic->send_IPI_mask_allbutself)
+ #else
+-static inline void send_IPI_mask(cpumask_t mask, int vector)
++static inline void send_IPI_mask(const cpumask_t *mask, int vector)
+ {
+       send_IPI_mask_bitmask(mask, vector);
+ }
++void send_IPI_mask_allbutself(const cpumask_t *mask, int vector);
+ #endif
+ static inline void __local_send_IPI_allbutself(int vector)
+ {
+-      if (no_broadcast || vector == NMI_VECTOR) {
+-              cpumask_t mask = cpu_online_map;
+-
+-              cpu_clear(smp_processor_id(), mask);
+-              send_IPI_mask(mask, vector);
+-      } else
++      if (no_broadcast || vector == NMI_VECTOR)
++              send_IPI_mask_allbutself(&cpu_online_map, vector);
++      else
+               __send_IPI_shortcut(APIC_DEST_ALLBUT, vector);
+ }
+ static inline void __local_send_IPI_all(int vector)
+ {
+       if (no_broadcast || vector == NMI_VECTOR)
+-              send_IPI_mask(cpu_online_map, vector);
++              send_IPI_mask(&cpu_online_map, vector);
+       else
+               __send_IPI_shortcut(APIC_DEST_ALLINC, vector);
+ }
+--- a/include/asm-x86/mach-es7000/mach_apic.h
++++ b/include/asm-x86/mach-es7000/mach_apic.h
+@@ -9,12 +9,12 @@ static inline int apic_id_registered(voi
+               return (1);
+ }
+-static inline cpumask_t target_cpus(void)
++static inline cpumask_t *target_cpus(void)
+ { 
+ #if defined CONFIG_ES7000_CLUSTERED_APIC
+-      return CPU_MASK_ALL;
++      return &CPU_MASK_ALL;
+ #else
+-      return cpumask_of_cpu(smp_processor_id());
++      return &cpumask_of_cpu(smp_processor_id());
+ #endif
+ }
+ #define TARGET_CPUS   (target_cpus())
+@@ -145,7 +145,7 @@ static inline int check_phys_apicid_pres
+       return (1);
+ }
+-static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
++static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
+ {
+       int num_bits_set;
+       int cpus_found = 0;
+--- a/include/asm-x86/mach-es7000/mach_ipi.h
++++ b/include/asm-x86/mach-es7000/mach_ipi.h
+@@ -1,24 +1,30 @@
+ #ifndef __ASM_MACH_IPI_H
+ #define __ASM_MACH_IPI_H
+-void send_IPI_mask_sequence(cpumask_t mask, int vector);
++void send_IPI_mask_sequence(const cpumask_t *mask, int vector);
+-static inline void send_IPI_mask(cpumask_t mask, int vector)
++static inline void send_IPI_mask(const cpumask_t *mask, int vector)
+ {
+       send_IPI_mask_sequence(mask, vector);
+ }
++static inline void send_IPI_mask_allbutself(const cpumask_t *mask, int vector)
++{
++      cpumask_t allbutself = *mask;
++      cpu_clear(smp_processor_id(), allbutself);
++
++      if (!cpus_empty(allbutself))
++              send_IPI_mask_sequence(&allbutself, vector);
++}
++
+ static inline void send_IPI_allbutself(int vector)
+ {
+-      cpumask_t mask = cpu_online_map;
+-      cpu_clear(smp_processor_id(), mask);
+-      if (!cpus_empty(mask))
+-              send_IPI_mask(mask, vector);
++      send_IPI_mask_allbutself(&cpu_online_map, vector);
+ }
+ static inline void send_IPI_all(int vector)
+ {
+-      send_IPI_mask(cpu_online_map, vector);
++      send_IPI_mask(&cpu_online_map, vector);
+ }
+ #endif /* __ASM_MACH_IPI_H */
+--- a/include/asm-x86/mach-generic/mach_ipi.h
++++ b/include/asm-x86/mach-generic/mach_ipi.h
+@@ -4,6 +4,7 @@
+ #include <asm/genapic.h>
+ #define send_IPI_mask (genapic->send_IPI_mask)
++#define send_IPI_mask_allbutself (genapic->send_IPI_mask_allbutself)
+ #define send_IPI_allbutself (genapic->send_IPI_allbutself)
+ #define send_IPI_all (genapic->send_IPI_all)
+--- a/include/asm-x86/mach-numaq/mach_apic.h
++++ b/include/asm-x86/mach-numaq/mach_apic.h
+@@ -7,9 +7,9 @@
+ #define APIC_DFR_VALUE        (APIC_DFR_CLUSTER)
+-static inline cpumask_t target_cpus(void)
++static inline const cpumask_t *target_cpus(void)
+ {
+-      return CPU_MASK_ALL;
++      return &CPU_MASK_ALL;
+ }
+ #define TARGET_CPUS (target_cpus())
+@@ -124,7 +124,7 @@ static inline void enable_apic_mode(void
+  * We use physical apicids here, not logical, so just return the default
+  * physical broadcast to stop people from breaking us
+  */
+-static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
++static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
+ {
+       return (int) 0xF;
+ }
+--- a/include/asm-x86/mach-numaq/mach_ipi.h
++++ b/include/asm-x86/mach-numaq/mach_ipi.h
+@@ -1,25 +1,31 @@
+ #ifndef __ASM_MACH_IPI_H
+ #define __ASM_MACH_IPI_H
+-void send_IPI_mask_sequence(cpumask_t, int vector);
++void send_IPI_mask_sequence(const cpumask_t *mask, int vector);
+-static inline void send_IPI_mask(cpumask_t mask, int vector)
++static inline void send_IPI_mask(const cpumask_t *mask, int vector)
+ {
+       send_IPI_mask_sequence(mask, vector);
+ }
+-static inline void send_IPI_allbutself(int vector)
++static inline void send_IPI_mask_allbutself(const cpumask_t *mask, int vector)
+ {
+-      cpumask_t mask = cpu_online_map;
+-      cpu_clear(smp_processor_id(), mask);
++      cpumask_t allbutself = *mask;
++      cpu_clear(smp_processor_id(), allbutself);
++
++      if (!cpus_empty(allbutself))
++              send_IPI_mask_sequence(&allbutself, vector);
++}
+-      if (!cpus_empty(mask))
+-              send_IPI_mask(mask, vector);
++static inline void send_IPI_allbutself(int vector)
++{
++      send_IPI_mask_allbutself(&cpu_online_map, vector);
+ }
+ static inline void send_IPI_all(int vector)
+ {
+-      send_IPI_mask(cpu_online_map, vector);
++      send_IPI_mask(&cpu_online_map, vector);
+ }
+ #endif /* __ASM_MACH_IPI_H */
++
+--- a/include/asm-x86/mach-summit/mach_apic.h
++++ b/include/asm-x86/mach-summit/mach_apic.h
+@@ -14,13 +14,13 @@
+ #define APIC_DFR_VALUE        (APIC_DFR_CLUSTER)
+-static inline cpumask_t target_cpus(void)
++static inline const cpumask_t *target_cpus(void)
+ {
+       /* CPU_MASK_ALL (0xff) has undefined behaviour with
+        * dest_LowestPrio mode logical clustered apic interrupt routing
+        * Just start on cpu 0.  IRQ balancing will spread load
+        */
+-      return cpumask_of_cpu(0);
++      return &cpumask_of_cpu(0);
+ } 
+ #define TARGET_CPUS   (target_cpus())
+@@ -138,7 +138,7 @@ static inline void enable_apic_mode(void
+ {
+ }
+-static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
++static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
+ {
+       int num_bits_set;
+       int cpus_found = 0;
+--- a/include/asm-x86/mach-summit/mach_ipi.h
++++ b/include/asm-x86/mach-summit/mach_ipi.h
+@@ -1,25 +1,31 @@
+ #ifndef __ASM_MACH_IPI_H
+ #define __ASM_MACH_IPI_H
+-void send_IPI_mask_sequence(cpumask_t mask, int vector);
++void send_IPI_mask_sequence(const cpumask_t *mask, int vector);
+-static inline void send_IPI_mask(cpumask_t mask, int vector)
++static inline void send_IPI_mask(const cpumask_t *mask, int vector)
+ {
+       send_IPI_mask_sequence(mask, vector);
+ }
+-static inline void send_IPI_allbutself(int vector)
++static inline void send_IPI_mask_allbutself(const cpumask_t *mask, int vector)
+ {
+-      cpumask_t mask = cpu_online_map;
+-      cpu_clear(smp_processor_id(), mask);
++      cpumask_t allbutself = *mask;
++      cpu_clear(smp_processor_id(), allbutself);
++
++      if (!cpus_empty(allbutself))
++              send_IPI_mask_sequence(&allbutself, vector);
++}
+-      if (!cpus_empty(mask))
+-              send_IPI_mask(mask, vector);
++static inline void send_IPI_allbutself(int vector)
++{
++      send_IPI_mask_allbutself(&cpu_online_map, vector);
+ }
+ static inline void send_IPI_all(int vector)
+ {
+-      send_IPI_mask(cpu_online_map, vector);
++      send_IPI_mask(&cpu_online_map, vector);
+ }
+ #endif /* __ASM_MACH_IPI_H */
++