]> git.ipfire.org Git - thirdparty/linux.git/blobdiff - arch/x86/kernel/apic/apic.c
Merge tag 'printk-for-5.5' of git://git.kernel.org/pub/scm/linux/kernel/git/pmladek...
[thirdparty/linux.git] / arch / x86 / kernel / apic / apic.c
index 5be2c3bc9d9325cd008acd90e0fc2723420cc0cb..05feaec5d3d7811d6c818ada8e9d7b3bc38479c4 100644 (file)
@@ -65,10 +65,10 @@ unsigned int num_processors;
 unsigned disabled_cpus;
 
 /* Processor that is doing the boot up */
-unsigned int boot_cpu_physical_apicid = -1U;
+unsigned int boot_cpu_physical_apicid __ro_after_init = -1U;
 EXPORT_SYMBOL_GPL(boot_cpu_physical_apicid);
 
-u8 boot_cpu_apic_version;
+u8 boot_cpu_apic_version __ro_after_init;
 
 /*
  * The highest APIC ID seen during enumeration.
@@ -85,13 +85,13 @@ physid_mask_t phys_cpu_present_map;
  * disable_cpu_apicid=<int>, mostly used for the kdump 2nd kernel to
  * avoid undefined behaviour caused by sending INIT from AP to BSP.
  */
-static unsigned int disabled_cpu_apicid __read_mostly = BAD_APICID;
+static unsigned int disabled_cpu_apicid __ro_after_init = BAD_APICID;
 
 /*
  * This variable controls which CPUs receive external NMIs.  By default,
  * external NMIs are delivered only to the BSP.
  */
-static int apic_extnmi = APIC_EXTNMI_BSP;
+static int apic_extnmi __ro_after_init = APIC_EXTNMI_BSP;
 
 /*
  * Map cpu index to physical APIC ID
@@ -114,7 +114,7 @@ EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_acpiid);
 DEFINE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid, BAD_APICID);
 
 /* Local APIC was disabled by the BIOS and enabled by the kernel */
-static int enabled_via_apicbase;
+static int enabled_via_apicbase __ro_after_init;
 
 /*
  * Handle interrupt mode configuration register (IMCR).
@@ -172,23 +172,23 @@ static __init int setup_apicpmtimer(char *s)
 __setup("apicpmtimer", setup_apicpmtimer);
 #endif
 
-unsigned long mp_lapic_addr;
-int disable_apic;
+unsigned long mp_lapic_addr __ro_after_init;
+int disable_apic __ro_after_init;
 /* Disable local APIC timer from the kernel commandline or via dmi quirk */
 static int disable_apic_timer __initdata;
 /* Local APIC timer works in C2 */
-int local_apic_timer_c2_ok;
+int local_apic_timer_c2_ok __ro_after_init;
 EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
 
 /*
  * Debug level, exported for io_apic.c
  */
-unsigned int apic_verbosity;
+int apic_verbosity __ro_after_init;
 
-int pic_mode;
+int pic_mode __ro_after_init;
 
 /* Have we found an MP table */
-int smp_found_config;
+int smp_found_config __ro_after_init;
 
 static struct resource lapic_resource = {
        .name = "Local APIC",
@@ -199,7 +199,7 @@ unsigned int lapic_timer_period = 0;
 
 static void apic_pm_activate(void);
 
-static unsigned long apic_phys;
+static unsigned long apic_phys __ro_after_init;
 
 /*
  * Get the LAPIC version
@@ -590,21 +590,21 @@ static u32 skx_deadline_rev(void)
 static const struct x86_cpu_id deadline_match[] = {
        DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_HASWELL_X,        hsx_deadline_rev),
        DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_X,      0x0b000020),
-       DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_BROADWELL_XEON_D, bdx_deadline_rev),
+       DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_BROADWELL_D,      bdx_deadline_rev),
        DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_SKYLAKE_X,        skx_deadline_rev),
 
-       DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_CORE,     0x22),
-       DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_ULT,      0x20),
-       DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_GT3E,     0x17),
+       DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL,          0x22),
+       DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_L,        0x20),
+       DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_G,        0x17),
 
-       DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_CORE,   0x25),
-       DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_GT3E,   0x17),
+       DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL,        0x25),
+       DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_G,      0x17),
 
-       DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE_MOBILE,   0xb2),
-       DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE_DESKTOP,  0xb2),
+       DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE_L,        0xb2),
+       DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE,          0xb2),
 
-       DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_KABYLAKE_MOBILE,  0x52),
-       DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_KABYLAKE_DESKTOP, 0x52),
+       DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_KABYLAKE_L,       0x52),
+       DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_KABYLAKE,         0x52),
 
        {},
 };
@@ -722,7 +722,7 @@ static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2;
 static __initdata unsigned long lapic_cal_j1, lapic_cal_j2;
 
 /*
- * Temporary interrupt handler.
+ * Temporary interrupt handler and polled calibration function.
  */
 static void __init lapic_cal_handler(struct clock_event_device *dev)
 {
@@ -834,6 +834,10 @@ bool __init apic_needs_pit(void)
        if (!boot_cpu_has(X86_FEATURE_APIC))
                return true;
 
+       /* Virt guests may lack ARAT, but still have DEADLINE */
+       if (!boot_cpu_has(X86_FEATURE_ARAT))
+               return true;
+
        /* Deadline timer is based on TSC so no further PIT action required */
        if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
                return false;
@@ -851,7 +855,8 @@ bool __init apic_needs_pit(void)
 static int __init calibrate_APIC_clock(void)
 {
        struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
-       void (*real_handler)(struct clock_event_device *dev);
+       u64 tsc_perj = 0, tsc_start = 0;
+       unsigned long jif_start;
        unsigned long deltaj;
        long delta, deltatsc;
        int pm_referenced = 0;
@@ -878,28 +883,64 @@ static int __init calibrate_APIC_clock(void)
        apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
                    "calibrating APIC timer ...\n");
 
+       /*
+        * There are platforms w/o global clockevent devices. Instead of
+        * making the calibration conditional on that, use a polling based
+        * approach everywhere.
+        */
        local_irq_disable();
 
-       /* Replace the global interrupt handler */
-       real_handler = global_clock_event->event_handler;
-       global_clock_event->event_handler = lapic_cal_handler;
-
        /*
         * Setup the APIC counter to maximum. There is no way the lapic
         * can underflow in the 100ms detection time frame
         */
        __setup_APIC_LVTT(0xffffffff, 0, 0);
 
-       /* Let the interrupts run */
+       /*
+        * Methods to terminate the calibration loop:
+        *  1) Global clockevent if available (jiffies)
+        *  2) TSC if available and frequency is known
+        */
+       jif_start = READ_ONCE(jiffies);
+
+       if (tsc_khz) {
+               tsc_start = rdtsc();
+               tsc_perj = div_u64((u64)tsc_khz * 1000, HZ);
+       }
+
+       /*
+        * Enable interrupts so the tick can fire, if a global
+        * clockevent device is available
+        */
        local_irq_enable();
 
-       while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
-               cpu_relax();
+       while (lapic_cal_loops <= LAPIC_CAL_LOOPS) {
+               /* Wait for a tick to elapse */
+               while (1) {
+                       if (tsc_khz) {
+                               u64 tsc_now = rdtsc();
+                               if ((tsc_now - tsc_start) >= tsc_perj) {
+                                       tsc_start += tsc_perj;
+                                       break;
+                               }
+                       } else {
+                               unsigned long jif_now = READ_ONCE(jiffies);
 
-       local_irq_disable();
+                               if (time_after(jif_now, jif_start)) {
+                                       jif_start = jif_now;
+                                       break;
+                               }
+                       }
+                       cpu_relax();
+               }
 
-       /* Restore the real event handler */
-       global_clock_event->event_handler = real_handler;
+               /* Invoke the calibration routine */
+               local_irq_disable();
+               lapic_cal_handler(NULL);
+               local_irq_enable();
+       }
+
+       local_irq_disable();
 
        /* Build delta t1-t2 as apic timer counts down */
        delta = lapic_cal_t1 - lapic_cal_t2;
@@ -943,10 +984,11 @@ static int __init calibrate_APIC_clock(void)
        levt->features &= ~CLOCK_EVT_FEAT_DUMMY;
 
        /*
-        * PM timer calibration failed or not turned on
-        * so lets try APIC timer based calibration
+        * PM timer calibration failed or not turned on so lets try APIC
+        * timer based calibration, if a global clockevent device is
+        * available.
         */
-       if (!pm_referenced) {
+       if (!pm_referenced && global_clock_event) {
                apic_printk(APIC_VERBOSE, "... verify APIC timer\n");
 
                /*
@@ -1182,25 +1224,38 @@ void clear_local_APIC(void)
 }
 
 /**
- * disable_local_APIC - clear and disable the local APIC
+ * apic_soft_disable - Clears and software disables the local APIC on hotplug
+ *
+ * Contrary to disable_local_APIC() this does not touch the enable bit in
+ * MSR_IA32_APICBASE. Clearing that bit on systems based on the 3 wire APIC
+ * bus would require a hardware reset as the APIC would lose track of bus
+ * arbitration. On systems with FSB delivery APICBASE could be disabled,
+ * but it has to be guaranteed that no interrupt is sent to the APIC while
+ * in that state and it's not clear from the SDM whether it still responds
+ * to INIT/SIPI messages. Stay on the safe side and use software disable.
  */
-void disable_local_APIC(void)
+void apic_soft_disable(void)
 {
-       unsigned int value;
-
-       /* APIC hasn't been mapped yet */
-       if (!x2apic_mode && !apic_phys)
-               return;
+       u32 value;
 
        clear_local_APIC();
 
-       /*
-        * Disable APIC (implies clearing of registers
-        * for 82489DX!).
-        */
+       /* Soft disable APIC (implies clearing of registers for 82489DX!). */
        value = apic_read(APIC_SPIV);
        value &= ~APIC_SPIV_APIC_ENABLED;
        apic_write(APIC_SPIV, value);
+}
+
+/**
+ * disable_local_APIC - clear and disable the local APIC
+ */
+void disable_local_APIC(void)
+{
+       /* APIC hasn't been mapped yet */
+       if (!x2apic_mode && !apic_phys)
+               return;
+
+       apic_soft_disable();
 
 #ifdef CONFIG_X86_32
        /*
@@ -1265,7 +1320,7 @@ void __init sync_Arb_IDs(void)
                        APIC_INT_LEVELTRIG | APIC_DM_INIT);
 }
 
-enum apic_intr_mode_id apic_intr_mode;
+enum apic_intr_mode_id apic_intr_mode __ro_after_init;
 
 static int __init apic_intr_mode_select(void)
 {
@@ -1453,54 +1508,72 @@ static void lapic_setup_esr(void)
                        oldvalue, value);
 }
 
-static void apic_pending_intr_clear(void)
+#define APIC_IR_REGS           APIC_ISR_NR
+#define APIC_IR_BITS           (APIC_IR_REGS * 32)
+#define APIC_IR_MAPSIZE                (APIC_IR_BITS / BITS_PER_LONG)
+
+union apic_ir {
+       unsigned long   map[APIC_IR_MAPSIZE];
+       u32             regs[APIC_IR_REGS];
+};
+
+static bool apic_check_and_ack(union apic_ir *irr, union apic_ir *isr)
 {
-       long long max_loops = cpu_khz ? cpu_khz : 1000000;
-       unsigned long long tsc = 0, ntsc;
-       unsigned int queued;
-       unsigned long value;
-       int i, j, acked = 0;
+       int i, bit;
+
+       /* Read the IRRs */
+       for (i = 0; i < APIC_IR_REGS; i++)
+               irr->regs[i] = apic_read(APIC_IRR + i * 0x10);
+
+       /* Read the ISRs */
+       for (i = 0; i < APIC_IR_REGS; i++)
+               isr->regs[i] = apic_read(APIC_ISR + i * 0x10);
 
-       if (boot_cpu_has(X86_FEATURE_TSC))
-               tsc = rdtsc();
        /*
-        * After a crash, we no longer service the interrupts and a pending
-        * interrupt from previous kernel might still have ISR bit set.
-        *
-        * Most probably by now CPU has serviced that pending interrupt and
-        * it might not have done the ack_APIC_irq() because it thought,
-        * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it
-        * does not clear the ISR bit and cpu thinks it has already serivced
-        * the interrupt. Hence a vector might get locked. It was noticed
-        * for timer irq (vector 0x31). Issue an extra EOI to clear ISR.
+        * If the ISR map is not empty. ACK the APIC and run another round
+        * to verify whether a pending IRR has been unblocked and turned
+        * into a ISR.
         */
-       do {
-               queued = 0;
-               for (i = APIC_ISR_NR - 1; i >= 0; i--)
-                       queued |= apic_read(APIC_IRR + i*0x10);
-
-               for (i = APIC_ISR_NR - 1; i >= 0; i--) {
-                       value = apic_read(APIC_ISR + i*0x10);
-                       for_each_set_bit(j, &value, 32) {
-                               ack_APIC_irq();
-                               acked++;
-                       }
-               }
-               if (acked > 256) {
-                       pr_err("LAPIC pending interrupts after %d EOI\n", acked);
-                       break;
-               }
-               if (queued) {
-                       if (boot_cpu_has(X86_FEATURE_TSC) && cpu_khz) {
-                               ntsc = rdtsc();
-                               max_loops = (long long)cpu_khz << 10;
-                               max_loops -= ntsc - tsc;
-                       } else {
-                               max_loops--;
-                       }
-               }
-       } while (queued && max_loops > 0);
-       WARN_ON(max_loops <= 0);
+       if (!bitmap_empty(isr->map, APIC_IR_BITS)) {
+               /*
+                * There can be multiple ISR bits set when a high priority
+                * interrupt preempted a lower priority one. Issue an ACK
+                * per set bit.
+                */
+               for_each_set_bit(bit, isr->map, APIC_IR_BITS)
+                       ack_APIC_irq();
+               return true;
+       }
+
+       return !bitmap_empty(irr->map, APIC_IR_BITS);
+}
+
+/*
+ * After a crash, we no longer service the interrupts and a pending
+ * interrupt from previous kernel might still have ISR bit set.
+ *
+ * Most probably by now the CPU has serviced that pending interrupt and it
+ * might not have done the ack_APIC_irq() because it thought, interrupt
+ * came from i8259 as ExtInt. LAPIC did not get EOI so it does not clear
+ * the ISR bit and cpu thinks it has already serivced the interrupt. Hence
+ * a vector might get locked. It was noticed for timer irq (vector
+ * 0x31). Issue an extra EOI to clear ISR.
+ *
+ * If there are pending IRR bits they turn into ISR bits after a higher
+ * priority ISR bit has been acked.
+ */
+static void apic_pending_intr_clear(void)
+{
+       union apic_ir irr, isr;
+       unsigned int i;
+
+       /* 512 loops are way oversized and give the APIC a chance to obey. */
+       for (i = 0; i < 512; i++) {
+               if (!apic_check_and_ack(&irr, &isr))
+                       return;
+       }
+       /* Dump the IRR/ISR content if that failed */
+       pr_warn("APIC: Stale IRR: %256pb ISR: %256pb\n", irr.map, isr.map);
 }
 
 /**
@@ -1513,16 +1586,20 @@ static void setup_local_APIC(void)
 {
        int cpu = smp_processor_id();
        unsigned int value;
-#ifdef CONFIG_X86_32
-       int logical_apicid, ldr_apicid;
-#endif
-
 
        if (disable_apic) {
                disable_ioapic_support();
                return;
        }
 
+       /*
+        * If this comes from kexec/kcrash the APIC might be enabled in
+        * SPIV. Soft disable it before doing further initialization.
+        */
+       value = apic_read(APIC_SPIV);
+       value &= ~APIC_SPIV_APIC_ENABLED;
+       apic_write(APIC_SPIV, value);
+
 #ifdef CONFIG_X86_32
        /* Pound the ESR really hard over the head with a big hammer - mbligh */
        if (lapic_is_integrated() && apic->disable_esr) {
@@ -1532,8 +1609,6 @@ static void setup_local_APIC(void)
                apic_write(APIC_ESR, 0);
        }
 #endif
-       perf_events_lapic_init();
-
        /*
         * Double-check whether this APIC is really registered.
         * This is meaningless in clustered apic mode, so we skip it.
@@ -1548,26 +1623,35 @@ static void setup_local_APIC(void)
        apic->init_apic_ldr();
 
 #ifdef CONFIG_X86_32
-       /*
-        * APIC LDR is initialized.  If logical_apicid mapping was
-        * initialized during get_smp_config(), make sure it matches the
-        * actual value.
-        */
-       logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
-       ldr_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
-       WARN_ON(logical_apicid != BAD_APICID && logical_apicid != ldr_apicid);
-       /* always use the value from LDR */
-       early_per_cpu(x86_cpu_to_logical_apicid, cpu) = ldr_apicid;
+       if (apic->dest_logical) {
+               int logical_apicid, ldr_apicid;
+
+               /*
+                * APIC LDR is initialized.  If logical_apicid mapping was
+                * initialized during get_smp_config(), make sure it matches
+                * the actual value.
+                */
+               logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
+               ldr_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
+               if (logical_apicid != BAD_APICID)
+                       WARN_ON(logical_apicid != ldr_apicid);
+               /* Always use the value from LDR. */
+               early_per_cpu(x86_cpu_to_logical_apicid, cpu) = ldr_apicid;
+       }
 #endif
 
        /*
-        * Set Task Priority to 'accept all'. We never change this
-        * later on.
+        * Set Task Priority to 'accept all except vectors 0-31'.  An APIC
+        * vector in the 16-31 range could be delivered if TPR == 0, but we
+        * would think it's an exception and terrible things will happen.  We
+        * never change this later on.
         */
        value = apic_read(APIC_TASKPRI);
        value &= ~APIC_TPRI_MASK;
+       value |= 0x10;
        apic_write(APIC_TASKPRI, value);
 
+       /* Clear eventually stale ISR/IRR bits */
        apic_pending_intr_clear();
 
        /*
@@ -1614,6 +1698,8 @@ static void setup_local_APIC(void)
        value |= SPURIOUS_APIC_VECTOR;
        apic_write(APIC_SPIV, value);
 
+       perf_events_lapic_init();
+
        /*
         * Set up LVT0, LVT1:
         *