]> git.ipfire.org Git - thirdparty/linux.git/blobdiff - arch/x86/events/intel/core.c
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
[thirdparty/linux.git] / arch / x86 / events / intel / core.c
index 386151b2c62fec7868b6d17a31435243c0003402..546d13e436aafae54764787736f9501a110a8d4b 100644 (file)
@@ -239,6 +239,35 @@ static struct extra_reg intel_skl_extra_regs[] __read_mostly = {
        EVENT_EXTRA_END
 };
 
+static struct event_constraint intel_icl_event_constraints[] = {
+       FIXED_EVENT_CONSTRAINT(0x00c0, 0),      /* INST_RETIRED.ANY */
+       INTEL_UEVENT_CONSTRAINT(0x1c0, 0),      /* INST_RETIRED.PREC_DIST */
+       FIXED_EVENT_CONSTRAINT(0x003c, 1),      /* CPU_CLK_UNHALTED.CORE */
+       FIXED_EVENT_CONSTRAINT(0x0300, 2),      /* CPU_CLK_UNHALTED.REF */
+       FIXED_EVENT_CONSTRAINT(0x0400, 3),      /* SLOTS */
+       INTEL_EVENT_CONSTRAINT_RANGE(0x03, 0x0a, 0xf),
+       INTEL_EVENT_CONSTRAINT_RANGE(0x1f, 0x28, 0xf),
+       INTEL_EVENT_CONSTRAINT(0x32, 0xf),      /* SW_PREFETCH_ACCESS.* */
+       INTEL_EVENT_CONSTRAINT_RANGE(0x48, 0x54, 0xf),
+       INTEL_EVENT_CONSTRAINT_RANGE(0x60, 0x8b, 0xf),
+       INTEL_UEVENT_CONSTRAINT(0x04a3, 0xff),  /* CYCLE_ACTIVITY.STALLS_TOTAL */
+       INTEL_UEVENT_CONSTRAINT(0x10a3, 0xff),  /* CYCLE_ACTIVITY.STALLS_MEM_ANY */
+       INTEL_EVENT_CONSTRAINT(0xa3, 0xf),      /* CYCLE_ACTIVITY.* */
+       INTEL_EVENT_CONSTRAINT_RANGE(0xa8, 0xb0, 0xf),
+       INTEL_EVENT_CONSTRAINT_RANGE(0xb7, 0xbd, 0xf),
+       INTEL_EVENT_CONSTRAINT_RANGE(0xd0, 0xe6, 0xf),
+       INTEL_EVENT_CONSTRAINT_RANGE(0xf0, 0xf4, 0xf),
+       EVENT_CONSTRAINT_END
+};
+
+static struct extra_reg intel_icl_extra_regs[] __read_mostly = {
+       INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff9fffull, RSP_0),
+       INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff9fffull, RSP_1),
+       INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
+       INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE),
+       EVENT_EXTRA_END
+};
+
 EVENT_ATTR_STR(mem-loads,      mem_ld_nhm,     "event=0x0b,umask=0x10,ldlat=3");
 EVENT_ATTR_STR(mem-loads,      mem_ld_snb,     "event=0xcd,umask=0x1,ldlat=3");
 EVENT_ATTR_STR(mem-stores,     mem_st_snb,     "event=0xcd,umask=0x2");
@@ -1827,6 +1856,45 @@ static __initconst const u64 glp_hw_cache_extra_regs
        },
 };
 
+#define TNT_LOCAL_DRAM                 BIT_ULL(26)
+#define TNT_DEMAND_READ                        GLM_DEMAND_DATA_RD
+#define TNT_DEMAND_WRITE               GLM_DEMAND_RFO
+#define TNT_LLC_ACCESS                 GLM_ANY_RESPONSE
+#define TNT_SNP_ANY                    (SNB_SNP_NOT_NEEDED|SNB_SNP_MISS| \
+                                        SNB_NO_FWD|SNB_SNP_FWD|SNB_HITM)
+#define TNT_LLC_MISS                   (TNT_SNP_ANY|SNB_NON_DRAM|TNT_LOCAL_DRAM)
+
+static __initconst const u64 tnt_hw_cache_extra_regs
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+       [C(LL)] = {
+               [C(OP_READ)] = {
+                       [C(RESULT_ACCESS)]      = TNT_DEMAND_READ|
+                                                 TNT_LLC_ACCESS,
+                       [C(RESULT_MISS)]        = TNT_DEMAND_READ|
+                                                 TNT_LLC_MISS,
+               },
+               [C(OP_WRITE)] = {
+                       [C(RESULT_ACCESS)]      = TNT_DEMAND_WRITE|
+                                                 TNT_LLC_ACCESS,
+                       [C(RESULT_MISS)]        = TNT_DEMAND_WRITE|
+                                                 TNT_LLC_MISS,
+               },
+               [C(OP_PREFETCH)] = {
+                       [C(RESULT_ACCESS)]      = 0x0,
+                       [C(RESULT_MISS)]        = 0x0,
+               },
+       },
+};
+
+static struct extra_reg intel_tnt_extra_regs[] __read_mostly = {
+       /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+       INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffffff9fffull, RSP_0),
+       INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0xffffff9fffull, RSP_1),
+       EVENT_EXTRA_END
+};
+
 #define KNL_OT_L2_HITE         BIT_ULL(19) /* Other Tile L2 Hit */
 #define KNL_OT_L2_HITF         BIT_ULL(20) /* Other Tile L2 Hit */
 #define KNL_MCDRAM_LOCAL       BIT_ULL(21)
@@ -2015,7 +2083,7 @@ static void intel_tfa_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int
        /*
         * We're going to use PMC3, make sure TFA is set before we touch it.
         */
-       if (cntr == 3 && !cpuc->is_fake)
+       if (cntr == 3)
                intel_set_tfa(cpuc, true);
 }
 
@@ -2091,15 +2159,19 @@ static void intel_pmu_disable_event(struct perf_event *event)
        cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
        cpuc->intel_cp_status &= ~(1ull << hwc->idx);
 
-       if (unlikely(event->attr.precise_ip))
-               intel_pmu_pebs_disable(event);
-
        if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
                intel_pmu_disable_fixed(hwc);
                return;
        }
 
        x86_pmu_disable_event(event);
+
+       /*
+        * Needs to be called after x86_pmu_disable_event,
+        * so we don't trigger the event without PEBS bit set.
+        */
+       if (unlikely(event->attr.precise_ip))
+               intel_pmu_pebs_disable(event);
 }
 
 static void intel_pmu_del_event(struct perf_event *event)
@@ -2145,6 +2217,11 @@ static void intel_pmu_enable_fixed(struct perf_event *event)
        bits <<= (idx * 4);
        mask = 0xfULL << (idx * 4);
 
+       if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip) {
+               bits |= ICL_FIXED_0_ADAPTIVE << (idx * 4);
+               mask |= ICL_FIXED_0_ADAPTIVE << (idx * 4);
+       }
+
        rdmsrl(hwc->config_base, ctrl_val);
        ctrl_val &= ~mask;
        ctrl_val |= bits;
@@ -2692,7 +2769,7 @@ x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
 
        if (x86_pmu.event_constraints) {
                for_each_event_constraint(c, x86_pmu.event_constraints) {
-                       if ((event->hw.config & c->cmask) == c->code) {
+                       if (constraint_match(c, event->hw.config)) {
                                event->hw.flags |= c->flags;
                                return c;
                        }
@@ -2842,7 +2919,7 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
        struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
        struct intel_excl_states *xlo;
        int tid = cpuc->excl_thread_id;
-       int is_excl, i;
+       int is_excl, i, w;
 
        /*
         * validating a group does not require
@@ -2898,36 +2975,40 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
         * SHARED   : sibling counter measuring non-exclusive event
         * UNUSED   : sibling counter unused
         */
+       w = c->weight;
        for_each_set_bit(i, c->idxmsk, X86_PMC_IDX_MAX) {
                /*
                 * exclusive event in sibling counter
                 * our corresponding counter cannot be used
                 * regardless of our event
                 */
-               if (xlo->state[i] == INTEL_EXCL_EXCLUSIVE)
+               if (xlo->state[i] == INTEL_EXCL_EXCLUSIVE) {
                        __clear_bit(i, c->idxmsk);
+                       w--;
+                       continue;
+               }
                /*
                 * if measuring an exclusive event, sibling
                 * measuring non-exclusive, then counter cannot
                 * be used
                 */
-               if (is_excl && xlo->state[i] == INTEL_EXCL_SHARED)
+               if (is_excl && xlo->state[i] == INTEL_EXCL_SHARED) {
                        __clear_bit(i, c->idxmsk);
+                       w--;
+                       continue;
+               }
        }
 
-       /*
-        * recompute actual bit weight for scheduling algorithm
-        */
-       c->weight = hweight64(c->idxmsk64);
-
        /*
         * if we return an empty mask, then switch
         * back to static empty constraint to avoid
         * the cost of freeing later on
         */
-       if (c->weight == 0)
+       if (!w)
                c = &emptyconstraint;
 
+       c->weight = w;
+
        return c;
 }
 
@@ -2935,11 +3016,9 @@ static struct event_constraint *
 intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
                            struct perf_event *event)
 {
-       struct event_constraint *c1 = NULL;
-       struct event_constraint *c2;
+       struct event_constraint *c1, *c2;
 
-       if (idx >= 0) /* fake does < 0 */
-               c1 = cpuc->event_constraint[idx];
+       c1 = cpuc->event_constraint[idx];
 
        /*
         * first time only
@@ -2947,7 +3026,8 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
         * - dynamic constraint: handled by intel_get_excl_constraints()
         */
        c2 = __intel_get_event_constraints(cpuc, idx, event);
-       if (c1 && (c1->flags & PERF_X86_EVENT_DYNAMIC)) {
+       if (c1) {
+               WARN_ON_ONCE(!(c1->flags & PERF_X86_EVENT_DYNAMIC));
                bitmap_copy(c1->idxmsk, c2->idxmsk, X86_PMC_IDX_MAX);
                c1->weight = c2->weight;
                c2 = c1;
@@ -3135,7 +3215,7 @@ static unsigned long intel_pmu_large_pebs_flags(struct perf_event *event)
                flags &= ~PERF_SAMPLE_TIME;
        if (!event->attr.exclude_kernel)
                flags &= ~PERF_SAMPLE_REGS_USER;
-       if (event->attr.sample_regs_user & ~PEBS_REGS)
+       if (event->attr.sample_regs_user & ~PEBS_GP_REGS)
                flags &= ~(PERF_SAMPLE_REGS_USER | PERF_SAMPLE_REGS_INTR);
        return flags;
 }
@@ -3189,7 +3269,7 @@ static int intel_pmu_hw_config(struct perf_event *event)
                return ret;
 
        if (event->attr.precise_ip) {
-               if (!event->attr.freq) {
+               if (!(event->attr.freq || (event->attr.wakeup_events && !event->attr.watermark))) {
                        event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD;
                        if (!(event->attr.sample_type &
                              ~intel_pmu_large_pebs_flags(event)))
@@ -3370,6 +3450,12 @@ static struct event_constraint counter0_constraint =
 static struct event_constraint counter2_constraint =
                        EVENT_CONSTRAINT(0, 0x4, 0);
 
+static struct event_constraint fixed0_constraint =
+                       FIXED_EVENT_CONSTRAINT(0x00c0, 0);
+
+static struct event_constraint fixed0_counter0_constraint =
+                       INTEL_ALL_EVENT_CONSTRAINT(0, 0x100000001ULL);
+
 static struct event_constraint *
 hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
                          struct perf_event *event)
@@ -3388,6 +3474,21 @@ hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
        return c;
 }
 
+static struct event_constraint *
+icl_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+                         struct perf_event *event)
+{
+       /*
+        * Fixed counter 0 has less skid.
+        * Force instruction:ppp in Fixed counter 0
+        */
+       if ((event->attr.precise_ip == 3) &&
+           constraint_match(&fixed0_constraint, event->hw.config))
+               return &fixed0_constraint;
+
+       return hsw_get_event_constraints(cpuc, idx, event);
+}
+
 static struct event_constraint *
 glp_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
                          struct perf_event *event)
@@ -3403,6 +3504,29 @@ glp_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
        return c;
 }
 
+static struct event_constraint *
+tnt_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+                         struct perf_event *event)
+{
+       struct event_constraint *c;
+
+       /*
+        * :ppp means to do reduced skid PEBS,
+        * which is available on PMC0 and fixed counter 0.
+        */
+       if (event->attr.precise_ip == 3) {
+               /* Force instruction:ppp on PMC0 and Fixed counter 0 */
+               if (constraint_match(&fixed0_constraint, event->hw.config))
+                       return &fixed0_counter0_constraint;
+
+               return &counter0_constraint;
+       }
+
+       c = intel_get_event_constraints(cpuc, idx, event);
+
+       return c;
+}
+
 static bool allow_tsx_force_abort = true;
 
 static struct event_constraint *
@@ -3414,7 +3538,7 @@ tfa_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
        /*
         * Without TFA we must not use PMC3.
         */
-       if (!allow_tsx_force_abort && test_bit(3, c->idxmsk) && idx >= 0) {
+       if (!allow_tsx_force_abort && test_bit(3, c->idxmsk)) {
                c = dyn_constraint(cpuc, c, idx);
                c->idxmsk64 &= ~(1ULL << 3);
                c->weight--;
@@ -3511,6 +3635,8 @@ static struct intel_excl_cntrs *allocate_excl_cntrs(int cpu)
 
 int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu)
 {
+       cpuc->pebs_record_size = x86_pmu.pebs_record_size;
+
        if (x86_pmu.extra_regs || x86_pmu.lbr_sel_map) {
                cpuc->shared_regs = allocate_shared_regs(cpu);
                if (!cpuc->shared_regs)
@@ -3579,6 +3705,12 @@ static void intel_pmu_cpu_starting(int cpu)
 
        cpuc->lbr_sel = NULL;
 
+       if (x86_pmu.flags & PMU_FL_TFA) {
+               WARN_ON_ONCE(cpuc->tfa_shadow);
+               cpuc->tfa_shadow = ~0ULL;
+               intel_set_tfa(cpuc, false);
+       }
+
        if (x86_pmu.version > 1)
                flip_smm_bit(&x86_pmu.attr_freeze_on_smi);
 
@@ -4112,6 +4244,42 @@ static struct attribute *hsw_tsx_events_attrs[] = {
        NULL
 };
 
+EVENT_ATTR_STR(tx-capacity-read,  tx_capacity_read,  "event=0x54,umask=0x80");
+EVENT_ATTR_STR(tx-capacity-write, tx_capacity_write, "event=0x54,umask=0x2");
+EVENT_ATTR_STR(el-capacity-read,  el_capacity_read,  "event=0x54,umask=0x80");
+EVENT_ATTR_STR(el-capacity-write, el_capacity_write, "event=0x54,umask=0x2");
+
+static struct attribute *icl_events_attrs[] = {
+       EVENT_PTR(mem_ld_hsw),
+       EVENT_PTR(mem_st_hsw),
+       NULL,
+};
+
+static struct attribute *icl_tsx_events_attrs[] = {
+       EVENT_PTR(tx_start),
+       EVENT_PTR(tx_abort),
+       EVENT_PTR(tx_commit),
+       EVENT_PTR(tx_capacity_read),
+       EVENT_PTR(tx_capacity_write),
+       EVENT_PTR(tx_conflict),
+       EVENT_PTR(el_start),
+       EVENT_PTR(el_abort),
+       EVENT_PTR(el_commit),
+       EVENT_PTR(el_capacity_read),
+       EVENT_PTR(el_capacity_write),
+       EVENT_PTR(el_conflict),
+       EVENT_PTR(cycles_t),
+       EVENT_PTR(cycles_ct),
+       NULL,
+};
+
+static __init struct attribute **get_icl_events_attrs(void)
+{
+       return boot_cpu_has(X86_FEATURE_RTM) ?
+               merge_attr(icl_events_attrs, icl_tsx_events_attrs) :
+               icl_events_attrs;
+}
+
 static ssize_t freeze_on_smi_show(struct device *cdev,
                                  struct device_attribute *attr,
                                  char *buf)
@@ -4151,6 +4319,50 @@ done:
        return count;
 }
 
+static void update_tfa_sched(void *ignored)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+       /*
+        * check if PMC3 is used
+        * and if so force schedule out for all event types all contexts
+        */
+       if (test_bit(3, cpuc->active_mask))
+               perf_pmu_resched(x86_get_pmu());
+}
+
+static ssize_t show_sysctl_tfa(struct device *cdev,
+                             struct device_attribute *attr,
+                             char *buf)
+{
+       return snprintf(buf, 40, "%d\n", allow_tsx_force_abort);
+}
+
+static ssize_t set_sysctl_tfa(struct device *cdev,
+                             struct device_attribute *attr,
+                             const char *buf, size_t count)
+{
+       bool val;
+       ssize_t ret;
+
+       ret = kstrtobool(buf, &val);
+       if (ret)
+               return ret;
+
+       /* no change */
+       if (val == allow_tsx_force_abort)
+               return count;
+
+       allow_tsx_force_abort = val;
+
+       get_online_cpus();
+       on_each_cpu(update_tfa_sched, NULL, 1);
+       put_online_cpus();
+
+       return count;
+}
+
+
 static DEVICE_ATTR_RW(freeze_on_smi);
 
 static ssize_t branches_show(struct device *cdev,
@@ -4183,7 +4395,9 @@ static struct attribute *intel_pmu_caps_attrs[] = {
        NULL
 };
 
-static DEVICE_BOOL_ATTR(allow_tsx_force_abort, 0644, allow_tsx_force_abort);
+static DEVICE_ATTR(allow_tsx_force_abort, 0644,
+                  show_sysctl_tfa,
+                  set_sysctl_tfa);
 
 static struct attribute *intel_pmu_attrs[] = {
        &dev_attr_freeze_on_smi.attr,
@@ -4444,6 +4658,32 @@ __init int intel_pmu_init(void)
                name = "goldmont_plus";
                break;
 
+       case INTEL_FAM6_ATOM_TREMONT_X:
+               x86_pmu.late_ack = true;
+               memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
+                      sizeof(hw_cache_event_ids));
+               memcpy(hw_cache_extra_regs, tnt_hw_cache_extra_regs,
+                      sizeof(hw_cache_extra_regs));
+               hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1;
+
+               intel_pmu_lbr_init_skl();
+
+               x86_pmu.event_constraints = intel_slm_event_constraints;
+               x86_pmu.extra_regs = intel_tnt_extra_regs;
+               /*
+                * It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS
+                * for precise cycles.
+                */
+               x86_pmu.pebs_aliases = NULL;
+               x86_pmu.pebs_prec_dist = true;
+               x86_pmu.lbr_pt_coexist = true;
+               x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+               x86_pmu.get_event_constraints = tnt_get_event_constraints;
+               extra_attr = slm_format_attr;
+               pr_cont("Tremont events, ");
+               name = "Tremont";
+               break;
+
        case INTEL_FAM6_WESTMERE:
        case INTEL_FAM6_WESTMERE_EP:
        case INTEL_FAM6_WESTMERE_EX:
@@ -4692,13 +4932,41 @@ __init int intel_pmu_init(void)
                        x86_pmu.get_event_constraints = tfa_get_event_constraints;
                        x86_pmu.enable_all = intel_tfa_pmu_enable_all;
                        x86_pmu.commit_scheduling = intel_tfa_commit_scheduling;
-                       intel_pmu_attrs[1] = &dev_attr_allow_tsx_force_abort.attr.attr;
+                       intel_pmu_attrs[1] = &dev_attr_allow_tsx_force_abort.attr;
                }
 
                pr_cont("Skylake events, ");
                name = "skylake";
                break;
 
+       case INTEL_FAM6_ICELAKE_MOBILE:
+               x86_pmu.late_ack = true;
+               memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
+               memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
+               hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1;
+               intel_pmu_lbr_init_skl();
+
+               x86_pmu.event_constraints = intel_icl_event_constraints;
+               x86_pmu.pebs_constraints = intel_icl_pebs_event_constraints;
+               x86_pmu.extra_regs = intel_icl_extra_regs;
+               x86_pmu.pebs_aliases = NULL;
+               x86_pmu.pebs_prec_dist = true;
+               x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+               x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
+
+               x86_pmu.hw_config = hsw_hw_config;
+               x86_pmu.get_event_constraints = icl_get_event_constraints;
+               extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
+                       hsw_format_attr : nhm_format_attr;
+               extra_attr = merge_attr(extra_attr, skl_format_attr);
+               x86_pmu.cpu_events = get_icl_events_attrs();
+               x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xca, .umask=0x02);
+               x86_pmu.lbr_pt_coexist = true;
+               intel_pmu_pebs_data_source_skl(false);
+               pr_cont("Icelake events, ");
+               name = "icelake";
+               break;
+
        default:
                switch (x86_pmu.version) {
                case 1: