From 4456cb827f83718f19b8dc41ea40de1b8cf1dcd7 Mon Sep 17 00:00:00 2001 From: Vladimir Mezentsev Date: Thu, 15 Aug 2024 17:40:12 -0700 Subject: [PATCH] gprofng: add hardware counters for Appliedmicro processor gprofng/ChangeLog 2024-08-15 Vladimir Mezentsev . * common/hwc_cpus.h: New constant for Appliedmicro processor. * common/hwctable.c: Add the hwc table for Appliedmicro processor. * src/hhwc_arm64_amcc.h: New file. * src/collctrl.cc (read_int): Use strtol instead of atoi. --- gprofng/common/hwc_cpus.h | 4 + gprofng/common/hwctable.c | 26 ++++- gprofng/src/collctrl.cc | 6 +- gprofng/src/hwc_arm64_amcc.h | 182 +++++++++++++++++++++++++++++++++++ 4 files changed, 212 insertions(+), 6 deletions(-) create mode 100644 gprofng/src/hwc_arm64_amcc.h diff --git a/gprofng/common/hwc_cpus.h b/gprofng/common/hwc_cpus.h index 88788f3b144..d2253e91e0f 100644 --- a/gprofng/common/hwc_cpus.h +++ b/gprofng/common/hwc_cpus.h @@ -128,6 +128,10 @@ extern cpu_info_t *read_cpuinfo(); #define CPC_SPARC64_X 4006 /* Athena */ #define CPC_SPARC64_XII 4010 /* Athena++ */ +// Arm +#define CPC_ARM_GENERIC 3500 +#define CPC_ARM64_AMCC 3501 /* Applied Micro Circuits Corporation (ARM) */ + #define AMD_FAM_19H_ZEN3_NAME "AMD Family 19h (Zen3)" #define AMD_FAM_19H_ZEN4_NAME "AMD Family 19h (Zen4)" diff --git a/gprofng/common/hwctable.c b/gprofng/common/hwctable.c index 0baf63b1805..9ee932e301e 100644 --- a/gprofng/common/hwctable.c +++ b/gprofng/common/hwctable.c @@ -243,6 +243,7 @@ static Hwcentry papi_generic_list[] = { {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE} }; +#if defined(__i386__) || defined(__x86_64) /* Kernel profiling pseudo-chip, OBSOLETE (To support 12.3 and earlier, TBR) */ static Hwcentry kproflist[] = { {"kcycles", "kcycles", 0, STXT ("KCPU Cycles"), PRELOADS_5, 1, ABST_NONE}, @@ -1215,6 +1216,7 @@ static Hwcentry amd_15h[] = { {"insts1", "EX_retired_instr_w_excp_intr", 1, NULL, PRELOADS_8, 0, ABST_NONE}, {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE} }; +#endif /* __i386__ or __x86_64 */ #define INIT_HWC(nm, mtr, cfg, ty) .name = (nm), .metric = (mtr), \ .config = (cfg), .type = ty, .use_perf_event_type = 1, \ @@ -1296,15 +1298,18 @@ static Hwcentry amd_15h[] = { { HWCE("iTLB-loads", STXT("The Instruction TLB Loads"),\ PERF_COUNT_HW_CACHE_ITLB,\ PERF_COUNT_HW_CACHE_OP_READ, PERF_COUNT_HW_CACHE_RESULT_ACCESS) }, - static Hwcentry generic_list[] = { HWC_GENERIC {NULL, NULL, 0, NULL, 0, 0, 0, 0, ABST_NONE} }; -#include "hwc_amd_zen3.h" -#include "hwc_amd_zen4.h" -#include "hwc_intel_icelake.h" +#if defined(__i386__) || defined(__x86_64) + #include "hwc_amd_zen3.h" + #include "hwc_amd_zen4.h" + #include "hwc_intel_icelake.h" +#elif defined(__aarch64__) + #include "hwc_arm64_amcc.h" +#endif /* structure defining the counters for a CPU type */ typedef struct @@ -1325,6 +1330,7 @@ typedef struct * If the string is not formatted that way, -h hi and -h lo will fail */ static cpu_list_t cputabs[] = { +#if defined(__i386__) || defined(__x86_64) {CPC_PENTIUM_PRO_MMX, pentiumIIlist, {"insts", 0}}, {CPC_PENTIUM_PRO, pentiumIIIlist, {"insts", 0}}, {CPC_PENTIUM_4, pentium4, {"insts", 0}}, @@ -1353,10 +1359,13 @@ static cpu_list_t cputabs[] = { {CPC_AMD_FAM_11H, amd_opteron_10h_11h, {"insts,,cycles,,l2dm,,l2dtlbm", 0}}, {CPC_AMD_FAM_15H, amd_15h, {"insts,,cycles", 0}}, {CPC_KPROF, kproflist, {NULL}}, // OBSOLETE (To support 12.3 and earlier, TBR) - {ARM_CPU_IMP_APM, generic_list, {"insts,,cycles", 0}}, {CPC_AMD_Authentic, generic_list, {"insts,,cycles", 0}}, {CPC_AMD_FAM_19H_ZEN3, amd_zen3_list, {"insts,,cycles", 0}}, {CPC_AMD_FAM_19H_ZEN4, amd_zen4_list, {"insts,,cycles", 0}}, +#elif defined(__aarch64__) + {CPC_ARM64_AMCC, arm64_amcc_list, {"insts,,cycles", 0}}, + {CPC_ARM_GENERIC, generic_list, {"insts,,cycles", 0}}, +#endif {0, generic_list, {"insts,,cycles", 0}}, }; @@ -1856,6 +1865,13 @@ setup_cpc_general (int skip_hwc_test) cpcx_cpuver = CPC_INTEL_ICELAKE; } } + else if (strcmp (cpu_p->cpu_vendorstr, AARCH64_VENDORSTR_ARM) == 0) + { + if (cpu_p->cpu_family == 0x50) + cpcx_cpuver = CPC_ARM64_AMCC; + else + cpcx_cpuver = CPC_ARM_GENERIC; + } #ifdef DISALLOW_PENTIUM_PRO_MMX_7007575 if (cpcx_cpuver == CPC_PENTIUM_PRO_MMX) diff --git a/gprofng/src/collctrl.cc b/gprofng/src/collctrl.cc index da7281eb4f3..95324fbd5a6 100644 --- a/gprofng/src/collctrl.cc +++ b/gprofng/src/collctrl.cc @@ -77,7 +77,7 @@ read_int (char *from) { char *val = strchr (from, ':'); if (val) - return atoi (val + 1); + return (int) strtol (val + 1, NULL, 0); return 0; } @@ -130,7 +130,11 @@ read_cpuinfo () fclose (procf); } if (cpu_info.cpu_vendorstr == NULL) +#if defined(__aarch64__) + cpu_info.cpu_vendorstr = strdup (AARCH64_VENDORSTR_ARM); +#else cpu_info.cpu_vendorstr = GTXT ("Unknown processor"); +#endif if (cpu_info.cpu_modelstr == NULL) cpu_info.cpu_modelstr = GTXT ("Unknown cpu model"); return &cpu_info; diff --git a/gprofng/src/hwc_arm64_amcc.h b/gprofng/src/hwc_arm64_amcc.h new file mode 100644 index 00000000000..5d86c6b1b9a --- /dev/null +++ b/gprofng/src/hwc_arm64_amcc.h @@ -0,0 +1,182 @@ +/* Copyright (C) 2024 Free Software Foundation, Inc. + Contributed by Oracle. + + This file is part of GNU Binutils. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, 51 Franklin Street - Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#ifndef _HWC_ARM64_AMCC_H +#define _HWC_ARM64_AMCC_H + +#define I(nm, event, mtr) INIT_HWC(nm, mtr, (event), PERF_TYPE_RAW) + +static Hwcentry arm64_amcc_list[] = { + HWC_GENERIC +/* branch: */ + { I("br_immed_spec", 0x78, + STXT("Branch speculatively executed, immediate branch")) }, + { I("br_indirect_spec", 0x7a, + STXT("Branch speculatively executed, indirect branch")) }, + { I("br_mis_pred", 0x10, STXT("Branch mispredicted")) }, + { I("br_pred", 0x12, STXT("Predictable branch")) }, + { I("br_return_spec", 0x79, + STXT("Branch speculatively executed, procedure return")) }, +/* bus: */ + { I("bus_access", 0x19, STXT("Attributable Bus access")) }, + { I("bus_access_normal", 0x64, STXT("Bus access, Normal")) }, + { I("bus_access_not_shared", 0x63, + STXT("Bus access, not Normal, Cacheable, Shareable")) }, + { I("bus_access_periph", 0x65, STXT("Bus access, peripheral")) }, + { I("bus_access_shared", 0x62, + STXT("Bus access, Normal, Cacheable, Shareable")) }, +/* cache: */ + { I("btb_mis_pred", 0x102, STXT("BTB misprediction")) }, + { I("dtb_miss", 0x104, STXT("DTB miss")) }, + { I("itb_miss", 0x103, STXT("ITB miss")) }, + { I("l1_stage2_tlb_refill", 0x111, STXT("L1 stage 2 TLB refill")) }, + { I("l1d_cache", 0x4, STXT("Level 1 data cache access")) }, + { I("l1d_cache_inval", 0x48, STXT("L1D cache invalidate")) }, + { I("l1d_cache_late_miss", 0x105, STXT("L1D cache late miss")) }, + { I("l1d_cache_prefetch", 0x106, STXT("L1D cache prefetch")) }, + { I("l1d_cache_refill", 0x3, STXT("Level 1 data cache refill")) }, + { I("l1d_tlb", 0x25, STXT("L1D TLB access")) }, + { I("l1d_tlb_refill", 0x5, STXT("Attributable Level 1 data TLB refill")) }, + { I("l1i_cache", 0x14, + STXT("Attributable Level 1 instruction cache access")) }, + { I("l1i_cache_refill", 0x1, STXT("Level 1 instruction cache refill")) }, + { I("l1i_tlb", 0x26, STXT("Attributable Level 1 instruction TLB access")) }, + { I("l1i_tlb_refill", 0x2, + STXT("Attributable Level 1 instruction TLB refill")) }, + { I("l2d_cache", 0x16, STXT("Level 2 data cache access")) }, + { I("l2d_cache_inval", 0x58, STXT("L2D cache invalidate")) }, + { I("l2d_cache_prefetch", 0x107, STXT("L2D cache prefetch")) }, + { I("l2d_cache_rd", 0x50, STXT("L2D cache access, read")) }, + { I("l2d_cache_refill", 0x17, STXT("Level 2 data refill")) }, + { I("l2d_cache_refill_rd", 0x52, STXT("L2D cache refill, read")) }, + { I("l2d_cache_refill_wr", 0x53, STXT("L2D cache refill, write")) }, + { I("l2d_cache_wb", 0x18, + STXT("Attributable Level 2 data cache write-back")) }, + { I("l2d_cache_wb_clean", 0x57, + STXT("L2D cache Write-Back, cleaning and coherency")) }, + { I("l2d_cache_wb_victim", 0x56, STXT("L2D cache Write-Back, victim")) }, + { I("l2d_cache_wr", 0x51, STXT("L2D cache access, write")) }, + { I("l2d_tlb_access", 0x34, STXT("L2D TLB access")) }, + { I("l2i_tlb_access", 0x35, STXT("L2I TLB access")) }, + { I("page_walk_l0_stage1_hit", 0x112, STXT("Page walk, L0 stage-1 hit")) }, + { I("page_walk_l1_stage1_hit", 0x113, STXT("Page walk, L1 stage-1 hit")) }, + { I("page_walk_l1_stage2_hit", 0x115, STXT("Page walk, L1 stage-2 hit")) }, + { I("page_walk_l2_stage1_hit", 0x114, STXT("Page walk, L2 stage-1 hit")) }, + { I("page_walk_l2_stage2_hit", 0x116, STXT("Page walk, L2 stage-2 hit")) }, +/* clock: */ + { I("cpu_cycles", 0x11, STXT("Cycle")) }, + { I("fsu_clock_off_cycles", 0x101, STXT("FSU clocking gated off cycle")) }, + { I("wait_cycles", 0x110, STXT("Wait state cycle")) }, +/* core imp def: */ + { I("bus_access_rd", 0x60, STXT("Bus access read")) }, + { I("bus_access_wr", 0x61, STXT("Bus access write")) }, + { I("l1d_cache_rd", 0x40, STXT("L1D cache access, read")) }, + { I("l1d_cache_refill_rd", 0x42, STXT("L1D cache refill, read")) }, + { I("l1d_cache_refill_wr", 0x43, STXT("L1D cache refill, write")) }, + { I("l1d_cache_wr", 0x41, STXT("L1D cache access, write")) }, + { I("l1d_tlb_rd", 0x4e, STXT("L1D tlb access, read")) }, + { I("l1d_tlb_refill_rd", 0x4c, STXT("L1D tlb refill, read")) }, + { I("l1d_tlb_refill_wr", 0x4d, STXT("L1D tlb refill, write")) }, + { I("l1d_tlb_wr", 0x4f, STXT("L1D tlb access, write")) }, +/* exception: */ + { I("exc_dabort", 0x84, STXT("Exception taken, Data Abort and SError")) }, + { I("exc_fiq", 0x87, STXT("Exception taken, FIQ")) }, + { I("exc_hvc", 0x8a, STXT("Exception taken, Hypervisor Call")) }, + { I("exc_irq", 0x86, STXT("Exception taken, IRQ")) }, + { I("exc_pabort", 0x83, STXT("Exception taken, Instruction Abort")) }, + { I("exc_return", 0xa, + STXT("Instruction architecturally executed, condition check pass, exception" + " return")) }, + { I("exc_svc", 0x82, STXT("Exception taken, Supervisor Call")) }, + { I("exc_taken", 0x9, STXT("Exception taken")) }, + { I("exc_trap_dabort", 0x8c, + STXT("Exception taken, Data Abort or SError not taken locally")) }, + { I("exc_trap_fiq", 0x8f, STXT("Exception taken, FIQ not taken locally")) }, + { I("exc_trap_irq", 0x8e, STXT("Exception taken, IRQ not taken locally")) }, + { I("exc_trap_other", 0x8d, + STXT("Exception taken, Other traps not taken locally")) }, + { I("exc_trap_pabort", 0x8b, + STXT("Exception taken, Instruction Abort not taken locally")) }, + { I("exc_undef", 0x81, STXT("Exception taken, Other synchronous")) }, +/* instruction: */ + { I("ase_spec", 0x74, + STXT("Operation speculatively executed, Advanced SIMD instruction")) }, + { I("br_mis_pred_retired", 0x22, + STXT("Instruction architecturally executed, mispredicted branch")) }, + { I("br_retired", 0x21, + STXT("Instruction architecturally executed, branch")) }, + { I("cid_write_retired", 0xb, STXT("Write to CONTEXTIDR")) }, + { I("crypto_spec", 0x77, + STXT("Operation speculatively executed, Cryptographic instruction")) }, + { I("dmb_spec", 0x7e, STXT("Barrier speculatively executed, DMB")) }, + { I("dp_spec", 0x73, + STXT("Operation speculatively executed, integer data processing")) }, + { I("dsb_spec", 0x7d, STXT("Barrier speculatively executed, DSB")) }, + { I("inst_retired", 0x8, STXT("Instruction architecturally executed")) }, + { I("inst_spec", 0x1b, STXT("Operation speculatively executed")) }, + { I("isb_spec", 0x7c, STXT("Barrier speculatively executed, ISB")) }, + { I("ld_spec", 0x70, STXT("Operation speculatively executed, load")) }, + { I("ldst_spec", 0x72, + STXT("Operation speculatively executed, load or store")) }, + { I("nop_spec", 0x100, STXT("Speculatively executed, NOP")) }, + { I("pc_write_spec", 0x76, + STXT("Operation speculatively executed, software change of the PC")) }, + { I("rc_ld_spec", 0x90, + STXT("Release consistency operation speculatively executed, Load-Acquire")) }, + { I("rc_st_spec", 0x91, + STXT("Release consistency operation speculatively executed, Store-Release")) }, + { I("st_spec", 0x71, STXT("Operation speculatively executed, store")) }, + { I("sw_incr", 0, STXT("Software increment")) }, + { I("ttbr_write_retired", 0x1c, + STXT("Instruction architecturally executed, Condition code check pass, write" + " to TTBR")) }, + { I("vfp_spec", 0x75, + STXT("Operation speculatively executed, floating-point instruction")) }, +/* intrinsic: */ + { I("ldrex_spec", 0x6c, + STXT("Exclusive operation speculatively executed, LDREX or LDX")) }, + { I("strex_fail_spec", 0x6e, + STXT("Exclusive operation speculatively executed, STREX or STX fail")) }, + { I("strex_pass_spec", 0x6d, + STXT("Exclusive operation speculatively executed, STREX or STX pass")) }, + { I("strex_spec", 0x6f, + STXT("Exclusive operation speculatively executed, STREX or STX")) }, +/* memory: */ + { I("mem_access", 0x13, STXT("Data memory access")) }, + { I("mem_access_rd", 0x66, STXT("Data memory access, read")) }, + { I("mem_access_wr", 0x67, STXT("Data memory access, write")) }, + { I("memory_error", 0x1a, STXT("Local memory error")) }, + { I("unaligned_ld_spec", 0x68, STXT("Unaligned access, read")) }, + { I("unaligned_ldst_spec", 0x6a, STXT("Unaligned access")) }, + { I("unaligned_st_spec", 0x69, STXT("Unaligned access, write")) }, +/* pipeline: */ + { I("bx_stall", 0x10c, STXT("BX stalled")) }, + { I("decode_stall", 0x108, STXT("Decode starved")) }, + { I("dispatch_stall", 0x109, STXT("Dispatch stalled")) }, + { I("fx_stall", 0x10f, STXT("FX stalled")) }, + { I("ixa_stall", 0x10a, STXT("IXA stalled")) }, + { I("ixb_stall", 0x10b, STXT("IXB stalled")) }, + { I("lx_stall", 0x10d, STXT("LX stalled")) }, + { I("sx_stall", 0x10e, STXT("SX stalled")) }, + { NULL, NULL, 0, NULL } +}; + +#undef I +#endif -- 2.39.5