int page_count;
int mmap_size;
vector<uint8_t> event_wraparound_temp; // for events straddling ring buffer end
-
+
void decode_event(const perf_event_header* ehdr);
public:
// PerfReader(perf_event_attr* attr, int pid, PerfConsumer* consumer); // attach to process hierarchy; may modify *attr
PerfReader(perf_event_attr* attr, PerfConsumer* consumer, int pid=-1); // systemwide; may modify *attr
-
+
~PerfReader();
void process_some(); // run briefly, relay decoded perf_events to consumer
PerfConsumer() {}
virtual ~PerfConsumer() {}
virtual void process(const perf_event_header* sample) {}
-
+
virtual void process_comm(const perf_event_header* sample,
uint32_t pid, uint32_t tid, bool exec, const char* comm) {}
virtual void process_exit(const perf_event_header* sample,
class StatsPerfConsumer: public PerfConsumer
{
unordered_map<int,unsigned> event_type_counts;
-
+
public:
StatsPerfConsumer() {}
~StatsPerfConsumer(); // report to stdout
class UnwindStatsConsumer: public UnwindSampleConsumer
{
unordered_map<int,unsigned> event_unwind_counts;
- unordered_map<string,unsigned> event_buildid_hits;
-
+ unordered_map<string,unsigned> event_buildid_hits;
+
public:
UnwindStatsConsumer() {}
~UnwindStatsConsumer();
{ "verbose", 'v', NULL, 0, N_ ("Increase verbosity of logging messages."), 0 },
{ "gmon", 'g', NULL, 0, N_("Generate gmon.BUILDID.out files for each binary."), 0 },
{ "pid", 'p', "PID", 0, N_("Profile given PID, and its future children."), 0 },
-#ifdef HAVE_PERFMON_PFMLIB_PERF_EVENT_H
+#ifdef HAVE_PERFMON_PFMLIB_PERF_EVENT_H
{ "event", 'e', "EVENT", 0, N_("Sample given LIBPFM event specification."), 0 },
#define ARGP_KEY_EVENT_LIST 0x1000
{ "event-list", ARGP_KEY_EVENT_LIST, NULL, 0, N_("Sample given LIBPFM event specification."), 0 },
parse_opt (int key, char *arg, struct argp_state *state)
{
(void)state;
-
+
switch (key)
{
case ARGP_KEY_INIT:
pid = atoi(arg);
break;
-#ifdef HAVE_PERFMON_PFMLIB_PERF_EVENT_H
+#ifdef HAVE_PERFMON_PFMLIB_PERF_EVENT_H
case 'e':
libpfm_event = arg;
break;
{
pfm_pmu_info_t pinfo;
pfm_event_info_t info;
-
+
pfm_err_t rc = pfm_initialize();
if (rc != PFM_SUCCESS)
{
}
exit(0);
#endif
-
+
default:
return ARGP_ERR_UNKNOWN;
}
int pipefd[2] = {-1, -1}; // for CMD child process post-fork sync
(void) argp_parse (&argp, argc, argv, 0, &remaining, NULL);
-
+
if (pid > 0 && remaining < argc) // got a pid AND a cmd? reject
{
cerr << "ERROR: Must not specify both -p PID and CMD" << endl;
exit(1);
}
-
+
bool systemwide = (pid == 0) || (remaining == argc);
(void) systemwide;
-
+
try
{
perf_event_attr attr;
memset(&attr, 0, sizeof(attr));
attr.size = sizeof(attr);
-
+
if (libpfm_event != "")
{
#if HAVE_PERFMON_PFMLIB_PERF_EVENT_H
if (verbose>1)
{
- auto oldf = clog.flags();
+ auto oldf = clog.flags();
clog << "perf_event_attr configuration" << hex << showbase
<< " type=" << attr.type
<< " config=" << attr.config
<< endl;
clog.setf(oldf);
}
-
+
if (remaining < argc) // got a CMD... suffix? ok start it
{
int rc = pipe (pipefd); // will use pipefd[] >= 0 as flag for synchronization just below
UnwindStatsConsumer *usc = nullptr;
PerfConsumerUnwinder *pcu = nullptr;
StatsPerfConsumer *spc = nullptr;
-
+
if (gmon)
{
usc = new UnwindStatsConsumer();
spc = new StatsPerfConsumer();
pr = new PerfReader(&attr, spc, pid);
}
-
+
signal(SIGINT, sigint_handler);
- signal(SIGTERM, sigint_handler);
+ signal(SIGTERM, sigint_handler);
if (pid > 0 && pipefd[0]>=0) // need to release child CMD process?
{
else clog << "systemwide";
clog << endl;
}
-
+
while (true) // main loop
{
if (interrupted) break;
delete usc;
delete pcu;
delete spc;
-
+
// reporting done in various destructors
}
catch (const exception& e)
{
- cerr << e.what() << endl;
+ cerr << e.what() << endl;
}
-
+
return 0;
}
this->event_wraparound_temp.resize(this->mmap_size); // NB: never resize this object again!
this->consumer = consumer;
this->enabled = false;
-
+
Ebl *default_ebl = ebl_openbackend_machine(EM_X86_64); /* TODO: Generalize to architectures beyond x86. */
this->sample_regs_user = ebl_perf_frame_regs_mask (default_ebl);
this->sample_regs_count = bitset<64>(this->sample_regs_user).count();
-
+
attr->sample_regs_user = this->sample_regs_user;
attr->sample_stack_user = 8192; // enough?
attr->sample_type = (PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_TIME);
attr->disabled = 1; /* will get enabled soon */
attr->task = 1; // catch FORK/EXIT
attr->comm = 1; // catch EXEC
- attr->comm_exec = 1; // catch EXEC
+ attr->comm_exec = 1; // catch EXEC
// attr->precise_ip = 2; // request 0 skid ... but that conflicts with PERF_COUNT_HW_BRANCH_INSTRUCTIONS:freq=4000
attr->build_id = 1; // request build ids in MMAP2 events
if (verbose>3)
{ // hexdump attr
- auto oldf = clog.flags();
+ auto oldf = clog.flags();
clog << "perf_event_attr hexdump:";
auto bytes = (unsigned char*) attr;
for (size_t x = 0; x<sizeof(*attr); x++)
cout << endl;
clog.setf(oldf);
}
-
+
// Iterate over all cpus, even if attaching to a single pid, because
// we set ->inherit=1. That requires possible concurrency, which is
// enabled by per-cpu ring buffers.
uint64_t starttime = millis_monotonic();
uint64_t endtime = starttime + 1000; // run at most one second
uint64_t ring_buffer_size = this->page_size * this->page_count; // just the ring buffer size
-
+
while (! interrupted)
{
uint64_t now = millis_monotonic();
if (this->pollfds[i].revents & POLLIN) // found an fd with fresh yummy events
{
perf_event_mmap_page *header = perf_headers[i];
- uint64_t data_head = ring_buffer_read_head(header);
- uint64_t data_tail = header->data_tail;
+ uint64_t data_head = ring_buffer_read_head(header);
+ uint64_t data_tail = header->data_tail;
uint8_t *base = ((uint8_t *) header) + this->page_size;
struct perf_event_header *ehdr;
size_t ehdr_size;
clog << "perf head=" << (void*) data_head
<< " tail=" << (void*) data_tail
<< " ehdr=" << (void*) ehdr
- << " size=" << setbase(10) << ehdr_size << setbase(16) << endl;
-
+ << " size=" << setbase(10) << ehdr_size << setbase(16) << endl;
+
if (((uint8_t *)ehdr) + ehdr_size > base + ring_buffer_size) // mmap region wraparound?
{
// need to copy it to a contiguous temporary
uint8_t *event_temp = this->event_wraparound_temp.data();
memcpy(event_temp, copy_start, len_first); // part at end of mmap'd region
memcpy(event_temp + len_first, base, len_secnd); // part at beginning of mmap'd region
- ehdr = (perf_event_header*) event_temp;
+ ehdr = (perf_event_header*) event_temp;
}
this->decode_event(ehdr);
uint32_t pid = *reinterpret_cast<const uint32_t*>(data); data += sizeof(uint32_t);
uint32_t tid = *reinterpret_cast<const uint32_t*>(data); data += sizeof(uint32_t);
const char* comm = reinterpret_cast<const char*>(data);
- consumer->process_comm(ehdr, pid, tid, (ehdr->misc & PERF_RECORD_MISC_COMM_EXEC), comm);
+ consumer->process_comm(ehdr, pid, tid, (ehdr->misc & PERF_RECORD_MISC_COMM_EXEC), comm);
break;
}
case PERF_RECORD_EXIT:
cout << "buildid / unwind-hit counts:" << endl;
for (const auto& kv : this->event_buildid_hits)
cout << "buildid " << kv.first << " count " << kv.second << endl;
-
}
void UnwindStatsConsumer::process(const UnwindSample* sample)