initialize a raw ring 3 readable performance counter
This allows more flexible setup with a custom perf_event_attr. For simple uses rdpmc_open should be used instead. Must be called for each thread using the counter. Must be closed with rdpmc_close
int rdpmc_open_attr (struct perf_event_attr * attr, struct rdpmc_ctx * ctx, struct rdpmc_ctx * leader_ctx)
struct perf_event_attr l1_read_attr = {
.type = PERF_TYPE_HW_CACHE,
.config =
((PERF_COUNT_HW_CACHE_L1D) |
(PERF_COUNT_HW_CACHE_OP_READ << 8) |
(PERF_COUNT_HW_CACHE_RESULT_MISS << 16)),
.size = PERF_ATTR_SIZE_VER0,
.sample_type = PERF_SAMPLE_READ,
.exclude_kernel = 1
};
struct perf_event_attr l1_write_attr = {
.type = PERF_TYPE_HW_CACHE,
.config =
((PERF_COUNT_HW_CACHE_L1D) |
(PERF_COUNT_HW_CACHE_OP_WRITE << 8) |
(PERF_COUNT_HW_CACHE_RESULT_MISS << 16)),
.size = PERF_ATTR_SIZE_VER0,
.sample_type = PERF_SAMPLE_READ,
.exclude_kernel = 1
};
struct perf_event_attr icache_attr = {
.type = PERF_TYPE_HW_CACHE,
.config =
((PERF_COUNT_HW_CACHE_L1I) |
(PERF_COUNT_HW_CACHE_OP_READ << 8) |
(PERF_COUNT_HW_CACHE_RESULT_MISS << 16)),
.size = PERF_ATTR_SIZE_VER0,
.sample_type = PERF_SAMPLE_READ,
.exclude_kernel = 1
};
int is_event_supported(struct perf_event_attr *attr) {
struct rdpmc_ctx ctx;
int ok = !rdpmc_open_attr(attr, &ctx, 0);
rdpmc_close(&ctx);
return ok;
}
// find out which PMCs are supported
*l1_read_supported = is_event_supported(&l1_read_attr);
*l1_write_supported = is_event_supported(&l1_write_attr);
*icache_supported = is_event_supported(&icache_attr);
cache와 관련된 PMC가 사용 가능한지 확인하고 있다.
(1) L1 read cache support (2) L1 write support (3) I-cache support
references: [1] http://halobates.de/jevents.html