Loading arch/arc/include/asm/perf_event.h +24 −29 Original line number Diff line number Diff line Loading @@ -57,26 +57,7 @@ struct arc_reg_cc_build { #define PERF_COUNT_ARC_HW_MAX (PERF_COUNT_HW_MAX + 6) /* * The "generalized" performance events seem to really be a copy * of the available events on x86 processors; the mapping to ARC * events is not always possible 1-to-1. Fortunately, there doesn't * seem to be an exact definition for these events, so we can cheat * a bit where necessary. * * In particular, the following PERF events may behave a bit differently * compared to other architectures: * * PERF_COUNT_HW_CPU_CYCLES * Cycles not in halted state * * PERF_COUNT_HW_REF_CPU_CYCLES * Reference cycles not in halted state, same as PERF_COUNT_HW_CPU_CYCLES * for now as we don't do Dynamic Voltage/Frequency Scaling (yet) * * PERF_COUNT_HW_BUS_CYCLES * Unclear what this means, Intel uses 0x013c, which according to * their datasheet means "unhalted reference cycles". It sounds similar * to PERF_COUNT_HW_REF_CPU_CYCLES, and we use the same counter for it. * Some ARC pct quirks: * * PERF_COUNT_HW_STALLED_CYCLES_BACKEND * PERF_COUNT_HW_STALLED_CYCLES_FRONTEND Loading @@ -91,21 +72,35 @@ struct arc_reg_cc_build { * Note that I$ cache misses aren't counted by either of the two! */ /* * ARC PCT has hardware conditions with fixed "names" but variable "indexes" * (based on a specific RTL build) * Below is the static map between perf generic/arc specific event_id and * h/w condition names. * At the time of probe, we loop thru each index and find it's name to * complete the mapping of perf event_id to h/w index as latter is needed * to program the counter really */ static const char * const arc_pmu_ev_hw_map[] = { /* count cycles */ [PERF_COUNT_HW_CPU_CYCLES] = "crun", [PERF_COUNT_HW_REF_CPU_CYCLES] = "crun", [PERF_COUNT_HW_BUS_CYCLES] = "crun", [PERF_COUNT_HW_INSTRUCTIONS] = "iall", [PERF_COUNT_HW_BRANCH_MISSES] = "bpfail", [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmp", [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = "bflush", [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = "bstall", [PERF_COUNT_ARC_DCLM] = "dclm", [PERF_COUNT_ARC_DCSM] = "dcsm", [PERF_COUNT_ARC_ICM] = "icm", [PERF_COUNT_ARC_BPOK] = "bpok", [PERF_COUNT_ARC_EDTLB] = "edtlb", [PERF_COUNT_ARC_EITLB] = "eitlb", /* counts condition */ [PERF_COUNT_HW_INSTRUCTIONS] = "iall", [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmp", [PERF_COUNT_ARC_BPOK] = "bpok", /* NP-NT, PT-T, PNT-NT */ [PERF_COUNT_HW_BRANCH_MISSES] = "bpfail", /* NP-T, PT-NT, PNT-T */ [PERF_COUNT_ARC_DCLM] = "dclm", /* D-cache Load Miss */ [PERF_COUNT_ARC_DCSM] = "dcsm", /* D-cache Store Miss */ [PERF_COUNT_ARC_ICM] = "icm", /* I-cache Miss */ [PERF_COUNT_ARC_EDTLB] = "edtlb", /* D-TLB Miss */ [PERF_COUNT_ARC_EITLB] = "eitlb", /* I-TLB Miss */ }; #define C(_x) PERF_COUNT_HW_CACHE_##_x Loading arch/arc/kernel/perf_event.c +13 −5 Original line number Diff line number Diff line Loading @@ -90,6 +90,10 @@ static int arc_pmu_cache_event(u64 config) if (ret == CACHE_OP_UNSUPPORTED) return -ENOENT; pr_debug("init cache event: type/op/result %d/%d/%d with h/w %d \'%s\'\n", cache_type, cache_op, cache_result, ret, arc_pmu_ev_hw_map[ret]); return ret; } Loading @@ -106,8 +110,9 @@ static int arc_pmu_event_init(struct perf_event *event) if (arc_pmu->ev_hw_idx[event->attr.config] < 0) return -ENOENT; hwc->config = arc_pmu->ev_hw_idx[event->attr.config]; pr_debug("initializing event %d with cfg %d\n", (int) event->attr.config, (int) hwc->config); pr_debug("init event %d with h/w %d \'%s\'\n", (int) event->attr.config, (int) hwc->config, arc_pmu_ev_hw_map[event->attr.config]); return 0; case PERF_TYPE_HW_CACHE: ret = arc_pmu_cache_event(event->attr.config); Loading Loading @@ -260,19 +265,22 @@ static int arc_pmu_device_probe(struct platform_device *pdev) arc_pmu->n_counters, arc_pmu->counter_size, cc_bcr.c); cc_name.str[8] = 0; for (i = 0; i < PERF_COUNT_HW_MAX; i++) for (i = 0; i < PERF_COUNT_ARC_HW_MAX; i++) arc_pmu->ev_hw_idx[i] = -1; /* loop thru all available h/w condition indexes */ for (j = 0; j < cc_bcr.c; j++) { write_aux_reg(ARC_REG_CC_INDEX, j); cc_name.indiv.word0 = read_aux_reg(ARC_REG_CC_NAME0); cc_name.indiv.word1 = read_aux_reg(ARC_REG_CC_NAME1); /* See if it has been mapped to a perf event_id */ for (i = 0; i < ARRAY_SIZE(arc_pmu_ev_hw_map); i++) { if (arc_pmu_ev_hw_map[i] && !strcmp(arc_pmu_ev_hw_map[i], cc_name.str) && strlen(arc_pmu_ev_hw_map[i])) { pr_debug("mapping %d to idx %d with name %s\n", i, j, cc_name.str); pr_debug("mapping perf event %2d to h/w event \'%8s\' (idx %d)\n", i, cc_name.str, j); arc_pmu->ev_hw_idx[i] = j; } } Loading Loading
arch/arc/include/asm/perf_event.h +24 −29 Original line number Diff line number Diff line Loading @@ -57,26 +57,7 @@ struct arc_reg_cc_build { #define PERF_COUNT_ARC_HW_MAX (PERF_COUNT_HW_MAX + 6) /* * The "generalized" performance events seem to really be a copy * of the available events on x86 processors; the mapping to ARC * events is not always possible 1-to-1. Fortunately, there doesn't * seem to be an exact definition for these events, so we can cheat * a bit where necessary. * * In particular, the following PERF events may behave a bit differently * compared to other architectures: * * PERF_COUNT_HW_CPU_CYCLES * Cycles not in halted state * * PERF_COUNT_HW_REF_CPU_CYCLES * Reference cycles not in halted state, same as PERF_COUNT_HW_CPU_CYCLES * for now as we don't do Dynamic Voltage/Frequency Scaling (yet) * * PERF_COUNT_HW_BUS_CYCLES * Unclear what this means, Intel uses 0x013c, which according to * their datasheet means "unhalted reference cycles". It sounds similar * to PERF_COUNT_HW_REF_CPU_CYCLES, and we use the same counter for it. * Some ARC pct quirks: * * PERF_COUNT_HW_STALLED_CYCLES_BACKEND * PERF_COUNT_HW_STALLED_CYCLES_FRONTEND Loading @@ -91,21 +72,35 @@ struct arc_reg_cc_build { * Note that I$ cache misses aren't counted by either of the two! */ /* * ARC PCT has hardware conditions with fixed "names" but variable "indexes" * (based on a specific RTL build) * Below is the static map between perf generic/arc specific event_id and * h/w condition names. * At the time of probe, we loop thru each index and find it's name to * complete the mapping of perf event_id to h/w index as latter is needed * to program the counter really */ static const char * const arc_pmu_ev_hw_map[] = { /* count cycles */ [PERF_COUNT_HW_CPU_CYCLES] = "crun", [PERF_COUNT_HW_REF_CPU_CYCLES] = "crun", [PERF_COUNT_HW_BUS_CYCLES] = "crun", [PERF_COUNT_HW_INSTRUCTIONS] = "iall", [PERF_COUNT_HW_BRANCH_MISSES] = "bpfail", [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmp", [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = "bflush", [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = "bstall", [PERF_COUNT_ARC_DCLM] = "dclm", [PERF_COUNT_ARC_DCSM] = "dcsm", [PERF_COUNT_ARC_ICM] = "icm", [PERF_COUNT_ARC_BPOK] = "bpok", [PERF_COUNT_ARC_EDTLB] = "edtlb", [PERF_COUNT_ARC_EITLB] = "eitlb", /* counts condition */ [PERF_COUNT_HW_INSTRUCTIONS] = "iall", [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmp", [PERF_COUNT_ARC_BPOK] = "bpok", /* NP-NT, PT-T, PNT-NT */ [PERF_COUNT_HW_BRANCH_MISSES] = "bpfail", /* NP-T, PT-NT, PNT-T */ [PERF_COUNT_ARC_DCLM] = "dclm", /* D-cache Load Miss */ [PERF_COUNT_ARC_DCSM] = "dcsm", /* D-cache Store Miss */ [PERF_COUNT_ARC_ICM] = "icm", /* I-cache Miss */ [PERF_COUNT_ARC_EDTLB] = "edtlb", /* D-TLB Miss */ [PERF_COUNT_ARC_EITLB] = "eitlb", /* I-TLB Miss */ }; #define C(_x) PERF_COUNT_HW_CACHE_##_x Loading
arch/arc/kernel/perf_event.c +13 −5 Original line number Diff line number Diff line Loading @@ -90,6 +90,10 @@ static int arc_pmu_cache_event(u64 config) if (ret == CACHE_OP_UNSUPPORTED) return -ENOENT; pr_debug("init cache event: type/op/result %d/%d/%d with h/w %d \'%s\'\n", cache_type, cache_op, cache_result, ret, arc_pmu_ev_hw_map[ret]); return ret; } Loading @@ -106,8 +110,9 @@ static int arc_pmu_event_init(struct perf_event *event) if (arc_pmu->ev_hw_idx[event->attr.config] < 0) return -ENOENT; hwc->config = arc_pmu->ev_hw_idx[event->attr.config]; pr_debug("initializing event %d with cfg %d\n", (int) event->attr.config, (int) hwc->config); pr_debug("init event %d with h/w %d \'%s\'\n", (int) event->attr.config, (int) hwc->config, arc_pmu_ev_hw_map[event->attr.config]); return 0; case PERF_TYPE_HW_CACHE: ret = arc_pmu_cache_event(event->attr.config); Loading Loading @@ -260,19 +265,22 @@ static int arc_pmu_device_probe(struct platform_device *pdev) arc_pmu->n_counters, arc_pmu->counter_size, cc_bcr.c); cc_name.str[8] = 0; for (i = 0; i < PERF_COUNT_HW_MAX; i++) for (i = 0; i < PERF_COUNT_ARC_HW_MAX; i++) arc_pmu->ev_hw_idx[i] = -1; /* loop thru all available h/w condition indexes */ for (j = 0; j < cc_bcr.c; j++) { write_aux_reg(ARC_REG_CC_INDEX, j); cc_name.indiv.word0 = read_aux_reg(ARC_REG_CC_NAME0); cc_name.indiv.word1 = read_aux_reg(ARC_REG_CC_NAME1); /* See if it has been mapped to a perf event_id */ for (i = 0; i < ARRAY_SIZE(arc_pmu_ev_hw_map); i++) { if (arc_pmu_ev_hw_map[i] && !strcmp(arc_pmu_ev_hw_map[i], cc_name.str) && strlen(arc_pmu_ev_hw_map[i])) { pr_debug("mapping %d to idx %d with name %s\n", i, j, cc_name.str); pr_debug("mapping perf event %2d to h/w event \'%8s\' (idx %d)\n", i, cc_name.str, j); arc_pmu->ev_hw_idx[i] = j; } } Loading