Loading Documentation/devicetree/bindings/devfreq/bimc-bwmon.txt +5 −3 Original line number Diff line number Diff line Loading @@ -5,8 +5,8 @@ can be used to measure the bandwidth of read/write traffic from the BIMC master ports. For example, the CPU subsystem sits on one BIMC master port. Required properties: - compatible: Must be "qcom,bimc-bwmon", "qcom,bimc-bwmon2" or "qcom,bimc-bwmon3" - compatible: Must be "qcom,bimc-bwmon", "qcom,bimc-bwmon2" "qcom,bimc-bwmon3" or "qcom,bimc-bwmon4" - reg: Pairs of physical base addresses and region sizes of memory mapped registers. - reg-names: Names of the bases for the above registers. Expected Loading @@ -14,7 +14,8 @@ Required properties: - interrupts: Lists the threshold IRQ. - qcom,mport: The hardware master port that this device can monitor - qcom,target-dev: The DT device that corresponds to this master port - qcom,hw-timer-hz: Hardware sampling rate in Hz. This field must be specified for "qcom,bimc-bwmon4" Example: qcom,cpu-bwmon { compatible = "qcom,bimc-bwmon"; Loading @@ -23,4 +24,5 @@ Example: interrupts = <0 183 1>; qcom,mport = <0>; qcom,target-dev = <&cpubw>; qcom,hw-timer-hz = <19200000>; }; drivers/devfreq/Kconfig +19 −0 Original line number Diff line number Diff line Loading @@ -82,6 +82,15 @@ config QCOM_BIMC_BWMON has the capability to raise an IRQ when the count exceeds a programmable limit. config ARM_MEMLAT_MON tristate "ARM CPU Memory Latency monitor hardware" depends on ARCH_QCOM help The PMU present on these ARM cores allow for the use of counters to monitor the memory latency characteristics of an ARM CPU workload. This driver uses these counters to implement the APIs needed by the mem_latency devfreq governor. config DEVFREQ_GOV_QCOM_BW_HWMON tristate "HW monitor based governor for device BW" depends on QCOM_BIMC_BWMON Loading @@ -101,6 +110,16 @@ config DEVFREQ_GOV_QCOM_CACHE_HWMON it can conflict with existing profiling tools. This governor is unlikely to be useful for other devices. config DEVFREQ_GOV_MEMLAT tristate "HW monitor based governor for device BW" depends on ARM_MEMLAT_MON help HW monitor based governor for device to DDR bandwidth voting. This governor sets the CPU BW vote based on stats obtained from memalat monitor if it determines that a workload is memory latency bound. Since this uses target specific counters it can conflict with existing profiling tools. comment "DEVFREQ Drivers" config ARM_EXYNOS_BUS_DEVFREQ Loading drivers/devfreq/Makefile +2 −0 Original line number Diff line number Diff line Loading @@ -7,8 +7,10 @@ obj-$(CONFIG_DEVFREQ_GOV_POWERSAVE) += governor_powersave.o obj-$(CONFIG_DEVFREQ_GOV_USERSPACE) += governor_userspace.o obj-$(CONFIG_DEVFREQ_GOV_PASSIVE) += governor_passive.o obj-$(CONFIG_QCOM_BIMC_BWMON) += bimc-bwmon.o obj-$(CONFIG_ARM_MEMLAT_MON) += arm-memlat-mon.o obj-$(CONFIG_DEVFREQ_GOV_QCOM_BW_HWMON) += governor_bw_hwmon.o obj-$(CONFIG_DEVFREQ_GOV_QCOM_CACHE_HWMON) += governor_cache_hwmon.o obj-$(CONFIG_DEVFREQ_GOV_MEMLAT) += governor_memlat.o # DEVFREQ Drivers obj-$(CONFIG_ARM_EXYNOS_BUS_DEVFREQ) += exynos-bus.o Loading drivers/devfreq/arm-memlat-mon.c 0 → 100644 +314 −0 Original line number Diff line number Diff line // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2014-2017, 2019, The Linux Foundation. All rights reserved. */ #define pr_fmt(fmt) "arm-memlat-mon: " fmt #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> #include <linux/io.h> #include <linux/delay.h> #include <linux/err.h> #include <linux/errno.h> #include <linux/interrupt.h> #include <linux/platform_device.h> #include <linux/of.h> #include <linux/of_irq.h> #include <linux/slab.h> #include <linux/irq.h> #include <linux/cpu_pm.h> #include <linux/cpu.h> #include "governor.h" #include "governor_memlat.h" #include <linux/perf_event.h> enum ev_index { INST_IDX, CM_IDX, CYC_IDX, NUM_EVENTS }; #define INST_EV 0x08 #define L2DM_EV 0x17 #define CYC_EV 0x11 struct event_data { struct perf_event *pevent; unsigned long prev_count; }; struct cpu_pmu_stats { struct event_data events[NUM_EVENTS]; ktime_t prev_ts; }; struct cpu_grp_info { cpumask_t cpus; unsigned int event_ids[NUM_EVENTS]; struct cpu_pmu_stats *cpustats; struct memlat_hwmon hw; }; #define to_cpustats(cpu_grp, cpu) \ (&cpu_grp->cpustats[cpu - cpumask_first(&cpu_grp->cpus)]) #define to_devstats(cpu_grp, cpu) \ (&cpu_grp->hw.core_stats[cpu - cpumask_first(&cpu_grp->cpus)]) #define to_cpu_grp(hwmon) container_of(hwmon, struct cpu_grp_info, hw) static unsigned long compute_freq(struct cpu_pmu_stats *cpustats, unsigned long cyc_cnt) { ktime_t ts; unsigned int diff; unsigned long freq = 0; ts = ktime_get(); diff = ktime_to_us(ktime_sub(ts, cpustats->prev_ts)); if (!diff) diff = 1; cpustats->prev_ts = ts; freq = cyc_cnt; do_div(freq, diff); return freq; } #define MAX_COUNT_LIM 0xFFFFFFFFFFFFFFFF static inline unsigned long read_event(struct event_data *event) { unsigned long ev_count; u64 total, enabled, running; total = perf_event_read_value(event->pevent, &enabled, &running); ev_count = total - event->prev_count; event->prev_count = total; return ev_count; } static void read_perf_counters(int cpu, struct cpu_grp_info *cpu_grp) { struct cpu_pmu_stats *cpustats = to_cpustats(cpu_grp, cpu); struct dev_stats *devstats = to_devstats(cpu_grp, cpu); unsigned long cyc_cnt; devstats->inst_count = read_event(&cpustats->events[INST_IDX]); devstats->mem_count = read_event(&cpustats->events[CM_IDX]); cyc_cnt = read_event(&cpustats->events[CYC_IDX]); devstats->freq = compute_freq(cpustats, cyc_cnt); } static unsigned long get_cnt(struct memlat_hwmon *hw) { int cpu; struct cpu_grp_info *cpu_grp = to_cpu_grp(hw); for_each_cpu(cpu, &cpu_grp->cpus) read_perf_counters(cpu, cpu_grp); return 0; } static void delete_events(struct cpu_pmu_stats *cpustats) { int i; for (i = 0; i < ARRAY_SIZE(cpustats->events); i++) { cpustats->events[i].prev_count = 0; perf_event_release_kernel(cpustats->events[i].pevent); } } static void stop_hwmon(struct memlat_hwmon *hw) { int cpu; struct cpu_grp_info *cpu_grp = to_cpu_grp(hw); struct dev_stats *devstats; for_each_cpu(cpu, &cpu_grp->cpus) { delete_events(to_cpustats(cpu_grp, cpu)); /* Clear governor data */ devstats = to_devstats(cpu_grp, cpu); devstats->inst_count = 0; devstats->mem_count = 0; devstats->freq = 0; } } static struct perf_event_attr *alloc_attr(void) { struct perf_event_attr *attr; attr = kzalloc(sizeof(struct perf_event_attr), GFP_KERNEL); if (!attr) return attr; attr->type = PERF_TYPE_RAW; attr->size = sizeof(struct perf_event_attr); attr->pinned = 1; attr->exclude_idle = 1; return attr; } static int set_events(struct cpu_grp_info *cpu_grp, int cpu) { struct perf_event *pevent; struct perf_event_attr *attr; int err, i; struct cpu_pmu_stats *cpustats = to_cpustats(cpu_grp, cpu); /* Allocate an attribute for event initialization */ attr = alloc_attr(); if (!attr) return -ENOMEM; for (i = 0; i < ARRAY_SIZE(cpustats->events); i++) { attr->config = cpu_grp->event_ids[i]; pevent = perf_event_create_kernel_counter(attr, cpu, NULL, NULL, NULL); if (IS_ERR(pevent)) goto err_out; cpustats->events[i].pevent = pevent; perf_event_enable(pevent); } kfree(attr); return 0; err_out: err = PTR_ERR(pevent); kfree(attr); return err; } static int start_hwmon(struct memlat_hwmon *hw) { int cpu, ret = 0; struct cpu_grp_info *cpu_grp = to_cpu_grp(hw); for_each_cpu(cpu, &cpu_grp->cpus) { ret = set_events(cpu_grp, cpu); if (ret) { pr_warn("Perf event init failed on CPU%d\n", cpu); break; } } return ret; } static int get_mask_from_dev_handle(struct platform_device *pdev, cpumask_t *mask) { struct device *dev = &pdev->dev; struct device_node *dev_phandle; struct device *cpu_dev; int cpu, i = 0; int ret = -ENOENT; dev_phandle = of_parse_phandle(dev->of_node, "qcom,cpulist", i++); while (dev_phandle) { for_each_possible_cpu(cpu) { cpu_dev = get_cpu_device(cpu); if (cpu_dev && cpu_dev->of_node == dev_phandle) { cpumask_set_cpu(cpu, mask); ret = 0; break; } } dev_phandle = of_parse_phandle(dev->of_node, "qcom,cpulist", i++); } return ret; } static int arm_memlat_mon_driver_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct memlat_hwmon *hw; struct cpu_grp_info *cpu_grp; int cpu, ret; u32 event_id; cpu_grp = devm_kzalloc(dev, sizeof(*cpu_grp), GFP_KERNEL); if (!cpu_grp) return -ENOMEM; hw = &cpu_grp->hw; hw->dev = dev; hw->of_node = of_parse_phandle(dev->of_node, "qcom,target-dev", 0); if (!hw->of_node) { dev_err(dev, "Couldn't find a target device\n"); return -ENODEV; } if (get_mask_from_dev_handle(pdev, &cpu_grp->cpus)) { dev_err(dev, "CPU list is empty\n"); return -ENODEV; } hw->num_cores = cpumask_weight(&cpu_grp->cpus); hw->core_stats = devm_kzalloc(dev, hw->num_cores * sizeof(*(hw->core_stats)), GFP_KERNEL); if (!hw->core_stats) return -ENOMEM; cpu_grp->cpustats = devm_kzalloc(dev, hw->num_cores * sizeof(*(cpu_grp->cpustats)), GFP_KERNEL); if (!cpu_grp->cpustats) return -ENOMEM; cpu_grp->event_ids[CYC_IDX] = CYC_EV; ret = of_property_read_u32(dev->of_node, "qcom,cachemiss-ev", &event_id); if (ret) { dev_dbg(dev, "Cache Miss event not specified. Using def:0x%x\n", L2DM_EV); event_id = L2DM_EV; } cpu_grp->event_ids[CM_IDX] = event_id; ret = of_property_read_u32(dev->of_node, "qcom,inst-ev", &event_id); if (ret) { dev_dbg(dev, "Inst event not specified. Using def:0x%x\n", INST_EV); event_id = INST_EV; } cpu_grp->event_ids[INST_IDX] = event_id; for_each_cpu(cpu, &cpu_grp->cpus) to_devstats(cpu_grp, cpu)->id = cpu; hw->start_hwmon = &start_hwmon; hw->stop_hwmon = &stop_hwmon; hw->get_cnt = &get_cnt; ret = register_memlat(dev, hw); if (ret) { pr_err("Mem Latency Gov registration failed\n"); return ret; } return 0; } static const struct of_device_id memlat_match_table[] = { { .compatible = "qcom,arm-memlat-mon" }, {} }; static struct platform_driver arm_memlat_mon_driver = { .probe = arm_memlat_mon_driver_probe, .driver = { .name = "arm-memlat-mon", .of_match_table = memlat_match_table, }, }; module_platform_driver(arm_memlat_mon_driver); drivers/devfreq/bimc-bwmon.c +244 −22 Original line number Diff line number Diff line // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2014-2015, 2019, The Linux Foundation. All rights reserved. * Copyright (c) 2014-2016, 2019, The Linux Foundation. All rights reserved. */ #define pr_fmt(fmt) "bimc-bwmon: " fmt Loading @@ -10,6 +10,7 @@ #include <linux/init.h> #include <linux/io.h> #include <linux/delay.h> #include <linux/bitops.h> #include <linux/err.h> #include <linux/errno.h> #include <linux/interrupt.h> Loading @@ -32,10 +33,24 @@ #define MON_MASK(m) ((m)->base + 0x298) #define MON_MATCH(m) ((m)->base + 0x29C) #define MON2_EN(m) ((m)->base + 0x2A0) #define MON2_CLEAR(m) ((m)->base + 0x2A4) #define MON2_SW(m) ((m)->base + 0x2A8) #define MON2_THRES_HI(m) ((m)->base + 0x2AC) #define MON2_THRES_MED(m) ((m)->base + 0x2B0) #define MON2_THRES_LO(m) ((m)->base + 0x2B4) #define MON2_ZONE_ACTIONS(m) ((m)->base + 0x2B8) #define MON2_ZONE_CNT_THRES(m) ((m)->base + 0x2BC) #define MON2_BYTE_CNT(m) ((m)->base + 0x2D0) #define MON2_WIN_TIMER(m) ((m)->base + 0x2D4) #define MON2_ZONE_CNT(m) ((m)->base + 0x2D8) #define MON2_ZONE_MAX(m, zone) ((m)->base + 0x2E0 + 0x4 * zone) struct bwmon_spec { bool wrap_on_thres; bool overflow; bool throt_adj; bool hw_sampling; }; struct bwmon { Loading @@ -46,23 +61,36 @@ struct bwmon { const struct bwmon_spec *spec; struct device *dev; struct bw_hwmon hw; u32 hw_timer_hz; u32 throttle_adj; u32 sample_size_ms; u32 intr_status; }; #define to_bwmon(ptr) container_of(ptr, struct bwmon, hw) #define has_hw_sampling(m) (m->spec->hw_sampling) #define ENABLE_MASK BIT(0) #define THROTTLE_MASK 0x1F #define THROTTLE_SHIFT 16 #define INT_ENABLE_V1 0x1 #define INT_STATUS_MASK 0x03 #define INT_STATUS_MASK_HWS 0xF0 static DEFINE_SPINLOCK(glb_lock); static void mon_enable(struct bwmon *m) { if (has_hw_sampling(m)) writel_relaxed((ENABLE_MASK | m->throttle_adj), MON2_EN(m)); else writel_relaxed((ENABLE_MASK | m->throttle_adj), MON_EN(m)); } static void mon_disable(struct bwmon *m) { if (has_hw_sampling(m)) writel_relaxed(m->throttle_adj, MON2_EN(m)); else writel_relaxed(m->throttle_adj, MON_EN(m)); /* * mon_disable() and mon_irq_clear(), Loading @@ -72,17 +100,46 @@ static void mon_disable(struct bwmon *m) mb(); } static void mon_clear(struct bwmon *m) #define MON_CLEAR_BIT 0x1 #define MON_CLEAR_ALL_BIT 0x2 static void mon_clear(struct bwmon *m, bool clear_all) { writel_relaxed(0x1, MON_CLEAR(m)); if (!has_hw_sampling(m)) { writel_relaxed(MON_CLEAR_BIT, MON_CLEAR(m)); goto out; } if (clear_all) writel_relaxed(MON_CLEAR_ALL_BIT, MON2_CLEAR(m)); else writel_relaxed(MON_CLEAR_BIT, MON2_CLEAR(m)); /* * The counter clear and IRQ clear bits are not in the same 4KB * region. So, we need to make sure the counter clear is completed * before we try to clear the IRQ or do any other counter operations. */ out: mb(); } #define SAMPLE_WIN_LIM 0xFFFFF static void mon_set_hw_sampling_window(struct bwmon *m, unsigned int sample_ms) { u32 rate; if (unlikely(sample_ms != m->sample_size_ms)) { rate = mult_frac(sample_ms, m->hw_timer_hz, MSEC_PER_SEC); m->sample_size_ms = sample_ms; if (unlikely(rate > SAMPLE_WIN_LIM)) { rate = SAMPLE_WIN_LIM; pr_warn("Sample window %u larger than hw limit: %u\n", rate, SAMPLE_WIN_LIM); } writel_relaxed(rate, MON2_SW(m)); } } static void mon_irq_enable(struct bwmon *m) { u32 val; Loading @@ -91,11 +148,11 @@ static void mon_irq_enable(struct bwmon *m) val = readl_relaxed(GLB_INT_EN(m)); val |= 1 << m->mport; writel_relaxed(val, GLB_INT_EN(m)); spin_unlock(&glb_lock); val = readl_relaxed(MON_INT_EN(m)); val |= 0x1; val |= has_hw_sampling(m) ? INT_STATUS_MASK_HWS : INT_ENABLE_V1; writel_relaxed(val, MON_INT_EN(m)); spin_unlock(&glb_lock); /* * make Sure irq enable complete for local and global * to avoid race with other monitor calls Loading @@ -111,11 +168,11 @@ static void mon_irq_disable(struct bwmon *m) val = readl_relaxed(GLB_INT_EN(m)); val &= ~(1 << m->mport); writel_relaxed(val, GLB_INT_EN(m)); spin_unlock(&glb_lock); val = readl_relaxed(MON_INT_EN(m)); val &= ~0x1; val &= has_hw_sampling(m) ? ~INT_STATUS_MASK_HWS : ~INT_ENABLE_V1; writel_relaxed(val, MON_INT_EN(m)); spin_unlock(&glb_lock); /* * make Sure irq disable complete for local and global * to avoid race with other monitor calls Loading @@ -132,12 +189,18 @@ static unsigned int mon_irq_status(struct bwmon *m) dev_dbg(m->dev, "IRQ status p:%x, g:%x\n", mval, readl_relaxed(GLB_INT_STATUS(m))); mval &= has_hw_sampling(m) ? INT_STATUS_MASK_HWS : INT_STATUS_MASK; return mval; } static void mon_irq_clear(struct bwmon *m) { writel_relaxed(0x3, MON_INT_CLR(m)); u32 intclr; intclr = has_hw_sampling(m) ? INT_STATUS_MASK_HWS : INT_STATUS_MASK; writel_relaxed(intclr, MON_INT_CLR(m)); /* Ensure the monitor IRQ is clear before clearing GLB IRQ */ mb(); writel_relaxed(1 << m->mport, GLB_INT_CLR(m)); Loading Loading @@ -165,6 +228,90 @@ static u32 mon_get_throttle_adj(struct bw_hwmon *hw) return m->throttle_adj >> THROTTLE_SHIFT; } #define ZONE1_SHIFT 8 #define ZONE2_SHIFT 16 #define ZONE3_SHIFT 24 #define ZONE0_ACTION 0x01 /* Increment zone 0 count */ #define ZONE1_ACTION 0x09 /* Increment zone 1 & clear lower zones */ #define ZONE2_ACTION 0x25 /* Increment zone 2 & clear lower zones */ #define ZONE3_ACTION 0x95 /* Increment zone 3 & clear lower zones */ static u32 calc_zone_actions(void) { u32 zone_actions; zone_actions = ZONE0_ACTION; zone_actions |= ZONE1_ACTION << ZONE1_SHIFT; zone_actions |= ZONE2_ACTION << ZONE2_SHIFT; zone_actions |= ZONE3_ACTION << ZONE3_SHIFT; return zone_actions; } #define ZONE_CNT_LIM 0xFFU #define UP_CNT_1 1 static u32 calc_zone_counts(struct bw_hwmon *hw) { u32 zone_counts; zone_counts = ZONE_CNT_LIM; zone_counts |= min(hw->down_cnt, ZONE_CNT_LIM) << ZONE1_SHIFT; zone_counts |= ZONE_CNT_LIM << ZONE2_SHIFT; zone_counts |= UP_CNT_1 << ZONE3_SHIFT; return zone_counts; } static unsigned int mbps_to_mb(unsigned long mbps, unsigned int ms) { mbps *= ms; mbps = DIV_ROUND_UP(mbps, MSEC_PER_SEC); return mbps; } /* * Define the 4 zones using HI, MED & LO thresholds: * Zone 0: byte count < THRES_LO * Zone 1: THRES_LO < byte count < THRES_MED * Zone 2: THRES_MED < byte count < THRES_HI * Zone 3: byte count > THRES_HI */ #define THRES_LIM 0x7FFU static void set_zone_thres(struct bwmon *m, unsigned int sample_ms) { struct bw_hwmon *hw = &(m->hw); u32 hi, med, lo; hi = mbps_to_mb(hw->up_wake_mbps, sample_ms); med = mbps_to_mb(hw->down_wake_mbps, sample_ms); lo = 0; if (unlikely((hi > THRES_LIM) || (med > hi) || (lo > med))) { pr_warn("Zone thres larger than hw limit: hi:%u med:%u lo:%u\n", hi, med, lo); hi = min(hi, THRES_LIM); med = min(med, hi - 1); lo = min(lo, med-1); } writel_relaxed(hi, MON2_THRES_HI(m)); writel_relaxed(med, MON2_THRES_MED(m)); writel_relaxed(lo, MON2_THRES_LO(m)); dev_dbg(m->dev, "Thres: hi:%u med:%u lo:%u\n", hi, med, lo); } static void mon_set_zones(struct bwmon *m, unsigned int sample_ms) { struct bw_hwmon *hw = &(m->hw); u32 zone_cnt_thres = calc_zone_counts(hw); mon_set_hw_sampling_window(m, sample_ms); set_zone_thres(m, sample_ms); /* Set the zone count thresholds for interrupts */ writel_relaxed(zone_cnt_thres, MON2_ZONE_CNT_THRES(m)); dev_dbg(m->dev, "Zone Count Thres: %0x\n", zone_cnt_thres); } static void mon_set_limit(struct bwmon *m, u32 count) { writel_relaxed(count, MON_THRES(m)); Loading Loading @@ -197,6 +344,41 @@ static unsigned long mon_get_count(struct bwmon *m) return count; } static unsigned int get_zone(struct bwmon *m) { u32 zone_counts; u32 zone; zone = get_bitmask_order((m->intr_status & INT_STATUS_MASK_HWS) >> 4); if (zone) { zone--; } else { zone_counts = readl_relaxed(MON2_ZONE_CNT(m)); if (zone_counts) { zone = get_bitmask_order(zone_counts) - 1; zone /= 8; } } m->intr_status = 0; return zone; } static unsigned long mon_get_zone_stats(struct bwmon *m) { unsigned int zone; unsigned long count = 0; zone = get_zone(m); count = readl_relaxed(MON2_ZONE_MAX(m, zone)) + 1; count *= SZ_1M; dev_dbg(m->dev, "Zone%d Max byte count: %08lx\n", zone, count); return count; } /* ********** CPUBW specific code ********** */ /* Returns MBps of read/writes for the sampling window. */ Loading @@ -216,8 +398,8 @@ static unsigned long get_bytes_and_clear(struct bw_hwmon *hw) unsigned long count; mon_disable(m); count = mon_get_count(m); mon_clear(m); count = has_hw_sampling(m) ? mon_get_zone_stats(m) : mon_get_count(m); mon_clear(m, false); mon_irq_clear(m); mon_enable(m); Loading @@ -232,7 +414,7 @@ static unsigned long set_thres(struct bw_hwmon *hw, unsigned long bytes) mon_disable(m); count = mon_get_count(m); mon_clear(m); mon_clear(m, false); mon_irq_clear(m); if (likely(!m->spec->wrap_on_thres)) Loading @@ -246,11 +428,26 @@ static unsigned long set_thres(struct bw_hwmon *hw, unsigned long bytes) return count; } static unsigned long set_hw_events(struct bw_hwmon *hw, unsigned int sample_ms) { struct bwmon *m = to_bwmon(hw); mon_disable(m); mon_clear(m, false); mon_irq_clear(m); mon_set_zones(m, sample_ms); mon_enable(m); return 0; } static irqreturn_t bwmon_intr_handler(int irq, void *dev) { struct bwmon *m = dev; if (!mon_irq_status(m)) m->intr_status = mon_irq_status(m); if (!m->intr_status) return IRQ_NONE; if (bw_hwmon_sample_end(&m->hw) > 0) Loading @@ -271,6 +468,7 @@ static int start_bw_hwmon(struct bw_hwmon *hw, unsigned long mbps) { struct bwmon *m = to_bwmon(hw); u32 limit; u32 zone_actions = calc_zone_actions(); int ret; ret = request_threaded_irq(m->irq, bwmon_intr_handler, Loading @@ -285,10 +483,16 @@ static int start_bw_hwmon(struct bw_hwmon *hw, unsigned long mbps) mon_disable(m); mon_clear(m, true); limit = mbps_to_bytes(mbps, hw->df->profile->polling_ms, 0); if (has_hw_sampling(m)) { mon_set_zones(m, hw->df->profile->polling_ms); /* Set the zone actions to increment appropriate counters */ writel_relaxed(zone_actions, MON2_ZONE_ACTIONS(m)); } else { mon_set_limit(m, limit); } mon_clear(m); mon_irq_clear(m); mon_irq_enable(m); mon_enable(m); Loading @@ -303,7 +507,7 @@ static void stop_bw_hwmon(struct bw_hwmon *hw) mon_irq_disable(m); free_irq(m->irq, m); mon_disable(m); mon_clear(m); mon_clear(m, true); mon_irq_clear(m); } Loading @@ -324,7 +528,7 @@ static int resume_bw_hwmon(struct bw_hwmon *hw) struct bwmon *m = to_bwmon(hw); int ret; mon_clear(m); mon_clear(m, false); ret = request_threaded_irq(m->irq, bwmon_intr_handler, bwmon_intr_thread, IRQF_ONESHOT | IRQF_SHARED, Loading @@ -344,15 +548,21 @@ static int resume_bw_hwmon(struct bw_hwmon *hw) /*************************************************************************/ static const struct bwmon_spec spec[] = { { .wrap_on_thres = true, .overflow = false, .throt_adj = false}, { .wrap_on_thres = false, .overflow = true, .throt_adj = false}, { .wrap_on_thres = false, .overflow = true, .throt_adj = true}, { .wrap_on_thres = true, .overflow = false, .throt_adj = false, .hw_sampling = false}, { .wrap_on_thres = false, .overflow = true, .throt_adj = false, .hw_sampling = false}, { .wrap_on_thres = false, .overflow = true, .throt_adj = true, .hw_sampling = false}, { .wrap_on_thres = false, .overflow = true, .throt_adj = true, .hw_sampling = true}, }; static const struct of_device_id bimc_bwmon_match_table[] = { { .compatible = "qcom,bimc-bwmon", .data = &spec[0] }, { .compatible = "qcom,bimc-bwmon2", .data = &spec[1] }, { .compatible = "qcom,bimc-bwmon3", .data = &spec[2] }, { .compatible = "qcom,bimc-bwmon4", .data = &spec[3] }, {} }; Loading Loading @@ -384,6 +594,16 @@ static int bimc_bwmon_driver_probe(struct platform_device *pdev) } m->spec = id->data; if (has_hw_sampling(m)) { ret = of_property_read_u32(dev->of_node, "qcom,hw-timer-hz", &data); if (ret) { dev_err(dev, "HW sampling rate not specified!\n"); return ret; } m->hw_timer_hz = data; } res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "base"); if (!res) { dev_err(dev, "base not found!\n"); Loading Loading @@ -421,6 +641,8 @@ static int bimc_bwmon_driver_probe(struct platform_device *pdev) m->hw.resume_hwmon = &resume_bw_hwmon; m->hw.get_bytes_and_clear = &get_bytes_and_clear; m->hw.set_thres = &set_thres; if (has_hw_sampling(m)) m->hw.set_hw_events = &set_hw_events; if (m->spec->throt_adj) { m->hw.set_throttle_adj = &mon_set_throttle_adj; m->hw.get_throttle_adj = &mon_get_throttle_adj; Loading Loading
Documentation/devicetree/bindings/devfreq/bimc-bwmon.txt +5 −3 Original line number Diff line number Diff line Loading @@ -5,8 +5,8 @@ can be used to measure the bandwidth of read/write traffic from the BIMC master ports. For example, the CPU subsystem sits on one BIMC master port. Required properties: - compatible: Must be "qcom,bimc-bwmon", "qcom,bimc-bwmon2" or "qcom,bimc-bwmon3" - compatible: Must be "qcom,bimc-bwmon", "qcom,bimc-bwmon2" "qcom,bimc-bwmon3" or "qcom,bimc-bwmon4" - reg: Pairs of physical base addresses and region sizes of memory mapped registers. - reg-names: Names of the bases for the above registers. Expected Loading @@ -14,7 +14,8 @@ Required properties: - interrupts: Lists the threshold IRQ. - qcom,mport: The hardware master port that this device can monitor - qcom,target-dev: The DT device that corresponds to this master port - qcom,hw-timer-hz: Hardware sampling rate in Hz. This field must be specified for "qcom,bimc-bwmon4" Example: qcom,cpu-bwmon { compatible = "qcom,bimc-bwmon"; Loading @@ -23,4 +24,5 @@ Example: interrupts = <0 183 1>; qcom,mport = <0>; qcom,target-dev = <&cpubw>; qcom,hw-timer-hz = <19200000>; };
drivers/devfreq/Kconfig +19 −0 Original line number Diff line number Diff line Loading @@ -82,6 +82,15 @@ config QCOM_BIMC_BWMON has the capability to raise an IRQ when the count exceeds a programmable limit. config ARM_MEMLAT_MON tristate "ARM CPU Memory Latency monitor hardware" depends on ARCH_QCOM help The PMU present on these ARM cores allow for the use of counters to monitor the memory latency characteristics of an ARM CPU workload. This driver uses these counters to implement the APIs needed by the mem_latency devfreq governor. config DEVFREQ_GOV_QCOM_BW_HWMON tristate "HW monitor based governor for device BW" depends on QCOM_BIMC_BWMON Loading @@ -101,6 +110,16 @@ config DEVFREQ_GOV_QCOM_CACHE_HWMON it can conflict with existing profiling tools. This governor is unlikely to be useful for other devices. config DEVFREQ_GOV_MEMLAT tristate "HW monitor based governor for device BW" depends on ARM_MEMLAT_MON help HW monitor based governor for device to DDR bandwidth voting. This governor sets the CPU BW vote based on stats obtained from memalat monitor if it determines that a workload is memory latency bound. Since this uses target specific counters it can conflict with existing profiling tools. comment "DEVFREQ Drivers" config ARM_EXYNOS_BUS_DEVFREQ Loading
drivers/devfreq/Makefile +2 −0 Original line number Diff line number Diff line Loading @@ -7,8 +7,10 @@ obj-$(CONFIG_DEVFREQ_GOV_POWERSAVE) += governor_powersave.o obj-$(CONFIG_DEVFREQ_GOV_USERSPACE) += governor_userspace.o obj-$(CONFIG_DEVFREQ_GOV_PASSIVE) += governor_passive.o obj-$(CONFIG_QCOM_BIMC_BWMON) += bimc-bwmon.o obj-$(CONFIG_ARM_MEMLAT_MON) += arm-memlat-mon.o obj-$(CONFIG_DEVFREQ_GOV_QCOM_BW_HWMON) += governor_bw_hwmon.o obj-$(CONFIG_DEVFREQ_GOV_QCOM_CACHE_HWMON) += governor_cache_hwmon.o obj-$(CONFIG_DEVFREQ_GOV_MEMLAT) += governor_memlat.o # DEVFREQ Drivers obj-$(CONFIG_ARM_EXYNOS_BUS_DEVFREQ) += exynos-bus.o Loading
drivers/devfreq/arm-memlat-mon.c 0 → 100644 +314 −0 Original line number Diff line number Diff line // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2014-2017, 2019, The Linux Foundation. All rights reserved. */ #define pr_fmt(fmt) "arm-memlat-mon: " fmt #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> #include <linux/io.h> #include <linux/delay.h> #include <linux/err.h> #include <linux/errno.h> #include <linux/interrupt.h> #include <linux/platform_device.h> #include <linux/of.h> #include <linux/of_irq.h> #include <linux/slab.h> #include <linux/irq.h> #include <linux/cpu_pm.h> #include <linux/cpu.h> #include "governor.h" #include "governor_memlat.h" #include <linux/perf_event.h> enum ev_index { INST_IDX, CM_IDX, CYC_IDX, NUM_EVENTS }; #define INST_EV 0x08 #define L2DM_EV 0x17 #define CYC_EV 0x11 struct event_data { struct perf_event *pevent; unsigned long prev_count; }; struct cpu_pmu_stats { struct event_data events[NUM_EVENTS]; ktime_t prev_ts; }; struct cpu_grp_info { cpumask_t cpus; unsigned int event_ids[NUM_EVENTS]; struct cpu_pmu_stats *cpustats; struct memlat_hwmon hw; }; #define to_cpustats(cpu_grp, cpu) \ (&cpu_grp->cpustats[cpu - cpumask_first(&cpu_grp->cpus)]) #define to_devstats(cpu_grp, cpu) \ (&cpu_grp->hw.core_stats[cpu - cpumask_first(&cpu_grp->cpus)]) #define to_cpu_grp(hwmon) container_of(hwmon, struct cpu_grp_info, hw) static unsigned long compute_freq(struct cpu_pmu_stats *cpustats, unsigned long cyc_cnt) { ktime_t ts; unsigned int diff; unsigned long freq = 0; ts = ktime_get(); diff = ktime_to_us(ktime_sub(ts, cpustats->prev_ts)); if (!diff) diff = 1; cpustats->prev_ts = ts; freq = cyc_cnt; do_div(freq, diff); return freq; } #define MAX_COUNT_LIM 0xFFFFFFFFFFFFFFFF static inline unsigned long read_event(struct event_data *event) { unsigned long ev_count; u64 total, enabled, running; total = perf_event_read_value(event->pevent, &enabled, &running); ev_count = total - event->prev_count; event->prev_count = total; return ev_count; } static void read_perf_counters(int cpu, struct cpu_grp_info *cpu_grp) { struct cpu_pmu_stats *cpustats = to_cpustats(cpu_grp, cpu); struct dev_stats *devstats = to_devstats(cpu_grp, cpu); unsigned long cyc_cnt; devstats->inst_count = read_event(&cpustats->events[INST_IDX]); devstats->mem_count = read_event(&cpustats->events[CM_IDX]); cyc_cnt = read_event(&cpustats->events[CYC_IDX]); devstats->freq = compute_freq(cpustats, cyc_cnt); } static unsigned long get_cnt(struct memlat_hwmon *hw) { int cpu; struct cpu_grp_info *cpu_grp = to_cpu_grp(hw); for_each_cpu(cpu, &cpu_grp->cpus) read_perf_counters(cpu, cpu_grp); return 0; } static void delete_events(struct cpu_pmu_stats *cpustats) { int i; for (i = 0; i < ARRAY_SIZE(cpustats->events); i++) { cpustats->events[i].prev_count = 0; perf_event_release_kernel(cpustats->events[i].pevent); } } static void stop_hwmon(struct memlat_hwmon *hw) { int cpu; struct cpu_grp_info *cpu_grp = to_cpu_grp(hw); struct dev_stats *devstats; for_each_cpu(cpu, &cpu_grp->cpus) { delete_events(to_cpustats(cpu_grp, cpu)); /* Clear governor data */ devstats = to_devstats(cpu_grp, cpu); devstats->inst_count = 0; devstats->mem_count = 0; devstats->freq = 0; } } static struct perf_event_attr *alloc_attr(void) { struct perf_event_attr *attr; attr = kzalloc(sizeof(struct perf_event_attr), GFP_KERNEL); if (!attr) return attr; attr->type = PERF_TYPE_RAW; attr->size = sizeof(struct perf_event_attr); attr->pinned = 1; attr->exclude_idle = 1; return attr; } static int set_events(struct cpu_grp_info *cpu_grp, int cpu) { struct perf_event *pevent; struct perf_event_attr *attr; int err, i; struct cpu_pmu_stats *cpustats = to_cpustats(cpu_grp, cpu); /* Allocate an attribute for event initialization */ attr = alloc_attr(); if (!attr) return -ENOMEM; for (i = 0; i < ARRAY_SIZE(cpustats->events); i++) { attr->config = cpu_grp->event_ids[i]; pevent = perf_event_create_kernel_counter(attr, cpu, NULL, NULL, NULL); if (IS_ERR(pevent)) goto err_out; cpustats->events[i].pevent = pevent; perf_event_enable(pevent); } kfree(attr); return 0; err_out: err = PTR_ERR(pevent); kfree(attr); return err; } static int start_hwmon(struct memlat_hwmon *hw) { int cpu, ret = 0; struct cpu_grp_info *cpu_grp = to_cpu_grp(hw); for_each_cpu(cpu, &cpu_grp->cpus) { ret = set_events(cpu_grp, cpu); if (ret) { pr_warn("Perf event init failed on CPU%d\n", cpu); break; } } return ret; } static int get_mask_from_dev_handle(struct platform_device *pdev, cpumask_t *mask) { struct device *dev = &pdev->dev; struct device_node *dev_phandle; struct device *cpu_dev; int cpu, i = 0; int ret = -ENOENT; dev_phandle = of_parse_phandle(dev->of_node, "qcom,cpulist", i++); while (dev_phandle) { for_each_possible_cpu(cpu) { cpu_dev = get_cpu_device(cpu); if (cpu_dev && cpu_dev->of_node == dev_phandle) { cpumask_set_cpu(cpu, mask); ret = 0; break; } } dev_phandle = of_parse_phandle(dev->of_node, "qcom,cpulist", i++); } return ret; } static int arm_memlat_mon_driver_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct memlat_hwmon *hw; struct cpu_grp_info *cpu_grp; int cpu, ret; u32 event_id; cpu_grp = devm_kzalloc(dev, sizeof(*cpu_grp), GFP_KERNEL); if (!cpu_grp) return -ENOMEM; hw = &cpu_grp->hw; hw->dev = dev; hw->of_node = of_parse_phandle(dev->of_node, "qcom,target-dev", 0); if (!hw->of_node) { dev_err(dev, "Couldn't find a target device\n"); return -ENODEV; } if (get_mask_from_dev_handle(pdev, &cpu_grp->cpus)) { dev_err(dev, "CPU list is empty\n"); return -ENODEV; } hw->num_cores = cpumask_weight(&cpu_grp->cpus); hw->core_stats = devm_kzalloc(dev, hw->num_cores * sizeof(*(hw->core_stats)), GFP_KERNEL); if (!hw->core_stats) return -ENOMEM; cpu_grp->cpustats = devm_kzalloc(dev, hw->num_cores * sizeof(*(cpu_grp->cpustats)), GFP_KERNEL); if (!cpu_grp->cpustats) return -ENOMEM; cpu_grp->event_ids[CYC_IDX] = CYC_EV; ret = of_property_read_u32(dev->of_node, "qcom,cachemiss-ev", &event_id); if (ret) { dev_dbg(dev, "Cache Miss event not specified. Using def:0x%x\n", L2DM_EV); event_id = L2DM_EV; } cpu_grp->event_ids[CM_IDX] = event_id; ret = of_property_read_u32(dev->of_node, "qcom,inst-ev", &event_id); if (ret) { dev_dbg(dev, "Inst event not specified. Using def:0x%x\n", INST_EV); event_id = INST_EV; } cpu_grp->event_ids[INST_IDX] = event_id; for_each_cpu(cpu, &cpu_grp->cpus) to_devstats(cpu_grp, cpu)->id = cpu; hw->start_hwmon = &start_hwmon; hw->stop_hwmon = &stop_hwmon; hw->get_cnt = &get_cnt; ret = register_memlat(dev, hw); if (ret) { pr_err("Mem Latency Gov registration failed\n"); return ret; } return 0; } static const struct of_device_id memlat_match_table[] = { { .compatible = "qcom,arm-memlat-mon" }, {} }; static struct platform_driver arm_memlat_mon_driver = { .probe = arm_memlat_mon_driver_probe, .driver = { .name = "arm-memlat-mon", .of_match_table = memlat_match_table, }, }; module_platform_driver(arm_memlat_mon_driver);
drivers/devfreq/bimc-bwmon.c +244 −22 Original line number Diff line number Diff line // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2014-2015, 2019, The Linux Foundation. All rights reserved. * Copyright (c) 2014-2016, 2019, The Linux Foundation. All rights reserved. */ #define pr_fmt(fmt) "bimc-bwmon: " fmt Loading @@ -10,6 +10,7 @@ #include <linux/init.h> #include <linux/io.h> #include <linux/delay.h> #include <linux/bitops.h> #include <linux/err.h> #include <linux/errno.h> #include <linux/interrupt.h> Loading @@ -32,10 +33,24 @@ #define MON_MASK(m) ((m)->base + 0x298) #define MON_MATCH(m) ((m)->base + 0x29C) #define MON2_EN(m) ((m)->base + 0x2A0) #define MON2_CLEAR(m) ((m)->base + 0x2A4) #define MON2_SW(m) ((m)->base + 0x2A8) #define MON2_THRES_HI(m) ((m)->base + 0x2AC) #define MON2_THRES_MED(m) ((m)->base + 0x2B0) #define MON2_THRES_LO(m) ((m)->base + 0x2B4) #define MON2_ZONE_ACTIONS(m) ((m)->base + 0x2B8) #define MON2_ZONE_CNT_THRES(m) ((m)->base + 0x2BC) #define MON2_BYTE_CNT(m) ((m)->base + 0x2D0) #define MON2_WIN_TIMER(m) ((m)->base + 0x2D4) #define MON2_ZONE_CNT(m) ((m)->base + 0x2D8) #define MON2_ZONE_MAX(m, zone) ((m)->base + 0x2E0 + 0x4 * zone) struct bwmon_spec { bool wrap_on_thres; bool overflow; bool throt_adj; bool hw_sampling; }; struct bwmon { Loading @@ -46,23 +61,36 @@ struct bwmon { const struct bwmon_spec *spec; struct device *dev; struct bw_hwmon hw; u32 hw_timer_hz; u32 throttle_adj; u32 sample_size_ms; u32 intr_status; }; #define to_bwmon(ptr) container_of(ptr, struct bwmon, hw) #define has_hw_sampling(m) (m->spec->hw_sampling) #define ENABLE_MASK BIT(0) #define THROTTLE_MASK 0x1F #define THROTTLE_SHIFT 16 #define INT_ENABLE_V1 0x1 #define INT_STATUS_MASK 0x03 #define INT_STATUS_MASK_HWS 0xF0 static DEFINE_SPINLOCK(glb_lock); static void mon_enable(struct bwmon *m) { if (has_hw_sampling(m)) writel_relaxed((ENABLE_MASK | m->throttle_adj), MON2_EN(m)); else writel_relaxed((ENABLE_MASK | m->throttle_adj), MON_EN(m)); } static void mon_disable(struct bwmon *m) { if (has_hw_sampling(m)) writel_relaxed(m->throttle_adj, MON2_EN(m)); else writel_relaxed(m->throttle_adj, MON_EN(m)); /* * mon_disable() and mon_irq_clear(), Loading @@ -72,17 +100,46 @@ static void mon_disable(struct bwmon *m) mb(); } static void mon_clear(struct bwmon *m) #define MON_CLEAR_BIT 0x1 #define MON_CLEAR_ALL_BIT 0x2 static void mon_clear(struct bwmon *m, bool clear_all) { writel_relaxed(0x1, MON_CLEAR(m)); if (!has_hw_sampling(m)) { writel_relaxed(MON_CLEAR_BIT, MON_CLEAR(m)); goto out; } if (clear_all) writel_relaxed(MON_CLEAR_ALL_BIT, MON2_CLEAR(m)); else writel_relaxed(MON_CLEAR_BIT, MON2_CLEAR(m)); /* * The counter clear and IRQ clear bits are not in the same 4KB * region. So, we need to make sure the counter clear is completed * before we try to clear the IRQ or do any other counter operations. */ out: mb(); } #define SAMPLE_WIN_LIM 0xFFFFF static void mon_set_hw_sampling_window(struct bwmon *m, unsigned int sample_ms) { u32 rate; if (unlikely(sample_ms != m->sample_size_ms)) { rate = mult_frac(sample_ms, m->hw_timer_hz, MSEC_PER_SEC); m->sample_size_ms = sample_ms; if (unlikely(rate > SAMPLE_WIN_LIM)) { rate = SAMPLE_WIN_LIM; pr_warn("Sample window %u larger than hw limit: %u\n", rate, SAMPLE_WIN_LIM); } writel_relaxed(rate, MON2_SW(m)); } } static void mon_irq_enable(struct bwmon *m) { u32 val; Loading @@ -91,11 +148,11 @@ static void mon_irq_enable(struct bwmon *m) val = readl_relaxed(GLB_INT_EN(m)); val |= 1 << m->mport; writel_relaxed(val, GLB_INT_EN(m)); spin_unlock(&glb_lock); val = readl_relaxed(MON_INT_EN(m)); val |= 0x1; val |= has_hw_sampling(m) ? INT_STATUS_MASK_HWS : INT_ENABLE_V1; writel_relaxed(val, MON_INT_EN(m)); spin_unlock(&glb_lock); /* * make Sure irq enable complete for local and global * to avoid race with other monitor calls Loading @@ -111,11 +168,11 @@ static void mon_irq_disable(struct bwmon *m) val = readl_relaxed(GLB_INT_EN(m)); val &= ~(1 << m->mport); writel_relaxed(val, GLB_INT_EN(m)); spin_unlock(&glb_lock); val = readl_relaxed(MON_INT_EN(m)); val &= ~0x1; val &= has_hw_sampling(m) ? ~INT_STATUS_MASK_HWS : ~INT_ENABLE_V1; writel_relaxed(val, MON_INT_EN(m)); spin_unlock(&glb_lock); /* * make Sure irq disable complete for local and global * to avoid race with other monitor calls Loading @@ -132,12 +189,18 @@ static unsigned int mon_irq_status(struct bwmon *m) dev_dbg(m->dev, "IRQ status p:%x, g:%x\n", mval, readl_relaxed(GLB_INT_STATUS(m))); mval &= has_hw_sampling(m) ? INT_STATUS_MASK_HWS : INT_STATUS_MASK; return mval; } static void mon_irq_clear(struct bwmon *m) { writel_relaxed(0x3, MON_INT_CLR(m)); u32 intclr; intclr = has_hw_sampling(m) ? INT_STATUS_MASK_HWS : INT_STATUS_MASK; writel_relaxed(intclr, MON_INT_CLR(m)); /* Ensure the monitor IRQ is clear before clearing GLB IRQ */ mb(); writel_relaxed(1 << m->mport, GLB_INT_CLR(m)); Loading Loading @@ -165,6 +228,90 @@ static u32 mon_get_throttle_adj(struct bw_hwmon *hw) return m->throttle_adj >> THROTTLE_SHIFT; } #define ZONE1_SHIFT 8 #define ZONE2_SHIFT 16 #define ZONE3_SHIFT 24 #define ZONE0_ACTION 0x01 /* Increment zone 0 count */ #define ZONE1_ACTION 0x09 /* Increment zone 1 & clear lower zones */ #define ZONE2_ACTION 0x25 /* Increment zone 2 & clear lower zones */ #define ZONE3_ACTION 0x95 /* Increment zone 3 & clear lower zones */ static u32 calc_zone_actions(void) { u32 zone_actions; zone_actions = ZONE0_ACTION; zone_actions |= ZONE1_ACTION << ZONE1_SHIFT; zone_actions |= ZONE2_ACTION << ZONE2_SHIFT; zone_actions |= ZONE3_ACTION << ZONE3_SHIFT; return zone_actions; } #define ZONE_CNT_LIM 0xFFU #define UP_CNT_1 1 static u32 calc_zone_counts(struct bw_hwmon *hw) { u32 zone_counts; zone_counts = ZONE_CNT_LIM; zone_counts |= min(hw->down_cnt, ZONE_CNT_LIM) << ZONE1_SHIFT; zone_counts |= ZONE_CNT_LIM << ZONE2_SHIFT; zone_counts |= UP_CNT_1 << ZONE3_SHIFT; return zone_counts; } static unsigned int mbps_to_mb(unsigned long mbps, unsigned int ms) { mbps *= ms; mbps = DIV_ROUND_UP(mbps, MSEC_PER_SEC); return mbps; } /* * Define the 4 zones using HI, MED & LO thresholds: * Zone 0: byte count < THRES_LO * Zone 1: THRES_LO < byte count < THRES_MED * Zone 2: THRES_MED < byte count < THRES_HI * Zone 3: byte count > THRES_HI */ #define THRES_LIM 0x7FFU static void set_zone_thres(struct bwmon *m, unsigned int sample_ms) { struct bw_hwmon *hw = &(m->hw); u32 hi, med, lo; hi = mbps_to_mb(hw->up_wake_mbps, sample_ms); med = mbps_to_mb(hw->down_wake_mbps, sample_ms); lo = 0; if (unlikely((hi > THRES_LIM) || (med > hi) || (lo > med))) { pr_warn("Zone thres larger than hw limit: hi:%u med:%u lo:%u\n", hi, med, lo); hi = min(hi, THRES_LIM); med = min(med, hi - 1); lo = min(lo, med-1); } writel_relaxed(hi, MON2_THRES_HI(m)); writel_relaxed(med, MON2_THRES_MED(m)); writel_relaxed(lo, MON2_THRES_LO(m)); dev_dbg(m->dev, "Thres: hi:%u med:%u lo:%u\n", hi, med, lo); } static void mon_set_zones(struct bwmon *m, unsigned int sample_ms) { struct bw_hwmon *hw = &(m->hw); u32 zone_cnt_thres = calc_zone_counts(hw); mon_set_hw_sampling_window(m, sample_ms); set_zone_thres(m, sample_ms); /* Set the zone count thresholds for interrupts */ writel_relaxed(zone_cnt_thres, MON2_ZONE_CNT_THRES(m)); dev_dbg(m->dev, "Zone Count Thres: %0x\n", zone_cnt_thres); } static void mon_set_limit(struct bwmon *m, u32 count) { writel_relaxed(count, MON_THRES(m)); Loading Loading @@ -197,6 +344,41 @@ static unsigned long mon_get_count(struct bwmon *m) return count; } static unsigned int get_zone(struct bwmon *m) { u32 zone_counts; u32 zone; zone = get_bitmask_order((m->intr_status & INT_STATUS_MASK_HWS) >> 4); if (zone) { zone--; } else { zone_counts = readl_relaxed(MON2_ZONE_CNT(m)); if (zone_counts) { zone = get_bitmask_order(zone_counts) - 1; zone /= 8; } } m->intr_status = 0; return zone; } static unsigned long mon_get_zone_stats(struct bwmon *m) { unsigned int zone; unsigned long count = 0; zone = get_zone(m); count = readl_relaxed(MON2_ZONE_MAX(m, zone)) + 1; count *= SZ_1M; dev_dbg(m->dev, "Zone%d Max byte count: %08lx\n", zone, count); return count; } /* ********** CPUBW specific code ********** */ /* Returns MBps of read/writes for the sampling window. */ Loading @@ -216,8 +398,8 @@ static unsigned long get_bytes_and_clear(struct bw_hwmon *hw) unsigned long count; mon_disable(m); count = mon_get_count(m); mon_clear(m); count = has_hw_sampling(m) ? mon_get_zone_stats(m) : mon_get_count(m); mon_clear(m, false); mon_irq_clear(m); mon_enable(m); Loading @@ -232,7 +414,7 @@ static unsigned long set_thres(struct bw_hwmon *hw, unsigned long bytes) mon_disable(m); count = mon_get_count(m); mon_clear(m); mon_clear(m, false); mon_irq_clear(m); if (likely(!m->spec->wrap_on_thres)) Loading @@ -246,11 +428,26 @@ static unsigned long set_thres(struct bw_hwmon *hw, unsigned long bytes) return count; } static unsigned long set_hw_events(struct bw_hwmon *hw, unsigned int sample_ms) { struct bwmon *m = to_bwmon(hw); mon_disable(m); mon_clear(m, false); mon_irq_clear(m); mon_set_zones(m, sample_ms); mon_enable(m); return 0; } static irqreturn_t bwmon_intr_handler(int irq, void *dev) { struct bwmon *m = dev; if (!mon_irq_status(m)) m->intr_status = mon_irq_status(m); if (!m->intr_status) return IRQ_NONE; if (bw_hwmon_sample_end(&m->hw) > 0) Loading @@ -271,6 +468,7 @@ static int start_bw_hwmon(struct bw_hwmon *hw, unsigned long mbps) { struct bwmon *m = to_bwmon(hw); u32 limit; u32 zone_actions = calc_zone_actions(); int ret; ret = request_threaded_irq(m->irq, bwmon_intr_handler, Loading @@ -285,10 +483,16 @@ static int start_bw_hwmon(struct bw_hwmon *hw, unsigned long mbps) mon_disable(m); mon_clear(m, true); limit = mbps_to_bytes(mbps, hw->df->profile->polling_ms, 0); if (has_hw_sampling(m)) { mon_set_zones(m, hw->df->profile->polling_ms); /* Set the zone actions to increment appropriate counters */ writel_relaxed(zone_actions, MON2_ZONE_ACTIONS(m)); } else { mon_set_limit(m, limit); } mon_clear(m); mon_irq_clear(m); mon_irq_enable(m); mon_enable(m); Loading @@ -303,7 +507,7 @@ static void stop_bw_hwmon(struct bw_hwmon *hw) mon_irq_disable(m); free_irq(m->irq, m); mon_disable(m); mon_clear(m); mon_clear(m, true); mon_irq_clear(m); } Loading @@ -324,7 +528,7 @@ static int resume_bw_hwmon(struct bw_hwmon *hw) struct bwmon *m = to_bwmon(hw); int ret; mon_clear(m); mon_clear(m, false); ret = request_threaded_irq(m->irq, bwmon_intr_handler, bwmon_intr_thread, IRQF_ONESHOT | IRQF_SHARED, Loading @@ -344,15 +548,21 @@ static int resume_bw_hwmon(struct bw_hwmon *hw) /*************************************************************************/ static const struct bwmon_spec spec[] = { { .wrap_on_thres = true, .overflow = false, .throt_adj = false}, { .wrap_on_thres = false, .overflow = true, .throt_adj = false}, { .wrap_on_thres = false, .overflow = true, .throt_adj = true}, { .wrap_on_thres = true, .overflow = false, .throt_adj = false, .hw_sampling = false}, { .wrap_on_thres = false, .overflow = true, .throt_adj = false, .hw_sampling = false}, { .wrap_on_thres = false, .overflow = true, .throt_adj = true, .hw_sampling = false}, { .wrap_on_thres = false, .overflow = true, .throt_adj = true, .hw_sampling = true}, }; static const struct of_device_id bimc_bwmon_match_table[] = { { .compatible = "qcom,bimc-bwmon", .data = &spec[0] }, { .compatible = "qcom,bimc-bwmon2", .data = &spec[1] }, { .compatible = "qcom,bimc-bwmon3", .data = &spec[2] }, { .compatible = "qcom,bimc-bwmon4", .data = &spec[3] }, {} }; Loading Loading @@ -384,6 +594,16 @@ static int bimc_bwmon_driver_probe(struct platform_device *pdev) } m->spec = id->data; if (has_hw_sampling(m)) { ret = of_property_read_u32(dev->of_node, "qcom,hw-timer-hz", &data); if (ret) { dev_err(dev, "HW sampling rate not specified!\n"); return ret; } m->hw_timer_hz = data; } res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "base"); if (!res) { dev_err(dev, "base not found!\n"); Loading Loading @@ -421,6 +641,8 @@ static int bimc_bwmon_driver_probe(struct platform_device *pdev) m->hw.resume_hwmon = &resume_bw_hwmon; m->hw.get_bytes_and_clear = &get_bytes_and_clear; m->hw.set_thres = &set_thres; if (has_hw_sampling(m)) m->hw.set_hw_events = &set_hw_events; if (m->spec->throt_adj) { m->hw.set_throttle_adj = &mon_set_throttle_adj; m->hw.get_throttle_adj = &mon_get_throttle_adj; Loading