From aa1a43262ad5df010768f69530fa179ff81651d3 Mon Sep 17 00:00:00 2001 From: Vincent Donnefort Date: Wed, 8 Sep 2021 15:05:22 +0100 Subject: PM: EM: Fix inefficient states detection Currently, a debug message is printed if an inefficient state is detected in the Energy Model. Unfortunately, it won't detect if the first state is inefficient or if two successive states are. Fix this behavior. Fixes: 27871f7a8a34 (PM: Introduce an Energy Model management framework) Signed-off-by: Vincent Donnefort Reviewed-by: Quentin Perret Reviewed-by: Lukasz Luba Reviewed-by: Matthias Kaehlcke Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- kernel/power/energy_model.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) (limited to 'kernel/power') diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index a332ccd829e2..97e62469a6b3 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -107,8 +107,7 @@ static void em_debug_remove_pd(struct device *dev) {} static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd, int nr_states, struct em_data_callback *cb) { - unsigned long opp_eff, prev_opp_eff = ULONG_MAX; - unsigned long power, freq, prev_freq = 0; + unsigned long power, freq, prev_freq = 0, prev_cost = ULONG_MAX; struct em_perf_state *table; int i, ret; u64 fmax; @@ -153,27 +152,21 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd, table[i].power = power; table[i].frequency = prev_freq = freq; - - /* - * The hertz/watts efficiency ratio should decrease as the - * frequency grows on sane platforms. But this isn't always - * true in practice so warn the user if a higher OPP is more - * power efficient than a lower one. - */ - opp_eff = freq / power; - if (opp_eff >= prev_opp_eff) - dev_dbg(dev, "EM: hertz/watts ratio non-monotonically decreasing: em_perf_state %d >= em_perf_state%d\n", - i, i - 1); - prev_opp_eff = opp_eff; } /* Compute the cost of each performance state. */ fmax = (u64) table[nr_states - 1].frequency; - for (i = 0; i < nr_states; i++) { + for (i = nr_states - 1; i >= 0; i--) { unsigned long power_res = em_scale_power(table[i].power); table[i].cost = div64_u64(fmax * power_res, table[i].frequency); + if (table[i].cost >= prev_cost) { + dev_dbg(dev, "EM: OPP:%lu is inefficient\n", + table[i].frequency); + } else { + prev_cost = table[i].cost; + } } pd->table = table; -- cgit v1.2.3 From c8ed99533dbc0fcc1142671ec80acb33045d2999 Mon Sep 17 00:00:00 2001 From: Vincent Donnefort Date: Wed, 8 Sep 2021 15:05:23 +0100 Subject: PM: EM: Mark inefficient states Some SoCs, such as the sd855 have OPPs within the same performance domain, whose cost is higher than others with a higher frequency. Even though those OPPs are interesting from a cooling perspective, it makes no sense to use them when the device can run at full capacity. Those OPPs handicap the performance domain, when choosing the most energy-efficient CPU and are wasting energy. They are inefficient. Hence, add support for such OPPs to the Energy Model. The table can now be read skipping inefficient performance states (and by extension, inefficient OPPs). Signed-off-by: Vincent Donnefort Reviewed-by: Matthias Kaehlcke Reviewed-by: Lukasz Luba Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- kernel/power/energy_model.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel/power') diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 97e62469a6b3..6d8438347535 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -2,7 +2,7 @@ /* * Energy Model of devices * - * Copyright (c) 2018-2020, Arm ltd. + * Copyright (c) 2018-2021, Arm ltd. * Written by: Quentin Perret, Arm ltd. * Improvements provided by: Lukasz Luba, Arm ltd. */ @@ -42,6 +42,7 @@ static void em_debug_create_ps(struct em_perf_state *ps, struct dentry *pd) debugfs_create_ulong("frequency", 0444, d, &ps->frequency); debugfs_create_ulong("power", 0444, d, &ps->power); debugfs_create_ulong("cost", 0444, d, &ps->cost); + debugfs_create_ulong("inefficient", 0444, d, &ps->flags); } static int em_debug_cpus_show(struct seq_file *s, void *unused) @@ -162,6 +163,7 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd, table[i].cost = div64_u64(fmax * power_res, table[i].frequency); if (table[i].cost >= prev_cost) { + table[i].flags = EM_PERF_STATE_INEFFICIENT; dev_dbg(dev, "EM: OPP:%lu is inefficient\n", table[i].frequency); } else { -- cgit v1.2.3 From 88f7a89560f6d0fc7803a8933637488f14e0a098 Mon Sep 17 00:00:00 2001 From: Vincent Donnefort Date: Wed, 8 Sep 2021 15:05:24 +0100 Subject: PM: EM: Extend em_perf_domain with a flag field Merge the current "milliwatts" option into a "flag" field. This intends to prepare the extension of this structure for inefficient states support in the Energy Model. Signed-off-by: Vincent Donnefort Reviewed-by: Lukasz Luba Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- kernel/power/energy_model.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'kernel/power') diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 6d8438347535..3a7d1573b214 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -56,7 +56,8 @@ DEFINE_SHOW_ATTRIBUTE(em_debug_cpus); static int em_debug_units_show(struct seq_file *s, void *unused) { struct em_perf_domain *pd = s->private; - char *units = pd->milliwatts ? "milliWatts" : "bogoWatts"; + char *units = (pd->flags & EM_PERF_DOMAIN_MILLIWATTS) ? + "milliWatts" : "bogoWatts"; seq_printf(s, "%s\n", units); @@ -330,7 +331,8 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, if (ret) goto unlock; - dev->em_pd->milliwatts = milliwatts; + if (milliwatts) + dev->em_pd->flags |= EM_PERF_DOMAIN_MILLIWATTS; em_debug_create_pd(dev); dev_info(dev, "EM: created perf domain\n"); -- cgit v1.2.3 From 8354eb9eb3ddb4a8d0857648a470beffcc9d8639 Mon Sep 17 00:00:00 2001 From: Vincent Donnefort Date: Wed, 8 Sep 2021 15:05:25 +0100 Subject: PM: EM: Allow skipping inefficient states The new performance domain flag EM_PERF_DOMAIN_SKIP_INEFFICIENCIES allows to not take into account inefficient states when estimating energy consumption. This intends to let the Energy Model know that CPUFreq itself will skip inefficiencies and such states don't need to be part of the estimation anymore. Signed-off-by: Vincent Donnefort Reviewed-by: Lukasz Luba Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- kernel/power/energy_model.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'kernel/power') diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 3a7d1573b214..d353ef29e37f 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -65,6 +65,17 @@ static int em_debug_units_show(struct seq_file *s, void *unused) } DEFINE_SHOW_ATTRIBUTE(em_debug_units); +static int em_debug_skip_inefficiencies_show(struct seq_file *s, void *unused) +{ + struct em_perf_domain *pd = s->private; + int enabled = (pd->flags & EM_PERF_DOMAIN_SKIP_INEFFICIENCIES) ? 1 : 0; + + seq_printf(s, "%d\n", enabled); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(em_debug_skip_inefficiencies); + static void em_debug_create_pd(struct device *dev) { struct dentry *d; @@ -78,6 +89,8 @@ static void em_debug_create_pd(struct device *dev) &em_debug_cpus_fops); debugfs_create_file("units", 0444, d, dev->em_pd, &em_debug_units_fops); + debugfs_create_file("skip-inefficiencies", 0444, d, dev->em_pd, + &em_debug_skip_inefficiencies_fops); /* Create a sub-directory for each performance state */ for (i = 0; i < dev->em_pd->nr_perf_states; i++) -- cgit v1.2.3 From e458716a92b57f854deb89bb40aa3554c2b6205e Mon Sep 17 00:00:00 2001 From: Vincent Donnefort Date: Wed, 8 Sep 2021 15:05:30 +0100 Subject: PM: EM: Mark inefficiencies in CPUFreq The Energy Model has a 1:1 mapping between OPPs and performance states (em_perf_state). If a CPUFreq driver registers an Energy Model, inefficiencies found by the latter can be applied to CPUFreq. Signed-off-by: Vincent Donnefort Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- kernel/power/energy_model.c | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'kernel/power') diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index d353ef29e37f..0153b0ca7b23 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -10,6 +10,7 @@ #define pr_fmt(fmt) "energy_model: " fmt #include +#include #include #include #include @@ -231,6 +232,43 @@ static int em_create_pd(struct device *dev, int nr_states, return 0; } +static void em_cpufreq_update_efficiencies(struct device *dev) +{ + struct em_perf_domain *pd = dev->em_pd; + struct em_perf_state *table; + struct cpufreq_policy *policy; + int found = 0; + int i; + + if (!_is_cpu_device(dev) || !pd) + return; + + policy = cpufreq_cpu_get(cpumask_first(em_span_cpus(pd))); + if (!policy) { + dev_warn(dev, "EM: Access to CPUFreq policy failed"); + return; + } + + table = pd->table; + + for (i = 0; i < pd->nr_perf_states; i++) { + if (!(table[i].flags & EM_PERF_STATE_INEFFICIENT)) + continue; + + if (!cpufreq_table_set_inefficient(policy, table[i].frequency)) + found++; + } + + if (!found) + return; + + /* + * Efficiencies have been installed in CPUFreq, inefficient frequencies + * will be skipped. The EM can do the same. + */ + pd->flags |= EM_PERF_DOMAIN_SKIP_INEFFICIENCIES; +} + /** * em_pd_get() - Return the performance domain for a device * @dev : Device to find the performance domain for @@ -347,6 +385,8 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, if (milliwatts) dev->em_pd->flags |= EM_PERF_DOMAIN_MILLIWATTS; + em_cpufreq_update_efficiencies(dev); + em_debug_create_pd(dev); dev_info(dev, "EM: created perf domain\n"); -- cgit v1.2.3