Discussion:
[PATCH v7 REPOST 9/9] arm64: add sysfs cpu_capacity attribute
(too old to reply)
Juri Lelli
2016-10-17 15:50:01 UTC
Permalink
Add a sysfs cpu_capacity attribute with which it is possible to read and
write (thus over-writing default values) CPUs capacity. This might be
useful in situations where values needs changing after boot.

The new attribute shows up as:

/sys/devices/system/cpu/cpu*/cpu_capacity

Cc: Catalin Marinas <***@arm.com>
Cc: Will Deacon <***@arm.com>
Cc: Mark Brown <***@kernel.org>
Cc: Sudeep Holla <***@arm.com>
Signed-off-by: Juri Lelli <***@arm.com>
---

Changes from v5:
- add mutex to protect cpu_scale (as pointed out by Morten off-line)
---
arch/arm64/kernel/topology.c | 73 ++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 73 insertions(+)

diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index b75b0ba2e113..cff34cc858b7 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -26,6 +26,7 @@
#include <asm/topology.h>

static DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;
+static DEFINE_MUTEX(cpu_scale_mutex);

unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu)
{
@@ -37,6 +38,76 @@ static void set_capacity_scale(unsigned int cpu, unsigned long capacity)
per_cpu(cpu_scale, cpu) = capacity;
}

+#ifdef CONFIG_PROC_SYSCTL
+#include <asm/cpu.h>
+#include <linux/string.h>
+static ssize_t show_cpu_capacity(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct cpu *cpu = container_of(dev, struct cpu, dev);
+ ssize_t rc;
+ int cpunum = cpu->dev.id;
+ unsigned long capacity = arch_scale_cpu_capacity(NULL, cpunum);
+
+ rc = sprintf(buf, "%lu\n", capacity);
+
+ return rc;
+}
+
+static ssize_t store_cpu_capacity(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct cpu *cpu = container_of(dev, struct cpu, dev);
+ int this_cpu = cpu->dev.id, i;
+ unsigned long new_capacity;
+ ssize_t ret;
+
+ if (count) {
+ char *p = (char *) buf;
+
+ ret = kstrtoul(p, 0, &new_capacity);
+ if (ret)
+ return ret;
+ if (new_capacity > SCHED_CAPACITY_SCALE)
+ return -EINVAL;
+
+ mutex_lock(&cpu_scale_mutex);
+ for_each_cpu(i, &cpu_topology[this_cpu].core_sibling)
+ set_capacity_scale(i, new_capacity);
+ mutex_unlock(&cpu_scale_mutex);
+ }
+
+ return count;
+}
+
+static DEVICE_ATTR(cpu_capacity,
+ 0644,
+ show_cpu_capacity,
+ store_cpu_capacity);
+
+static int register_cpu_capacity_sysctl(void)
+{
+ int i;
+ struct device *cpu;
+
+ for_each_possible_cpu(i) {
+ cpu = get_cpu_device(i);
+ if (!cpu) {
+ pr_err("%s: too early to get CPU%d device!\n",
+ __func__, i);
+ continue;
+ }
+ device_create_file(cpu, &dev_attr_cpu_capacity);
+ }
+
+ return 0;
+}
+late_initcall(register_cpu_capacity_sysctl);
+#endif
+
static u32 capacity_scale;
static u32 *raw_capacity;
static bool cap_parsing_failed;
@@ -87,6 +158,7 @@ static void normalize_cpu_capacity(void)
return;

pr_debug("cpu_capacity: capacity_scale=%u\n", capacity_scale);
+ mutex_lock(&cpu_scale_mutex);
for_each_possible_cpu(cpu) {
pr_debug("cpu_capacity: cpu=%d raw_capacity=%u\n",
cpu, raw_capacity[cpu]);
@@ -96,6 +168,7 @@ static void normalize_cpu_capacity(void)
pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n",
cpu, arch_scale_cpu_capacity(NULL, cpu));
}
+ mutex_unlock(&cpu_scale_mutex);
}

#ifdef CONFIG_CPU_FREQ
--
2.10.0
Juri Lelli
2016-10-17 15:50:02 UTC
Permalink
ARM systems may be configured to have cpus with different power/performance
characteristics within the same chip. In this case, additional information
has to be made available to the kernel (the scheduler in particular) for it
to be aware of such differences and take decisions accordingly.

Therefore, this patch aims at standardizing cpu capacities device tree
bindings for ARM platforms. Bindings define cpu capacity-dmips-mhz
parameter, to allow operating systems to retrieve such information from
the device tree and initialize related kernel structures, paving the way
for common code in the kernel to deal with heterogeneity.

Cc: Rob Herring <robh+***@kernel.org>
Cc: Pawel Moll <***@arm.com>
Cc: Mark Rutland <***@arm.com>
Cc: Ian Campbell <ijc+***@hellion.org.uk>
Cc: Kumar Gala <***@codeaurora.org>
Cc: Maxime Ripard <***@free-electrons.com>
Cc: Olof Johansson <***@lixom.net>
Cc: Gregory CLEMENT <***@free-electrons.com>
Cc: Paul Walmsley <***@pwsan.com>
Cc: Linus Walleij <***@linaro.org>
Cc: Chen-Yu Tsai <***@csie.org>
Cc: Thomas Petazzoni <***@free-electrons.com>
Cc: ***@vger.kernel.org
Signed-off-by: Juri Lelli <***@arm.com>
Acked-by: Rob Herring <***@kernel.org>
Acked-by: Vincent Guittot <***@linaro.org>
---

Changes from v1:
- removed section regarding capacity-scale
- added information regarding normalization

Changes from v4:
- binding changed to capacity-dmips-mhz
- sections and changelod updated accordingly

Changes from v5:
- addressed Mark and Vincent comments
---
.../devicetree/bindings/arm/cpu-capacity.txt | 236 +++++++++++++++++++++
Documentation/devicetree/bindings/arm/cpus.txt | 10 +
2 files changed, 246 insertions(+)
create mode 100644 Documentation/devicetree/bindings/arm/cpu-capacity.txt

diff --git a/Documentation/devicetree/bindings/arm/cpu-capacity.txt b/Documentation/devicetree/bindings/arm/cpu-capacity.txt
new file mode 100644
index 000000000000..7809fbe0cdb7
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/cpu-capacity.txt
@@ -0,0 +1,236 @@
+==========================================
+ARM CPUs capacity bindings
+==========================================
+
+==========================================
+1 - Introduction
+==========================================
+
+ARM systems may be configured to have cpus with different power/performance
+characteristics within the same chip. In this case, additional information has
+to be made available to the kernel for it to be aware of such differences and
+take decisions accordingly.
+
+==========================================
+2 - CPU capacity definition
+==========================================
+
+CPU capacity is a number that provides the scheduler information about CPUs
+heterogeneity. Such heterogeneity can come from micro-architectural differences
+(e.g., ARM big.LITTLE systems) or maximum frequency at which CPUs can run
+(e.g., SMP systems with multiple frequency domains). Heterogeneity in this
+context is about differing performance characteristics; this binding tries to
+capture a first-order approximation of the relative performance of CPUs.
+
+CPU capacities are obtained by running a suitable benchmark. This binding makes
+no guarantees on the validity or suitability of any particular benchmark, the
+final capacity should, however, be:
+
+* A "single-threaded" or CPU affine benchmark
+* Divided by the running frequency of the CPU executing the benchmark
+* Not subject to dynamic frequency scaling of the CPU
+
+For the time being we however advise usage of the Dhrystone benchmark. What
+above thus becomes:
+
+CPU capacities are obtained by running the Dhrystone benchmark on each CPU at
+max frequency (with caches enabled). The obtained DMIPS score is then divided
+by the frequency (in MHz) at which the benchmark has been run, so that
+DMIPS/MHz are obtained. Such values are then normalized w.r.t. the highest
+score obtained in the system.
+
+==========================================
+3 - capacity-dmips-mhz
+==========================================
+
+capacity-dmips-mhz is an optional cpu node [1] property: u32 value
+representing CPU capacity expressed in normalized DMIPS/MHz. At boot time, the
+maximum frequency available to the cpu is then used to calculate the capacity
+value internally used by the kernel.
+
+capacity-dmips-mhz property is all-or-nothing: if it is specified for a cpu
+node, it has to be specified for every other cpu nodes, or the system will
+fall back to the default capacity value for every CPU. If cpufreq is not
+available, final capacities are calculated by directly using capacity-dmips-
+mhz values (normalized w.r.t. the highest value found while parsing the DT).
+
+===========================================
+4 - Examples
+===========================================
+
+Example 1 (ARM 64-bit, 6-cpu system, two clusters):
+capacities-dmips-mhz are scaled w.r.t. 1024 (***@0 and ***@1)
+supposing ***@max-freq=1100 and ***@max-freq=850,
+final capacities are 1024 for cluster0 and 446 for cluster1
+
+cpus {
+ #address-cells = <2>;
+ #size-cells = <0>;
+
+ cpu-map {
+ cluster0 {
+ core0 {
+ cpu = <&A57_0>;
+ };
+ core1 {
+ cpu = <&A57_1>;
+ };
+ };
+
+ cluster1 {
+ core0 {
+ cpu = <&A53_0>;
+ };
+ core1 {
+ cpu = <&A53_1>;
+ };
+ core2 {
+ cpu = <&A53_2>;
+ };
+ core3 {
+ cpu = <&A53_3>;
+ };
+ };
+ };
+
+ idle-states {
+ entry-method = "arm,psci";
+
+ CPU_SLEEP_0: cpu-sleep-0 {
+ compatible = "arm,idle-state";
+ arm,psci-suspend-param = <0x0010000>;
+ local-timer-stop;
+ entry-latency-us = <100>;
+ exit-latency-us = <250>;
+ min-residency-us = <150>;
+ };
+
+ CLUSTER_SLEEP_0: cluster-sleep-0 {
+ compatible = "arm,idle-state";
+ arm,psci-suspend-param = <0x1010000>;
+ local-timer-stop;
+ entry-latency-us = <800>;
+ exit-latency-us = <700>;
+ min-residency-us = <2500>;
+ };
+ };
+
+ A57_0: ***@0 {
+ compatible = "arm,cortex-a57","arm,armv8";
+ reg = <0x0 0x0>;
+ device_type = "cpu";
+ enable-method = "psci";
+ next-level-cache = <&A57_L2>;
+ clocks = <&scpi_dvfs 0>;
+ cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+ capacity-dmips-mhz = <1024>;
+ };
+
+ A57_1: ***@1 {
+ compatible = "arm,cortex-a57","arm,armv8";
+ reg = <0x0 0x1>;
+ device_type = "cpu";
+ enable-method = "psci";
+ next-level-cache = <&A57_L2>;
+ clocks = <&scpi_dvfs 0>;
+ cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+ capacity-dmips-mhz = <1024>;
+ };
+
+ A53_0: ***@100 {
+ compatible = "arm,cortex-a53","arm,armv8";
+ reg = <0x0 0x100>;
+ device_type = "cpu";
+ enable-method = "psci";
+ next-level-cache = <&A53_L2>;
+ clocks = <&scpi_dvfs 1>;
+ cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+ capacity-dmips-mhz = <578>;
+ };
+
+ A53_1: ***@101 {
+ compatible = "arm,cortex-a53","arm,armv8";
+ reg = <0x0 0x101>;
+ device_type = "cpu";
+ enable-method = "psci";
+ next-level-cache = <&A53_L2>;
+ clocks = <&scpi_dvfs 1>;
+ cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+ capacity-dmips-mhz = <578>;
+ };
+
+ A53_2: ***@102 {
+ compatible = "arm,cortex-a53","arm,armv8";
+ reg = <0x0 0x102>;
+ device_type = "cpu";
+ enable-method = "psci";
+ next-level-cache = <&A53_L2>;
+ clocks = <&scpi_dvfs 1>;
+ cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+ capacity-dmips-mhz = <578>;
+ };
+
+ A53_3: ***@103 {
+ compatible = "arm,cortex-a53","arm,armv8";
+ reg = <0x0 0x103>;
+ device_type = "cpu";
+ enable-method = "psci";
+ next-level-cache = <&A53_L2>;
+ clocks = <&scpi_dvfs 1>;
+ cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+ capacity-dmips-mhz = <578>;
+ };
+
+ A57_L2: l2-cache0 {
+ compatible = "cache";
+ };
+
+ A53_L2: l2-cache1 {
+ compatible = "cache";
+ };
+};
+
+Example 2 (ARM 32-bit, 4-cpu system, two clusters,
+ cpus 0,***@1GHz, cpus 2,***@500MHz):
+capacities-dmips-mhz are scaled w.r.t. 2 (***@0 and ***@1), this means that first
+***@0 and ***@1 are twice fast than ***@2 and ***@3 (at the same frequency)
+
+cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu0: ***@0 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a15";
+ reg = <0>;
+ capacity-dmips-mhz = <2>;
+ };
+
+ cpu1: ***@1 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a15";
+ reg = <1>;
+ capacity-dmips-mhz = <2>;
+ };
+
+ cpu2: ***@2 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a15";
+ reg = <0x100>;
+ capacity-dmips-mhz = <1>;
+ };
+
+ cpu3: ***@3 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a15";
+ reg = <0x101>;
+ capacity-dmips-mhz = <1>;
+ };
+};
+
+===========================================
+5 - References
+===========================================
+
+[1] ARM Linux Kernel documentation - CPUs bindings
+ Documentation/devicetree/bindings/arm/cpus.txt
diff --git a/Documentation/devicetree/bindings/arm/cpus.txt b/Documentation/devicetree/bindings/arm/cpus.txt
index e6782d50cbcd..c1dcf4cade2e 100644
--- a/Documentation/devicetree/bindings/arm/cpus.txt
+++ b/Documentation/devicetree/bindings/arm/cpus.txt
@@ -241,6 +241,14 @@ nodes to be present and contain the properties described below.
# List of phandles to idle state nodes supported
by this cpu [3].

+ - capacity-dmips-mhz
+ Usage: Optional
+ Value type: <u32>
+ Definition:
+ # u32 value representing CPU capacity [3] in
+ DMIPS/MHz, relative to highest capacity-dmips-mhz
+ in the system.
+
- rockchip,pmu
Usage: optional for systems that have an "enable-method"
property value of "rockchip,rk3066-smp"
@@ -464,3 +472,5 @@ cpus {
[2] arm/msm/qcom,kpss-acc.txt
[3] ARM Linux kernel documentation - idle states bindings
Documentation/devicetree/bindings/arm/idle-states.txt
+[3] ARM Linux kernel documentation - cpu capacity bindings
+ Documentation/devicetree/bindings/arm/cpu-capacity.txt
--
2.10.0
Juri Lelli
2016-10-17 15:50:02 UTC
Permalink
Add Juno r2 cpu capacity-dmips-mhz information.

Cc: Rob Herring <robh+***@kernel.org>
Cc: Pawel Moll <***@arm.com>
Cc: Mark Rutland <***@arm.com>
Cc: Ian Campbell <ijc+***@hellion.org.uk>
Cc: Kumar Gala <***@codeaurora.org>
Cc: Catalin Marinas <***@arm.com>
Cc: Will Deacon <***@arm.com>
Cc: Liviu Dudau <***@arm.com>
Cc: Sudeep Holla <***@arm.com>
Cc: Arnd Bergmann <***@arndb.de>
Cc: Jon Medhurst <***@linaro.org>
Cc: Olof Johansson <***@lixom.net>
Cc: Robin Murphy <***@arm.com>
Cc: ***@vger.kernel.org
Signed-off-by: Juri Lelli <***@arm.com>
Acked-by: Sudeep Holla <***@arm.com>
---

Changes from v4:
- new patch since Juno r2 dt has been merged

Changes from v6:
- s/bindings// in changelog
---
arch/arm64/boot/dts/arm/juno-r2.dts | 6 ++++++
1 file changed, 6 insertions(+)

diff --git a/arch/arm64/boot/dts/arm/juno-r2.dts b/arch/arm64/boot/dts/arm/juno-r2.dts
index 007be826efce..614fc9227943 100644
--- a/arch/arm64/boot/dts/arm/juno-r2.dts
+++ b/arch/arm64/boot/dts/arm/juno-r2.dts
@@ -90,6 +90,7 @@
next-level-cache = <&A72_L2>;
clocks = <&scpi_dvfs 0>;
cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+ capacity-dmips-mhz = <1024>;
};

A72_1: ***@1 {
@@ -100,6 +101,7 @@
next-level-cache = <&A72_L2>;
clocks = <&scpi_dvfs 0>;
cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+ capacity-dmips-mhz = <1024>;
};

A53_0: ***@100 {
@@ -110,6 +112,7 @@
next-level-cache = <&A53_L2>;
clocks = <&scpi_dvfs 1>;
cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+ capacity-dmips-mhz = <485>;
};

A53_1: ***@101 {
@@ -120,6 +123,7 @@
next-level-cache = <&A53_L2>;
clocks = <&scpi_dvfs 1>;
cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+ capacity-dmips-mhz = <485>;
};

A53_2: ***@102 {
@@ -130,6 +134,7 @@
next-level-cache = <&A53_L2>;
clocks = <&scpi_dvfs 1>;
cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+ capacity-dmips-mhz = <485>;
};

A53_3: ***@103 {
@@ -140,6 +145,7 @@
next-level-cache = <&A53_L2>;
clocks = <&scpi_dvfs 1>;
cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+ capacity-dmips-mhz = <485>;
};

A72_L2: l2-cache0 {
--
2.10.0
Juri Lelli
2016-10-17 15:50:02 UTC
Permalink
With the introduction of cpu capacity-dmips-mhz bindings, CPU capacities
can now be calculated from values extracted from DT and information
coming from cpufreq. Add parsing of DT information at boot time, and
complement it with cpufreq information. Also, store such information
using per CPU variables, as we do for arm.

Caveat: the information provided by this patch will start to be used in
the future. We need to #define arch_scale_cpu_capacity to something
provided in arch, so that scheduler's default implementation (which gets
used if arch_scale_cpu_capacity is not defined) is overwritten.

Cc: Catalin Marinas <***@arm.com>
Cc: Will Deacon <***@arm.com>
Cc: Mark Brown <***@kernel.org>
Cc: Sudeep Holla <***@arm.com>
Signed-off-by: Juri Lelli <***@arm.com>
Acked-by: Vincent Guittot <***@linaro.org>
---

Changes from v1:
- normalize w.r.t. highest capacity found in DT
- bailout conditions (all-or-nothing)

Changes from v4:
- parsing modified to reflect change in binding (capacity-dmips-mhz)

Changes from v5:
- allocate raw_capacity array with kcalloc()
- pr_err() only for partial capacity information

Changes from v6:
- use cpuinfo.max_freq instead of policy->max
- add delayed work to unregister cpufreq notifier
---
arch/arm64/kernel/topology.c | 159 ++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 158 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index 694f6deedbab..b75b0ba2e113 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -19,10 +19,162 @@
#include <linux/nodemask.h>
#include <linux/of.h>
#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/cpufreq.h>

#include <asm/cputype.h>
#include <asm/topology.h>

+static DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;
+
+unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu)
+{
+ return per_cpu(cpu_scale, cpu);
+}
+
+static void set_capacity_scale(unsigned int cpu, unsigned long capacity)
+{
+ per_cpu(cpu_scale, cpu) = capacity;
+}
+
+static u32 capacity_scale;
+static u32 *raw_capacity;
+static bool cap_parsing_failed;
+
+static void __init parse_cpu_capacity(struct device_node *cpu_node, int cpu)
+{
+ int ret;
+ u32 cpu_capacity;
+
+ if (cap_parsing_failed)
+ return;
+
+ ret = of_property_read_u32(cpu_node,
+ "capacity-dmips-mhz",
+ &cpu_capacity);
+ if (!ret) {
+ if (!raw_capacity) {
+ raw_capacity = kcalloc(num_possible_cpus(),
+ sizeof(*raw_capacity),
+ GFP_KERNEL);
+ if (!raw_capacity) {
+ pr_err("cpu_capacity: failed to allocate memory for raw capacities\n");
+ cap_parsing_failed = true;
+ return;
+ }
+ }
+ capacity_scale = max(cpu_capacity, capacity_scale);
+ raw_capacity[cpu] = cpu_capacity;
+ pr_debug("cpu_capacity: %s cpu_capacity=%u (raw)\n",
+ cpu_node->full_name, raw_capacity[cpu]);
+ } else {
+ if (raw_capacity) {
+ pr_err("cpu_capacity: missing %s raw capacity\n",
+ cpu_node->full_name);
+ pr_err("cpu_capacity: partial information: fallback to 1024 for all CPUs\n");
+ }
+ cap_parsing_failed = true;
+ kfree(raw_capacity);
+ }
+}
+
+static void normalize_cpu_capacity(void)
+{
+ u64 capacity;
+ int cpu;
+
+ if (!raw_capacity || cap_parsing_failed)
+ return;
+
+ pr_debug("cpu_capacity: capacity_scale=%u\n", capacity_scale);
+ for_each_possible_cpu(cpu) {
+ pr_debug("cpu_capacity: cpu=%d raw_capacity=%u\n",
+ cpu, raw_capacity[cpu]);
+ capacity = (raw_capacity[cpu] << SCHED_CAPACITY_SHIFT)
+ / capacity_scale;
+ set_capacity_scale(cpu, capacity);
+ pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n",
+ cpu, arch_scale_cpu_capacity(NULL, cpu));
+ }
+}
+
+#ifdef CONFIG_CPU_FREQ
+static cpumask_var_t cpus_to_visit;
+static bool cap_parsing_done;
+static void parsing_done_workfn(struct work_struct *work);
+static DECLARE_WORK(parsing_done_work, parsing_done_workfn);
+
+static int
+init_cpu_capacity_callback(struct notifier_block *nb,
+ unsigned long val,
+ void *data)
+{
+ struct cpufreq_policy *policy = data;
+ int cpu;
+
+ if (cap_parsing_failed || cap_parsing_done)
+ return 0;
+
+ switch (val) {
+ case CPUFREQ_NOTIFY:
+ pr_debug("cpu_capacity: init cpu capacity for CPUs [%*pbl] (to_visit=%*pbl)\n",
+ cpumask_pr_args(policy->related_cpus),
+ cpumask_pr_args(cpus_to_visit));
+ cpumask_andnot(cpus_to_visit,
+ cpus_to_visit,
+ policy->related_cpus);
+ for_each_cpu(cpu, policy->related_cpus) {
+ raw_capacity[cpu] = arch_scale_cpu_capacity(NULL, cpu) *
+ policy->cpuinfo.max_freq / 1000UL;
+ capacity_scale = max(raw_capacity[cpu], capacity_scale);
+ }
+ if (cpumask_empty(cpus_to_visit)) {
+ normalize_cpu_capacity();
+ kfree(raw_capacity);
+ pr_debug("cpu_capacity: parsing done\n");
+ cap_parsing_done = true;
+ schedule_work(&parsing_done_work);
+ }
+ }
+ return 0;
+}
+
+static struct notifier_block init_cpu_capacity_notifier = {
+ .notifier_call = init_cpu_capacity_callback,
+};
+
+static int __init register_cpufreq_notifier(void)
+{
+ if (cap_parsing_failed)
+ return -EINVAL;
+
+ if (!alloc_cpumask_var(&cpus_to_visit, GFP_KERNEL)) {
+ pr_err("cpu_capacity: failed to allocate memory for cpus_to_visit\n");
+ return -ENOMEM;
+ }
+ cpumask_copy(cpus_to_visit, cpu_possible_mask);
+
+ return cpufreq_register_notifier(&init_cpu_capacity_notifier,
+ CPUFREQ_POLICY_NOTIFIER);
+}
+core_initcall(register_cpufreq_notifier);
+
+static void parsing_done_workfn(struct work_struct *work)
+{
+ cpufreq_unregister_notifier(&init_cpu_capacity_notifier,
+ CPUFREQ_POLICY_NOTIFIER);
+}
+
+#else
+static int __init free_raw_capacity(void)
+{
+ kfree(raw_capacity);
+
+ return 0;
+}
+core_initcall(free_raw_capacity);
+#endif
+
static int __init get_cpu_for_node(struct device_node *node)
{
struct device_node *cpu_node;
@@ -34,6 +186,7 @@ static int __init get_cpu_for_node(struct device_node *node)

for_each_possible_cpu(cpu) {
if (of_get_cpu_node(cpu, NULL) == cpu_node) {
+ parse_cpu_capacity(cpu_node, cpu);
of_node_put(cpu_node);
return cpu;
}
@@ -178,13 +331,17 @@ static int __init parse_dt_topology(void)
* cluster with restricted subnodes.
*/
map = of_get_child_by_name(cn, "cpu-map");
- if (!map)
+ if (!map) {
+ cap_parsing_failed = true;
goto out;
+ }

ret = parse_cluster(map, 0);
if (ret != 0)
goto out_map;

+ normalize_cpu_capacity();
+
/*
* Check that all cores are in the topology; the SMP code will
* only mark cores described in the DT as possible.
--
2.10.0
Juri Lelli
2016-10-17 15:50:02 UTC
Permalink
Add a sysfs cpu_capacity attribute with which it is possible to read and
write (thus over-writing default values) CPUs capacity. This might be
useful in situations where values needs changing after boot.

The new attribute shows up as:

/sys/devices/system/cpu/cpu*/cpu_capacity

Cc: Russell King <***@arm.linux.org.uk>
Signed-off-by: Juri Lelli <***@arm.com>
---

Changes from v5:
- add mutex to protect cpu_scale (as pointed out by Morten off-line)
---
arch/arm/kernel/topology.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 73 insertions(+)

diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c
index 18bb0474f7ec..46167bffbcf1 100644
--- a/arch/arm/kernel/topology.c
+++ b/arch/arm/kernel/topology.c
@@ -42,6 +42,7 @@
* updated during this sequence.
*/
static DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;
+static DEFINE_MUTEX(cpu_scale_mutex);

unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu)
{
@@ -53,6 +54,76 @@ static void set_capacity_scale(unsigned int cpu, unsigned long capacity)
per_cpu(cpu_scale, cpu) = capacity;
}

+#ifdef CONFIG_PROC_SYSCTL
+#include <asm/cpu.h>
+#include <linux/string.h>
+static ssize_t show_cpu_capacity(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct cpu *cpu = container_of(dev, struct cpu, dev);
+ ssize_t rc;
+ int cpunum = cpu->dev.id;
+ unsigned long capacity = arch_scale_cpu_capacity(NULL, cpunum);
+
+ rc = sprintf(buf, "%lu\n", capacity);
+
+ return rc;
+}
+
+static ssize_t store_cpu_capacity(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct cpu *cpu = container_of(dev, struct cpu, dev);
+ int this_cpu = cpu->dev.id, i;
+ unsigned long new_capacity;
+ ssize_t ret;
+
+ if (count) {
+ char *p = (char *) buf;
+
+ ret = kstrtoul(p, 0, &new_capacity);
+ if (ret)
+ return ret;
+ if (new_capacity > SCHED_CAPACITY_SCALE)
+ return -EINVAL;
+
+ mutex_lock(&cpu_scale_mutex);
+ for_each_cpu(i, &cpu_topology[this_cpu].core_sibling)
+ set_capacity_scale(i, new_capacity);
+ mutex_unlock(&cpu_scale_mutex);
+ }
+
+ return count;
+}
+
+static DEVICE_ATTR(cpu_capacity,
+ 0644,
+ show_cpu_capacity,
+ store_cpu_capacity);
+
+static int register_cpu_capacity_sysctl(void)
+{
+ int i;
+ struct device *cpu;
+
+ for_each_possible_cpu(i) {
+ cpu = get_cpu_device(i);
+ if (!cpu) {
+ pr_err("%s: too early to get CPU%d device!\n",
+ __func__, i);
+ continue;
+ }
+ device_create_file(cpu, &dev_attr_cpu_capacity);
+ }
+
+ return 0;
+}
+late_initcall(register_cpu_capacity_sysctl);
+#endif
+
#ifdef CONFIG_OF
struct cpu_efficiency {
const char *compatible;
@@ -132,6 +203,7 @@ static void normalize_cpu_capacity(void)
return;

pr_debug("cpu_capacity: capacity_scale=%u\n", capacity_scale);
+ mutex_lock(&cpu_scale_mutex);
for_each_possible_cpu(cpu) {
capacity = (raw_capacity[cpu] << SCHED_CAPACITY_SHIFT)
/ capacity_scale;
@@ -139,6 +211,7 @@ static void normalize_cpu_capacity(void)
pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n",
cpu, arch_scale_cpu_capacity(NULL, cpu));
}
+ mutex_unlock(&cpu_scale_mutex);
}

#ifdef CONFIG_CPU_FREQ
--
2.10.0
Russell King - ARM Linux
2016-10-30 20:50:02 UTC
Permalink
This post might be inappropriate. Click to display it.
Juri Lelli
2016-11-03 05:40:01 UTC
Permalink
Hi,

apologies for the delay in replying, but I'm attending Linux Plumbers
this week.
Post by Russell King - ARM Linux
Post by Juri Lelli
+#ifdef CONFIG_PROC_SYSCTL
+#include <asm/cpu.h>
+#include <linux/string.h>
Include files at the top of the file please. No need to ifdef them.
They're sorted alphabetically, so new additions should be alphabetical.
(That's a general rule - if something is already alphabetical, do not
make it non-alphabetical.)
Post by Juri Lelli
+static ssize_t show_cpu_capacity(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct cpu *cpu = container_of(dev, struct cpu, dev);
+ ssize_t rc;
+ int cpunum = cpu->dev.id;
+ unsigned long capacity = arch_scale_cpu_capacity(NULL, cpunum);
+
+ rc = sprintf(buf, "%lu\n", capacity);
+
+ return rc;
Way too many lines for such a simple function. This can be simplified
struct cpu *cpu = container_of(dev, struct cpu, dev);
return sprintf(buf, "%lu\n", arch_scale_cpu_capacity(NULL, cpu->dev.id);
If you don't like the last line ending on column 79, then feel free to
break it across two lines after the format string.
Post by Juri Lelli
+}
+
+static ssize_t store_cpu_capacity(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct cpu *cpu = container_of(dev, struct cpu, dev);
+ int this_cpu = cpu->dev.id, i;
+ unsigned long new_capacity;
+ ssize_t ret;
+
+ if (count) {
+ char *p = (char *) buf;
+
+ ret = kstrtoul(p, 0, &new_capacity);
Unnecessary cast - kstrtoul takes a const char pointer, and in any case
it's really bad form to cast away the "const-ness" of any pointer. So,
if (count) {
ret = kstrtoul(buf, 0, &new_capacity);
should work just fine.
Post by Juri Lelli
+ if (ret)
+ return ret;
+ if (new_capacity > SCHED_CAPACITY_SCALE)
+ return -EINVAL;
+
+ mutex_lock(&cpu_scale_mutex);
+ for_each_cpu(i, &cpu_topology[this_cpu].core_sibling)
+ set_capacity_scale(i, new_capacity);
+ mutex_unlock(&cpu_scale_mutex);
+ }
+
+ return count;
+}
+
+static DEVICE_ATTR(cpu_capacity,
+ 0644,
+ show_cpu_capacity,
+ store_cpu_capacity);
There's a move to use the named DEVICE_ATTR_RW() for this kind of thing,
it'll want the functions named xxx_show() and xxx_store(). I see
there's some recent patches to do this conversion across the kernel, so
this should probably be done before submission.
Post by Juri Lelli
+
+static int register_cpu_capacity_sysctl(void)
+{
+ int i;
+ struct device *cpu;
+
+ for_each_possible_cpu(i) {
+ cpu = get_cpu_device(i);
+ if (!cpu) {
+ pr_err("%s: too early to get CPU%d device!\n",
+ __func__, i);
+ continue;
+ }
+ device_create_file(cpu, &dev_attr_cpu_capacity);
+ }
+
+ return 0;
+}
+late_initcall(register_cpu_capacity_sysctl);
Hmm, this is really weird. topology_init() in arch/arm/kernel/setup.c
is where these devices get created, and they're created at
subsys_initcall() time. By that point, the list of possible CPUs has
to be static, it's not going to change. I don't see why this has to be
done at late_initcall() - and since topology.c will be linked after
setup.c, I don't see why it shouldn't be at subsys_initcall() level to
follow on after topology_init().
I should have addressed your comments with the updated version below. If
it looks good to you I'll superseed the old version with this new one.

Best,

- Juri

--->8---
From 14c0f21d403ad47843896eecc042334d4e0ed8dd Mon Sep 17 00:00:00 2001
From: Juri Lelli <***@arm.com>
Date: Thu, 15 Oct 2015 13:53:37 +0100
Subject: [PATCH v7.1 8/9] arm: add sysfs cpu_capacity attribute

Add a sysfs cpu_capacity attribute with which it is possible to read and
write (thus over-writing default values) CPUs capacity. This might be
useful in situations where values needs changing after boot.

The new attribute shows up as:

/sys/devices/system/cpu/cpu*/cpu_capacity

Cc: Russell King <***@arm.linux.org.uk>
Signed-off-by: Juri Lelli <***@arm.com>
---

Changes from v5:
- add mutex to protect cpu_scale (as pointed out by Morten off-line)

Changes from v7:
- include files moved at top of file
- show_cpu_capacity simplified to less lines of code
- unnecessary cast removed in store_cpu_capacity
- use DEVICE_ATTR_RW() instead of DEVICE_ATTR()
- use subsys_initcall instead of late_initcall
---
arch/arm/kernel/topology.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 64 insertions(+)

diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c
index aa63287d9a10..ebf47d91b804 100644
--- a/arch/arm/kernel/topology.c
+++ b/arch/arm/kernel/topology.c
@@ -22,7 +22,9 @@
#include <linux/of.h>
#include <linux/sched.h>
#include <linux/slab.h>
+#include <linux/string.h>

+#include <asm/cpu.h>
#include <asm/cputype.h>
#include <asm/topology.h>

@@ -42,6 +44,7 @@
* updated during this sequence.
*/
static DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;
+static DEFINE_MUTEX(cpu_scale_mutex);

unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu)
{
@@ -53,6 +56,65 @@ static void set_capacity_scale(unsigned int cpu, unsigned long capacity)
per_cpu(cpu_scale, cpu) = capacity;
}

+#ifdef CONFIG_PROC_SYSCTL
+static ssize_t cpu_capacity_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct cpu *cpu = container_of(dev, struct cpu, dev);
+
+ return sprintf(buf, "%lu\n",
+ arch_scale_cpu_capacity(NULL, cpu->dev.id));
+}
+
+static ssize_t cpu_capacity_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct cpu *cpu = container_of(dev, struct cpu, dev);
+ int this_cpu = cpu->dev.id, i;
+ unsigned long new_capacity;
+ ssize_t ret;
+
+ if (count) {
+ ret = kstrtoul(buf, 0, &new_capacity);
+ if (ret)
+ return ret;
+ if (new_capacity > SCHED_CAPACITY_SCALE)
+ return -EINVAL;
+
+ mutex_lock(&cpu_scale_mutex);
+ for_each_cpu(i, &cpu_topology[this_cpu].core_sibling)
+ set_capacity_scale(i, new_capacity);
+ mutex_unlock(&cpu_scale_mutex);
+ }
+
+ return count;
+}
+
+static DEVICE_ATTR_RW(cpu_capacity);
+
+static int register_cpu_capacity_sysctl(void)
+{
+ int i;
+ struct device *cpu;
+
+ for_each_possible_cpu(i) {
+ cpu = get_cpu_device(i);
+ if (!cpu) {
+ pr_err("%s: too early to get CPU%d device!\n",
+ __func__, i);
+ continue;
+ }
+ device_create_file(cpu, &dev_attr_cpu_capacity);
+ }
+
+ return 0;
+}
+subsys_initcall(register_cpu_capacity_sysctl);
+#endif
+
#ifdef CONFIG_OF
struct cpu_efficiency {
const char *compatible;
@@ -132,6 +194,7 @@ static void normalize_cpu_capacity(void)
return;

pr_debug("cpu_capacity: capacity_scale=%u\n", capacity_scale);
+ mutex_lock(&cpu_scale_mutex);
for_each_possible_cpu(cpu) {
capacity = (raw_capacity[cpu] << SCHED_CAPACITY_SHIFT)
/ capacity_scale;
@@ -139,6 +202,7 @@ static void normalize_cpu_capacity(void)
pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n",
cpu, arch_scale_cpu_capacity(NULL, cpu));
}
+ mutex_unlock(&cpu_scale_mutex);
}

#ifdef CONFIG_CPU_FREQ
--
2.10.0
Juri Lelli
2016-11-18 08:30:02 UTC
Permalink
Hi Russell,
Post by Juri Lelli
Hi,
apologies for the delay in replying, but I'm attending Linux Plumbers
this week.
[...]
Post by Juri Lelli
I should have addressed your comments with the updated version below. If
it looks good to you I'll superseed the old version with this new one.
The two updated patches are still listed as incoming in your system.
Do you think we will be able to queue them for 4.10? IMHO, it would be
good to have all pieces in together at once (Catalin and Sudeep already
queued their respective bits).

Thanks a lot.

Best,

- Juri

Juri Lelli
2016-10-17 15:50:02 UTC
Permalink
With the introduction of cpu capacity-dmips-mhz bindings, CPU capacities
can now be calculated from values extracted from DT and information
coming from cpufreq. Add parsing of DT information at boot time, and
complement it with cpufreq information. We keep code that can produce
same information, based on different DT properties and hard-coded
values, as fall-back for backward compatibility.

Caveat: the information provided by this patch will start to be used in
the future. We need to #define arch_scale_cpu_capacity to something
provided in arch, so that scheduler's default implementation (which gets
used if arch_scale_cpu_capacity is not defined) is overwritten.

Cc: Russell King <***@arm.linux.org.uk>
Signed-off-by: Juri Lelli <***@arm.com>
Acked-by: Vincent Guittot <***@linaro.org>
---

Changes from v1:
- normalize w.r.t. highest capacity found in DT
- bailout conditions (all-or-nothing)

Changes from v4:
- parsing modified to reflect change in binding (capacity-dmips-mhz)

Changes from v5:
- allocate raw_capacity array with kcalloc()
- pr_err() only for partial capacity information

Changes from v6:
- use cpuinfo.max_freq instead of policy->max
- add delayed work to unregister cpufreq notifier
---
arch/arm/kernel/topology.c | 156 ++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 155 insertions(+), 1 deletion(-)

diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c
index ec279d161b32..18bb0474f7ec 100644
--- a/arch/arm/kernel/topology.c
+++ b/arch/arm/kernel/topology.c
@@ -21,6 +21,7 @@
#include <linux/of.h>
#include <linux/sched.h>
#include <linux/slab.h>
+#include <linux/cpufreq.h>

#include <asm/cputype.h>
#include <asm/topology.h>
@@ -78,6 +79,144 @@ static unsigned long *__cpu_capacity;
#define cpu_capacity(cpu) __cpu_capacity[cpu]

static unsigned long middle_capacity = 1;
+static bool cap_from_dt = true;
+static u32 *raw_capacity;
+static bool cap_parsing_failed;
+static u32 capacity_scale;
+
+static int __init parse_cpu_capacity(struct device_node *cpu_node, int cpu)
+{
+ int ret = 1;
+ u32 cpu_capacity;
+
+ if (cap_parsing_failed)
+ return !ret;
+
+ ret = of_property_read_u32(cpu_node,
+ "capacity-dmips-mhz",
+ &cpu_capacity);
+ if (!ret) {
+ if (!raw_capacity) {
+ raw_capacity = kcalloc(num_possible_cpus(),
+ sizeof(*raw_capacity),
+ GFP_KERNEL);
+ if (!raw_capacity) {
+ pr_err("cpu_capacity: failed to allocate memory for raw capacities\n");
+ cap_parsing_failed = true;
+ return !ret;
+ }
+ }
+ capacity_scale = max(cpu_capacity, capacity_scale);
+ raw_capacity[cpu] = cpu_capacity;
+ pr_debug("cpu_capacity: %s cpu_capacity=%u (raw)\n",
+ cpu_node->full_name, raw_capacity[cpu]);
+ } else {
+ if (raw_capacity) {
+ pr_err("cpu_capacity: missing %s raw capacity\n",
+ cpu_node->full_name);
+ pr_err("cpu_capacity: partial information: fallback to 1024 for all CPUs\n");
+ }
+ cap_parsing_failed = true;
+ kfree(raw_capacity);
+ }
+
+ return !ret;
+}
+
+static void normalize_cpu_capacity(void)
+{
+ u64 capacity;
+ int cpu;
+
+ if (!raw_capacity || cap_parsing_failed)
+ return;
+
+ pr_debug("cpu_capacity: capacity_scale=%u\n", capacity_scale);
+ for_each_possible_cpu(cpu) {
+ capacity = (raw_capacity[cpu] << SCHED_CAPACITY_SHIFT)
+ / capacity_scale;
+ set_capacity_scale(cpu, capacity);
+ pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n",
+ cpu, arch_scale_cpu_capacity(NULL, cpu));
+ }
+}
+
+#ifdef CONFIG_CPU_FREQ
+static cpumask_var_t cpus_to_visit;
+static bool cap_parsing_done;
+static void parsing_done_workfn(struct work_struct *work);
+static DECLARE_WORK(parsing_done_work, parsing_done_workfn);
+
+static int
+init_cpu_capacity_callback(struct notifier_block *nb,
+ unsigned long val,
+ void *data)
+{
+ struct cpufreq_policy *policy = data;
+ int cpu;
+
+ if (cap_parsing_failed || cap_parsing_done)
+ return 0;
+
+ switch (val) {
+ case CPUFREQ_NOTIFY:
+ pr_debug("cpu_capacity: init cpu capacity for CPUs [%*pbl] (to_visit=%*pbl)\n",
+ cpumask_pr_args(policy->related_cpus),
+ cpumask_pr_args(cpus_to_visit));
+ cpumask_andnot(cpus_to_visit,
+ cpus_to_visit,
+ policy->related_cpus);
+ for_each_cpu(cpu, policy->related_cpus) {
+ raw_capacity[cpu] = arch_scale_cpu_capacity(NULL, cpu) *
+ policy->cpuinfo.max_freq / 1000UL;
+ capacity_scale = max(raw_capacity[cpu], capacity_scale);
+ }
+ if (cpumask_empty(cpus_to_visit)) {
+ normalize_cpu_capacity();
+ kfree(raw_capacity);
+ pr_debug("cpu_capacity: parsing done\n");
+ cap_parsing_done = true;
+ schedule_work(&parsing_done_work);
+ }
+ }
+ return 0;
+}
+
+static struct notifier_block init_cpu_capacity_notifier = {
+ .notifier_call = init_cpu_capacity_callback,
+};
+
+static int __init register_cpufreq_notifier(void)
+{
+ if (cap_parsing_failed)
+ return -EINVAL;
+
+ if (!alloc_cpumask_var(&cpus_to_visit, GFP_KERNEL)) {
+ pr_err("cpu_capacity: failed to allocate memory for cpus_to_visit\n");
+ return -ENOMEM;
+ }
+ cpumask_copy(cpus_to_visit, cpu_possible_mask);
+
+ return cpufreq_register_notifier(&init_cpu_capacity_notifier,
+ CPUFREQ_POLICY_NOTIFIER);
+}
+core_initcall(register_cpufreq_notifier);
+
+static void parsing_done_workfn(struct work_struct *work)
+{
+ cpufreq_unregister_notifier(&init_cpu_capacity_notifier,
+ CPUFREQ_POLICY_NOTIFIER);
+}
+
+#else
+static int __init free_raw_capacity(void)
+{
+ kfree(raw_capacity);
+
+ return 0;
+}
+core_initcall(free_raw_capacity);
+#endif

/*
* Iterate all CPUs' descriptor in DT and compute the efficiency
@@ -99,6 +238,12 @@ static void __init parse_dt_topology(void)
__cpu_capacity = kcalloc(nr_cpu_ids, sizeof(*__cpu_capacity),
GFP_NOWAIT);

+ cn = of_find_node_by_path("/cpus");
+ if (!cn) {
+ pr_err("No CPU information found in DT\n");
+ return;
+ }
+
for_each_possible_cpu(cpu) {
const u32 *rate;
int len;
@@ -110,6 +255,13 @@ static void __init parse_dt_topology(void)
continue;
}

+ if (parse_cpu_capacity(cn, cpu)) {
+ of_node_put(cn);
+ continue;
+ }
+
+ cap_from_dt = false;
+
for (cpu_eff = table_efficiency; cpu_eff->compatible; cpu_eff++)
if (of_device_is_compatible(cn, cpu_eff->compatible))
break;
@@ -151,6 +303,8 @@ static void __init parse_dt_topology(void)
middle_capacity = ((max_capacity / 3)
(SCHED_CAPACITY_SHIFT-1)) + 1;
+ if (cap_from_dt && !cap_parsing_failed)
+ normalize_cpu_capacity();
}

/*
@@ -160,7 +314,7 @@ static void __init parse_dt_topology(void)
*/
static void update_cpu_capacity(unsigned int cpu)
{
- if (!cpu_capacity(cpu))
+ if (!cpu_capacity(cpu) || cap_from_dt)
return;

set_capacity_scale(cpu, cpu_capacity(cpu) / middle_capacity);
--
2.10.0
Juri Lelli
2016-10-17 15:50:03 UTC
Permalink
Add Juno r1 cpu capacity-dmips-mhz information.

Cc: Rob Herring <robh+***@kernel.org>
Cc: Pawel Moll <***@arm.com>
Cc: Mark Rutland <***@arm.com>
Cc: Ian Campbell <ijc+***@hellion.org.uk>
Cc: Kumar Gala <***@codeaurora.org>
Cc: Catalin Marinas <***@arm.com>
Cc: Will Deacon <***@arm.com>
Cc: Liviu Dudau <***@arm.com>
Cc: Sudeep Holla <***@arm.com>
Cc: Arnd Bergmann <***@arndb.de>
Cc: Jon Medhurst <***@linaro.org>
Cc: Olof Johansson <***@lixom.net>
Cc: Robin Murphy <***@arm.com>
Cc: ***@vger.kernel.org
Signed-off-by: Juri Lelli <***@arm.com>
---

Changes from v6:
- new patch as per off-line discussion with Sudeep
---
arch/arm64/boot/dts/arm/juno-r1.dts | 6 ++++++
1 file changed, 6 insertions(+)

diff --git a/arch/arm64/boot/dts/arm/juno-r1.dts b/arch/arm64/boot/dts/arm/juno-r1.dts
index 123a58b29cbd..3be8a3ef671c 100644
--- a/arch/arm64/boot/dts/arm/juno-r1.dts
+++ b/arch/arm64/boot/dts/arm/juno-r1.dts
@@ -90,6 +90,7 @@
next-level-cache = <&A57_L2>;
clocks = <&scpi_dvfs 0>;
cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+ capacity-dmips-mhz = <1024>;
};

A57_1: ***@1 {
@@ -100,6 +101,7 @@
next-level-cache = <&A57_L2>;
clocks = <&scpi_dvfs 0>;
cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+ capacity-dmips-mhz = <1024>;
};

A53_0: ***@100 {
@@ -110,6 +112,7 @@
next-level-cache = <&A53_L2>;
clocks = <&scpi_dvfs 1>;
cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+ capacity-dmips-mhz = <578>;
};

A53_1: ***@101 {
@@ -120,6 +123,7 @@
next-level-cache = <&A53_L2>;
clocks = <&scpi_dvfs 1>;
cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+ capacity-dmips-mhz = <578>;
};

A53_2: ***@102 {
@@ -130,6 +134,7 @@
next-level-cache = <&A53_L2>;
clocks = <&scpi_dvfs 1>;
cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+ capacity-dmips-mhz = <578>;
};

A53_3: ***@103 {
@@ -140,6 +145,7 @@
next-level-cache = <&A53_L2>;
clocks = <&scpi_dvfs 1>;
cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+ capacity-dmips-mhz = <578>;
};

A57_L2: l2-cache0 {
--
2.10.0
Juri Lelli
2016-10-17 16:00:01 UTC
Permalink
Add TC2 cpu capacity information.

Cc: Liviu Dudau <***@arm.com>
Cc: Sudeep Holla <***@arm.com>
Cc: Lorenzo Pieralisi <***@arm.com>
Cc: Rob Herring <robh+***@kernel.org>
Cc: Pawel Moll <***@arm.com>
Cc: Mark Rutland <***@arm.com>
Cc: Ian Campbell <ijc+***@hellion.org.uk>
Cc: Kumar Gala <***@codeaurora.org>
Cc: Russell King <***@arm.linux.org.uk>
Cc: ***@vger.kernel.org
Signed-off-by: Juri Lelli <***@arm.com>
Acked-by: Sudeep Holla <***@arm.com>
---

Changes from v1:
- capacity-scale removed

Changes from v4:
- binding changed to capacity-dmips-mhz

Changes from v6:
- s/binding// in changelog
---
arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts | 5 +++++
1 file changed, 5 insertions(+)

diff --git a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
index 0205c97efdef..45d08cc37b01 100644
--- a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
+++ b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
@@ -39,6 +39,7 @@
reg = <0>;
cci-control-port = <&cci_control1>;
cpu-idle-states = <&CLUSTER_SLEEP_BIG>;
+ capacity-dmips-mhz = <1024>;
};

cpu1: ***@1 {
@@ -47,6 +48,7 @@
reg = <1>;
cci-control-port = <&cci_control1>;
cpu-idle-states = <&CLUSTER_SLEEP_BIG>;
+ capacity-dmips-mhz = <1024>;
};

cpu2: ***@2 {
@@ -55,6 +57,7 @@
reg = <0x100>;
cci-control-port = <&cci_control2>;
cpu-idle-states = <&CLUSTER_SLEEP_LITTLE>;
+ capacity-dmips-mhz = <516>;
};

cpu3: ***@3 {
@@ -63,6 +66,7 @@
reg = <0x101>;
cci-control-port = <&cci_control2>;
cpu-idle-states = <&CLUSTER_SLEEP_LITTLE>;
+ capacity-dmips-mhz = <516>;
};

cpu4: ***@4 {
@@ -71,6 +75,7 @@
reg = <0x102>;
cci-control-port = <&cci_control2>;
cpu-idle-states = <&CLUSTER_SLEEP_LITTLE>;
+ capacity-dmips-mhz = <516>;
};

idle-states {
--
2.10.0
Juri Lelli
2016-10-17 16:00:02 UTC
Permalink
Add Juno cpu capacity-dmips-mhz information.

Cc: Rob Herring <robh+***@kernel.org>
Cc: Pawel Moll <***@arm.com>
Cc: Mark Rutland <***@arm.com>
Cc: Ian Campbell <ijc+***@hellion.org.uk>
Cc: Kumar Gala <***@codeaurora.org>
Cc: Catalin Marinas <***@arm.com>
Cc: Will Deacon <***@arm.com>
Cc: Liviu Dudau <***@arm.com>
Cc: Sudeep Holla <***@arm.com>
Cc: Arnd Bergmann <***@arndb.de>
Cc: Jon Medhurst <***@linaro.org>
Cc: Olof Johansson <***@lixom.net>
Cc: Robin Murphy <***@arm.com>
Cc: ***@vger.kernel.org
Signed-off-by: Juri Lelli <***@arm.com>
Acked-by: Sudeep Holla <***@arm.com>
---

Changes from v1:
- capacity-scale removed

Changes from v4:
- binding changed to capacity-dmips-mhz

Changes from v6:
- s/bindings// in changelog
---
arch/arm64/boot/dts/arm/juno.dts | 6 ++++++
1 file changed, 6 insertions(+)

diff --git a/arch/arm64/boot/dts/arm/juno.dts b/arch/arm64/boot/dts/arm/juno.dts
index a7270eff6939..6b4135e9cfe5 100644
--- a/arch/arm64/boot/dts/arm/juno.dts
+++ b/arch/arm64/boot/dts/arm/juno.dts
@@ -90,6 +90,7 @@
next-level-cache = <&A57_L2>;
clocks = <&scpi_dvfs 0>;
cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+ capacity-dmips-mhz = <1024>;
};

A57_1: ***@1 {
@@ -100,6 +101,7 @@
next-level-cache = <&A57_L2>;
clocks = <&scpi_dvfs 0>;
cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+ capacity-dmips-mhz = <1024>;
};

A53_0: ***@100 {
@@ -110,6 +112,7 @@
next-level-cache = <&A53_L2>;
clocks = <&scpi_dvfs 1>;
cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+ capacity-dmips-mhz = <578>;
};

A53_1: ***@101 {
@@ -120,6 +123,7 @@
next-level-cache = <&A53_L2>;
clocks = <&scpi_dvfs 1>;
cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+ capacity-dmips-mhz = <578>;
};

A53_2: ***@102 {
@@ -130,6 +134,7 @@
next-level-cache = <&A53_L2>;
clocks = <&scpi_dvfs 1>;
cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+ capacity-dmips-mhz = <578>;
};

A53_3: ***@103 {
@@ -140,6 +145,7 @@
next-level-cache = <&A53_L2>;
clocks = <&scpi_dvfs 1>;
cpu-idle-states = <&CPU_SLEEP_0 &CLUSTER_SLEEP_0>;
+ capacity-dmips-mhz = <578>;
};

A57_L2: l2-cache0 {
--
2.10.0
Sudeep Holla
2016-10-17 16:40:02 UTC
Permalink
Hi all,
this is a repost of version 7 of "CPUs capacity information for heterogeneous
systems" patchset [1] (please refer to previous postings to get some context).
I only added Juno r1 dts, as discussed off-line with Sudeep meanwhile (no code
changes at all, so that's why I'm saying this is a repost).
I'm reposting as I didn't receive any comment (despite pinging people) on the
original v7 posting (apart from Vincent acking patches 2 and 4, thanks!). I
then waited until merge window for 4.9 was closed.
I'm thus now assuming that everybody is OK with the patches and that they can
be queued for 4.10 (we certainly need this plumbing at this point). Please
speak if my assumption is wrong (and provide feedback! :).
- use Russell's patching system for patches 2 and 8
- ask Sudeep to pull patches 3,5,6 and 7
I have applied 3 to [1] and 5,6,7 to [2]. Let me know if things change
and you want me to drop them with some cosmetic subject change and
updated changelog.
--
Regards,
Sudeep

[1] git.kernel.org/sudeep.holla/linux/h/vexpress-dt/for-next
[2] git.kernel.org/sudeep.holla/linux/h/juno-dt/for-next
Juri Lelli
2016-10-17 16:50:01 UTC
Permalink
Hi Sudeep,
Post by Sudeep Holla
Hi all,
this is a repost of version 7 of "CPUs capacity information for heterogeneous
systems" patchset [1] (please refer to previous postings to get some context).
I only added Juno r1 dts, as discussed off-line with Sudeep meanwhile (no code
changes at all, so that's why I'm saying this is a repost).
I'm reposting as I didn't receive any comment (despite pinging people) on the
original v7 posting (apart from Vincent acking patches 2 and 4, thanks!). I
then waited until merge window for 4.9 was closed.
I'm thus now assuming that everybody is OK with the patches and that they can
be queued for 4.10 (we certainly need this plumbing at this point). Please
speak if my assumption is wrong (and provide feedback! :).
- use Russell's patching system for patches 2 and 8
- ask Sudeep to pull patches 3,5,6 and 7
I have applied 3 to [1] and 5,6,7 to [2]. Let me know if things change
and you want me to drop them with some cosmetic subject change and
updated changelog.
Thanks!

Best,

- Juri
Post by Sudeep Holla
--
Regards,
Sudeep
[1] git.kernel.org/sudeep.holla/linux/h/vexpress-dt/for-next
[2] git.kernel.org/sudeep.holla/linux/h/juno-dt/for-next
Catalin Marinas
2016-10-30 21:30:02 UTC
Permalink
I'm thus now assuming that everybody is OK with the patches and that they can
be queued for 4.10 (we certainly need this plumbing at this point). Please
speak if my assumption is wrong (and provide feedback! :).
- use Russell's patching system for patches 2 and 8
- ask Sudeep to pull patches 3,5,6 and 7
- ask Catalin/Will to pull patches 1,4 and 9
I'm happy to queue patches 1, 4 and 9 for 4.10 (though it might have
been easier for the whole series to go through arm-soc).
Do you think we might get into trouble splitting the merge process this way?
Probably not. The only minor downside is that I have to grab a new DT
for Juno from Sudeep to test the patches. Not an issue, though.
--
Catalin
Juri Lelli
2016-11-03 05:40:01 UTC
Permalink
Hi Catalin,
Post by Catalin Marinas
I'm thus now assuming that everybody is OK with the patches and that they can
be queued for 4.10 (we certainly need this plumbing at this point). Please
speak if my assumption is wrong (and provide feedback! :).
- use Russell's patching system for patches 2 and 8
- ask Sudeep to pull patches 3,5,6 and 7
- ask Catalin/Will to pull patches 1,4 and 9
I'm happy to queue patches 1, 4 and 9 for 4.10 (though it might have
been easier for the whole series to go through arm-soc).
Do you think we might get into trouble splitting the merge process this way?
Probably not. The only minor downside is that I have to grab a new DT
for Juno from Sudeep to test the patches. Not an issue, though.
Thanks and apologies if merging through different trees generates some
confusion.

I updated arm patches to address Russell's comments. I did the same for
arm64. I'll reply with the updated version, so you can see if it looks
good to you as well. In case it is OK, I already updated the for-arm64
branch with the new version:

git://linux-arm.org/linux-jl.git upstream/default_caps_for-arm64

Best,

- Juri
Juri Lelli
2016-11-03 05:50:01 UTC
Permalink
Hi,

small update to be in sync with Russell's comments on arm correspoding
patch.
Post by Juri Lelli
Add a sysfs cpu_capacity attribute with which it is possible to read and
write (thus over-writing default values) CPUs capacity. This might be
useful in situations where values needs changing after boot.
/sys/devices/system/cpu/cpu*/cpu_capacity
---
- add mutex to protect cpu_scale (as pointed out by Morten off-line)
--->8---
From 17684f3db6d74342da424997badcb3714a1a9e63 Mon Sep 17 00:00:00 2001
From: Juri Lelli <***@arm.com>
Date: Wed, 14 Oct 2015 12:02:05 +0100
Subject: [PATCH v7.1 9/9] arm64: add sysfs cpu_capacity attribute

Add a sysfs cpu_capacity attribute with which it is possible to read and
write (thus over-writing default values) CPUs capacity. This might be
useful in situations where values needs changing after boot.

The new attribute shows up as:

/sys/devices/system/cpu/cpu*/cpu_capacity

Cc: Catalin Marinas <***@arm.com>
Cc: Will Deacon <***@arm.com>
Cc: Mark Brown <***@kernel.org>
Cc: Sudeep Holla <***@arm.com>
Signed-off-by: Juri Lelli <***@arm.com>
---

Changes from v5:
- add mutex to protect cpu_scale (as pointed out by Morten off-line)

Changes from v7:
- include files moved at top of file
- show_cpu_capacity simplified to less lines of code
- unnecessary cast removed in store_cpu_capacity
- use DEVICE_ATTR_RW() instead of DEVICE_ATTR()
- use subsys_initcall instead of late_initcall
---
arch/arm64/kernel/topology.c | 64 ++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 64 insertions(+)

diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index b75b0ba2e113..23e9e13bd2aa 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -20,12 +20,15 @@
#include <linux/of.h>
#include <linux/sched.h>
#include <linux/slab.h>
+#include <linux/string.h>
#include <linux/cpufreq.h>

+#include <asm/cpu.h>
#include <asm/cputype.h>
#include <asm/topology.h>

static DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;
+static DEFINE_MUTEX(cpu_scale_mutex);

unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu)
{
@@ -37,6 +40,65 @@ static void set_capacity_scale(unsigned int cpu, unsigned long capacity)
per_cpu(cpu_scale, cpu) = capacity;
}

+#ifdef CONFIG_PROC_SYSCTL
+static ssize_t cpu_capacity_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct cpu *cpu = container_of(dev, struct cpu, dev);
+
+ return sprintf(buf, "%lu\n",
+ arch_scale_cpu_capacity(NULL, cpu->dev.id));
+}
+
+static ssize_t cpu_capacity_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct cpu *cpu = container_of(dev, struct cpu, dev);
+ int this_cpu = cpu->dev.id, i;
+ unsigned long new_capacity;
+ ssize_t ret;
+
+ if (count) {
+ ret = kstrtoul(buf, 0, &new_capacity);
+ if (ret)
+ return ret;
+ if (new_capacity > SCHED_CAPACITY_SCALE)
+ return -EINVAL;
+
+ mutex_lock(&cpu_scale_mutex);
+ for_each_cpu(i, &cpu_topology[this_cpu].core_sibling)
+ set_capacity_scale(i, new_capacity);
+ mutex_unlock(&cpu_scale_mutex);
+ }
+
+ return count;
+}
+
+static DEVICE_ATTR_RW(cpu_capacity);
+
+static int register_cpu_capacity_sysctl(void)
+{
+ int i;
+ struct device *cpu;
+
+ for_each_possible_cpu(i) {
+ cpu = get_cpu_device(i);
+ if (!cpu) {
+ pr_err("%s: too early to get CPU%d device!\n",
+ __func__, i);
+ continue;
+ }
+ device_create_file(cpu, &dev_attr_cpu_capacity);
+ }
+
+ return 0;
+}
+subsys_initcall(register_cpu_capacity_sysctl);
+#endif
+
static u32 capacity_scale;
static u32 *raw_capacity;
static bool cap_parsing_failed;
@@ -87,6 +149,7 @@ static void normalize_cpu_capacity(void)
return;

pr_debug("cpu_capacity: capacity_scale=%u\n", capacity_scale);
+ mutex_lock(&cpu_scale_mutex);
for_each_possible_cpu(cpu) {
pr_debug("cpu_capacity: cpu=%d raw_capacity=%u\n",
cpu, raw_capacity[cpu]);
@@ -96,6 +159,7 @@ static void normalize_cpu_capacity(void)
pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n",
cpu, arch_scale_cpu_capacity(NULL, cpu));
}
+ mutex_unlock(&cpu_scale_mutex);
}

#ifdef CONFIG_CPU_FREQ
--
2.10.0
Loading...