From d8c216cfa57e8a579f41729cbb88c97835d9ac8d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 8 Jan 2011 00:29:20 +0100 Subject: cpuidle: Make cpuidle_enable_device() call poll_idle_init() The following scenario is possible with the current cpuidle code and the ACPI cpuidle driver: (1) acpi_processor_cst_has_changed() is called, (2) cpuidle_disable_device() is called, (3) cpuidle_remove_state_sysfs() is called to remove the (presumably outdated) states info from sysfs, (3) acpi_processor_get_power_info() is called, the first entry in the pr->power.states[] table is filled with zeros, (4) acpi_processor_setup_cpuidle() is called and it doesn't fill the first entry in pr->power.states[], (5) cpuidle_enable_device() is called, (6) __cpuidle_register_device() is _not_ called, since the device has already been registered, (7) Consequently, poll_idle_init() is _not_ called either, (8) cpuidle_add_state_sysfs() is called to create the sysfs attributes for the new states and it uses the bogus first table entry from acpi_processor_get_power_info() for creating state0. This problem is avoided if cpuidle_enable_device() unconditionally calls poll_idle_init(). Reported-by: Len Brown Signed-off-by: Rafael J. Wysocki Signed-off-by: Len Brown cc: stable@kernel.org --- drivers/cpuidle/cpuidle.c | 82 +++++++++++++++++++++++------------------------ 1 file changed, 41 insertions(+), 41 deletions(-) (limited to 'drivers/cpuidle') diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index a507108433785..97df791c74cbd 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -154,6 +154,45 @@ void cpuidle_resume_and_unlock(void) EXPORT_SYMBOL_GPL(cpuidle_resume_and_unlock); +#ifdef CONFIG_ARCH_HAS_CPU_RELAX +static int poll_idle(struct cpuidle_device *dev, struct cpuidle_state *st) +{ + ktime_t t1, t2; + s64 diff; + int ret; + + t1 = ktime_get(); + local_irq_enable(); + while (!need_resched()) + cpu_relax(); + + t2 = ktime_get(); + diff = ktime_to_us(ktime_sub(t2, t1)); + if (diff > INT_MAX) + diff = INT_MAX; + + ret = (int) diff; + return ret; +} + +static void poll_idle_init(struct cpuidle_device *dev) +{ + struct cpuidle_state *state = &dev->states[0]; + + cpuidle_set_statedata(state, NULL); + + snprintf(state->name, CPUIDLE_NAME_LEN, "C0"); + snprintf(state->desc, CPUIDLE_DESC_LEN, "CPUIDLE CORE POLL IDLE"); + state->exit_latency = 0; + state->target_residency = 0; + state->power_usage = -1; + state->flags = CPUIDLE_FLAG_POLL; + state->enter = poll_idle; +} +#else +static void poll_idle_init(struct cpuidle_device *dev) {} +#endif /* CONFIG_ARCH_HAS_CPU_RELAX */ + /** * cpuidle_enable_device - enables idle PM for a CPU * @dev: the CPU @@ -178,6 +217,8 @@ int cpuidle_enable_device(struct cpuidle_device *dev) return ret; } + poll_idle_init(dev); + if ((ret = cpuidle_add_state_sysfs(dev))) return ret; @@ -232,45 +273,6 @@ void cpuidle_disable_device(struct cpuidle_device *dev) EXPORT_SYMBOL_GPL(cpuidle_disable_device); -#ifdef CONFIG_ARCH_HAS_CPU_RELAX -static int poll_idle(struct cpuidle_device *dev, struct cpuidle_state *st) -{ - ktime_t t1, t2; - s64 diff; - int ret; - - t1 = ktime_get(); - local_irq_enable(); - while (!need_resched()) - cpu_relax(); - - t2 = ktime_get(); - diff = ktime_to_us(ktime_sub(t2, t1)); - if (diff > INT_MAX) - diff = INT_MAX; - - ret = (int) diff; - return ret; -} - -static void poll_idle_init(struct cpuidle_device *dev) -{ - struct cpuidle_state *state = &dev->states[0]; - - cpuidle_set_statedata(state, NULL); - - snprintf(state->name, CPUIDLE_NAME_LEN, "C0"); - snprintf(state->desc, CPUIDLE_DESC_LEN, "CPUIDLE CORE POLL IDLE"); - state->exit_latency = 0; - state->target_residency = 0; - state->power_usage = -1; - state->flags = CPUIDLE_FLAG_POLL; - state->enter = poll_idle; -} -#else -static void poll_idle_init(struct cpuidle_device *dev) {} -#endif /* CONFIG_ARCH_HAS_CPU_RELAX */ - /** * __cpuidle_register_device - internal register function called before register * and enable routines @@ -291,8 +293,6 @@ static int __cpuidle_register_device(struct cpuidle_device *dev) init_completion(&dev->kobj_unregister); - poll_idle_init(dev); - /* * cpuidle driver should set the dev->power_specified bit * before registering the device if the driver provides -- cgit v1.2.3 From 720f1c3010db6a411358b962a2007969117840bc Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Fri, 7 Jan 2011 11:29:43 +0100 Subject: cpuidle: Rename X86 specific idle poll state[0] from C0 to POLL C0 means and is well know as "not idle". All documentation out there uses this term as "running"/"not idle" state. Also Linux userspace tools (e.g. cpufreq-aperf and turbostat) show C0 residency which there is correct, but means something totally else than cpuidle "POLL" state. Signed-off-by: Thomas Renninger Signed-off-by: Len Brown --- drivers/cpuidle/cpuidle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/cpuidle') diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 97df791c74cbd..37e4460413065 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -181,7 +181,7 @@ static void poll_idle_init(struct cpuidle_device *dev) cpuidle_set_statedata(state, NULL); - snprintf(state->name, CPUIDLE_NAME_LEN, "C0"); + snprintf(state->name, CPUIDLE_NAME_LEN, "POLL"); snprintf(state->desc, CPUIDLE_DESC_LEN, "CPUIDLE CORE POLL IDLE"); state->exit_latency = 0; state->target_residency = 0; -- cgit v1.2.3 From d247632c08c674864d438733280422ddb7130ff8 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 12 Jan 2011 02:34:59 -0500 Subject: cpuidle: delete NOP CPUIDLE_FLAG_POLL it serves no purpose Signed-off-by: Len Brown --- drivers/cpuidle/cpuidle.c | 2 +- include/linux/cpuidle.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/cpuidle') diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 37e4460413065..dd5f1eafd6f27 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -186,7 +186,7 @@ static void poll_idle_init(struct cpuidle_device *dev) state->exit_latency = 0; state->target_residency = 0; state->power_usage = -1; - state->flags = CPUIDLE_FLAG_POLL; + state->flags = 0; state->enter = poll_idle; } #else diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 1be416bbbb825..dee32853b0daf 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -48,7 +48,6 @@ struct cpuidle_state { /* Idle State Flags */ #define CPUIDLE_FLAG_TIME_VALID (0x01) /* is residency time measurable? */ #define CPUIDLE_FLAG_CHECK_BM (0x02) /* BM activity will exit state */ -#define CPUIDLE_FLAG_POLL (0x10) /* no latency, no savings */ #define CPUIDLE_FLAG_SHALLOW (0x20) /* low latency, minimal savings */ #define CPUIDLE_FLAG_BALANCED (0x40) /* medium latency, moderate savings */ #define CPUIDLE_FLAG_DEEP (0x80) /* high latency, large savings */ -- cgit v1.2.3 From f77cfe4ea21760268c0277fa3e4b02dfd2a2c2f4 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Fri, 7 Jan 2011 11:29:44 +0100 Subject: cpuidle/x86/perf: fix power:cpu_idle double end events and throw cpu_idle events from the cpuidle layer Currently intel_idle and acpi_idle driver show double cpu_idle "exit idle" events -> this patch fixes it and makes cpu_idle events throwing less complex. It also introduces cpu_idle events for all architectures which use the cpuidle subsystem, namely: - arch/arm/mach-at91/cpuidle.c - arch/arm/mach-davinci/cpuidle.c - arch/arm/mach-kirkwood/cpuidle.c - arch/arm/mach-omap2/cpuidle34xx.c - arch/drivers/acpi/processor_idle.c (for all cases, not only mwait) - arch/x86/kernel/process.c (did throw events before, but was a mess) - drivers/idle/intel_idle.c (did throw events before) Convention should be: Fire cpu_idle events inside the current pm_idle function (not somewhere down the the callee tree) to keep things easy. Current possible pm_idle functions in X86: c1e_idle, poll_idle, cpuidle_idle_call, mwait_idle, default_idle -> this is really easy is now. This affects userspace: The type field of the cpu_idle power event can now direclty get mapped to: /sys/devices/system/cpu/cpuX/cpuidle/stateX/{name,desc,usage,time,...} instead of throwing very CPU/mwait specific values. This change is not visible for the intel_idle driver. For the acpi_idle driver it should only be visible if the vendor misses out C-states in his BIOS. Another (perf timechart) patch reads out cpuidle info of cpu_idle events from: /sys/.../cpuidle/stateX/*, then the cpuidle events are mapped to the correct C-/cpuidle state again, even if e.g. vendors miss out C-states in their BIOS and for example only export C1 and C3. -> everything is fine. Signed-off-by: Thomas Renninger CC: Robert Schoene CC: Jean Pihet CC: Arjan van de Ven CC: Ingo Molnar CC: Frederic Weisbecker CC: linux-pm@lists.linux-foundation.org CC: linux-acpi@vger.kernel.org CC: linux-kernel@vger.kernel.org CC: linux-perf-users@vger.kernel.org CC: linux-omap@vger.kernel.org Signed-off-by: Len Brown --- arch/x86/kernel/process.c | 6 ++++-- arch/x86/kernel/process_32.c | 4 ---- arch/x86/kernel/process_64.c | 6 ------ drivers/cpuidle/cpuidle.c | 10 ++++++++-- drivers/idle/intel_idle.c | 2 -- 5 files changed, 12 insertions(+), 16 deletions(-) (limited to 'drivers/cpuidle') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 09c08a1c706f0..67e96e67157b7 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -386,6 +386,8 @@ void default_idle(void) else local_irq_enable(); current_thread_info()->status |= TS_POLLING; + trace_power_end(smp_processor_id()); + trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); } else { local_irq_enable(); /* loop is done by the caller */ @@ -443,8 +445,6 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait); */ void mwait_idle_with_hints(unsigned long ax, unsigned long cx) { - trace_power_start(POWER_CSTATE, (ax>>4)+1, smp_processor_id()); - trace_cpu_idle((ax>>4)+1, smp_processor_id()); if (!need_resched()) { if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLUSH_MONITOR)) clflush((void *)¤t_thread_info()->flags); @@ -471,6 +471,8 @@ static void mwait_idle(void) __sti_mwait(0, 0); else local_irq_enable(); + trace_power_end(smp_processor_id()); + trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); } else local_irq_enable(); } diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 4b9befa0e347f..8d128783af473 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -57,8 +57,6 @@ #include #include -#include - asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); /* @@ -113,8 +111,6 @@ void cpu_idle(void) stop_critical_timings(); pm_idle(); start_critical_timings(); - trace_power_end(smp_processor_id()); - trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); } tick_nohz_restart_sched_tick(); preempt_enable_no_resched(); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 4c818a7383968..bd387e8f73b47 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -51,8 +51,6 @@ #include #include -#include - asmlinkage extern void ret_from_fork(void); DEFINE_PER_CPU(unsigned long, old_rsp); @@ -141,10 +139,6 @@ void cpu_idle(void) pm_idle(); start_critical_timings(); - trace_power_end(smp_processor_id()); - trace_cpu_idle(PWR_EVENT_EXIT, - smp_processor_id()); - /* In many cases the interrupt that ended idle has already called exit_idle. But some idle loops can be woken up without interrupt. */ diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 386888f10df02..e4855c33f8974 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -96,7 +96,15 @@ static void cpuidle_idle_call(void) /* enter the state and update stats */ dev->last_state = target_state; + + trace_power_start(POWER_CSTATE, next_state, dev->cpu); + trace_cpu_idle(next_state, dev->cpu); + dev->last_residency = target_state->enter(dev, target_state); + + trace_power_end(dev->cpu); + trace_cpu_idle(PWR_EVENT_EXIT, dev->cpu); + if (dev->last_state) target_state = dev->last_state; @@ -106,8 +114,6 @@ static void cpuidle_idle_call(void) /* give the governor an opportunity to reflect on the outcome */ if (cpuidle_curr_governor->reflect) cpuidle_curr_governor->reflect(dev); - trace_power_end(smp_processor_id()); - trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); } /** diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 56ac09d6c9308..60fa6ecdb41f4 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -220,8 +220,6 @@ static int intel_idle(struct cpuidle_device *dev, struct cpuidle_state *state) kt_before = ktime_get_real(); stop_critical_timings(); - trace_power_start(POWER_CSTATE, (eax >> 4) + 1, cpu); - trace_cpu_idle((eax >> 4) + 1, cpu); if (!need_resched()) { __monitor((void *)¤t_thread_info()->flags, 0, 0); -- cgit v1.2.3