From 34ed62461ae4970695974afb9a60ac3df0086830 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 7 Jan 2013 13:37:42 -0800 Subject: rcu: Remove restrictions on no-CBs CPUs Currently, CPU 0 is constrained to not be a no-CBs CPU, and furthermore at least one no-CBs CPU must remain online at any given time. These restrictions are problematic in some situations, such as cases where all CPUs must run a real-time workload that needs to be insulated from OS jitter and latencies due to RCU callback invocation. This commit therefore provides no-CBs CPUs a (very crude and energy-inefficient) way to start and to wait for grace periods independently of the normal RCU callback mechanisms. This approach allows any or all of the CPUs to be designated as no-CBs CPUs, and allows any proper subset of the CPUs (whether no-CBs CPUs or not) to be offlined. This commit also provides a fix for a locking bug spotted by Xie ChanglongX . Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- init/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'init') diff --git a/init/Kconfig b/init/Kconfig index 22616cd434bc..c8bd349eb638 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -655,7 +655,7 @@ config RCU_BOOST_DELAY Accept the default if unsure. config RCU_NOCB_CPU - bool "Offload RCU callback processing from boot-selected CPUs" + bool "Offload RCU callback processing from boot-selected CPUs (EXPERIMENTAL" depends on TREE_RCU || TREE_PREEMPT_RCU default n help @@ -673,7 +673,7 @@ config RCU_NOCB_CPU callback, and (2) affinity or cgroups can be used to force the kthreads to run on whatever set of CPUs is desired. - Say Y here if you want reduced OS jitter on selected CPUs. + Say Y here if you want to help to debug reduced OS jitter. Say N here if you are unsure. endmenu # "RCU Subsystem" -- cgit v1.2.3 From 3d374d09f16f64ab4d71704cbe621514d36cd0b1 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 6 Mar 2013 12:35:31 -0800 Subject: final removal of CONFIG_EXPERIMENTAL Remove "config EXPERIMENTAL" itself, now that every "depends on" it has been removed from the tree. Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- init/Kconfig | 4 ---- 1 file changed, 4 deletions(-) (limited to 'init') diff --git a/init/Kconfig b/init/Kconfig index 22616cd434bc..5341d7232c3a 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -28,10 +28,6 @@ config BUILDTIME_EXTABLE_SORT menu "General setup" -config EXPERIMENTAL - bool - default y - config BROKEN bool -- cgit v1.2.3 From 911af505ef407c2511106c224dd640f882f0f590 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 11 Feb 2013 10:23:27 -0800 Subject: rcu: Provide compile-time control for no-CBs CPUs Currently, the only way to specify no-CBs CPUs is via the rcu_nocbs kernel command-line parameter. This is inconvenient in some cases, particularly for randconfig testing, so this commit adds a new set of kernel configuration parameters. CONFIG_RCU_NOCB_CPU_NONE (the default) retains the old behavior, CONFIG_RCU_NOCB_CPU_ZERO offloads callback processing from CPU 0 (along with any other CPUs specified by the rcu_nocbs boot-time parameter), and CONFIG_RCU_NOCB_CPU_ALL offloads callback processing from all CPUs. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- init/Kconfig | 39 +++++++++++++++++++++++++++++++++++++++ kernel/rcutree_plugin.h | 14 ++++++++++++++ 2 files changed, 53 insertions(+) (limited to 'init') diff --git a/init/Kconfig b/init/Kconfig index c8bd349eb638..307499704580 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -676,6 +676,45 @@ config RCU_NOCB_CPU Say Y here if you want to help to debug reduced OS jitter. Say N here if you are unsure. +choice + prompt "Build-forced no-CBs CPUs" + default RCU_NOCB_CPU_NONE + help + This option allows no-CBs CPUs to be specified at build time. + Additional no-CBs CPUs may be specified by the rcu_nocbs= + boot parameter. + +config RCU_NOCB_CPU_NONE + bool "No build_forced no-CBs CPUs" + depends on RCU_NOCB_CPU + help + This option does not force any of the CPUs to be no-CBs CPUs. + Only CPUs designated by the rcu_nocbs= boot parameter will be + no-CBs CPUs. + +config RCU_NOCB_CPU_ZERO + bool "CPU 0 is a build_forced no-CBs CPU" + depends on RCU_NOCB_CPU + help + This option forces CPU 0 to be a no-CBs CPU. Additional CPUs + may be designated as no-CBs CPUs using the rcu_nocbs= boot + parameter will be no-CBs CPUs. + + Select this if CPU 0 needs to be a no-CBs CPU for real-time + or energy-efficiency reasons. + +config RCU_NOCB_CPU_ALL + bool "All CPUs are build_forced no-CBs CPUs" + depends on RCU_NOCB_CPU + help + This option forces all CPUs to be no-CBs CPUs. The rcu_nocbs= + boot parameter will be ignored. + + Select this if all CPUs need to be no-CBs CPUs for real-time + or energy-efficiency reasons. + +endchoice + endmenu # "RCU Subsystem" config IKCONFIG diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 44f958a88b21..3e33aefce0ea 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -85,6 +85,20 @@ static void __init rcu_bootup_announce_oddness(void) if (nr_cpu_ids != NR_CPUS) printk(KERN_INFO "\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids); #ifdef CONFIG_RCU_NOCB_CPU +#ifndef CONFIG_RCU_NOCB_CPU_NONE + if (!have_rcu_nocb_mask) { + alloc_bootmem_cpumask_var(&rcu_nocb_mask); + have_rcu_nocb_mask = true; + } +#ifdef CONFIG_RCU_NOCB_CPU_ZERO + pr_info("\tExperimental no-CBs CPU 0\n"); + cpumask_set_cpu(0, rcu_nocb_mask); +#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ZERO */ +#ifdef CONFIG_RCU_NOCB_CPU_ALL + pr_info("\tExperimental no-CBs for all CPUs\n"); + cpumask_setall(rcu_nocb_mask); +#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ALL */ +#endif /* #ifndef CONFIG_RCU_NOCB_CPU_NONE */ if (have_rcu_nocb_mask) { cpulist_scnprintf(nocb_buf, sizeof(nocb_buf), rcu_nocb_mask); pr_info("\tExperimental no-CBs CPUs: %s.\n", nocb_buf); -- cgit v1.2.3 From a488985851cf2facd2227bd982cc2c251df56268 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 3 Dec 2012 08:16:28 -0800 Subject: rcu: Distinguish "rcuo" kthreads by RCU flavor Currently, the per-no-CBs-CPU kthreads are named "rcuo" followed by the CPU number, for example, "rcuo". This is problematic given that there are either two or three RCU flavors, each of which gets a per-CPU kthread with exactly the same name. This commit therefore introduces a one-letter abbreviation for each RCU flavor, namely 'b' for RCU-bh, 'p' for RCU-preempt, and 's' for RCU-sched. This abbreviation is used to distinguish the "rcuo" kthreads, for example, for CPU 0 we would have "rcuob/0", "rcuop/0", and "rcuos/0". Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Tested-by: Dietmar Eggemann --- Documentation/kernel-parameters.txt | 7 +++++-- init/Kconfig | 13 +++++++------ kernel/rcutree.c | 7 ++++--- kernel/rcutree.h | 1 + kernel/rcutree_plugin.h | 5 +++-- 5 files changed, 20 insertions(+), 13 deletions(-) (limited to 'init') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 4609e81dbc37..a17ba16c8fc8 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2461,9 +2461,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. In kernels built with CONFIG_RCU_NOCB_CPU=y, set the specified list of CPUs to be no-callback CPUs. Invocation of these CPUs' RCU callbacks will - be offloaded to "rcuoN" kthreads created for - that purpose. This reduces OS jitter on the + be offloaded to "rcuox/N" kthreads created for + that purpose, where "x" is "b" for RCU-bh, "p" + for RCU-preempt, and "s" for RCU-sched, and "N" + is the CPU number. This reduces OS jitter on the offloaded CPUs, which can be useful for HPC and + real-time workloads. It can also improve energy efficiency for asymmetric multiprocessors. diff --git a/init/Kconfig b/init/Kconfig index 307499704580..717584064a7e 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -666,12 +666,13 @@ config RCU_NOCB_CPU This option offloads callback invocation from the set of CPUs specified at boot time by the rcu_nocbs parameter. - For each such CPU, a kthread ("rcuoN") will be created to - invoke callbacks, where the "N" is the CPU being offloaded. - Nothing prevents this kthread from running on the specified - CPUs, but (1) the kthreads may be preempted between each - callback, and (2) affinity or cgroups can be used to force - the kthreads to run on whatever set of CPUs is desired. + For each such CPU, a kthread ("rcuox/N") will be created to + invoke callbacks, where the "N" is the CPU being offloaded, + and where the "x" is "b" for RCU-bh, "p" for RCU-preempt, and + "s" for RCU-sched. Nothing prevents this kthread from running + on the specified CPUs, but (1) the kthreads may be preempted + between each callback, and (2) affinity or cgroups can be used + to force the kthreads to run on whatever set of CPUs is desired. Say Y here if you want to help to debug reduced OS jitter. Say N here if you are unsure. diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 433f426c848f..074cb2d974bf 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -64,7 +64,7 @@ static struct lock_class_key rcu_node_class[RCU_NUM_LVLS]; static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS]; -#define RCU_STATE_INITIALIZER(sname, cr) { \ +#define RCU_STATE_INITIALIZER(sname, sabbr, cr) { \ .level = { &sname##_state.node[0] }, \ .call = cr, \ .fqs_state = RCU_GP_IDLE, \ @@ -76,13 +76,14 @@ static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS]; .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ .onoff_mutex = __MUTEX_INITIALIZER(sname##_state.onoff_mutex), \ .name = #sname, \ + .abbr = sabbr, \ } struct rcu_state rcu_sched_state = - RCU_STATE_INITIALIZER(rcu_sched, call_rcu_sched); + RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched); DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); -struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh, call_rcu_bh); +struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh); DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); static struct rcu_state *rcu_state; diff --git a/kernel/rcutree.h b/kernel/rcutree.h index e51373c0b748..b6c2335efbdf 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -443,6 +443,7 @@ struct rcu_state { unsigned long gp_max; /* Maximum GP duration in */ /* jiffies. */ char *name; /* Name of structure. */ + char abbr; /* Abbreviated name. */ struct list_head flavors; /* List of RCU flavors. */ }; diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index e32236e83dda..c0164441ab92 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -111,7 +111,7 @@ static void __init rcu_bootup_announce_oddness(void) #ifdef CONFIG_TREE_PREEMPT_RCU struct rcu_state rcu_preempt_state = - RCU_STATE_INITIALIZER(rcu_preempt, call_rcu); + RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu); DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); static struct rcu_state *rcu_state = &rcu_preempt_state; @@ -2517,7 +2517,8 @@ static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp) return; for_each_cpu(cpu, rcu_nocb_mask) { rdp = per_cpu_ptr(rsp->rda, cpu); - t = kthread_run(rcu_nocb_kthread, rdp, "rcuo%d", cpu); + t = kthread_run(rcu_nocb_kthread, rdp, + "rcuo%c/%d", rsp->abbr, cpu); BUG_ON(IS_ERR(t)); ACCESS_ONCE(rdp->nocb_kthread) = t; } -- cgit v1.2.3 From c0f4dfd4f90f1667d234d21f15153ea09a2eaa66 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 28 Dec 2012 11:30:36 -0800 Subject: rcu: Make RCU_FAST_NO_HZ take advantage of numbered callbacks Because RCU callbacks are now associated with the number of the grace period that they must wait for, CPUs can now take advance callbacks corresponding to grace periods that ended while a given CPU was in dyntick-idle mode. This eliminates the need to try forcing the RCU state machine while entering idle, thus reducing the CPU intensiveness of RCU_FAST_NO_HZ, which should increase its energy efficiency. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- Documentation/kernel-parameters.txt | 28 ++- include/linux/rcupdate.h | 1 + init/Kconfig | 17 +- kernel/rcutree.c | 28 +-- kernel/rcutree.h | 12 +- kernel/rcutree_plugin.h | 374 ++++++++++-------------------------- kernel/rcutree_trace.c | 2 - 7 files changed, 149 insertions(+), 313 deletions(-) (limited to 'init') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index a17ba16c8fc8..22303b2e74bc 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2490,6 +2490,17 @@ bytes respectively. Such letter suffixes can also be entirely omitted. leaf rcu_node structure. Useful for very large systems. + rcutree.jiffies_till_first_fqs= [KNL,BOOT] + Set delay from grace-period initialization to + first attempt to force quiescent states. + Units are jiffies, minimum value is zero, + and maximum value is HZ. + + rcutree.jiffies_till_next_fqs= [KNL,BOOT] + Set delay between subsequent attempts to force + quiescent states. Units are jiffies, minimum + value is one, and maximum value is HZ. + rcutree.qhimark= [KNL,BOOT] Set threshold of queued RCU callbacks over which batch limiting is disabled. @@ -2504,16 +2515,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted. rcutree.rcu_cpu_stall_timeout= [KNL,BOOT] Set timeout for RCU CPU stall warning messages. - rcutree.jiffies_till_first_fqs= [KNL,BOOT] - Set delay from grace-period initialization to - first attempt to force quiescent states. - Units are jiffies, minimum value is zero, - and maximum value is HZ. + rcutree.rcu_idle_gp_delay= [KNL,BOOT] + Set wakeup interval for idle CPUs that have + RCU callbacks (RCU_FAST_NO_HZ=y). - rcutree.jiffies_till_next_fqs= [KNL,BOOT] - Set delay between subsequent attempts to force - quiescent states. Units are jiffies, minimum - value is one, and maximum value is HZ. + rcutree.rcu_idle_lazy_gp_delay= [KNL,BOOT] + Set wakeup interval for idle CPUs that have + only "lazy" RCU callbacks (RCU_FAST_NO_HZ=y). + Lazy RCU callbacks are those which RCU can + prove do nothing more than free memory. rcutorture.fqs_duration= [KNL,BOOT] Set duration of force_quiescent_state bursts. diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index b758ce17b309..9ed2c9a4de45 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -80,6 +80,7 @@ extern void do_trace_rcu_torture_read(char *rcutorturename, #define UINT_CMP_LT(a, b) (UINT_MAX / 2 < (a) - (b)) #define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) #define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) +#define ulong2long(a) (*(long *)(&(a))) /* Exported common interfaces */ diff --git a/init/Kconfig b/init/Kconfig index 717584064a7e..a3a2304fa6d2 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -582,13 +582,16 @@ config RCU_FAST_NO_HZ depends on NO_HZ && SMP default n help - This option causes RCU to attempt to accelerate grace periods in - order to allow CPUs to enter dynticks-idle state more quickly. - On the other hand, this option increases the overhead of the - dynticks-idle checking, thus degrading scheduling latency. - - Say Y if energy efficiency is critically important, and you don't - care about real-time response. + This option permits CPUs to enter dynticks-idle state even if + they have RCU callbacks queued, and prevents RCU from waking + these CPUs up more than roughly once every four jiffies (by + default, you can adjust this using the rcutree.rcu_idle_gp_delay + parameter), thus improving energy efficiency. On the other + hand, this option increases the duration of RCU grace periods, + for example, slowing down synchronize_rcu(). + + Say Y if energy efficiency is critically important, and you + don't care about increased grace-period durations. Say N if you are unsure. diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 2015bce749f9..7b1d7769872a 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -2640,19 +2640,27 @@ static int rcu_pending(int cpu) } /* - * Check to see if any future RCU-related work will need to be done - * by the current CPU, even if none need be done immediately, returning - * 1 if so. + * Return true if the specified CPU has any callback. If all_lazy is + * non-NULL, store an indication of whether all callbacks are lazy. + * (If there are no callbacks, all of them are deemed to be lazy.) */ -static int rcu_cpu_has_callbacks(int cpu) +static int rcu_cpu_has_callbacks(int cpu, bool *all_lazy) { + bool al = true; + bool hc = false; + struct rcu_data *rdp; struct rcu_state *rsp; - /* RCU callbacks either ready or pending? */ - for_each_rcu_flavor(rsp) - if (per_cpu_ptr(rsp->rda, cpu)->nxtlist) - return 1; - return 0; + for_each_rcu_flavor(rsp) { + rdp = per_cpu_ptr(rsp->rda, cpu); + if (rdp->qlen != rdp->qlen_lazy) + al = false; + if (rdp->nxtlist) + hc = true; + } + if (all_lazy) + *all_lazy = al; + return hc; } /* @@ -2871,7 +2879,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; atomic_set(&rdp->dynticks->dynticks, (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1); - rcu_prepare_for_idle_init(cpu); raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ /* Add CPU to rcu_node bitmasks. */ @@ -2945,7 +2952,6 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, */ for_each_rcu_flavor(rsp) rcu_cleanup_dying_cpu(rsp); - rcu_cleanup_after_idle(cpu); break; case CPU_DEAD: case CPU_DEAD_FROZEN: diff --git a/kernel/rcutree.h b/kernel/rcutree.h index b6c2335efbdf..96a27f922e92 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -88,18 +88,13 @@ struct rcu_dynticks { int dynticks_nmi_nesting; /* Track NMI nesting level. */ atomic_t dynticks; /* Even value for idle, else odd. */ #ifdef CONFIG_RCU_FAST_NO_HZ - int dyntick_drain; /* Prepare-for-idle state variable. */ - unsigned long dyntick_holdoff; - /* No retries for the jiffy of failure. */ - struct timer_list idle_gp_timer; - /* Wake up CPU sleeping with callbacks. */ - unsigned long idle_gp_timer_expires; - /* When to wake up CPU (for repost). */ - bool idle_first_pass; /* First pass of attempt to go idle? */ + bool all_lazy; /* Are all CPU's CBs lazy? */ unsigned long nonlazy_posted; /* # times non-lazy CBs posted to CPU. */ unsigned long nonlazy_posted_snap; /* idle-period nonlazy_posted snapshot. */ + unsigned long last_accelerate; + /* Last jiffy CBs were accelerated. */ int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */ #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ }; @@ -521,7 +516,6 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, struct rcu_node *rnp); #endif /* #ifdef CONFIG_RCU_BOOST */ static void __cpuinit rcu_prepare_kthreads(int cpu); -static void rcu_prepare_for_idle_init(int cpu); static void rcu_cleanup_after_idle(int cpu); static void rcu_prepare_for_idle(int cpu); static void rcu_idle_count_callbacks_posted(void); diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 28185ad18df3..d318f9f18be5 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -1543,14 +1543,7 @@ static void __cpuinit rcu_prepare_kthreads(int cpu) int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) { *delta_jiffies = ULONG_MAX; - return rcu_cpu_has_callbacks(cpu); -} - -/* - * Because we do not have RCU_FAST_NO_HZ, don't bother initializing for it. - */ -static void rcu_prepare_for_idle_init(int cpu) -{ + return rcu_cpu_has_callbacks(cpu, NULL); } /* @@ -1587,16 +1580,6 @@ static void rcu_idle_count_callbacks_posted(void) * * The following three proprocessor symbols control this state machine: * - * RCU_IDLE_FLUSHES gives the maximum number of times that we will attempt - * to satisfy RCU. Beyond this point, it is better to incur a periodic - * scheduling-clock interrupt than to loop through the state machine - * at full power. - * RCU_IDLE_OPT_FLUSHES gives the number of RCU_IDLE_FLUSHES that are - * optional if RCU does not need anything immediately from this - * CPU, even if this CPU still has RCU callbacks queued. The first - * times through the state machine are mandatory: we need to give - * the state machine a chance to communicate a quiescent state - * to the RCU core. * RCU_IDLE_GP_DELAY gives the number of jiffies that a CPU is permitted * to sleep in dyntick-idle mode with RCU callbacks pending. This * is sized to be roughly one RCU grace period. Those energy-efficiency @@ -1612,15 +1595,9 @@ static void rcu_idle_count_callbacks_posted(void) * adjustment, they can be converted into kernel config parameters, though * making the state machine smarter might be a better option. */ -#define RCU_IDLE_FLUSHES 5 /* Number of dyntick-idle tries. */ -#define RCU_IDLE_OPT_FLUSHES 3 /* Optional dyntick-idle tries. */ #define RCU_IDLE_GP_DELAY 4 /* Roughly one grace period. */ #define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */ -static int rcu_idle_flushes = RCU_IDLE_FLUSHES; -module_param(rcu_idle_flushes, int, 0644); -static int rcu_idle_opt_flushes = RCU_IDLE_OPT_FLUSHES; -module_param(rcu_idle_opt_flushes, int, 0644); static int rcu_idle_gp_delay = RCU_IDLE_GP_DELAY; module_param(rcu_idle_gp_delay, int, 0644); static int rcu_idle_lazy_gp_delay = RCU_IDLE_LAZY_GP_DELAY; @@ -1629,178 +1606,97 @@ module_param(rcu_idle_lazy_gp_delay, int, 0644); extern int tick_nohz_enabled; /* - * Does the specified flavor of RCU have non-lazy callbacks pending on - * the specified CPU? Both RCU flavor and CPU are specified by the - * rcu_data structure. - */ -static bool __rcu_cpu_has_nonlazy_callbacks(struct rcu_data *rdp) -{ - return rdp->qlen != rdp->qlen_lazy; -} - -#ifdef CONFIG_TREE_PREEMPT_RCU - -/* - * Are there non-lazy RCU-preempt callbacks? (There cannot be if there - * is no RCU-preempt in the kernel.) + * Try to advance callbacks for all flavors of RCU on the current CPU. + * Afterwards, if there are any callbacks ready for immediate invocation, + * return true. */ -static bool rcu_preempt_cpu_has_nonlazy_callbacks(int cpu) +static bool rcu_try_advance_all_cbs(void) { - struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); - - return __rcu_cpu_has_nonlazy_callbacks(rdp); -} - -#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ + bool cbs_ready = false; + struct rcu_data *rdp; + struct rcu_node *rnp; + struct rcu_state *rsp; -static bool rcu_preempt_cpu_has_nonlazy_callbacks(int cpu) -{ - return 0; -} + for_each_rcu_flavor(rsp) { + rdp = this_cpu_ptr(rsp->rda); + rnp = rdp->mynode; -#endif /* else #ifdef CONFIG_TREE_PREEMPT_RCU */ + /* + * Don't bother checking unless a grace period has + * completed since we last checked and there are + * callbacks not yet ready to invoke. + */ + if (rdp->completed != rnp->completed && + rdp->nxttail[RCU_DONE_TAIL] != rdp->nxttail[RCU_NEXT_TAIL]) + rcu_process_gp_end(rsp, rdp); -/* - * Does any flavor of RCU have non-lazy callbacks on the specified CPU? - */ -static bool rcu_cpu_has_nonlazy_callbacks(int cpu) -{ - return __rcu_cpu_has_nonlazy_callbacks(&per_cpu(rcu_sched_data, cpu)) || - __rcu_cpu_has_nonlazy_callbacks(&per_cpu(rcu_bh_data, cpu)) || - rcu_preempt_cpu_has_nonlazy_callbacks(cpu); + if (cpu_has_callbacks_ready_to_invoke(rdp)) + cbs_ready = true; + } + return cbs_ready; } /* - * Allow the CPU to enter dyntick-idle mode if either: (1) There are no - * callbacks on this CPU, (2) this CPU has not yet attempted to enter - * dyntick-idle mode, or (3) this CPU is in the process of attempting to - * enter dyntick-idle mode. Otherwise, if we have recently tried and failed - * to enter dyntick-idle mode, we refuse to try to enter it. After all, - * it is better to incur scheduling-clock interrupts than to spin - * continuously for the same time duration! + * Allow the CPU to enter dyntick-idle mode unless it has callbacks ready + * to invoke. If the CPU has callbacks, try to advance them. Tell the + * caller to set the timeout based on whether or not there are non-lazy + * callbacks. * - * The delta_jiffies argument is used to store the time when RCU is - * going to need the CPU again if it still has callbacks. The reason - * for this is that rcu_prepare_for_idle() might need to post a timer, - * but if so, it will do so after tick_nohz_stop_sched_tick() has set - * the wakeup time for this CPU. This means that RCU's timer can be - * delayed until the wakeup time, which defeats the purpose of posting - * a timer. + * The caller must have disabled interrupts. */ -int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) +int rcu_needs_cpu(int cpu, unsigned long *dj) { struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); - /* Flag a new idle sojourn to the idle-entry state machine. */ - rdtp->idle_first_pass = 1; + /* Snapshot to detect later posting of non-lazy callback. */ + rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted; + /* If no callbacks, RCU doesn't need the CPU. */ - if (!rcu_cpu_has_callbacks(cpu)) { - *delta_jiffies = ULONG_MAX; + if (!rcu_cpu_has_callbacks(cpu, &rdtp->all_lazy)) { + *dj = ULONG_MAX; return 0; } - if (rdtp->dyntick_holdoff == jiffies) { - /* RCU recently tried and failed, so don't try again. */ - *delta_jiffies = 1; + + /* Attempt to advance callbacks. */ + if (rcu_try_advance_all_cbs()) { + /* Some ready to invoke, so initiate later invocation. */ + invoke_rcu_core(); return 1; } - /* Set up for the possibility that RCU will post a timer. */ - if (rcu_cpu_has_nonlazy_callbacks(cpu)) { - *delta_jiffies = round_up(rcu_idle_gp_delay + jiffies, - rcu_idle_gp_delay) - jiffies; + rdtp->last_accelerate = jiffies; + + /* Request timer delay depending on laziness, and round. */ + if (rdtp->all_lazy) { + *dj = round_up(rcu_idle_gp_delay + jiffies, + rcu_idle_gp_delay) - jiffies; } else { - *delta_jiffies = jiffies + rcu_idle_lazy_gp_delay; - *delta_jiffies = round_jiffies(*delta_jiffies) - jiffies; + *dj = round_jiffies(rcu_idle_lazy_gp_delay + jiffies) - jiffies; } return 0; } /* - * Handler for smp_call_function_single(). The only point of this - * handler is to wake the CPU up, so the handler does only tracing. - */ -void rcu_idle_demigrate(void *unused) -{ - trace_rcu_prep_idle("Demigrate"); -} - -/* - * Timer handler used to force CPU to start pushing its remaining RCU - * callbacks in the case where it entered dyntick-idle mode with callbacks - * pending. The hander doesn't really need to do anything because the - * real work is done upon re-entry to idle, or by the next scheduling-clock - * interrupt should idle not be re-entered. - * - * One special case: the timer gets migrated without awakening the CPU - * on which the timer was scheduled on. In this case, we must wake up - * that CPU. We do so with smp_call_function_single(). - */ -static void rcu_idle_gp_timer_func(unsigned long cpu_in) -{ - int cpu = (int)cpu_in; - - trace_rcu_prep_idle("Timer"); - if (cpu != smp_processor_id()) - smp_call_function_single(cpu, rcu_idle_demigrate, NULL, 0); - else - WARN_ON_ONCE(1); /* Getting here can hang the system... */ -} - -/* - * Initialize the timer used to pull CPUs out of dyntick-idle mode. - */ -static void rcu_prepare_for_idle_init(int cpu) -{ - struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); - - rdtp->dyntick_holdoff = jiffies - 1; - setup_timer(&rdtp->idle_gp_timer, rcu_idle_gp_timer_func, cpu); - rdtp->idle_gp_timer_expires = jiffies - 1; - rdtp->idle_first_pass = 1; -} - -/* - * Clean up for exit from idle. Because we are exiting from idle, there - * is no longer any point to ->idle_gp_timer, so cancel it. This will - * do nothing if this timer is not active, so just cancel it unconditionally. - */ -static void rcu_cleanup_after_idle(int cpu) -{ - struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); - - del_timer(&rdtp->idle_gp_timer); - trace_rcu_prep_idle("Cleanup after idle"); - rdtp->tick_nohz_enabled_snap = ACCESS_ONCE(tick_nohz_enabled); -} - -/* - * Check to see if any RCU-related work can be done by the current CPU, - * and if so, schedule a softirq to get it done. This function is part - * of the RCU implementation; it is -not- an exported member of the RCU API. - * - * The idea is for the current CPU to clear out all work required by the - * RCU core for the current grace period, so that this CPU can be permitted - * to enter dyntick-idle mode. In some cases, it will need to be awakened - * at the end of the grace period by whatever CPU ends the grace period. - * This allows CPUs to go dyntick-idle more quickly, and to reduce the - * number of wakeups by a modest integer factor. - * - * Because it is not legal to invoke rcu_process_callbacks() with irqs - * disabled, we do one pass of force_quiescent_state(), then do a - * invoke_rcu_core() to cause rcu_process_callbacks() to be invoked - * later. The ->dyntick_drain field controls the sequencing. + * Prepare a CPU for idle from an RCU perspective. The first major task + * is to sense whether nohz mode has been enabled or disabled via sysfs. + * The second major task is to check to see if a non-lazy callback has + * arrived at a CPU that previously had only lazy callbacks. The third + * major task is to accelerate (that is, assign grace-period numbers to) + * any recently arrived callbacks. * * The caller must have disabled interrupts. */ static void rcu_prepare_for_idle(int cpu) { - struct timer_list *tp; + struct rcu_data *rdp; struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); + struct rcu_node *rnp; + struct rcu_state *rsp; int tne; /* Handle nohz enablement switches conservatively. */ tne = ACCESS_ONCE(tick_nohz_enabled); if (tne != rdtp->tick_nohz_enabled_snap) { - if (rcu_cpu_has_callbacks(cpu)) + if (rcu_cpu_has_callbacks(cpu, NULL)) invoke_rcu_core(); /* force nohz to see update. */ rdtp->tick_nohz_enabled_snap = tne; return; @@ -1808,125 +1704,56 @@ static void rcu_prepare_for_idle(int cpu) if (!tne) return; - /* Adaptive-tick mode, where usermode execution is idle to RCU. */ - if (!is_idle_task(current)) { - rdtp->dyntick_holdoff = jiffies - 1; - if (rcu_cpu_has_nonlazy_callbacks(cpu)) { - trace_rcu_prep_idle("User dyntick with callbacks"); - rdtp->idle_gp_timer_expires = - round_up(jiffies + rcu_idle_gp_delay, - rcu_idle_gp_delay); - } else if (rcu_cpu_has_callbacks(cpu)) { - rdtp->idle_gp_timer_expires = - round_jiffies(jiffies + rcu_idle_lazy_gp_delay); - trace_rcu_prep_idle("User dyntick with lazy callbacks"); - } else { - return; - } - tp = &rdtp->idle_gp_timer; - mod_timer_pinned(tp, rdtp->idle_gp_timer_expires); + /* If this is a no-CBs CPU, no callbacks, just return. */ + if (is_nocb_cpu(cpu)) return; - } /* - * If this is an idle re-entry, for example, due to use of - * RCU_NONIDLE() or the new idle-loop tracing API within the idle - * loop, then don't take any state-machine actions, unless the - * momentary exit from idle queued additional non-lazy callbacks. - * Instead, repost the ->idle_gp_timer if this CPU has callbacks - * pending. + * If a non-lazy callback arrived at a CPU having only lazy + * callbacks, invoke RCU core for the side-effect of recalculating + * idle duration on re-entry to idle. */ - if (!rdtp->idle_first_pass && - (rdtp->nonlazy_posted == rdtp->nonlazy_posted_snap)) { - if (rcu_cpu_has_callbacks(cpu)) { - tp = &rdtp->idle_gp_timer; - mod_timer_pinned(tp, rdtp->idle_gp_timer_expires); - } + if (rdtp->all_lazy && + rdtp->nonlazy_posted != rdtp->nonlazy_posted_snap) { + invoke_rcu_core(); return; } - rdtp->idle_first_pass = 0; - rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted - 1; /* - * If there are no callbacks on this CPU, enter dyntick-idle mode. - * Also reset state to avoid prejudicing later attempts. + * If we have not yet accelerated this jiffy, accelerate all + * callbacks on this CPU. */ - if (!rcu_cpu_has_callbacks(cpu)) { - rdtp->dyntick_holdoff = jiffies - 1; - rdtp->dyntick_drain = 0; - trace_rcu_prep_idle("No callbacks"); + if (rdtp->last_accelerate == jiffies) return; + rdtp->last_accelerate = jiffies; + for_each_rcu_flavor(rsp) { + rdp = per_cpu_ptr(rsp->rda, cpu); + if (!*rdp->nxttail[RCU_DONE_TAIL]) + continue; + rnp = rdp->mynode; + raw_spin_lock(&rnp->lock); /* irqs already disabled. */ + rcu_accelerate_cbs(rsp, rnp, rdp); + raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ } +} - /* - * If in holdoff mode, just return. We will presumably have - * refrained from disabling the scheduling-clock tick. - */ - if (rdtp->dyntick_holdoff == jiffies) { - trace_rcu_prep_idle("In holdoff"); - return; - } +/* + * Clean up for exit from idle. Attempt to advance callbacks based on + * any grace periods that elapsed while the CPU was idle, and if any + * callbacks are now ready to invoke, initiate invocation. + */ +static void rcu_cleanup_after_idle(int cpu) +{ + struct rcu_data *rdp; + struct rcu_state *rsp; - /* Check and update the ->dyntick_drain sequencing. */ - if (rdtp->dyntick_drain <= 0) { - /* First time through, initialize the counter. */ - rdtp->dyntick_drain = rcu_idle_flushes; - } else if (rdtp->dyntick_drain <= rcu_idle_opt_flushes && - !rcu_pending(cpu) && - !local_softirq_pending()) { - /* Can we go dyntick-idle despite still having callbacks? */ - rdtp->dyntick_drain = 0; - rdtp->dyntick_holdoff = jiffies; - if (rcu_cpu_has_nonlazy_callbacks(cpu)) { - trace_rcu_prep_idle("Dyntick with callbacks"); - rdtp->idle_gp_timer_expires = - round_up(jiffies + rcu_idle_gp_delay, - rcu_idle_gp_delay); - } else { - rdtp->idle_gp_timer_expires = - round_jiffies(jiffies + rcu_idle_lazy_gp_delay); - trace_rcu_prep_idle("Dyntick with lazy callbacks"); - } - tp = &rdtp->idle_gp_timer; - mod_timer_pinned(tp, rdtp->idle_gp_timer_expires); - rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted; - return; /* Nothing more to do immediately. */ - } else if (--(rdtp->dyntick_drain) <= 0) { - /* We have hit the limit, so time to give up. */ - rdtp->dyntick_holdoff = jiffies; - trace_rcu_prep_idle("Begin holdoff"); - invoke_rcu_core(); /* Force the CPU out of dyntick-idle. */ + if (is_nocb_cpu(cpu)) return; - } - - /* - * Do one step of pushing the remaining RCU callbacks through - * the RCU core state machine. - */ -#ifdef CONFIG_TREE_PREEMPT_RCU - if (per_cpu(rcu_preempt_data, cpu).nxtlist) { - rcu_preempt_qs(cpu); - force_quiescent_state(&rcu_preempt_state); - } -#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ - if (per_cpu(rcu_sched_data, cpu).nxtlist) { - rcu_sched_qs(cpu); - force_quiescent_state(&rcu_sched_state); - } - if (per_cpu(rcu_bh_data, cpu).nxtlist) { - rcu_bh_qs(cpu); - force_quiescent_state(&rcu_bh_state); - } - - /* - * If RCU callbacks are still pending, RCU still needs this CPU. - * So try forcing the callbacks through the grace period. - */ - if (rcu_cpu_has_callbacks(cpu)) { - trace_rcu_prep_idle("More callbacks"); - invoke_rcu_core(); - } else { - trace_rcu_prep_idle("Callbacks drained"); + rcu_try_advance_all_cbs(); + for_each_rcu_flavor(rsp) { + rdp = per_cpu_ptr(rsp->rda, cpu); + if (cpu_has_callbacks_ready_to_invoke(rdp)) + invoke_rcu_core(); } } @@ -2034,16 +1861,13 @@ early_initcall(rcu_register_oom_notifier); static void print_cpu_stall_fast_no_hz(char *cp, int cpu) { struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); - struct timer_list *tltp = &rdtp->idle_gp_timer; - char c; + unsigned long nlpd = rdtp->nonlazy_posted - rdtp->nonlazy_posted_snap; - c = rdtp->dyntick_holdoff == jiffies ? 'H' : '.'; - if (timer_pending(tltp)) - sprintf(cp, "drain=%d %c timer=%lu", - rdtp->dyntick_drain, c, tltp->expires - jiffies); - else - sprintf(cp, "drain=%d %c timer not pending", - rdtp->dyntick_drain, c); + sprintf(cp, "last_accelerate: %04lx/%04lx, nonlazy_posted: %ld, %c%c", + rdtp->last_accelerate & 0xffff, jiffies & 0xffff, + ulong2long(nlpd), + rdtp->all_lazy ? 'L' : '.', + rdtp->tick_nohz_enabled_snap ? '.' : 'D'); } #else /* #ifdef CONFIG_RCU_FAST_NO_HZ */ diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index 0d095dcaa670..49099e81c87b 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c @@ -46,8 +46,6 @@ #define RCU_TREE_NONCORE #include "rcutree.h" -#define ulong2long(a) (*(long *)(&(a))) - static int r_open(struct inode *inode, struct file *file, const struct seq_operations *op) { -- cgit v1.2.3 From a1a04ec3c7c27a682473fd9beb2c996316a64649 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 21 Mar 2013 22:49:34 +0100 Subject: idle: Provide a generic entry point for the idle code For now this calls cpu_idle(), but in the long run we want to move the cpu bringup code to the core and therefor we add a state argument. Signed-off-by: Thomas Gleixner Cc: Linus Torvalds Cc: Rusty Russell Cc: Paul McKenney Cc: Peter Zijlstra Reviewed-by: Cc: Srivatsa S. Bhat Cc: Magnus Damm Link: http://lkml.kernel.org/r/20130321215233.583190032@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpu.h | 8 ++++++++ init/main.c | 2 +- kernel/Makefile | 1 + kernel/cpu/Makefile | 1 + kernel/cpu/idle.c | 10 ++++++++++ 5 files changed, 21 insertions(+), 1 deletion(-) create mode 100644 kernel/cpu/Makefile create mode 100644 kernel/cpu/idle.c (limited to 'init') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index ce7a074f2519..7419e30c55fb 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -212,4 +212,12 @@ static inline int disable_nonboot_cpus(void) { return 0; } static inline void enable_nonboot_cpus(void) {} #endif /* !CONFIG_PM_SLEEP_SMP */ +enum cpuhp_state { + CPUHP_OFFLINE, + CPUHP_ONLINE, +}; + +void cpu_startup_entry(enum cpuhp_state state); +void cpu_idle(void); + #endif /* _LINUX_CPU_H_ */ diff --git a/init/main.c b/init/main.c index 63534a141b4e..adb179d3e0f8 100644 --- a/init/main.c +++ b/init/main.c @@ -384,7 +384,7 @@ static noinline void __init_refok rest_init(void) init_idle_bootup_task(current); schedule_preempt_disabled(); /* Call into cpu_idle with preempt disabled */ - cpu_idle(); + cpu_startup_entry(CPUHP_ONLINE); } /* Check for early params. */ diff --git a/kernel/Makefile b/kernel/Makefile index bbde5f1a4486..d1574d47cf27 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -24,6 +24,7 @@ endif obj-y += sched/ obj-y += power/ +obj-y += cpu/ obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o obj-$(CONFIG_FREEZER) += freezer.o diff --git a/kernel/cpu/Makefile b/kernel/cpu/Makefile new file mode 100644 index 000000000000..59ab052ef7a0 --- /dev/null +++ b/kernel/cpu/Makefile @@ -0,0 +1 @@ +obj-y = idle.o diff --git a/kernel/cpu/idle.c b/kernel/cpu/idle.c new file mode 100644 index 000000000000..1908f00e0e98 --- /dev/null +++ b/kernel/cpu/idle.c @@ -0,0 +1,10 @@ +/* + * Generic entry point for the idle threads + */ +#include +#include + +void cpu_startup_entry(enum cpuhp_state state) +{ + cpu_idle(); +} -- cgit v1.2.3 From f91eb62f71b31e69e405663ff8d047bc3b9f7525 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 29 Apr 2013 16:18:18 -0700 Subject: init: scream bloody murder if interrupts are enabled too early As I was testing a lot of my code recently, and having several "successes", I accidentally noticed in the dmesg this little line: start_kernel(): bug: interrupts were enabled *very* early, fixing it Sure enough, one of my patches two commits ago enabled interrupts early. The sad part here is that I never noticed it, and I ran several tests with ktest too, and ktest did not notice this line. What ktest looks for (and so does many other automated testing scripts) is a back trace produced by a WARN_ON() or BUG(). As a back trace was never produced, my buggy patch could have slipped into linux-next, or even worse, mainline. Adding a WARN(!irqs_disabled()) makes this bug a little more obvious: PID hash table entries: 4096 (order: 3, 32768 bytes) __ex_table already sorted, skipping sort Checking aperture... No AGP bridge found Calgary: detecting Calgary via BIOS EBDA area Calgary: Unable to locate Rio Grande table in EBDA - bailing! Memory: 2003252k/2054848k available (4857k kernel code, 460k absent, 51136k reserved, 6210k data, 1096k init) ------------[ cut here ]------------ WARNING: at /home/rostedt/work/git/linux-trace.git/init/main.c:543 start_kernel+0x21e/0x415() Hardware name: To Be Filled By O.E.M. Interrupts were enabled *very* early, fixing it Modules linked in: Pid: 0, comm: swapper/0 Not tainted 3.8.0-test+ #286 Call Trace: warn_slowpath_common+0x83/0x9b warn_slowpath_fmt+0x46/0x48 start_kernel+0x21e/0x415 x86_64_start_reservations+0x10e/0x112 x86_64_start_kernel+0x102/0x111 ---[ end trace 007d8b0491b4f5d8 ]--- Preemptible hierarchical RCU implementation. RCU restricting CPUs from NR_CPUS=8 to nr_cpu_ids=4. NR_IRQS:4352 nr_irqs:712 16 Console: colour VGA+ 80x25 console [ttyS0] enabled, bootconsole disabled Do you see it? The original version of this patch just slapped a WARN_ON() in there and kept the printk(). Ard van Breemen suggested using the WARN() interface, which makes the code a bit cleaner. Also, while examining other warnings in init/main.c, I found two other locations that deserve a bloody murder scream if their conditions are hit, and updated them accordingly. Signed-off-by: Steven Rostedt Cc: Ard van Breemen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- init/main.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) (limited to 'init') diff --git a/init/main.c b/init/main.c index 63534a141b4e..26cd398acf2a 100644 --- a/init/main.c +++ b/init/main.c @@ -539,11 +539,8 @@ asmlinkage void __init start_kernel(void) * fragile until we cpu_idle() for the first time. */ preempt_disable(); - if (!irqs_disabled()) { - printk(KERN_WARNING "start_kernel(): bug: interrupts were " - "enabled *very* early, fixing it\n"); + if (WARN(!irqs_disabled(), "Interrupts were enabled *very* early, fixing it\n")) local_irq_disable(); - } idr_init_cache(); perf_event_init(); rcu_init(); @@ -558,9 +555,7 @@ asmlinkage void __init start_kernel(void) time_init(); profile_init(); call_function_init(); - if (!irqs_disabled()) - printk(KERN_CRIT "start_kernel(): bug: interrupts were " - "enabled early\n"); + WARN(!irqs_disabled(), "Interrupts were enabled early\n"); early_boot_irqs_disabled = false; local_irq_enable(); @@ -702,9 +697,7 @@ int __init_or_module do_one_initcall(initcall_t fn) strlcat(msgbuf, "disabled interrupts ", sizeof(msgbuf)); local_irq_enable(); } - if (msgbuf[0]) { - printk("initcall %pF returned with %s\n", fn, msgbuf); - } + WARN(msgbuf[0], "initcall %pF returned with %s\n", fn, msgbuf); return ret; } -- cgit v1.2.3 From c2409b004ac4757ac5121851f8a58e0bcbcf7a3c Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Mon, 29 Apr 2013 16:18:19 -0700 Subject: init: raise log level If the kernel was booted with the "quiet" boot option we have currently no chance to see why an initrd fails. Change KERN_WARNING to KERN_ERR to see what is going on. Signed-off-by: Richard Weinberger Cc: "H. Peter Anvin" Cc: Rusty Russell Cc: Jim Cromie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- init/main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'init') diff --git a/init/main.c b/init/main.c index 26cd398acf2a..e1c87e1d6359 100644 --- a/init/main.c +++ b/init/main.c @@ -825,7 +825,7 @@ static int __ref kernel_init(void *unused) if (ramdisk_execute_command) { if (!run_init_process(ramdisk_execute_command)) return 0; - printk(KERN_WARNING "Failed to execute %s\n", + printk(KERN_ERR "Failed to execute %s\n", ramdisk_execute_command); } @@ -838,7 +838,7 @@ static int __ref kernel_init(void *unused) if (execute_command) { if (!run_init_process(execute_command)) return 0; - printk(KERN_WARNING "Failed to execute %s. Attempting " + printk(KERN_ERR "Failed to execute %s. Attempting " "defaults...\n", execute_command); } if (!run_init_process("/sbin/init") || @@ -884,7 +884,7 @@ static noinline void __init kernel_init_freeable(void) /* Open the /dev/console on the rootfs, this should never fail */ if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0) - printk(KERN_WARNING "Warning: unable to open an initial console.\n"); + printk(KERN_ERR "Warning: unable to open an initial console.\n"); (void) sys_dup(0); (void) sys_dup(0); -- cgit v1.2.3 From ea676e846a8171b8e215627259f485a4e70328cb Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 29 Apr 2013 16:18:20 -0700 Subject: init/main.c: convert to pr_foo() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also enables cleanup of some 80-col trickery. Cc: Richard Weinberger Cc: Uwe Kleine-König Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- init/main.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) (limited to 'init') diff --git a/init/main.c b/init/main.c index e1c87e1d6359..ea73e1eb361c 100644 --- a/init/main.c +++ b/init/main.c @@ -9,6 +9,8 @@ * Simplified starting of init: Michael A. Griffith */ +#define DEBUG /* Enable initcall_debug */ + #include #include #include @@ -174,8 +176,8 @@ static int __init obsolete_checksetup(char *line) if (line[n] == '\0' || line[n] == '=') had_early_param = 1; } else if (!p->setup_func) { - printk(KERN_WARNING "Parameter %s is obsolete," - " ignored\n", p->str); + pr_warn("Parameter %s is obsolete, ignored\n", + p->str); return 1; } else if (p->setup_func(line + n)) return 1; @@ -398,8 +400,7 @@ static int __init do_early_param(char *param, char *val, const char *unused) strcmp(p->str, "earlycon") == 0) ) { if (p->setup_func(val) != 0) - printk(KERN_WARNING - "Malformed early option '%s'\n", param); + pr_warn("Malformed early option '%s'\n", param); } } /* We accept everything at this stage. */ @@ -497,7 +498,7 @@ asmlinkage void __init start_kernel(void) tick_init(); boot_cpu_init(); page_address_init(); - printk(KERN_NOTICE "%s", linux_banner); + pr_notice("%s", linux_banner); setup_arch(&command_line); mm_init_owner(&init_mm, &init_task); mm_init_cpumask(&init_mm); @@ -509,7 +510,7 @@ asmlinkage void __init start_kernel(void) build_all_zonelists(NULL, NULL); page_alloc_init(); - printk(KERN_NOTICE "Kernel command line: %s\n", boot_command_line); + pr_notice("Kernel command line: %s\n", boot_command_line); parse_early_param(); parse_args("Booting kernel", static_command_line, __start___param, __stop___param - __start___param, @@ -582,8 +583,7 @@ asmlinkage void __init start_kernel(void) #ifdef CONFIG_BLK_DEV_INITRD if (initrd_start && !initrd_below_start_ok && page_to_pfn(virt_to_page((void *)initrd_start)) < min_low_pfn) { - printk(KERN_CRIT "initrd overwritten (0x%08lx < 0x%08lx) - " - "disabling it.\n", + pr_crit("initrd overwritten (0x%08lx < 0x%08lx) - disabling it.\n", page_to_pfn(virt_to_page((void *)initrd_start)), min_low_pfn); initrd_start = 0; @@ -662,14 +662,14 @@ static int __init_or_module do_one_initcall_debug(initcall_t fn) unsigned long long duration; int ret; - printk(KERN_DEBUG "calling %pF @ %i\n", fn, task_pid_nr(current)); + pr_debug("calling %pF @ %i\n", fn, task_pid_nr(current)); calltime = ktime_get(); ret = fn(); rettime = ktime_get(); delta = ktime_sub(rettime, calltime); duration = (unsigned long long) ktime_to_ns(delta) >> 10; - printk(KERN_DEBUG "initcall %pF returned %d after %lld usecs\n", fn, - ret, duration); + pr_debug("initcall %pF returned %d after %lld usecs\n", + fn, ret, duration); return ret; } @@ -825,8 +825,7 @@ static int __ref kernel_init(void *unused) if (ramdisk_execute_command) { if (!run_init_process(ramdisk_execute_command)) return 0; - printk(KERN_ERR "Failed to execute %s\n", - ramdisk_execute_command); + pr_err("Failed to execute %s\n", ramdisk_execute_command); } /* @@ -838,8 +837,8 @@ static int __ref kernel_init(void *unused) if (execute_command) { if (!run_init_process(execute_command)) return 0; - printk(KERN_ERR "Failed to execute %s. Attempting " - "defaults...\n", execute_command); + pr_err("Failed to execute %s. Attempting defaults...\n", + execute_command); } if (!run_init_process("/sbin/init") || !run_init_process("/etc/init") || @@ -884,7 +883,7 @@ static noinline void __init kernel_init_freeable(void) /* Open the /dev/console on the rootfs, this should never fail */ if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0) - printk(KERN_ERR "Warning: unable to open an initial console.\n"); + pr_err("Warning: unable to open an initial console.\n"); (void) sys_dup(0); (void) sys_dup(0); -- cgit v1.2.3