From fa07fb6a4ecad41d373eaf1574b9d54b4f0c1e75 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 5 Sep 2016 08:54:06 -0400
Subject: workqueue: dump workqueue state on sanity check failures in
 destroy_workqueue()

destroy_workqueue() performs a number of sanity checks to ensure that
the workqueue is empty before proceeding with destruction.  However,
it's not always easy to tell what's going on just from the warning
message.  Let's dump workqueue state after sanity check failures to
help debugging.

Signed-off-by: Tejun Heo <tj@kernel.org>
Link: http://lkml.kernel.org/r/CACT4Y+Zs6vkjHo9qHb4TrEiz3S4+quvvVQ9VWvj2Mx6pETGb9Q@mail.gmail.com
Cc: Dmitry Vyukov <dvyukov@google.com>
---
 kernel/workqueue.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index ef071ca73fc3..4eaec8b86d65 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -4021,6 +4021,7 @@ void destroy_workqueue(struct workqueue_struct *wq)
 		for (i = 0; i < WORK_NR_COLORS; i++) {
 			if (WARN_ON(pwq->nr_in_flight[i])) {
 				mutex_unlock(&wq->mutex);
+				show_workqueue_state();
 				return;
 			}
 		}
@@ -4029,6 +4030,7 @@ void destroy_workqueue(struct workqueue_struct *wq)
 		    WARN_ON(pwq->nr_active) ||
 		    WARN_ON(!list_empty(&pwq->delayed_works))) {
 			mutex_unlock(&wq->mutex);
+			show_workqueue_state();
 			return;
 		}
 	}
-- 
cgit v1.2.3


From 3347fa0928210d96aaa2bd6cd5a8391d5e630873 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 16 Sep 2016 15:49:32 -0400
Subject: workqueue: make workqueue available early during boot

Workqueue is currently initialized in an early init call; however,
there are cases where early boot code has to be split and reordered to
come after workqueue initialization or the same code path which makes
use of workqueues is used both before workqueue initailization and
after.  The latter cases have to gate workqueue usages with
keventd_up() tests, which is nasty and easy to get wrong.

Workqueue usages have become widespread and it'd be a lot more
convenient if it can be used very early from boot.  This patch splits
workqueue initialization into two steps.  workqueue_init_early() which
sets up the basic data structures so that workqueues can be created
and work items queued, and workqueue_init() which actually brings up
workqueues online and starts executing queued work items.  The former
step can be done very early during boot once memory allocation,
cpumasks and idr are initialized.  The latter right after kthreads
become available.

This allows work item queueing and canceling from very early boot
which is what most of these use cases want.

* As systemd_wq being initialized doesn't indicate that workqueue is
  fully online anymore, update keventd_up() to test wq_online instead.
  The follow-up patches will get rid of all its usages and the
  function itself.

* Flushing doesn't make sense before workqueue is fully initialized.
  The flush functions trigger WARN and return immediately before fully
  online.

* Work items are never in-flight before fully online.  Canceling can
  always succeed by skipping the flush step.

* Some code paths can no longer assume to be called with irq enabled
  as irq is disabled during early boot.  Use irqsave/restore
  operations instead.

v2: Watchdog init, which requires timer to be running, moved from
    workqueue_init_early() to workqueue_init().

Signed-off-by: Tejun Heo <tj@kernel.org>
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/CA+55aFx0vPuMuxn00rBSM192n-Du5uxy+4AvKa0SBSOVJeuCGg@mail.gmail.com
---
 include/linux/workqueue.h |  7 ++++-
 init/main.c               | 10 +++++++
 kernel/workqueue.c        | 76 +++++++++++++++++++++++++++++++++++++----------
 3 files changed, 76 insertions(+), 17 deletions(-)

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 26cc1df280d6..91d416f9c0a7 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -358,6 +358,8 @@ extern struct workqueue_struct *system_freezable_wq;
 extern struct workqueue_struct *system_power_efficient_wq;
 extern struct workqueue_struct *system_freezable_power_efficient_wq;
 
+extern bool wq_online;
+
 extern struct workqueue_struct *
 __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
 	struct lock_class_key *key, const char *lock_name, ...) __printf(1, 6);
@@ -594,7 +596,7 @@ static inline bool schedule_delayed_work(struct delayed_work *dwork,
  */
 static inline bool keventd_up(void)
 {
-	return system_wq != NULL;
+	return wq_online;
 }
 
 #ifndef CONFIG_SMP
@@ -631,4 +633,7 @@ int workqueue_online_cpu(unsigned int cpu);
 int workqueue_offline_cpu(unsigned int cpu);
 #endif
 
+int __init workqueue_init_early(void);
+int __init workqueue_init(void);
+
 #endif
diff --git a/init/main.c b/init/main.c
index a8a58e2794a5..5c4fd68a8671 100644
--- a/init/main.c
+++ b/init/main.c
@@ -551,6 +551,14 @@ asmlinkage __visible void __init start_kernel(void)
 		 "Interrupts were enabled *very* early, fixing it\n"))
 		local_irq_disable();
 	idr_init_cache();
+
+	/*
+	 * Allow workqueue creation and work item queueing/cancelling
+	 * early.  Work item execution depends on kthreads and starts after
+	 * workqueue_init().
+	 */
+	workqueue_init_early();
+
 	rcu_init();
 
 	/* trace_printk() and trace points may be used after this */
@@ -1005,6 +1013,8 @@ static noinline void __init kernel_init_freeable(void)
 
 	smp_prepare_cpus(setup_max_cpus);
 
+	workqueue_init();
+
 	do_pre_smp_initcalls();
 	lockup_detector_init();
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 4eaec8b86d65..15d0811c9e91 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -290,6 +290,8 @@ module_param_named(disable_numa, wq_disable_numa, bool, 0444);
 static bool wq_power_efficient = IS_ENABLED(CONFIG_WQ_POWER_EFFICIENT_DEFAULT);
 module_param_named(power_efficient, wq_power_efficient, bool, 0444);
 
+bool wq_online;				/* can kworkers be created yet? */
+
 static bool wq_numa_enabled;		/* unbound NUMA affinity enabled */
 
 /* buf for wq_update_unbound_numa_attrs(), protected by CPU hotplug exclusion */
@@ -2583,6 +2585,9 @@ void flush_workqueue(struct workqueue_struct *wq)
 	};
 	int next_color;
 
+	if (WARN_ON(!wq_online))
+		return;
+
 	lock_map_acquire(&wq->lockdep_map);
 	lock_map_release(&wq->lockdep_map);
 
@@ -2843,6 +2848,9 @@ bool flush_work(struct work_struct *work)
 {
 	struct wq_barrier barr;
 
+	if (WARN_ON(!wq_online))
+		return false;
+
 	lock_map_acquire(&work->lockdep_map);
 	lock_map_release(&work->lockdep_map);
 
@@ -2913,7 +2921,13 @@ static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
 	mark_work_canceling(work);
 	local_irq_restore(flags);
 
-	flush_work(work);
+	/*
+	 * This allows canceling during early boot.  We know that @work
+	 * isn't executing.
+	 */
+	if (wq_online)
+		flush_work(work);
+
 	clear_work_data(work);
 
 	/*
@@ -3352,7 +3366,7 @@ static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
 		goto fail;
 
 	/* create and start the initial worker */
-	if (!create_worker(pool))
+	if (wq_online && !create_worker(pool))
 		goto fail;
 
 	/* install */
@@ -3417,6 +3431,7 @@ static void pwq_adjust_max_active(struct pool_workqueue *pwq)
 {
 	struct workqueue_struct *wq = pwq->wq;
 	bool freezable = wq->flags & WQ_FREEZABLE;
+	unsigned long flags;
 
 	/* for @wq->saved_max_active */
 	lockdep_assert_held(&wq->mutex);
@@ -3425,7 +3440,8 @@ static void pwq_adjust_max_active(struct pool_workqueue *pwq)
 	if (!freezable && pwq->max_active == wq->saved_max_active)
 		return;
 
-	spin_lock_irq(&pwq->pool->lock);
+	/* this function can be called during early boot w/ irq disabled */
+	spin_lock_irqsave(&pwq->pool->lock, flags);
 
 	/*
 	 * During [un]freezing, the caller is responsible for ensuring that
@@ -3448,7 +3464,7 @@ static void pwq_adjust_max_active(struct pool_workqueue *pwq)
 		pwq->max_active = 0;
 	}
 
-	spin_unlock_irq(&pwq->pool->lock);
+	spin_unlock_irqrestore(&pwq->pool->lock, flags);
 }
 
 /* initialize newly alloced @pwq which is associated with @wq and @pool */
@@ -5457,7 +5473,17 @@ static void __init wq_numa_init(void)
 	wq_numa_enabled = true;
 }
 
-static int __init init_workqueues(void)
+/**
+ * workqueue_init_early - early init for workqueue subsystem
+ *
+ * This is the first half of two-staged workqueue subsystem initialization
+ * and invoked as soon as the bare basics - memory allocation, cpumasks and
+ * idr are up.  It sets up all the data structures and system workqueues
+ * and allows early boot code to create workqueues and queue/cancel work
+ * items.  Actual work item execution starts only after kthreads can be
+ * created and scheduled right before early initcalls.
+ */
+int __init workqueue_init_early(void)
 {
 	int std_nice[NR_STD_WORKER_POOLS] = { 0, HIGHPRI_NICE_LEVEL };
 	int i, cpu;
@@ -5490,16 +5516,6 @@ static int __init init_workqueues(void)
 		}
 	}
 
-	/* create the initial worker */
-	for_each_online_cpu(cpu) {
-		struct worker_pool *pool;
-
-		for_each_cpu_worker_pool(pool, cpu) {
-			pool->flags &= ~POOL_DISASSOCIATED;
-			BUG_ON(!create_worker(pool));
-		}
-	}
-
 	/* create default unbound and ordered wq attrs */
 	for (i = 0; i < NR_STD_WORKER_POOLS; i++) {
 		struct workqueue_attrs *attrs;
@@ -5536,8 +5552,36 @@ static int __init init_workqueues(void)
 	       !system_power_efficient_wq ||
 	       !system_freezable_power_efficient_wq);
 
+	return 0;
+}
+
+/**
+ * workqueue_init - bring workqueue subsystem fully online
+ *
+ * This is the latter half of two-staged workqueue subsystem initialization
+ * and invoked as soon as kthreads can be created and scheduled.
+ * Workqueues have been created and work items queued on them, but there
+ * are no kworkers executing the work items yet.  Populate the worker pools
+ * with the initial workers and enable future kworker creations.
+ */
+int __init workqueue_init(void)
+{
+	struct worker_pool *pool;
+	int cpu, bkt;
+
+	/* create the initial workers */
+	for_each_online_cpu(cpu) {
+		for_each_cpu_worker_pool(pool, cpu) {
+			pool->flags &= ~POOL_DISASSOCIATED;
+			BUG_ON(!create_worker(pool));
+		}
+	}
+
+	hash_for_each(unbound_pool_hash, bkt, pool, hash_node)
+		BUG_ON(!create_worker(pool));
+
+	wq_online = true;
 	wq_watchdog_init();
 
 	return 0;
 }
-early_initcall(init_workqueues);
-- 
cgit v1.2.3


From a2c2727d207dbd596b79fceb0953ba84b24abb4e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 16 Sep 2016 15:49:32 -0400
Subject: mce, workqueue: remove keventd_up() usage

Now that workqueue can handle work item queueing from very early
during boot, there is no need to gate schedule_work() with
keventd_up().  Remove it.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: linux-edac@vger.kernel.org
---
 arch/x86/kernel/cpu/mcheck/mce.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 79d8ec849468..675b7d8cde0a 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -491,7 +491,7 @@ int mce_available(struct cpuinfo_x86 *c)
 
 static void mce_schedule_work(void)
 {
-	if (!mce_gen_pool_empty() && keventd_up())
+	if (!mce_gen_pool_empty())
 		schedule_work(&mce_work);
 }
 
-- 
cgit v1.2.3


From 1bb0802460da42cd552e03060a65e3f303905088 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 16 Sep 2016 15:49:33 -0400
Subject: tty, workqueue: remove keventd_up() usage

Now that workqueue can handle work item queueing from very early
during boot, there is no need to delay schedule_work() while
!keventd_up().  Remove it.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jiri Slaby <jslaby@suse.cz>
Cc: Alan Cox <alan@linux.intel.com>
---
 drivers/tty/vt/vt.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index 2705ca960e92..f76ad4c1fc4c 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -3924,10 +3924,6 @@ void unblank_screen(void)
  */
 static void blank_screen_t(unsigned long dummy)
 {
-	if (unlikely(!keventd_up())) {
-		mod_timer(&console_timer, jiffies + (blankinterval * HZ));
-		return;
-	}
 	blank_timer_expired = 1;
 	schedule_work(&console_work);
 }
-- 
cgit v1.2.3


From a81f80f3eb759de72d74d18e478873fc0575abc5 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 16 Sep 2016 15:49:33 -0400
Subject: power, workqueue: remove keventd_up() usage

Now that workqueue can handle work item queueing/cancelling from very
early during boot, there is no need to gate cancel_delayed_work_sync()
while !keventd_up().  Remove it.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Qiao Zhou <qiaozhou@asrmicro.com>
---
 kernel/power/qos.c | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/kernel/power/qos.c b/kernel/power/qos.c
index 168ff442ebde..97b0df71303e 100644
--- a/kernel/power/qos.c
+++ b/kernel/power/qos.c
@@ -482,16 +482,7 @@ void pm_qos_update_request(struct pm_qos_request *req,
 		return;
 	}
 
-	/*
-	 * This function may be called very early during boot, for example,
-	 * from of_clk_init(), where irq needs to stay disabled.
-	 * cancel_delayed_work_sync() assumes that irq is enabled on
-	 * invocation and re-enables it on return.  Avoid calling it until
-	 * workqueue is initialized.
-	 */
-	if (keventd_up())
-		cancel_delayed_work_sync(&req->work);
-
+	cancel_delayed_work_sync(&req->work);
 	__pm_qos_update_request(req, new_value);
 }
 EXPORT_SYMBOL_GPL(pm_qos_update_request);
-- 
cgit v1.2.3


From eac0337af12b6a55f08c69429400d6530d602dff Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 16 Sep 2016 15:49:34 -0400
Subject: slab, workqueue: remove keventd_up() usage

Now that workqueue can handle work item queueing from very early
during boot, there is no need to gate schedule_delayed_work_on() while
!keventd_up().  Remove it.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: linux-mm@kvack.org
---
 mm/slab.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/mm/slab.c b/mm/slab.c
index b67271024135..dc69b6b625b1 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -550,12 +550,7 @@ static void start_cpu_timer(int cpu)
 {
 	struct delayed_work *reap_work = &per_cpu(slab_reap_work, cpu);
 
-	/*
-	 * When this gets called from do_initcalls via cpucache_init(),
-	 * init_workqueues() has already run, so keventd will be setup
-	 * at that time.
-	 */
-	if (keventd_up() && reap_work->work.func == NULL) {
+	if (reap_work->work.func == NULL) {
 		init_reap_node(cpu);
 		INIT_DEFERRABLE_WORK(reap_work, cache_reap);
 		schedule_delayed_work_on(cpu, reap_work,
-- 
cgit v1.2.3


From 7092dff2af0bd734b91e963d693b9123de7e4414 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 16 Sep 2016 15:49:34 -0400
Subject: debugobj, workqueue: remove keventd_up() usage

Now that workqueue can handle work item queueing from very early
during boot, there is no need to gate schedule_work() while
!keventd_up().  Remove it.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
---
 lib/debugobjects.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index a8e12601eb37..8458ec9d3d9f 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -199,7 +199,7 @@ static void free_object(struct debug_obj *obj)
 	 * initialized:
 	 */
 	if (obj_pool_free > ODEBUG_POOL_SIZE && obj_cache)
-		sched = keventd_up();
+		sched = 1;
 	hlist_add_head(&obj->node, &obj_pool);
 	obj_pool_free++;
 	obj_pool_used--;
-- 
cgit v1.2.3


From 863b710b664bdcb90c0c682ee24adb368f497a5b Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 16 Sep 2016 15:49:34 -0400
Subject: workqueue: remove keventd_up()

keventd_up() no longer has in-kernel users.  Remove it and make
wq_online static.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 10 ----------
 kernel/workqueue.c        |  2 +-
 2 files changed, 1 insertion(+), 11 deletions(-)

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 91d416f9c0a7..56417133c672 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -358,8 +358,6 @@ extern struct workqueue_struct *system_freezable_wq;
 extern struct workqueue_struct *system_power_efficient_wq;
 extern struct workqueue_struct *system_freezable_power_efficient_wq;
 
-extern bool wq_online;
-
 extern struct workqueue_struct *
 __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
 	struct lock_class_key *key, const char *lock_name, ...) __printf(1, 6);
@@ -591,14 +589,6 @@ static inline bool schedule_delayed_work(struct delayed_work *dwork,
 	return queue_delayed_work(system_wq, dwork, delay);
 }
 
-/**
- * keventd_up - is workqueue initialized yet?
- */
-static inline bool keventd_up(void)
-{
-	return wq_online;
-}
-
 #ifndef CONFIG_SMP
 static inline long work_on_cpu(int cpu, long (*fn)(void *), void *arg)
 {
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 15d0811c9e91..ad0cd439223b 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -290,7 +290,7 @@ module_param_named(disable_numa, wq_disable_numa, bool, 0444);
 static bool wq_power_efficient = IS_ENABLED(CONFIG_WQ_POWER_EFFICIENT_DEFAULT);
 module_param_named(power_efficient, wq_power_efficient, bool, 0444);
 
-bool wq_online;				/* can kworkers be created yet? */
+static bool wq_online;			/* can kworkers be created yet? */
 
 static bool wq_numa_enabled;		/* unbound NUMA affinity enabled */
 
-- 
cgit v1.2.3


From 2186d9f940b6a04f263a3bacd48f2a7ba96df4cf Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 19 Oct 2016 12:01:27 -0400
Subject: workqueue: move wq_numa_init() to workqueue_init()

While splitting up workqueue initialization into two parts,
ac8f73400782 ("workqueue: make workqueue available early during boot")
put wq_numa_init() into workqueue_init_early().  Unfortunately, on
some archs including power and arm64, cpu to node mapping isn't yet
established by the time the early init is called leading to incorrect
NUMA initialization and subsequently the following oops due to zero
cpumask on node-specific unbound pools.

  Unable to handle kernel paging request for data at address 0x00000038
  Faulting instruction address: 0xc0000000000fc0cc
  Oops: Kernel access of bad area, sig: 11 [#1]
  SMP NR_CPUS=2048 NUMA PowerNV
  Modules linked in:
  CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.8.0-compiler_gcc-6.2.0-next-20161005 #94
  task: c0000007f5400000 task.stack: c000001ffc084000
  NIP: c0000000000fc0cc LR: c0000000000ed928 CTR: c0000000000fbfd0
  REGS: c000001ffc087780 TRAP: 0300   Not tainted  (4.8.0-compiler_gcc-6.2.0-next-20161005)
  MSR: 9000000002009033 <SF,HV,VEC,EE,ME,IR,DR,RI,LE>  CR: 48000424  XER: 00000000
  CFAR: c0000000000089dc DAR: 0000000000000038 DSISR: 40000000 SOFTE: 0
  GPR00: c0000000000ed928 c000001ffc087a00 c000000000e63200 c000000010d6d600
  GPR04: c0000007f5409200 0000000000000021 000000000748e08c 000000000000001f
  GPR08: 0000000000000000 0000000000000021 000000000748f1f8 0000000000000000
  GPR12: 0000000028000422 c00000000fb80000 c00000000000e0c8 0000000000000000
  GPR16: 0000000000000000 0000000000000000 0000000000000021 0000000000000001
  GPR20: ffffffffafb50401 0000000000000000 c000000010d6d600 000000000000ba7e
  GPR24: 000000000000ba7e c000000000d8bc58 afb504000afb5041 0000000000000001
  GPR28: 0000000000000000 0000000000000004 c0000007f5409280 0000000000000000
  NIP [c0000000000fc0cc] enqueue_task_fair+0xfc/0x18b0
  LR [c0000000000ed928] activate_task+0x78/0xe0
  Call Trace:
  [c000001ffc087a00] [c0000007f5409200] 0xc0000007f5409200 (unreliable)
  [c000001ffc087b10] [c0000000000ed928] activate_task+0x78/0xe0
  [c000001ffc087b50] [c0000000000ede58] ttwu_do_activate+0x68/0xc0
  [c000001ffc087b90] [c0000000000ef1b8] try_to_wake_up+0x208/0x4f0
  [c000001ffc087c10] [c0000000000d3484] create_worker+0x144/0x250
  [c000001ffc087cb0] [c000000000cd72d0] workqueue_init+0x124/0x150
  [c000001ffc087d00] [c000000000cc0e74] kernel_init_freeable+0x158/0x360
  [c000001ffc087dc0] [c00000000000e0e4] kernel_init+0x24/0x160
  [c000001ffc087e30] [c00000000000bfa0] ret_from_kernel_thread+0x5c/0xbc
  Instruction dump:
  62940401 3b800000 3aa00000 7f17c378 3a600001 3b600001 60000000 60000000
  60420000 72490021 ebfe0150 2f890001 <ebbf0038> 419e0de0 7fbee840 419e0e58
  ---[ end trace 0000000000000000 ]---

Fix it by moving wq_numa_init() to workqueue_init().  As this means
that the early intialization may not have full NUMA info for per-cpu
pools and ignores NUMA affinity for unbound pools, fix them up from
workqueue_init() after wq_numa_init().

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Michael Ellerman <mpe@ellerman.id.au>
Link: http://lkml.kernel.org/r/87twck5wqo.fsf@concordia.ellerman.id.au
Fixes: ac8f73400782 ("workqueue: make workqueue available early during boot")
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 25 +++++++++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index ad0cd439223b..c56479b4884b 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -5495,8 +5495,6 @@ int __init workqueue_init_early(void)
 
 	pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC);
 
-	wq_numa_init();
-
 	/* initialize CPU pools */
 	for_each_possible_cpu(cpu) {
 		struct worker_pool *pool;
@@ -5566,9 +5564,32 @@ int __init workqueue_init_early(void)
  */
 int __init workqueue_init(void)
 {
+	struct workqueue_struct *wq;
 	struct worker_pool *pool;
 	int cpu, bkt;
 
+	/*
+	 * It'd be simpler to initialize NUMA in workqueue_init_early() but
+	 * CPU to node mapping may not be available that early on some
+	 * archs such as power and arm64.  As per-cpu pools created
+	 * previously could be missing node hint and unbound pools NUMA
+	 * affinity, fix them up.
+	 */
+	wq_numa_init();
+
+	mutex_lock(&wq_pool_mutex);
+
+	for_each_possible_cpu(cpu) {
+		for_each_cpu_worker_pool(pool, cpu) {
+			pool->node = cpu_to_node(cpu);
+		}
+	}
+
+	list_for_each_entry(wq, &workqueues, list)
+		wq_update_unbound_numa(wq, smp_processor_id(), true);
+
+	mutex_unlock(&wq_pool_mutex);
+
 	/* create the initial workers */
 	for_each_online_cpu(cpu) {
 		for_each_cpu_worker_pool(pool, cpu) {
-- 
cgit v1.2.3