summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--block/blk-cgroup.c53
-rw-r--r--include/linux/blk-cgroup.h44
-rw-r--r--mm/backing-dev.c5
3 files changed, 94 insertions, 8 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 2998e4f095d1b..c19f9078da1ed 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1042,21 +1042,59 @@ static struct cftype blkcg_legacy_files[] = {
{ } /* terminate */
};
+/*
+ * blkcg destruction is a three-stage process.
+ *
+ * 1. Destruction starts. The blkcg_css_offline() callback is invoked
+ * which offlines writeback. Here we tie the next stage of blkg destruction
+ * to the completion of writeback associated with the blkcg. This lets us
+ * avoid punting potentially large amounts of outstanding writeback to root
+ * while maintaining any ongoing policies. The next stage is triggered when
+ * the nr_cgwbs count goes to zero.
+ *
+ * 2. When the nr_cgwbs count goes to zero, blkcg_destroy_blkgs() is called
+ * and handles the destruction of blkgs. Here the css reference held by
+ * the blkg is put back eventually allowing blkcg_css_free() to be called.
+ * This work may occur in cgwb_release_workfn() on the cgwb_release
+ * workqueue. Any submitted ios that fail to get the blkg ref will be
+ * punted to the root_blkg.
+ *
+ * 3. Once the blkcg ref count goes to zero, blkcg_css_free() is called.
+ * This finally frees the blkcg.
+ */
+
/**
* blkcg_css_offline - cgroup css_offline callback
* @css: css of interest
*
- * This function is called when @css is about to go away and responsible
- * for shooting down all blkgs associated with @css. blkgs should be
- * removed while holding both q and blkcg locks. As blkcg lock is nested
- * inside q lock, this function performs reverse double lock dancing.
- *
- * This is the blkcg counterpart of ioc_release_fn().
+ * This function is called when @css is about to go away. Here the cgwbs are
+ * offlined first and only once writeback associated with the blkcg has
+ * finished do we start step 2 (see above).
*/
static void blkcg_css_offline(struct cgroup_subsys_state *css)
{
struct blkcg *blkcg = css_to_blkcg(css);
+ /* this prevents anyone from attaching or migrating to this blkcg */
+ wb_blkcg_offline(blkcg);
+
+ /* put the base cgwb reference allowing step 2 to be triggered */
+ blkcg_cgwb_put(blkcg);
+}
+
+/**
+ * blkcg_destroy_blkgs - responsible for shooting down blkgs
+ * @blkcg: blkcg of interest
+ *
+ * blkgs should be removed while holding both q and blkcg locks. As blkcg lock
+ * is nested inside q lock, this function performs reverse double lock dancing.
+ * Destroying the blkgs releases the reference held on the blkcg's css allowing
+ * blkcg_css_free to eventually be called.
+ *
+ * This is the blkcg counterpart of ioc_release_fn().
+ */
+void blkcg_destroy_blkgs(struct blkcg *blkcg)
+{
spin_lock_irq(&blkcg->lock);
while (!hlist_empty(&blkcg->blkg_list)) {
@@ -1075,8 +1113,6 @@ static void blkcg_css_offline(struct cgroup_subsys_state *css)
}
spin_unlock_irq(&blkcg->lock);
-
- wb_blkcg_offline(blkcg);
}
static void blkcg_css_free(struct cgroup_subsys_state *css)
@@ -1146,6 +1182,7 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
INIT_HLIST_HEAD(&blkcg->blkg_list);
#ifdef CONFIG_CGROUP_WRITEBACK
INIT_LIST_HEAD(&blkcg->cgwb_list);
+ refcount_set(&blkcg->cgwb_refcnt, 1);
#endif
list_add_tail(&blkcg->all_blkcgs_node, &all_blkcgs);
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 1615cdd4c7979..6d766a19f2bbb 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -56,6 +56,7 @@ struct blkcg {
struct list_head all_blkcgs_node;
#ifdef CONFIG_CGROUP_WRITEBACK
struct list_head cgwb_list;
+ refcount_t cgwb_refcnt;
#endif
};
@@ -386,6 +387,49 @@ static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd)
return cpd ? cpd->blkcg : NULL;
}
+extern void blkcg_destroy_blkgs(struct blkcg *blkcg);
+
+#ifdef CONFIG_CGROUP_WRITEBACK
+
+/**
+ * blkcg_cgwb_get - get a reference for blkcg->cgwb_list
+ * @blkcg: blkcg of interest
+ *
+ * This is used to track the number of active wb's related to a blkcg.
+ */
+static inline void blkcg_cgwb_get(struct blkcg *blkcg)
+{
+ refcount_inc(&blkcg->cgwb_refcnt);
+}
+
+/**
+ * blkcg_cgwb_put - put a reference for @blkcg->cgwb_list
+ * @blkcg: blkcg of interest
+ *
+ * This is used to track the number of active wb's related to a blkcg.
+ * When this count goes to zero, all active wb has finished so the
+ * blkcg can continue destruction by calling blkcg_destroy_blkgs().
+ * This work may occur in cgwb_release_workfn() on the cgwb_release
+ * workqueue.
+ */
+static inline void blkcg_cgwb_put(struct blkcg *blkcg)
+{
+ if (refcount_dec_and_test(&blkcg->cgwb_refcnt))
+ blkcg_destroy_blkgs(blkcg);
+}
+
+#else
+
+static inline void blkcg_cgwb_get(struct blkcg *blkcg) { }
+
+static inline void blkcg_cgwb_put(struct blkcg *blkcg)
+{
+ /* wb isn't being accounted, so trigger destruction right away */
+ blkcg_destroy_blkgs(blkcg);
+}
+
+#endif
+
/**
* blkg_path - format cgroup path of blkg
* @blkg: blkg of interest
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index f5981e9d6ae2e..8a8bb8796c6c4 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -491,6 +491,7 @@ static void cgwb_release_workfn(struct work_struct *work)
{
struct bdi_writeback *wb = container_of(work, struct bdi_writeback,
release_work);
+ struct blkcg *blkcg = css_to_blkcg(wb->blkcg_css);
mutex_lock(&wb->bdi->cgwb_release_mutex);
wb_shutdown(wb);
@@ -499,6 +500,9 @@ static void cgwb_release_workfn(struct work_struct *work)
css_put(wb->blkcg_css);
mutex_unlock(&wb->bdi->cgwb_release_mutex);
+ /* triggers blkg destruction if cgwb_refcnt becomes zero */
+ blkcg_cgwb_put(blkcg);
+
fprop_local_destroy_percpu(&wb->memcg_completions);
percpu_ref_exit(&wb->refcnt);
wb_exit(wb);
@@ -597,6 +601,7 @@ static int cgwb_create(struct backing_dev_info *bdi,
list_add_tail_rcu(&wb->bdi_node, &bdi->wb_list);
list_add(&wb->memcg_node, memcg_cgwb_list);
list_add(&wb->blkcg_node, blkcg_cgwb_list);
+ blkcg_cgwb_get(blkcg);
css_get(memcg_css);
css_get(blkcg_css);
}