From 17534c6f2c065ad8e34ff6f013e5afaa90428512 Mon Sep 17 00:00:00 2001 From: weiping zhang Date: Mon, 11 Dec 2017 22:56:25 +0800 Subject: blk-throttle: export io_serviced_recursive, io_service_bytes_recursive export these two interface for cgroup-v1. Acked-by: Tejun Heo Signed-off-by: weiping zhang Signed-off-by: Jens Axboe --- block/blk-throttle.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'block/blk-throttle.c') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 825bc29767e66..e8428417ac0a7 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -1510,11 +1510,21 @@ static struct cftype throtl_legacy_files[] = { .private = (unsigned long)&blkcg_policy_throtl, .seq_show = blkg_print_stat_bytes, }, + { + .name = "throttle.io_service_bytes_recursive", + .private = (unsigned long)&blkcg_policy_throtl, + .seq_show = blkg_print_stat_bytes_recursive, + }, { .name = "throttle.io_serviced", .private = (unsigned long)&blkcg_policy_throtl, .seq_show = blkg_print_stat_ios, }, + { + .name = "throttle.io_serviced_recursive", + .private = (unsigned long)&blkcg_policy_throtl, + .seq_show = blkg_print_stat_ios_recursive, + }, { } /* terminate */ }; -- cgit v1.2.3 From b889bf66d001a46a95deef18ddbe6db84645ed24 Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Tue, 21 Nov 2017 09:38:30 +0800 Subject: blk-throttle: track read and write request individually In mixed read/write workload on SSD, write latency is much lower than read. But now we only track and record read latency and then use it as threshold base for both read and write io latency accounting. As a result, write io latency will always be considered as good and bad_bio_cnt is much smaller than 20% of bio_cnt. That is to mean, the tg to be checked will be treated as idle most of the time and still let others dispatch more ios, even it is truly running under low limit and wants its low limit to be guaranteed, which is not we expected in fact. So track read and write request individually, which can bring more precise latency control for low limit idle detection. Signed-off-by: Joseph Qi Reviewed-by: Shaohua Li Signed-off-by: Jens Axboe --- block/blk-throttle.c | 134 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 79 insertions(+), 55 deletions(-) (limited to 'block/blk-throttle.c') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index e8428417ac0a7..e136f5ef95779 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -216,9 +216,9 @@ struct throtl_data unsigned int scale; - struct latency_bucket tmp_buckets[LATENCY_BUCKET_SIZE]; - struct avg_latency_bucket avg_buckets[LATENCY_BUCKET_SIZE]; - struct latency_bucket __percpu *latency_buckets; + struct latency_bucket tmp_buckets[2][LATENCY_BUCKET_SIZE]; + struct avg_latency_bucket avg_buckets[2][LATENCY_BUCKET_SIZE]; + struct latency_bucket __percpu *latency_buckets[2]; unsigned long last_calculate_time; unsigned long filtered_latency; @@ -2050,10 +2050,10 @@ static void blk_throtl_update_idletime(struct throtl_grp *tg) #ifdef CONFIG_BLK_DEV_THROTTLING_LOW static void throtl_update_latency_buckets(struct throtl_data *td) { - struct avg_latency_bucket avg_latency[LATENCY_BUCKET_SIZE]; - int i, cpu; - unsigned long last_latency = 0; - unsigned long latency; + struct avg_latency_bucket avg_latency[2][LATENCY_BUCKET_SIZE]; + int i, cpu, rw; + unsigned long last_latency[2] = { 0 }; + unsigned long latency[2]; if (!blk_queue_nonrot(td->queue)) return; @@ -2062,56 +2062,67 @@ static void throtl_update_latency_buckets(struct throtl_data *td) td->last_calculate_time = jiffies; memset(avg_latency, 0, sizeof(avg_latency)); - for (i = 0; i < LATENCY_BUCKET_SIZE; i++) { - struct latency_bucket *tmp = &td->tmp_buckets[i]; - - for_each_possible_cpu(cpu) { - struct latency_bucket *bucket; - - /* this isn't race free, but ok in practice */ - bucket = per_cpu_ptr(td->latency_buckets, cpu); - tmp->total_latency += bucket[i].total_latency; - tmp->samples += bucket[i].samples; - bucket[i].total_latency = 0; - bucket[i].samples = 0; - } + for (rw = READ; rw <= WRITE; rw++) { + for (i = 0; i < LATENCY_BUCKET_SIZE; i++) { + struct latency_bucket *tmp = &td->tmp_buckets[rw][i]; + + for_each_possible_cpu(cpu) { + struct latency_bucket *bucket; + + /* this isn't race free, but ok in practice */ + bucket = per_cpu_ptr(td->latency_buckets[rw], + cpu); + tmp->total_latency += bucket[i].total_latency; + tmp->samples += bucket[i].samples; + bucket[i].total_latency = 0; + bucket[i].samples = 0; + } - if (tmp->samples >= 32) { - int samples = tmp->samples; + if (tmp->samples >= 32) { + int samples = tmp->samples; - latency = tmp->total_latency; + latency[rw] = tmp->total_latency; - tmp->total_latency = 0; - tmp->samples = 0; - latency /= samples; - if (latency == 0) - continue; - avg_latency[i].latency = latency; + tmp->total_latency = 0; + tmp->samples = 0; + latency[rw] /= samples; + if (latency[rw] == 0) + continue; + avg_latency[rw][i].latency = latency[rw]; + } } } - for (i = 0; i < LATENCY_BUCKET_SIZE; i++) { - if (!avg_latency[i].latency) { - if (td->avg_buckets[i].latency < last_latency) - td->avg_buckets[i].latency = last_latency; - continue; - } + for (rw = READ; rw <= WRITE; rw++) { + for (i = 0; i < LATENCY_BUCKET_SIZE; i++) { + if (!avg_latency[rw][i].latency) { + if (td->avg_buckets[rw][i].latency < last_latency[rw]) + td->avg_buckets[rw][i].latency = + last_latency[rw]; + continue; + } - if (!td->avg_buckets[i].valid) - latency = avg_latency[i].latency; - else - latency = (td->avg_buckets[i].latency * 7 + - avg_latency[i].latency) >> 3; + if (!td->avg_buckets[rw][i].valid) + latency[rw] = avg_latency[rw][i].latency; + else + latency[rw] = (td->avg_buckets[rw][i].latency * 7 + + avg_latency[rw][i].latency) >> 3; - td->avg_buckets[i].latency = max(latency, last_latency); - td->avg_buckets[i].valid = true; - last_latency = td->avg_buckets[i].latency; + td->avg_buckets[rw][i].latency = max(latency[rw], + last_latency[rw]); + td->avg_buckets[rw][i].valid = true; + last_latency[rw] = td->avg_buckets[rw][i].latency; + } } for (i = 0; i < LATENCY_BUCKET_SIZE; i++) throtl_log(&td->service_queue, - "Latency bucket %d: latency=%ld, valid=%d", i, - td->avg_buckets[i].latency, td->avg_buckets[i].valid); + "Latency bucket %d: read latency=%ld, read valid=%d, " + "write latency=%ld, write valid=%d", i, + td->avg_buckets[READ][i].latency, + td->avg_buckets[READ][i].valid, + td->avg_buckets[WRITE][i].latency, + td->avg_buckets[WRITE][i].valid); } #else static inline void throtl_update_latency_buckets(struct throtl_data *td) @@ -2258,16 +2269,17 @@ static void throtl_track_latency(struct throtl_data *td, sector_t size, struct latency_bucket *latency; int index; - if (!td || td->limit_index != LIMIT_LOW || op != REQ_OP_READ || + if (!td || td->limit_index != LIMIT_LOW || + !(op == REQ_OP_READ || op == REQ_OP_WRITE) || !blk_queue_nonrot(td->queue)) return; index = request_bucket_index(size); - latency = get_cpu_ptr(td->latency_buckets); + latency = get_cpu_ptr(td->latency_buckets[op]); latency[index].total_latency += time; latency[index].samples++; - put_cpu_ptr(td->latency_buckets); + put_cpu_ptr(td->latency_buckets[op]); } void blk_throtl_stat_add(struct request *rq, u64 time_ns) @@ -2286,6 +2298,7 @@ void blk_throtl_bio_endio(struct bio *bio) unsigned long finish_time; unsigned long start_time; unsigned long lat; + int rw = bio_data_dir(bio); tg = bio->bi_cg_private; if (!tg) @@ -2314,7 +2327,7 @@ void blk_throtl_bio_endio(struct bio *bio) bucket = request_bucket_index( blk_stat_size(&bio->bi_issue_stat)); - threshold = tg->td->avg_buckets[bucket].latency + + threshold = tg->td->avg_buckets[rw][bucket].latency + tg->latency_target; if (lat > threshold) tg->bad_bio_cnt++; @@ -2407,9 +2420,16 @@ int blk_throtl_init(struct request_queue *q) td = kzalloc_node(sizeof(*td), GFP_KERNEL, q->node); if (!td) return -ENOMEM; - td->latency_buckets = __alloc_percpu(sizeof(struct latency_bucket) * + td->latency_buckets[READ] = __alloc_percpu(sizeof(struct latency_bucket) * + LATENCY_BUCKET_SIZE, __alignof__(u64)); + if (!td->latency_buckets[READ]) { + kfree(td); + return -ENOMEM; + } + td->latency_buckets[WRITE] = __alloc_percpu(sizeof(struct latency_bucket) * LATENCY_BUCKET_SIZE, __alignof__(u64)); - if (!td->latency_buckets) { + if (!td->latency_buckets[WRITE]) { + free_percpu(td->latency_buckets[READ]); kfree(td); return -ENOMEM; } @@ -2428,7 +2448,8 @@ int blk_throtl_init(struct request_queue *q) /* activate policy */ ret = blkcg_activate_policy(q, &blkcg_policy_throtl); if (ret) { - free_percpu(td->latency_buckets); + free_percpu(td->latency_buckets[READ]); + free_percpu(td->latency_buckets[WRITE]); kfree(td); } return ret; @@ -2439,7 +2460,8 @@ void blk_throtl_exit(struct request_queue *q) BUG_ON(!q->td); throtl_shutdown_wq(q); blkcg_deactivate_policy(q, &blkcg_policy_throtl); - free_percpu(q->td->latency_buckets); + free_percpu(q->td->latency_buckets[READ]); + free_percpu(q->td->latency_buckets[WRITE]); kfree(q->td); } @@ -2457,8 +2479,10 @@ void blk_throtl_register_queue(struct request_queue *q) } else { td->throtl_slice = DFL_THROTL_SLICE_HD; td->filtered_latency = LATENCY_FILTERED_HD; - for (i = 0; i < LATENCY_BUCKET_SIZE; i++) - td->avg_buckets[i].latency = DFL_HD_BASELINE_LATENCY; + for (i = 0; i < LATENCY_BUCKET_SIZE; i++) { + td->avg_buckets[READ][i].latency = DFL_HD_BASELINE_LATENCY; + td->avg_buckets[WRITE][i].latency = DFL_HD_BASELINE_LATENCY; + } } #ifndef CONFIG_BLK_DEV_THROTTLING_LOW /* if no low limit, use previous default */ -- cgit v1.2.3 From 475a055e62a1eb92f4358ad8a9059df973c190ac Mon Sep 17 00:00:00 2001 From: weiping zhang Date: Sat, 20 Jan 2018 07:34:25 +0800 Subject: blk-throttle: use queue_is_rq_based use queue_is_rq_based instead of open code. Signed-off-by: weiping zhang Signed-off-by: Jens Axboe --- block/blk-throttle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block/blk-throttle.c') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index e136f5ef95779..c475f0fe35306 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -2489,7 +2489,7 @@ void blk_throtl_register_queue(struct request_queue *q) td->throtl_slice = DFL_THROTL_SLICE_HD; #endif - td->track_bio_latency = !q->mq_ops && !q->request_fn; + td->track_bio_latency = !queue_is_rq_based(q); if (!td->track_bio_latency) blk_stat_enable_accounting(q); } -- cgit v1.2.3