0001
0002
0003
0004
0005
0006
0007
0008
0009 #include <linux/kernel.h>
0010 #include <linux/blkdev.h>
0011 #include <linux/blk-mq.h>
0012 #include <linux/module.h>
0013 #include <linux/sbitmap.h>
0014
0015 #include <trace/events/block.h>
0016
0017 #include "elevator.h"
0018 #include "blk.h"
0019 #include "blk-mq.h"
0020 #include "blk-mq-debugfs.h"
0021 #include "blk-mq-sched.h"
0022 #include "blk-mq-tag.h"
0023
0024 #define CREATE_TRACE_POINTS
0025 #include <trace/events/kyber.h>
0026
0027
0028
0029
0030
0031 enum {
0032 KYBER_READ,
0033 KYBER_WRITE,
0034 KYBER_DISCARD,
0035 KYBER_OTHER,
0036 KYBER_NUM_DOMAINS,
0037 };
0038
0039 static const char *kyber_domain_names[] = {
0040 [KYBER_READ] = "READ",
0041 [KYBER_WRITE] = "WRITE",
0042 [KYBER_DISCARD] = "DISCARD",
0043 [KYBER_OTHER] = "OTHER",
0044 };
0045
0046 enum {
0047
0048
0049
0050
0051
0052 KYBER_ASYNC_PERCENT = 75,
0053 };
0054
0055
0056
0057
0058
0059
0060
0061
0062 static const unsigned int kyber_depth[] = {
0063 [KYBER_READ] = 256,
0064 [KYBER_WRITE] = 128,
0065 [KYBER_DISCARD] = 64,
0066 [KYBER_OTHER] = 16,
0067 };
0068
0069
0070
0071
0072 static const u64 kyber_latency_targets[] = {
0073 [KYBER_READ] = 2ULL * NSEC_PER_MSEC,
0074 [KYBER_WRITE] = 10ULL * NSEC_PER_MSEC,
0075 [KYBER_DISCARD] = 5ULL * NSEC_PER_SEC,
0076 };
0077
0078
0079
0080
0081
0082 static const unsigned int kyber_batch_size[] = {
0083 [KYBER_READ] = 16,
0084 [KYBER_WRITE] = 8,
0085 [KYBER_DISCARD] = 1,
0086 [KYBER_OTHER] = 1,
0087 };
0088
0089
0090
0091
0092
0093
0094
0095
0096
0097
0098
0099
0100
0101
0102 enum {
0103
0104
0105
0106
0107 KYBER_LATENCY_SHIFT = 2,
0108
0109
0110
0111
0112 KYBER_GOOD_BUCKETS = 1 << KYBER_LATENCY_SHIFT,
0113
0114 KYBER_LATENCY_BUCKETS = 2 << KYBER_LATENCY_SHIFT,
0115 };
0116
0117
0118
0119
0120
0121 enum {
0122 KYBER_TOTAL_LATENCY,
0123 KYBER_IO_LATENCY,
0124 };
0125
0126 static const char *kyber_latency_type_names[] = {
0127 [KYBER_TOTAL_LATENCY] = "total",
0128 [KYBER_IO_LATENCY] = "I/O",
0129 };
0130
0131
0132
0133
0134
0135 struct kyber_cpu_latency {
0136 atomic_t buckets[KYBER_OTHER][2][KYBER_LATENCY_BUCKETS];
0137 };
0138
0139
0140
0141
0142
0143 struct kyber_ctx_queue {
0144
0145
0146
0147
0148 spinlock_t lock;
0149 struct list_head rq_list[KYBER_NUM_DOMAINS];
0150 } ____cacheline_aligned_in_smp;
0151
0152 struct kyber_queue_data {
0153 struct request_queue *q;
0154 dev_t dev;
0155
0156
0157
0158
0159
0160 struct sbitmap_queue domain_tokens[KYBER_NUM_DOMAINS];
0161
0162
0163
0164
0165
0166 unsigned int async_depth;
0167
0168 struct kyber_cpu_latency __percpu *cpu_latency;
0169
0170
0171 struct timer_list timer;
0172
0173 unsigned int latency_buckets[KYBER_OTHER][2][KYBER_LATENCY_BUCKETS];
0174
0175 unsigned long latency_timeout[KYBER_OTHER];
0176
0177 int domain_p99[KYBER_OTHER];
0178
0179
0180 u64 latency_targets[KYBER_OTHER];
0181 };
0182
0183 struct kyber_hctx_data {
0184 spinlock_t lock;
0185 struct list_head rqs[KYBER_NUM_DOMAINS];
0186 unsigned int cur_domain;
0187 unsigned int batching;
0188 struct kyber_ctx_queue *kcqs;
0189 struct sbitmap kcq_map[KYBER_NUM_DOMAINS];
0190 struct sbq_wait domain_wait[KYBER_NUM_DOMAINS];
0191 struct sbq_wait_state *domain_ws[KYBER_NUM_DOMAINS];
0192 atomic_t wait_index[KYBER_NUM_DOMAINS];
0193 };
0194
0195 static int kyber_domain_wake(wait_queue_entry_t *wait, unsigned mode, int flags,
0196 void *key);
0197
0198 static unsigned int kyber_sched_domain(blk_opf_t opf)
0199 {
0200 switch (opf & REQ_OP_MASK) {
0201 case REQ_OP_READ:
0202 return KYBER_READ;
0203 case REQ_OP_WRITE:
0204 return KYBER_WRITE;
0205 case REQ_OP_DISCARD:
0206 return KYBER_DISCARD;
0207 default:
0208 return KYBER_OTHER;
0209 }
0210 }
0211
0212 static void flush_latency_buckets(struct kyber_queue_data *kqd,
0213 struct kyber_cpu_latency *cpu_latency,
0214 unsigned int sched_domain, unsigned int type)
0215 {
0216 unsigned int *buckets = kqd->latency_buckets[sched_domain][type];
0217 atomic_t *cpu_buckets = cpu_latency->buckets[sched_domain][type];
0218 unsigned int bucket;
0219
0220 for (bucket = 0; bucket < KYBER_LATENCY_BUCKETS; bucket++)
0221 buckets[bucket] += atomic_xchg(&cpu_buckets[bucket], 0);
0222 }
0223
0224
0225
0226
0227
0228 static int calculate_percentile(struct kyber_queue_data *kqd,
0229 unsigned int sched_domain, unsigned int type,
0230 unsigned int percentile)
0231 {
0232 unsigned int *buckets = kqd->latency_buckets[sched_domain][type];
0233 unsigned int bucket, samples = 0, percentile_samples;
0234
0235 for (bucket = 0; bucket < KYBER_LATENCY_BUCKETS; bucket++)
0236 samples += buckets[bucket];
0237
0238 if (!samples)
0239 return -1;
0240
0241
0242
0243
0244
0245 if (!kqd->latency_timeout[sched_domain])
0246 kqd->latency_timeout[sched_domain] = max(jiffies + HZ, 1UL);
0247 if (samples < 500 &&
0248 time_is_after_jiffies(kqd->latency_timeout[sched_domain])) {
0249 return -1;
0250 }
0251 kqd->latency_timeout[sched_domain] = 0;
0252
0253 percentile_samples = DIV_ROUND_UP(samples * percentile, 100);
0254 for (bucket = 0; bucket < KYBER_LATENCY_BUCKETS - 1; bucket++) {
0255 if (buckets[bucket] >= percentile_samples)
0256 break;
0257 percentile_samples -= buckets[bucket];
0258 }
0259 memset(buckets, 0, sizeof(kqd->latency_buckets[sched_domain][type]));
0260
0261 trace_kyber_latency(kqd->dev, kyber_domain_names[sched_domain],
0262 kyber_latency_type_names[type], percentile,
0263 bucket + 1, 1 << KYBER_LATENCY_SHIFT, samples);
0264
0265 return bucket;
0266 }
0267
0268 static void kyber_resize_domain(struct kyber_queue_data *kqd,
0269 unsigned int sched_domain, unsigned int depth)
0270 {
0271 depth = clamp(depth, 1U, kyber_depth[sched_domain]);
0272 if (depth != kqd->domain_tokens[sched_domain].sb.depth) {
0273 sbitmap_queue_resize(&kqd->domain_tokens[sched_domain], depth);
0274 trace_kyber_adjust(kqd->dev, kyber_domain_names[sched_domain],
0275 depth);
0276 }
0277 }
0278
0279 static void kyber_timer_fn(struct timer_list *t)
0280 {
0281 struct kyber_queue_data *kqd = from_timer(kqd, t, timer);
0282 unsigned int sched_domain;
0283 int cpu;
0284 bool bad = false;
0285
0286
0287 for_each_online_cpu(cpu) {
0288 struct kyber_cpu_latency *cpu_latency;
0289
0290 cpu_latency = per_cpu_ptr(kqd->cpu_latency, cpu);
0291 for (sched_domain = 0; sched_domain < KYBER_OTHER; sched_domain++) {
0292 flush_latency_buckets(kqd, cpu_latency, sched_domain,
0293 KYBER_TOTAL_LATENCY);
0294 flush_latency_buckets(kqd, cpu_latency, sched_domain,
0295 KYBER_IO_LATENCY);
0296 }
0297 }
0298
0299
0300
0301
0302
0303
0304 for (sched_domain = 0; sched_domain < KYBER_OTHER; sched_domain++) {
0305 int p90;
0306
0307 p90 = calculate_percentile(kqd, sched_domain, KYBER_IO_LATENCY,
0308 90);
0309 if (p90 >= KYBER_GOOD_BUCKETS)
0310 bad = true;
0311 }
0312
0313
0314
0315
0316
0317
0318 for (sched_domain = 0; sched_domain < KYBER_OTHER; sched_domain++) {
0319 unsigned int orig_depth, depth;
0320 int p99;
0321
0322 p99 = calculate_percentile(kqd, sched_domain,
0323 KYBER_TOTAL_LATENCY, 99);
0324
0325
0326
0327
0328
0329
0330
0331
0332 if (bad) {
0333 if (p99 < 0)
0334 p99 = kqd->domain_p99[sched_domain];
0335 kqd->domain_p99[sched_domain] = -1;
0336 } else if (p99 >= 0) {
0337 kqd->domain_p99[sched_domain] = p99;
0338 }
0339 if (p99 < 0)
0340 continue;
0341
0342
0343
0344
0345
0346
0347
0348
0349
0350
0351 if (bad || p99 >= KYBER_GOOD_BUCKETS) {
0352 orig_depth = kqd->domain_tokens[sched_domain].sb.depth;
0353 depth = (orig_depth * (p99 + 1)) >> KYBER_LATENCY_SHIFT;
0354 kyber_resize_domain(kqd, sched_domain, depth);
0355 }
0356 }
0357 }
0358
0359 static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q)
0360 {
0361 struct kyber_queue_data *kqd;
0362 int ret = -ENOMEM;
0363 int i;
0364
0365 kqd = kzalloc_node(sizeof(*kqd), GFP_KERNEL, q->node);
0366 if (!kqd)
0367 goto err;
0368
0369 kqd->q = q;
0370 kqd->dev = disk_devt(q->disk);
0371
0372 kqd->cpu_latency = alloc_percpu_gfp(struct kyber_cpu_latency,
0373 GFP_KERNEL | __GFP_ZERO);
0374 if (!kqd->cpu_latency)
0375 goto err_kqd;
0376
0377 timer_setup(&kqd->timer, kyber_timer_fn, 0);
0378
0379 for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
0380 WARN_ON(!kyber_depth[i]);
0381 WARN_ON(!kyber_batch_size[i]);
0382 ret = sbitmap_queue_init_node(&kqd->domain_tokens[i],
0383 kyber_depth[i], -1, false,
0384 GFP_KERNEL, q->node);
0385 if (ret) {
0386 while (--i >= 0)
0387 sbitmap_queue_free(&kqd->domain_tokens[i]);
0388 goto err_buckets;
0389 }
0390 }
0391
0392 for (i = 0; i < KYBER_OTHER; i++) {
0393 kqd->domain_p99[i] = -1;
0394 kqd->latency_targets[i] = kyber_latency_targets[i];
0395 }
0396
0397 return kqd;
0398
0399 err_buckets:
0400 free_percpu(kqd->cpu_latency);
0401 err_kqd:
0402 kfree(kqd);
0403 err:
0404 return ERR_PTR(ret);
0405 }
0406
0407 static int kyber_init_sched(struct request_queue *q, struct elevator_type *e)
0408 {
0409 struct kyber_queue_data *kqd;
0410 struct elevator_queue *eq;
0411
0412 eq = elevator_alloc(q, e);
0413 if (!eq)
0414 return -ENOMEM;
0415
0416 kqd = kyber_queue_data_alloc(q);
0417 if (IS_ERR(kqd)) {
0418 kobject_put(&eq->kobj);
0419 return PTR_ERR(kqd);
0420 }
0421
0422 blk_stat_enable_accounting(q);
0423
0424 blk_queue_flag_clear(QUEUE_FLAG_SQ_SCHED, q);
0425
0426 eq->elevator_data = kqd;
0427 q->elevator = eq;
0428
0429 return 0;
0430 }
0431
0432 static void kyber_exit_sched(struct elevator_queue *e)
0433 {
0434 struct kyber_queue_data *kqd = e->elevator_data;
0435 int i;
0436
0437 del_timer_sync(&kqd->timer);
0438 blk_stat_disable_accounting(kqd->q);
0439
0440 for (i = 0; i < KYBER_NUM_DOMAINS; i++)
0441 sbitmap_queue_free(&kqd->domain_tokens[i]);
0442 free_percpu(kqd->cpu_latency);
0443 kfree(kqd);
0444 }
0445
0446 static void kyber_ctx_queue_init(struct kyber_ctx_queue *kcq)
0447 {
0448 unsigned int i;
0449
0450 spin_lock_init(&kcq->lock);
0451 for (i = 0; i < KYBER_NUM_DOMAINS; i++)
0452 INIT_LIST_HEAD(&kcq->rq_list[i]);
0453 }
0454
0455 static void kyber_depth_updated(struct blk_mq_hw_ctx *hctx)
0456 {
0457 struct kyber_queue_data *kqd = hctx->queue->elevator->elevator_data;
0458 struct blk_mq_tags *tags = hctx->sched_tags;
0459 unsigned int shift = tags->bitmap_tags.sb.shift;
0460
0461 kqd->async_depth = (1U << shift) * KYBER_ASYNC_PERCENT / 100U;
0462
0463 sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, kqd->async_depth);
0464 }
0465
0466 static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
0467 {
0468 struct kyber_hctx_data *khd;
0469 int i;
0470
0471 khd = kmalloc_node(sizeof(*khd), GFP_KERNEL, hctx->numa_node);
0472 if (!khd)
0473 return -ENOMEM;
0474
0475 khd->kcqs = kmalloc_array_node(hctx->nr_ctx,
0476 sizeof(struct kyber_ctx_queue),
0477 GFP_KERNEL, hctx->numa_node);
0478 if (!khd->kcqs)
0479 goto err_khd;
0480
0481 for (i = 0; i < hctx->nr_ctx; i++)
0482 kyber_ctx_queue_init(&khd->kcqs[i]);
0483
0484 for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
0485 if (sbitmap_init_node(&khd->kcq_map[i], hctx->nr_ctx,
0486 ilog2(8), GFP_KERNEL, hctx->numa_node,
0487 false, false)) {
0488 while (--i >= 0)
0489 sbitmap_free(&khd->kcq_map[i]);
0490 goto err_kcqs;
0491 }
0492 }
0493
0494 spin_lock_init(&khd->lock);
0495
0496 for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
0497 INIT_LIST_HEAD(&khd->rqs[i]);
0498 khd->domain_wait[i].sbq = NULL;
0499 init_waitqueue_func_entry(&khd->domain_wait[i].wait,
0500 kyber_domain_wake);
0501 khd->domain_wait[i].wait.private = hctx;
0502 INIT_LIST_HEAD(&khd->domain_wait[i].wait.entry);
0503 atomic_set(&khd->wait_index[i], 0);
0504 }
0505
0506 khd->cur_domain = 0;
0507 khd->batching = 0;
0508
0509 hctx->sched_data = khd;
0510 kyber_depth_updated(hctx);
0511
0512 return 0;
0513
0514 err_kcqs:
0515 kfree(khd->kcqs);
0516 err_khd:
0517 kfree(khd);
0518 return -ENOMEM;
0519 }
0520
0521 static void kyber_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
0522 {
0523 struct kyber_hctx_data *khd = hctx->sched_data;
0524 int i;
0525
0526 for (i = 0; i < KYBER_NUM_DOMAINS; i++)
0527 sbitmap_free(&khd->kcq_map[i]);
0528 kfree(khd->kcqs);
0529 kfree(hctx->sched_data);
0530 }
0531
0532 static int rq_get_domain_token(struct request *rq)
0533 {
0534 return (long)rq->elv.priv[0];
0535 }
0536
0537 static void rq_set_domain_token(struct request *rq, int token)
0538 {
0539 rq->elv.priv[0] = (void *)(long)token;
0540 }
0541
0542 static void rq_clear_domain_token(struct kyber_queue_data *kqd,
0543 struct request *rq)
0544 {
0545 unsigned int sched_domain;
0546 int nr;
0547
0548 nr = rq_get_domain_token(rq);
0549 if (nr != -1) {
0550 sched_domain = kyber_sched_domain(rq->cmd_flags);
0551 sbitmap_queue_clear(&kqd->domain_tokens[sched_domain], nr,
0552 rq->mq_ctx->cpu);
0553 }
0554 }
0555
0556 static void kyber_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data)
0557 {
0558
0559
0560
0561
0562 if (!op_is_sync(opf)) {
0563 struct kyber_queue_data *kqd = data->q->elevator->elevator_data;
0564
0565 data->shallow_depth = kqd->async_depth;
0566 }
0567 }
0568
0569 static bool kyber_bio_merge(struct request_queue *q, struct bio *bio,
0570 unsigned int nr_segs)
0571 {
0572 struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
0573 struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, bio->bi_opf, ctx);
0574 struct kyber_hctx_data *khd = hctx->sched_data;
0575 struct kyber_ctx_queue *kcq = &khd->kcqs[ctx->index_hw[hctx->type]];
0576 unsigned int sched_domain = kyber_sched_domain(bio->bi_opf);
0577 struct list_head *rq_list = &kcq->rq_list[sched_domain];
0578 bool merged;
0579
0580 spin_lock(&kcq->lock);
0581 merged = blk_bio_list_merge(hctx->queue, rq_list, bio, nr_segs);
0582 spin_unlock(&kcq->lock);
0583
0584 return merged;
0585 }
0586
0587 static void kyber_prepare_request(struct request *rq)
0588 {
0589 rq_set_domain_token(rq, -1);
0590 }
0591
0592 static void kyber_insert_requests(struct blk_mq_hw_ctx *hctx,
0593 struct list_head *rq_list, bool at_head)
0594 {
0595 struct kyber_hctx_data *khd = hctx->sched_data;
0596 struct request *rq, *next;
0597
0598 list_for_each_entry_safe(rq, next, rq_list, queuelist) {
0599 unsigned int sched_domain = kyber_sched_domain(rq->cmd_flags);
0600 struct kyber_ctx_queue *kcq = &khd->kcqs[rq->mq_ctx->index_hw[hctx->type]];
0601 struct list_head *head = &kcq->rq_list[sched_domain];
0602
0603 spin_lock(&kcq->lock);
0604 trace_block_rq_insert(rq);
0605 if (at_head)
0606 list_move(&rq->queuelist, head);
0607 else
0608 list_move_tail(&rq->queuelist, head);
0609 sbitmap_set_bit(&khd->kcq_map[sched_domain],
0610 rq->mq_ctx->index_hw[hctx->type]);
0611 spin_unlock(&kcq->lock);
0612 }
0613 }
0614
0615 static void kyber_finish_request(struct request *rq)
0616 {
0617 struct kyber_queue_data *kqd = rq->q->elevator->elevator_data;
0618
0619 rq_clear_domain_token(kqd, rq);
0620 }
0621
0622 static void add_latency_sample(struct kyber_cpu_latency *cpu_latency,
0623 unsigned int sched_domain, unsigned int type,
0624 u64 target, u64 latency)
0625 {
0626 unsigned int bucket;
0627 u64 divisor;
0628
0629 if (latency > 0) {
0630 divisor = max_t(u64, target >> KYBER_LATENCY_SHIFT, 1);
0631 bucket = min_t(unsigned int, div64_u64(latency - 1, divisor),
0632 KYBER_LATENCY_BUCKETS - 1);
0633 } else {
0634 bucket = 0;
0635 }
0636
0637 atomic_inc(&cpu_latency->buckets[sched_domain][type][bucket]);
0638 }
0639
0640 static void kyber_completed_request(struct request *rq, u64 now)
0641 {
0642 struct kyber_queue_data *kqd = rq->q->elevator->elevator_data;
0643 struct kyber_cpu_latency *cpu_latency;
0644 unsigned int sched_domain;
0645 u64 target;
0646
0647 sched_domain = kyber_sched_domain(rq->cmd_flags);
0648 if (sched_domain == KYBER_OTHER)
0649 return;
0650
0651 cpu_latency = get_cpu_ptr(kqd->cpu_latency);
0652 target = kqd->latency_targets[sched_domain];
0653 add_latency_sample(cpu_latency, sched_domain, KYBER_TOTAL_LATENCY,
0654 target, now - rq->start_time_ns);
0655 add_latency_sample(cpu_latency, sched_domain, KYBER_IO_LATENCY, target,
0656 now - rq->io_start_time_ns);
0657 put_cpu_ptr(kqd->cpu_latency);
0658
0659 timer_reduce(&kqd->timer, jiffies + HZ / 10);
0660 }
0661
0662 struct flush_kcq_data {
0663 struct kyber_hctx_data *khd;
0664 unsigned int sched_domain;
0665 struct list_head *list;
0666 };
0667
0668 static bool flush_busy_kcq(struct sbitmap *sb, unsigned int bitnr, void *data)
0669 {
0670 struct flush_kcq_data *flush_data = data;
0671 struct kyber_ctx_queue *kcq = &flush_data->khd->kcqs[bitnr];
0672
0673 spin_lock(&kcq->lock);
0674 list_splice_tail_init(&kcq->rq_list[flush_data->sched_domain],
0675 flush_data->list);
0676 sbitmap_clear_bit(sb, bitnr);
0677 spin_unlock(&kcq->lock);
0678
0679 return true;
0680 }
0681
0682 static void kyber_flush_busy_kcqs(struct kyber_hctx_data *khd,
0683 unsigned int sched_domain,
0684 struct list_head *list)
0685 {
0686 struct flush_kcq_data data = {
0687 .khd = khd,
0688 .sched_domain = sched_domain,
0689 .list = list,
0690 };
0691
0692 sbitmap_for_each_set(&khd->kcq_map[sched_domain],
0693 flush_busy_kcq, &data);
0694 }
0695
0696 static int kyber_domain_wake(wait_queue_entry_t *wqe, unsigned mode, int flags,
0697 void *key)
0698 {
0699 struct blk_mq_hw_ctx *hctx = READ_ONCE(wqe->private);
0700 struct sbq_wait *wait = container_of(wqe, struct sbq_wait, wait);
0701
0702 sbitmap_del_wait_queue(wait);
0703 blk_mq_run_hw_queue(hctx, true);
0704 return 1;
0705 }
0706
0707 static int kyber_get_domain_token(struct kyber_queue_data *kqd,
0708 struct kyber_hctx_data *khd,
0709 struct blk_mq_hw_ctx *hctx)
0710 {
0711 unsigned int sched_domain = khd->cur_domain;
0712 struct sbitmap_queue *domain_tokens = &kqd->domain_tokens[sched_domain];
0713 struct sbq_wait *wait = &khd->domain_wait[sched_domain];
0714 struct sbq_wait_state *ws;
0715 int nr;
0716
0717 nr = __sbitmap_queue_get(domain_tokens);
0718
0719
0720
0721
0722
0723
0724 if (nr < 0 && list_empty_careful(&wait->wait.entry)) {
0725 ws = sbq_wait_ptr(domain_tokens,
0726 &khd->wait_index[sched_domain]);
0727 khd->domain_ws[sched_domain] = ws;
0728 sbitmap_add_wait_queue(domain_tokens, ws, wait);
0729
0730
0731
0732
0733
0734 nr = __sbitmap_queue_get(domain_tokens);
0735 }
0736
0737
0738
0739
0740
0741
0742
0743
0744 if (nr >= 0 && !list_empty_careful(&wait->wait.entry)) {
0745 ws = khd->domain_ws[sched_domain];
0746 spin_lock_irq(&ws->wait.lock);
0747 sbitmap_del_wait_queue(wait);
0748 spin_unlock_irq(&ws->wait.lock);
0749 }
0750
0751 return nr;
0752 }
0753
0754 static struct request *
0755 kyber_dispatch_cur_domain(struct kyber_queue_data *kqd,
0756 struct kyber_hctx_data *khd,
0757 struct blk_mq_hw_ctx *hctx)
0758 {
0759 struct list_head *rqs;
0760 struct request *rq;
0761 int nr;
0762
0763 rqs = &khd->rqs[khd->cur_domain];
0764
0765
0766
0767
0768
0769
0770
0771
0772
0773 rq = list_first_entry_or_null(rqs, struct request, queuelist);
0774 if (rq) {
0775 nr = kyber_get_domain_token(kqd, khd, hctx);
0776 if (nr >= 0) {
0777 khd->batching++;
0778 rq_set_domain_token(rq, nr);
0779 list_del_init(&rq->queuelist);
0780 return rq;
0781 } else {
0782 trace_kyber_throttled(kqd->dev,
0783 kyber_domain_names[khd->cur_domain]);
0784 }
0785 } else if (sbitmap_any_bit_set(&khd->kcq_map[khd->cur_domain])) {
0786 nr = kyber_get_domain_token(kqd, khd, hctx);
0787 if (nr >= 0) {
0788 kyber_flush_busy_kcqs(khd, khd->cur_domain, rqs);
0789 rq = list_first_entry(rqs, struct request, queuelist);
0790 khd->batching++;
0791 rq_set_domain_token(rq, nr);
0792 list_del_init(&rq->queuelist);
0793 return rq;
0794 } else {
0795 trace_kyber_throttled(kqd->dev,
0796 kyber_domain_names[khd->cur_domain]);
0797 }
0798 }
0799
0800
0801 return NULL;
0802 }
0803
0804 static struct request *kyber_dispatch_request(struct blk_mq_hw_ctx *hctx)
0805 {
0806 struct kyber_queue_data *kqd = hctx->queue->elevator->elevator_data;
0807 struct kyber_hctx_data *khd = hctx->sched_data;
0808 struct request *rq;
0809 int i;
0810
0811 spin_lock(&khd->lock);
0812
0813
0814
0815
0816
0817 if (khd->batching < kyber_batch_size[khd->cur_domain]) {
0818 rq = kyber_dispatch_cur_domain(kqd, khd, hctx);
0819 if (rq)
0820 goto out;
0821 }
0822
0823
0824
0825
0826
0827
0828
0829
0830
0831
0832 khd->batching = 0;
0833 for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
0834 if (khd->cur_domain == KYBER_NUM_DOMAINS - 1)
0835 khd->cur_domain = 0;
0836 else
0837 khd->cur_domain++;
0838
0839 rq = kyber_dispatch_cur_domain(kqd, khd, hctx);
0840 if (rq)
0841 goto out;
0842 }
0843
0844 rq = NULL;
0845 out:
0846 spin_unlock(&khd->lock);
0847 return rq;
0848 }
0849
0850 static bool kyber_has_work(struct blk_mq_hw_ctx *hctx)
0851 {
0852 struct kyber_hctx_data *khd = hctx->sched_data;
0853 int i;
0854
0855 for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
0856 if (!list_empty_careful(&khd->rqs[i]) ||
0857 sbitmap_any_bit_set(&khd->kcq_map[i]))
0858 return true;
0859 }
0860
0861 return false;
0862 }
0863
0864 #define KYBER_LAT_SHOW_STORE(domain, name) \
0865 static ssize_t kyber_##name##_lat_show(struct elevator_queue *e, \
0866 char *page) \
0867 { \
0868 struct kyber_queue_data *kqd = e->elevator_data; \
0869 \
0870 return sprintf(page, "%llu\n", kqd->latency_targets[domain]); \
0871 } \
0872 \
0873 static ssize_t kyber_##name##_lat_store(struct elevator_queue *e, \
0874 const char *page, size_t count) \
0875 { \
0876 struct kyber_queue_data *kqd = e->elevator_data; \
0877 unsigned long long nsec; \
0878 int ret; \
0879 \
0880 ret = kstrtoull(page, 10, &nsec); \
0881 if (ret) \
0882 return ret; \
0883 \
0884 kqd->latency_targets[domain] = nsec; \
0885 \
0886 return count; \
0887 }
0888 KYBER_LAT_SHOW_STORE(KYBER_READ, read);
0889 KYBER_LAT_SHOW_STORE(KYBER_WRITE, write);
0890 #undef KYBER_LAT_SHOW_STORE
0891
0892 #define KYBER_LAT_ATTR(op) __ATTR(op##_lat_nsec, 0644, kyber_##op##_lat_show, kyber_##op##_lat_store)
0893 static struct elv_fs_entry kyber_sched_attrs[] = {
0894 KYBER_LAT_ATTR(read),
0895 KYBER_LAT_ATTR(write),
0896 __ATTR_NULL
0897 };
0898 #undef KYBER_LAT_ATTR
0899
0900 #ifdef CONFIG_BLK_DEBUG_FS
0901 #define KYBER_DEBUGFS_DOMAIN_ATTRS(domain, name) \
0902 static int kyber_##name##_tokens_show(void *data, struct seq_file *m) \
0903 { \
0904 struct request_queue *q = data; \
0905 struct kyber_queue_data *kqd = q->elevator->elevator_data; \
0906 \
0907 sbitmap_queue_show(&kqd->domain_tokens[domain], m); \
0908 return 0; \
0909 } \
0910 \
0911 static void *kyber_##name##_rqs_start(struct seq_file *m, loff_t *pos) \
0912 __acquires(&khd->lock) \
0913 { \
0914 struct blk_mq_hw_ctx *hctx = m->private; \
0915 struct kyber_hctx_data *khd = hctx->sched_data; \
0916 \
0917 spin_lock(&khd->lock); \
0918 return seq_list_start(&khd->rqs[domain], *pos); \
0919 } \
0920 \
0921 static void *kyber_##name##_rqs_next(struct seq_file *m, void *v, \
0922 loff_t *pos) \
0923 { \
0924 struct blk_mq_hw_ctx *hctx = m->private; \
0925 struct kyber_hctx_data *khd = hctx->sched_data; \
0926 \
0927 return seq_list_next(v, &khd->rqs[domain], pos); \
0928 } \
0929 \
0930 static void kyber_##name##_rqs_stop(struct seq_file *m, void *v) \
0931 __releases(&khd->lock) \
0932 { \
0933 struct blk_mq_hw_ctx *hctx = m->private; \
0934 struct kyber_hctx_data *khd = hctx->sched_data; \
0935 \
0936 spin_unlock(&khd->lock); \
0937 } \
0938 \
0939 static const struct seq_operations kyber_##name##_rqs_seq_ops = { \
0940 .start = kyber_##name##_rqs_start, \
0941 .next = kyber_##name##_rqs_next, \
0942 .stop = kyber_##name##_rqs_stop, \
0943 .show = blk_mq_debugfs_rq_show, \
0944 }; \
0945 \
0946 static int kyber_##name##_waiting_show(void *data, struct seq_file *m) \
0947 { \
0948 struct blk_mq_hw_ctx *hctx = data; \
0949 struct kyber_hctx_data *khd = hctx->sched_data; \
0950 wait_queue_entry_t *wait = &khd->domain_wait[domain].wait; \
0951 \
0952 seq_printf(m, "%d\n", !list_empty_careful(&wait->entry)); \
0953 return 0; \
0954 }
0955 KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_READ, read)
0956 KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_WRITE, write)
0957 KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_DISCARD, discard)
0958 KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_OTHER, other)
0959 #undef KYBER_DEBUGFS_DOMAIN_ATTRS
0960
0961 static int kyber_async_depth_show(void *data, struct seq_file *m)
0962 {
0963 struct request_queue *q = data;
0964 struct kyber_queue_data *kqd = q->elevator->elevator_data;
0965
0966 seq_printf(m, "%u\n", kqd->async_depth);
0967 return 0;
0968 }
0969
0970 static int kyber_cur_domain_show(void *data, struct seq_file *m)
0971 {
0972 struct blk_mq_hw_ctx *hctx = data;
0973 struct kyber_hctx_data *khd = hctx->sched_data;
0974
0975 seq_printf(m, "%s\n", kyber_domain_names[khd->cur_domain]);
0976 return 0;
0977 }
0978
0979 static int kyber_batching_show(void *data, struct seq_file *m)
0980 {
0981 struct blk_mq_hw_ctx *hctx = data;
0982 struct kyber_hctx_data *khd = hctx->sched_data;
0983
0984 seq_printf(m, "%u\n", khd->batching);
0985 return 0;
0986 }
0987
0988 #define KYBER_QUEUE_DOMAIN_ATTRS(name) \
0989 {#name "_tokens", 0400, kyber_##name##_tokens_show}
0990 static const struct blk_mq_debugfs_attr kyber_queue_debugfs_attrs[] = {
0991 KYBER_QUEUE_DOMAIN_ATTRS(read),
0992 KYBER_QUEUE_DOMAIN_ATTRS(write),
0993 KYBER_QUEUE_DOMAIN_ATTRS(discard),
0994 KYBER_QUEUE_DOMAIN_ATTRS(other),
0995 {"async_depth", 0400, kyber_async_depth_show},
0996 {},
0997 };
0998 #undef KYBER_QUEUE_DOMAIN_ATTRS
0999
1000 #define KYBER_HCTX_DOMAIN_ATTRS(name) \
1001 {#name "_rqs", 0400, .seq_ops = &kyber_##name##_rqs_seq_ops}, \
1002 {#name "_waiting", 0400, kyber_##name##_waiting_show}
1003 static const struct blk_mq_debugfs_attr kyber_hctx_debugfs_attrs[] = {
1004 KYBER_HCTX_DOMAIN_ATTRS(read),
1005 KYBER_HCTX_DOMAIN_ATTRS(write),
1006 KYBER_HCTX_DOMAIN_ATTRS(discard),
1007 KYBER_HCTX_DOMAIN_ATTRS(other),
1008 {"cur_domain", 0400, kyber_cur_domain_show},
1009 {"batching", 0400, kyber_batching_show},
1010 {},
1011 };
1012 #undef KYBER_HCTX_DOMAIN_ATTRS
1013 #endif
1014
1015 static struct elevator_type kyber_sched = {
1016 .ops = {
1017 .init_sched = kyber_init_sched,
1018 .exit_sched = kyber_exit_sched,
1019 .init_hctx = kyber_init_hctx,
1020 .exit_hctx = kyber_exit_hctx,
1021 .limit_depth = kyber_limit_depth,
1022 .bio_merge = kyber_bio_merge,
1023 .prepare_request = kyber_prepare_request,
1024 .insert_requests = kyber_insert_requests,
1025 .finish_request = kyber_finish_request,
1026 .requeue_request = kyber_finish_request,
1027 .completed_request = kyber_completed_request,
1028 .dispatch_request = kyber_dispatch_request,
1029 .has_work = kyber_has_work,
1030 .depth_updated = kyber_depth_updated,
1031 },
1032 #ifdef CONFIG_BLK_DEBUG_FS
1033 .queue_debugfs_attrs = kyber_queue_debugfs_attrs,
1034 .hctx_debugfs_attrs = kyber_hctx_debugfs_attrs,
1035 #endif
1036 .elevator_attrs = kyber_sched_attrs,
1037 .elevator_name = "kyber",
1038 .elevator_owner = THIS_MODULE,
1039 };
1040
1041 static int __init kyber_init(void)
1042 {
1043 return elv_register(&kyber_sched);
1044 }
1045
1046 static void __exit kyber_exit(void)
1047 {
1048 elv_unregister(&kyber_sched);
1049 }
1050
1051 module_init(kyber_init);
1052 module_exit(kyber_exit);
1053
1054 MODULE_AUTHOR("Omar Sandoval");
1055 MODULE_LICENSE("GPL");
1056 MODULE_DESCRIPTION("Kyber I/O scheduler");