0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026 #include <linux/kernel.h>
0027 #include <linux/fs.h>
0028 #include <linux/blkdev.h>
0029 #include <linux/bio.h>
0030 #include <linux/module.h>
0031 #include <linux/slab.h>
0032 #include <linux/init.h>
0033 #include <linux/compiler.h>
0034 #include <linux/blktrace_api.h>
0035 #include <linux/hash.h>
0036 #include <linux/uaccess.h>
0037 #include <linux/pm_runtime.h>
0038
0039 #include <trace/events/block.h>
0040
0041 #include "elevator.h"
0042 #include "blk.h"
0043 #include "blk-mq-sched.h"
0044 #include "blk-pm.h"
0045 #include "blk-wbt.h"
0046 #include "blk-cgroup.h"
0047
0048 static DEFINE_SPINLOCK(elv_list_lock);
0049 static LIST_HEAD(elv_list);
0050
0051
0052
0053
0054 #define rq_hash_key(rq) (blk_rq_pos(rq) + blk_rq_sectors(rq))
0055
0056
0057
0058
0059
0060 static int elv_iosched_allow_bio_merge(struct request *rq, struct bio *bio)
0061 {
0062 struct request_queue *q = rq->q;
0063 struct elevator_queue *e = q->elevator;
0064
0065 if (e->type->ops.allow_merge)
0066 return e->type->ops.allow_merge(q, rq, bio);
0067
0068 return 1;
0069 }
0070
0071
0072
0073
0074 bool elv_bio_merge_ok(struct request *rq, struct bio *bio)
0075 {
0076 if (!blk_rq_merge_ok(rq, bio))
0077 return false;
0078
0079 if (!elv_iosched_allow_bio_merge(rq, bio))
0080 return false;
0081
0082 return true;
0083 }
0084 EXPORT_SYMBOL(elv_bio_merge_ok);
0085
0086 static inline bool elv_support_features(unsigned int elv_features,
0087 unsigned int required_features)
0088 {
0089 return (required_features & elv_features) == required_features;
0090 }
0091
0092
0093
0094
0095
0096
0097
0098
0099
0100
0101 static bool elevator_match(const struct elevator_type *e, const char *name,
0102 unsigned int required_features)
0103 {
0104 if (!elv_support_features(e->elevator_features, required_features))
0105 return false;
0106 if (!strcmp(e->elevator_name, name))
0107 return true;
0108 if (e->elevator_alias && !strcmp(e->elevator_alias, name))
0109 return true;
0110
0111 return false;
0112 }
0113
0114
0115
0116
0117
0118
0119
0120
0121
0122 static struct elevator_type *elevator_find(const char *name,
0123 unsigned int required_features)
0124 {
0125 struct elevator_type *e;
0126
0127 list_for_each_entry(e, &elv_list, list) {
0128 if (elevator_match(e, name, required_features))
0129 return e;
0130 }
0131
0132 return NULL;
0133 }
0134
0135 static void elevator_put(struct elevator_type *e)
0136 {
0137 module_put(e->elevator_owner);
0138 }
0139
0140 static struct elevator_type *elevator_get(struct request_queue *q,
0141 const char *name, bool try_loading)
0142 {
0143 struct elevator_type *e;
0144
0145 spin_lock(&elv_list_lock);
0146
0147 e = elevator_find(name, q->required_elevator_features);
0148 if (!e && try_loading) {
0149 spin_unlock(&elv_list_lock);
0150 request_module("%s-iosched", name);
0151 spin_lock(&elv_list_lock);
0152 e = elevator_find(name, q->required_elevator_features);
0153 }
0154
0155 if (e && !try_module_get(e->elevator_owner))
0156 e = NULL;
0157
0158 spin_unlock(&elv_list_lock);
0159 return e;
0160 }
0161
0162 static struct kobj_type elv_ktype;
0163
0164 struct elevator_queue *elevator_alloc(struct request_queue *q,
0165 struct elevator_type *e)
0166 {
0167 struct elevator_queue *eq;
0168
0169 eq = kzalloc_node(sizeof(*eq), GFP_KERNEL, q->node);
0170 if (unlikely(!eq))
0171 return NULL;
0172
0173 eq->type = e;
0174 kobject_init(&eq->kobj, &elv_ktype);
0175 mutex_init(&eq->sysfs_lock);
0176 hash_init(eq->hash);
0177
0178 return eq;
0179 }
0180 EXPORT_SYMBOL(elevator_alloc);
0181
0182 static void elevator_release(struct kobject *kobj)
0183 {
0184 struct elevator_queue *e;
0185
0186 e = container_of(kobj, struct elevator_queue, kobj);
0187 elevator_put(e->type);
0188 kfree(e);
0189 }
0190
0191 void elevator_exit(struct request_queue *q)
0192 {
0193 struct elevator_queue *e = q->elevator;
0194
0195 ioc_clear_queue(q);
0196 blk_mq_sched_free_rqs(q);
0197
0198 mutex_lock(&e->sysfs_lock);
0199 blk_mq_exit_sched(q, e);
0200 mutex_unlock(&e->sysfs_lock);
0201
0202 kobject_put(&e->kobj);
0203 }
0204
0205 static inline void __elv_rqhash_del(struct request *rq)
0206 {
0207 hash_del(&rq->hash);
0208 rq->rq_flags &= ~RQF_HASHED;
0209 }
0210
0211 void elv_rqhash_del(struct request_queue *q, struct request *rq)
0212 {
0213 if (ELV_ON_HASH(rq))
0214 __elv_rqhash_del(rq);
0215 }
0216 EXPORT_SYMBOL_GPL(elv_rqhash_del);
0217
0218 void elv_rqhash_add(struct request_queue *q, struct request *rq)
0219 {
0220 struct elevator_queue *e = q->elevator;
0221
0222 BUG_ON(ELV_ON_HASH(rq));
0223 hash_add(e->hash, &rq->hash, rq_hash_key(rq));
0224 rq->rq_flags |= RQF_HASHED;
0225 }
0226 EXPORT_SYMBOL_GPL(elv_rqhash_add);
0227
0228 void elv_rqhash_reposition(struct request_queue *q, struct request *rq)
0229 {
0230 __elv_rqhash_del(rq);
0231 elv_rqhash_add(q, rq);
0232 }
0233
0234 struct request *elv_rqhash_find(struct request_queue *q, sector_t offset)
0235 {
0236 struct elevator_queue *e = q->elevator;
0237 struct hlist_node *next;
0238 struct request *rq;
0239
0240 hash_for_each_possible_safe(e->hash, rq, next, hash, offset) {
0241 BUG_ON(!ELV_ON_HASH(rq));
0242
0243 if (unlikely(!rq_mergeable(rq))) {
0244 __elv_rqhash_del(rq);
0245 continue;
0246 }
0247
0248 if (rq_hash_key(rq) == offset)
0249 return rq;
0250 }
0251
0252 return NULL;
0253 }
0254
0255
0256
0257
0258
0259 void elv_rb_add(struct rb_root *root, struct request *rq)
0260 {
0261 struct rb_node **p = &root->rb_node;
0262 struct rb_node *parent = NULL;
0263 struct request *__rq;
0264
0265 while (*p) {
0266 parent = *p;
0267 __rq = rb_entry(parent, struct request, rb_node);
0268
0269 if (blk_rq_pos(rq) < blk_rq_pos(__rq))
0270 p = &(*p)->rb_left;
0271 else if (blk_rq_pos(rq) >= blk_rq_pos(__rq))
0272 p = &(*p)->rb_right;
0273 }
0274
0275 rb_link_node(&rq->rb_node, parent, p);
0276 rb_insert_color(&rq->rb_node, root);
0277 }
0278 EXPORT_SYMBOL(elv_rb_add);
0279
0280 void elv_rb_del(struct rb_root *root, struct request *rq)
0281 {
0282 BUG_ON(RB_EMPTY_NODE(&rq->rb_node));
0283 rb_erase(&rq->rb_node, root);
0284 RB_CLEAR_NODE(&rq->rb_node);
0285 }
0286 EXPORT_SYMBOL(elv_rb_del);
0287
0288 struct request *elv_rb_find(struct rb_root *root, sector_t sector)
0289 {
0290 struct rb_node *n = root->rb_node;
0291 struct request *rq;
0292
0293 while (n) {
0294 rq = rb_entry(n, struct request, rb_node);
0295
0296 if (sector < blk_rq_pos(rq))
0297 n = n->rb_left;
0298 else if (sector > blk_rq_pos(rq))
0299 n = n->rb_right;
0300 else
0301 return rq;
0302 }
0303
0304 return NULL;
0305 }
0306 EXPORT_SYMBOL(elv_rb_find);
0307
0308 enum elv_merge elv_merge(struct request_queue *q, struct request **req,
0309 struct bio *bio)
0310 {
0311 struct elevator_queue *e = q->elevator;
0312 struct request *__rq;
0313
0314
0315
0316
0317
0318
0319
0320 if (blk_queue_nomerges(q) || !bio_mergeable(bio))
0321 return ELEVATOR_NO_MERGE;
0322
0323
0324
0325
0326 if (q->last_merge && elv_bio_merge_ok(q->last_merge, bio)) {
0327 enum elv_merge ret = blk_try_merge(q->last_merge, bio);
0328
0329 if (ret != ELEVATOR_NO_MERGE) {
0330 *req = q->last_merge;
0331 return ret;
0332 }
0333 }
0334
0335 if (blk_queue_noxmerges(q))
0336 return ELEVATOR_NO_MERGE;
0337
0338
0339
0340
0341 __rq = elv_rqhash_find(q, bio->bi_iter.bi_sector);
0342 if (__rq && elv_bio_merge_ok(__rq, bio)) {
0343 *req = __rq;
0344
0345 if (blk_discard_mergable(__rq))
0346 return ELEVATOR_DISCARD_MERGE;
0347 return ELEVATOR_BACK_MERGE;
0348 }
0349
0350 if (e->type->ops.request_merge)
0351 return e->type->ops.request_merge(q, req, bio);
0352
0353 return ELEVATOR_NO_MERGE;
0354 }
0355
0356
0357
0358
0359
0360
0361
0362
0363
0364 bool elv_attempt_insert_merge(struct request_queue *q, struct request *rq,
0365 struct list_head *free)
0366 {
0367 struct request *__rq;
0368 bool ret;
0369
0370 if (blk_queue_nomerges(q))
0371 return false;
0372
0373
0374
0375
0376 if (q->last_merge && blk_attempt_req_merge(q, q->last_merge, rq)) {
0377 list_add(&rq->queuelist, free);
0378 return true;
0379 }
0380
0381 if (blk_queue_noxmerges(q))
0382 return false;
0383
0384 ret = false;
0385
0386
0387
0388 while (1) {
0389 __rq = elv_rqhash_find(q, blk_rq_pos(rq));
0390 if (!__rq || !blk_attempt_req_merge(q, __rq, rq))
0391 break;
0392
0393 list_add(&rq->queuelist, free);
0394
0395 ret = true;
0396 rq = __rq;
0397 }
0398
0399 return ret;
0400 }
0401
0402 void elv_merged_request(struct request_queue *q, struct request *rq,
0403 enum elv_merge type)
0404 {
0405 struct elevator_queue *e = q->elevator;
0406
0407 if (e->type->ops.request_merged)
0408 e->type->ops.request_merged(q, rq, type);
0409
0410 if (type == ELEVATOR_BACK_MERGE)
0411 elv_rqhash_reposition(q, rq);
0412
0413 q->last_merge = rq;
0414 }
0415
0416 void elv_merge_requests(struct request_queue *q, struct request *rq,
0417 struct request *next)
0418 {
0419 struct elevator_queue *e = q->elevator;
0420
0421 if (e->type->ops.requests_merged)
0422 e->type->ops.requests_merged(q, rq, next);
0423
0424 elv_rqhash_reposition(q, rq);
0425 q->last_merge = rq;
0426 }
0427
0428 struct request *elv_latter_request(struct request_queue *q, struct request *rq)
0429 {
0430 struct elevator_queue *e = q->elevator;
0431
0432 if (e->type->ops.next_request)
0433 return e->type->ops.next_request(q, rq);
0434
0435 return NULL;
0436 }
0437
0438 struct request *elv_former_request(struct request_queue *q, struct request *rq)
0439 {
0440 struct elevator_queue *e = q->elevator;
0441
0442 if (e->type->ops.former_request)
0443 return e->type->ops.former_request(q, rq);
0444
0445 return NULL;
0446 }
0447
0448 #define to_elv(atr) container_of((atr), struct elv_fs_entry, attr)
0449
0450 static ssize_t
0451 elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
0452 {
0453 struct elv_fs_entry *entry = to_elv(attr);
0454 struct elevator_queue *e;
0455 ssize_t error;
0456
0457 if (!entry->show)
0458 return -EIO;
0459
0460 e = container_of(kobj, struct elevator_queue, kobj);
0461 mutex_lock(&e->sysfs_lock);
0462 error = e->type ? entry->show(e, page) : -ENOENT;
0463 mutex_unlock(&e->sysfs_lock);
0464 return error;
0465 }
0466
0467 static ssize_t
0468 elv_attr_store(struct kobject *kobj, struct attribute *attr,
0469 const char *page, size_t length)
0470 {
0471 struct elv_fs_entry *entry = to_elv(attr);
0472 struct elevator_queue *e;
0473 ssize_t error;
0474
0475 if (!entry->store)
0476 return -EIO;
0477
0478 e = container_of(kobj, struct elevator_queue, kobj);
0479 mutex_lock(&e->sysfs_lock);
0480 error = e->type ? entry->store(e, page, length) : -ENOENT;
0481 mutex_unlock(&e->sysfs_lock);
0482 return error;
0483 }
0484
0485 static const struct sysfs_ops elv_sysfs_ops = {
0486 .show = elv_attr_show,
0487 .store = elv_attr_store,
0488 };
0489
0490 static struct kobj_type elv_ktype = {
0491 .sysfs_ops = &elv_sysfs_ops,
0492 .release = elevator_release,
0493 };
0494
0495 int elv_register_queue(struct request_queue *q, bool uevent)
0496 {
0497 struct elevator_queue *e = q->elevator;
0498 int error;
0499
0500 lockdep_assert_held(&q->sysfs_lock);
0501
0502 error = kobject_add(&e->kobj, &q->kobj, "%s", "iosched");
0503 if (!error) {
0504 struct elv_fs_entry *attr = e->type->elevator_attrs;
0505 if (attr) {
0506 while (attr->attr.name) {
0507 if (sysfs_create_file(&e->kobj, &attr->attr))
0508 break;
0509 attr++;
0510 }
0511 }
0512 if (uevent)
0513 kobject_uevent(&e->kobj, KOBJ_ADD);
0514
0515 e->registered = 1;
0516 }
0517 return error;
0518 }
0519
0520 void elv_unregister_queue(struct request_queue *q)
0521 {
0522 struct elevator_queue *e = q->elevator;
0523
0524 lockdep_assert_held(&q->sysfs_lock);
0525
0526 if (e && e->registered) {
0527 struct elevator_queue *e = q->elevator;
0528
0529 kobject_uevent(&e->kobj, KOBJ_REMOVE);
0530 kobject_del(&e->kobj);
0531
0532 e->registered = 0;
0533 }
0534 }
0535
0536 int elv_register(struct elevator_type *e)
0537 {
0538
0539 if (WARN_ON_ONCE(!e->ops.insert_requests || !e->ops.dispatch_request))
0540 return -EINVAL;
0541
0542
0543 if (e->icq_size) {
0544 if (WARN_ON(e->icq_size < sizeof(struct io_cq)) ||
0545 WARN_ON(e->icq_align < __alignof__(struct io_cq)))
0546 return -EINVAL;
0547
0548 snprintf(e->icq_cache_name, sizeof(e->icq_cache_name),
0549 "%s_io_cq", e->elevator_name);
0550 e->icq_cache = kmem_cache_create(e->icq_cache_name, e->icq_size,
0551 e->icq_align, 0, NULL);
0552 if (!e->icq_cache)
0553 return -ENOMEM;
0554 }
0555
0556
0557 spin_lock(&elv_list_lock);
0558 if (elevator_find(e->elevator_name, 0)) {
0559 spin_unlock(&elv_list_lock);
0560 kmem_cache_destroy(e->icq_cache);
0561 return -EBUSY;
0562 }
0563 list_add_tail(&e->list, &elv_list);
0564 spin_unlock(&elv_list_lock);
0565
0566 printk(KERN_INFO "io scheduler %s registered\n", e->elevator_name);
0567
0568 return 0;
0569 }
0570 EXPORT_SYMBOL_GPL(elv_register);
0571
0572 void elv_unregister(struct elevator_type *e)
0573 {
0574
0575 spin_lock(&elv_list_lock);
0576 list_del_init(&e->list);
0577 spin_unlock(&elv_list_lock);
0578
0579
0580
0581
0582
0583 if (e->icq_cache) {
0584 rcu_barrier();
0585 kmem_cache_destroy(e->icq_cache);
0586 e->icq_cache = NULL;
0587 }
0588 }
0589 EXPORT_SYMBOL_GPL(elv_unregister);
0590
0591 int elevator_switch_mq(struct request_queue *q,
0592 struct elevator_type *new_e)
0593 {
0594 int ret;
0595
0596 lockdep_assert_held(&q->sysfs_lock);
0597
0598 if (q->elevator) {
0599 elv_unregister_queue(q);
0600 elevator_exit(q);
0601 }
0602
0603 ret = blk_mq_init_sched(q, new_e);
0604 if (ret)
0605 goto out;
0606
0607 if (new_e) {
0608 ret = elv_register_queue(q, true);
0609 if (ret) {
0610 elevator_exit(q);
0611 goto out;
0612 }
0613 }
0614
0615 if (new_e)
0616 blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name);
0617 else
0618 blk_add_trace_msg(q, "elv switch: none");
0619
0620 out:
0621 return ret;
0622 }
0623
0624 static inline bool elv_support_iosched(struct request_queue *q)
0625 {
0626 if (!queue_is_mq(q) ||
0627 (q->tag_set && (q->tag_set->flags & BLK_MQ_F_NO_SCHED)))
0628 return false;
0629 return true;
0630 }
0631
0632
0633
0634
0635
0636 static struct elevator_type *elevator_get_default(struct request_queue *q)
0637 {
0638 if (q->tag_set && q->tag_set->flags & BLK_MQ_F_NO_SCHED_BY_DEFAULT)
0639 return NULL;
0640
0641 if (q->nr_hw_queues != 1 &&
0642 !blk_mq_is_shared_tags(q->tag_set->flags))
0643 return NULL;
0644
0645 return elevator_get(q, "mq-deadline", false);
0646 }
0647
0648
0649
0650
0651
0652 static struct elevator_type *elevator_get_by_features(struct request_queue *q)
0653 {
0654 struct elevator_type *e, *found = NULL;
0655
0656 spin_lock(&elv_list_lock);
0657
0658 list_for_each_entry(e, &elv_list, list) {
0659 if (elv_support_features(e->elevator_features,
0660 q->required_elevator_features)) {
0661 found = e;
0662 break;
0663 }
0664 }
0665
0666 if (found && !try_module_get(found->elevator_owner))
0667 found = NULL;
0668
0669 spin_unlock(&elv_list_lock);
0670 return found;
0671 }
0672
0673
0674
0675
0676
0677
0678
0679 void elevator_init_mq(struct request_queue *q)
0680 {
0681 struct elevator_type *e;
0682 int err;
0683
0684 if (!elv_support_iosched(q))
0685 return;
0686
0687 WARN_ON_ONCE(blk_queue_registered(q));
0688
0689 if (unlikely(q->elevator))
0690 return;
0691
0692 if (!q->required_elevator_features)
0693 e = elevator_get_default(q);
0694 else
0695 e = elevator_get_by_features(q);
0696 if (!e)
0697 return;
0698
0699
0700
0701
0702
0703
0704
0705
0706 blk_mq_freeze_queue(q);
0707 blk_mq_cancel_work_sync(q);
0708
0709 err = blk_mq_init_sched(q, e);
0710
0711 blk_mq_unfreeze_queue(q);
0712
0713 if (err) {
0714 pr_warn("\"%s\" elevator initialization failed, "
0715 "falling back to \"none\"\n", e->elevator_name);
0716 elevator_put(e);
0717 }
0718 }
0719
0720
0721
0722
0723
0724
0725
0726 static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
0727 {
0728 int err;
0729
0730 lockdep_assert_held(&q->sysfs_lock);
0731
0732 blk_mq_freeze_queue(q);
0733 blk_mq_quiesce_queue(q);
0734
0735 err = elevator_switch_mq(q, new_e);
0736
0737 blk_mq_unquiesce_queue(q);
0738 blk_mq_unfreeze_queue(q);
0739
0740 return err;
0741 }
0742
0743
0744
0745
0746 static int __elevator_change(struct request_queue *q, const char *name)
0747 {
0748 char elevator_name[ELV_NAME_MAX];
0749 struct elevator_type *e;
0750
0751
0752 if (!blk_queue_registered(q))
0753 return -ENOENT;
0754
0755
0756
0757
0758 if (!strncmp(name, "none", 4)) {
0759 if (!q->elevator)
0760 return 0;
0761 return elevator_switch(q, NULL);
0762 }
0763
0764 strlcpy(elevator_name, name, sizeof(elevator_name));
0765 e = elevator_get(q, strstrip(elevator_name), true);
0766 if (!e)
0767 return -EINVAL;
0768
0769 if (q->elevator &&
0770 elevator_match(q->elevator->type, elevator_name, 0)) {
0771 elevator_put(e);
0772 return 0;
0773 }
0774
0775 return elevator_switch(q, e);
0776 }
0777
0778 ssize_t elv_iosched_store(struct request_queue *q, const char *name,
0779 size_t count)
0780 {
0781 int ret;
0782
0783 if (!elv_support_iosched(q))
0784 return count;
0785
0786 ret = __elevator_change(q, name);
0787 if (!ret)
0788 return count;
0789
0790 return ret;
0791 }
0792
0793 ssize_t elv_iosched_show(struct request_queue *q, char *name)
0794 {
0795 struct elevator_queue *e = q->elevator;
0796 struct elevator_type *elv = NULL;
0797 struct elevator_type *__e;
0798 int len = 0;
0799
0800 if (!queue_is_mq(q))
0801 return sprintf(name, "none\n");
0802
0803 if (!q->elevator)
0804 len += sprintf(name+len, "[none] ");
0805 else
0806 elv = e->type;
0807
0808 spin_lock(&elv_list_lock);
0809 list_for_each_entry(__e, &elv_list, list) {
0810 if (elv && elevator_match(elv, __e->elevator_name, 0)) {
0811 len += sprintf(name+len, "[%s] ", elv->elevator_name);
0812 continue;
0813 }
0814 if (elv_support_iosched(q) &&
0815 elevator_match(__e, __e->elevator_name,
0816 q->required_elevator_features))
0817 len += sprintf(name+len, "%s ", __e->elevator_name);
0818 }
0819 spin_unlock(&elv_list_lock);
0820
0821 if (q->elevator)
0822 len += sprintf(name+len, "none");
0823
0824 len += sprintf(len+name, "\n");
0825 return len;
0826 }
0827
0828 struct request *elv_rb_former_request(struct request_queue *q,
0829 struct request *rq)
0830 {
0831 struct rb_node *rbprev = rb_prev(&rq->rb_node);
0832
0833 if (rbprev)
0834 return rb_entry_rq(rbprev);
0835
0836 return NULL;
0837 }
0838 EXPORT_SYMBOL(elv_rb_former_request);
0839
0840 struct request *elv_rb_latter_request(struct request_queue *q,
0841 struct request *rq)
0842 {
0843 struct rb_node *rbnext = rb_next(&rq->rb_node);
0844
0845 if (rbnext)
0846 return rb_entry_rq(rbnext);
0847
0848 return NULL;
0849 }
0850 EXPORT_SYMBOL(elv_rb_latter_request);
0851
0852 static int __init elevator_setup(char *str)
0853 {
0854 pr_warn("Kernel parameter elevator= does not have any effect anymore.\n"
0855 "Please use sysfs to set IO scheduler for individual devices.\n");
0856 return 1;
0857 }
0858
0859 __setup("elevator=", elevator_setup);