0001
0002
0003
0004
0005 #include <linux/kernel.h>
0006 #include <linux/module.h>
0007 #include <linux/init.h>
0008 #include <linux/bio.h>
0009 #include <linux/blkdev.h>
0010 #include <linux/slab.h>
0011 #include <linux/security.h>
0012 #include <linux/sched/task.h>
0013
0014 #include "blk.h"
0015 #include "blk-mq-sched.h"
0016
0017
0018
0019
0020 static struct kmem_cache *iocontext_cachep;
0021
0022 #ifdef CONFIG_BLK_ICQ
0023
0024
0025
0026
0027
0028
0029 static void get_io_context(struct io_context *ioc)
0030 {
0031 BUG_ON(atomic_long_read(&ioc->refcount) <= 0);
0032 atomic_long_inc(&ioc->refcount);
0033 }
0034
0035 static void icq_free_icq_rcu(struct rcu_head *head)
0036 {
0037 struct io_cq *icq = container_of(head, struct io_cq, __rcu_head);
0038
0039 kmem_cache_free(icq->__rcu_icq_cache, icq);
0040 }
0041
0042
0043
0044
0045
0046 static void ioc_exit_icq(struct io_cq *icq)
0047 {
0048 struct elevator_type *et = icq->q->elevator->type;
0049
0050 if (icq->flags & ICQ_EXITED)
0051 return;
0052
0053 if (et->ops.exit_icq)
0054 et->ops.exit_icq(icq);
0055
0056 icq->flags |= ICQ_EXITED;
0057 }
0058
0059 static void ioc_exit_icqs(struct io_context *ioc)
0060 {
0061 struct io_cq *icq;
0062
0063 spin_lock_irq(&ioc->lock);
0064 hlist_for_each_entry(icq, &ioc->icq_list, ioc_node)
0065 ioc_exit_icq(icq);
0066 spin_unlock_irq(&ioc->lock);
0067 }
0068
0069
0070
0071
0072
0073 static void ioc_destroy_icq(struct io_cq *icq)
0074 {
0075 struct io_context *ioc = icq->ioc;
0076 struct request_queue *q = icq->q;
0077 struct elevator_type *et = q->elevator->type;
0078
0079 lockdep_assert_held(&ioc->lock);
0080
0081 radix_tree_delete(&ioc->icq_tree, icq->q->id);
0082 hlist_del_init(&icq->ioc_node);
0083 list_del_init(&icq->q_node);
0084
0085
0086
0087
0088
0089
0090 if (rcu_access_pointer(ioc->icq_hint) == icq)
0091 rcu_assign_pointer(ioc->icq_hint, NULL);
0092
0093 ioc_exit_icq(icq);
0094
0095
0096
0097
0098
0099 icq->__rcu_icq_cache = et->icq_cache;
0100 icq->flags |= ICQ_DESTROYED;
0101 call_rcu(&icq->__rcu_head, icq_free_icq_rcu);
0102 }
0103
0104
0105
0106
0107
0108 static void ioc_release_fn(struct work_struct *work)
0109 {
0110 struct io_context *ioc = container_of(work, struct io_context,
0111 release_work);
0112 spin_lock_irq(&ioc->lock);
0113
0114 while (!hlist_empty(&ioc->icq_list)) {
0115 struct io_cq *icq = hlist_entry(ioc->icq_list.first,
0116 struct io_cq, ioc_node);
0117 struct request_queue *q = icq->q;
0118
0119 if (spin_trylock(&q->queue_lock)) {
0120 ioc_destroy_icq(icq);
0121 spin_unlock(&q->queue_lock);
0122 } else {
0123
0124 rcu_read_lock();
0125
0126
0127 spin_unlock(&ioc->lock);
0128 spin_lock(&q->queue_lock);
0129 spin_lock(&ioc->lock);
0130
0131
0132
0133
0134
0135 if (!(icq->flags & ICQ_DESTROYED))
0136 ioc_destroy_icq(icq);
0137
0138 spin_unlock(&q->queue_lock);
0139 rcu_read_unlock();
0140 }
0141 }
0142
0143 spin_unlock_irq(&ioc->lock);
0144
0145 kmem_cache_free(iocontext_cachep, ioc);
0146 }
0147
0148
0149
0150
0151
0152 static bool ioc_delay_free(struct io_context *ioc)
0153 {
0154 unsigned long flags;
0155
0156 spin_lock_irqsave(&ioc->lock, flags);
0157 if (!hlist_empty(&ioc->icq_list)) {
0158 queue_work(system_power_efficient_wq, &ioc->release_work);
0159 spin_unlock_irqrestore(&ioc->lock, flags);
0160 return true;
0161 }
0162 spin_unlock_irqrestore(&ioc->lock, flags);
0163 return false;
0164 }
0165
0166
0167
0168
0169
0170
0171
0172 void ioc_clear_queue(struct request_queue *q)
0173 {
0174 LIST_HEAD(icq_list);
0175
0176 spin_lock_irq(&q->queue_lock);
0177 list_splice_init(&q->icq_list, &icq_list);
0178 spin_unlock_irq(&q->queue_lock);
0179
0180 rcu_read_lock();
0181 while (!list_empty(&icq_list)) {
0182 struct io_cq *icq =
0183 list_entry(icq_list.next, struct io_cq, q_node);
0184
0185 spin_lock_irq(&icq->ioc->lock);
0186 if (!(icq->flags & ICQ_DESTROYED))
0187 ioc_destroy_icq(icq);
0188 spin_unlock_irq(&icq->ioc->lock);
0189 }
0190 rcu_read_unlock();
0191 }
0192 #else
0193 static inline void ioc_exit_icqs(struct io_context *ioc)
0194 {
0195 }
0196 static inline bool ioc_delay_free(struct io_context *ioc)
0197 {
0198 return false;
0199 }
0200 #endif
0201
0202
0203
0204
0205
0206
0207
0208
0209 void put_io_context(struct io_context *ioc)
0210 {
0211 BUG_ON(atomic_long_read(&ioc->refcount) <= 0);
0212 if (atomic_long_dec_and_test(&ioc->refcount) && !ioc_delay_free(ioc))
0213 kmem_cache_free(iocontext_cachep, ioc);
0214 }
0215 EXPORT_SYMBOL_GPL(put_io_context);
0216
0217
0218 void exit_io_context(struct task_struct *task)
0219 {
0220 struct io_context *ioc;
0221
0222 task_lock(task);
0223 ioc = task->io_context;
0224 task->io_context = NULL;
0225 task_unlock(task);
0226
0227 if (atomic_dec_and_test(&ioc->active_ref)) {
0228 ioc_exit_icqs(ioc);
0229 put_io_context(ioc);
0230 }
0231 }
0232
0233 static struct io_context *alloc_io_context(gfp_t gfp_flags, int node)
0234 {
0235 struct io_context *ioc;
0236
0237 ioc = kmem_cache_alloc_node(iocontext_cachep, gfp_flags | __GFP_ZERO,
0238 node);
0239 if (unlikely(!ioc))
0240 return NULL;
0241
0242 atomic_long_set(&ioc->refcount, 1);
0243 atomic_set(&ioc->active_ref, 1);
0244 #ifdef CONFIG_BLK_ICQ
0245 spin_lock_init(&ioc->lock);
0246 INIT_RADIX_TREE(&ioc->icq_tree, GFP_ATOMIC);
0247 INIT_HLIST_HEAD(&ioc->icq_list);
0248 INIT_WORK(&ioc->release_work, ioc_release_fn);
0249 #endif
0250 ioc->ioprio = IOPRIO_DEFAULT;
0251
0252 return ioc;
0253 }
0254
0255 int set_task_ioprio(struct task_struct *task, int ioprio)
0256 {
0257 int err;
0258 const struct cred *cred = current_cred(), *tcred;
0259
0260 rcu_read_lock();
0261 tcred = __task_cred(task);
0262 if (!uid_eq(tcred->uid, cred->euid) &&
0263 !uid_eq(tcred->uid, cred->uid) && !capable(CAP_SYS_NICE)) {
0264 rcu_read_unlock();
0265 return -EPERM;
0266 }
0267 rcu_read_unlock();
0268
0269 err = security_task_setioprio(task, ioprio);
0270 if (err)
0271 return err;
0272
0273 task_lock(task);
0274 if (unlikely(!task->io_context)) {
0275 struct io_context *ioc;
0276
0277 task_unlock(task);
0278
0279 ioc = alloc_io_context(GFP_ATOMIC, NUMA_NO_NODE);
0280 if (!ioc)
0281 return -ENOMEM;
0282
0283 task_lock(task);
0284 if (task->flags & PF_EXITING) {
0285 kmem_cache_free(iocontext_cachep, ioc);
0286 goto out;
0287 }
0288 if (task->io_context)
0289 kmem_cache_free(iocontext_cachep, ioc);
0290 else
0291 task->io_context = ioc;
0292 }
0293 task->io_context->ioprio = ioprio;
0294 out:
0295 task_unlock(task);
0296 return 0;
0297 }
0298 EXPORT_SYMBOL_GPL(set_task_ioprio);
0299
0300 int __copy_io(unsigned long clone_flags, struct task_struct *tsk)
0301 {
0302 struct io_context *ioc = current->io_context;
0303
0304
0305
0306
0307 if (clone_flags & CLONE_IO) {
0308 atomic_inc(&ioc->active_ref);
0309 tsk->io_context = ioc;
0310 } else if (ioprio_valid(ioc->ioprio)) {
0311 tsk->io_context = alloc_io_context(GFP_KERNEL, NUMA_NO_NODE);
0312 if (!tsk->io_context)
0313 return -ENOMEM;
0314 tsk->io_context->ioprio = ioc->ioprio;
0315 }
0316
0317 return 0;
0318 }
0319
0320 #ifdef CONFIG_BLK_ICQ
0321
0322
0323
0324
0325
0326
0327
0328 struct io_cq *ioc_lookup_icq(struct request_queue *q)
0329 {
0330 struct io_context *ioc = current->io_context;
0331 struct io_cq *icq;
0332
0333 lockdep_assert_held(&q->queue_lock);
0334
0335
0336
0337
0338
0339
0340
0341 rcu_read_lock();
0342 icq = rcu_dereference(ioc->icq_hint);
0343 if (icq && icq->q == q)
0344 goto out;
0345
0346 icq = radix_tree_lookup(&ioc->icq_tree, q->id);
0347 if (icq && icq->q == q)
0348 rcu_assign_pointer(ioc->icq_hint, icq);
0349 else
0350 icq = NULL;
0351 out:
0352 rcu_read_unlock();
0353 return icq;
0354 }
0355 EXPORT_SYMBOL(ioc_lookup_icq);
0356
0357
0358
0359
0360
0361
0362
0363
0364
0365
0366
0367 static struct io_cq *ioc_create_icq(struct request_queue *q)
0368 {
0369 struct io_context *ioc = current->io_context;
0370 struct elevator_type *et = q->elevator->type;
0371 struct io_cq *icq;
0372
0373
0374 icq = kmem_cache_alloc_node(et->icq_cache, GFP_ATOMIC | __GFP_ZERO,
0375 q->node);
0376 if (!icq)
0377 return NULL;
0378
0379 if (radix_tree_maybe_preload(GFP_ATOMIC) < 0) {
0380 kmem_cache_free(et->icq_cache, icq);
0381 return NULL;
0382 }
0383
0384 icq->ioc = ioc;
0385 icq->q = q;
0386 INIT_LIST_HEAD(&icq->q_node);
0387 INIT_HLIST_NODE(&icq->ioc_node);
0388
0389
0390 spin_lock_irq(&q->queue_lock);
0391 spin_lock(&ioc->lock);
0392
0393 if (likely(!radix_tree_insert(&ioc->icq_tree, q->id, icq))) {
0394 hlist_add_head(&icq->ioc_node, &ioc->icq_list);
0395 list_add(&icq->q_node, &q->icq_list);
0396 if (et->ops.init_icq)
0397 et->ops.init_icq(icq);
0398 } else {
0399 kmem_cache_free(et->icq_cache, icq);
0400 icq = ioc_lookup_icq(q);
0401 if (!icq)
0402 printk(KERN_ERR "cfq: icq link failed!\n");
0403 }
0404
0405 spin_unlock(&ioc->lock);
0406 spin_unlock_irq(&q->queue_lock);
0407 radix_tree_preload_end();
0408 return icq;
0409 }
0410
0411 struct io_cq *ioc_find_get_icq(struct request_queue *q)
0412 {
0413 struct io_context *ioc = current->io_context;
0414 struct io_cq *icq = NULL;
0415
0416 if (unlikely(!ioc)) {
0417 ioc = alloc_io_context(GFP_ATOMIC, q->node);
0418 if (!ioc)
0419 return NULL;
0420
0421 task_lock(current);
0422 if (current->io_context) {
0423 kmem_cache_free(iocontext_cachep, ioc);
0424 ioc = current->io_context;
0425 } else {
0426 current->io_context = ioc;
0427 }
0428
0429 get_io_context(ioc);
0430 task_unlock(current);
0431 } else {
0432 get_io_context(ioc);
0433
0434 spin_lock_irq(&q->queue_lock);
0435 icq = ioc_lookup_icq(q);
0436 spin_unlock_irq(&q->queue_lock);
0437 }
0438
0439 if (!icq) {
0440 icq = ioc_create_icq(q);
0441 if (!icq) {
0442 put_io_context(ioc);
0443 return NULL;
0444 }
0445 }
0446 return icq;
0447 }
0448 EXPORT_SYMBOL_GPL(ioc_find_get_icq);
0449 #endif
0450
0451 static int __init blk_ioc_init(void)
0452 {
0453 iocontext_cachep = kmem_cache_create("blkdev_ioc",
0454 sizeof(struct io_context), 0, SLAB_PANIC, NULL);
0455 return 0;
0456 }
0457 subsys_initcall(blk_ioc_init);