Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * Functions related to io context handling
0004  */
0005 #include <linux/kernel.h>
0006 #include <linux/module.h>
0007 #include <linux/init.h>
0008 #include <linux/bio.h>
0009 #include <linux/blkdev.h>
0010 #include <linux/slab.h>
0011 #include <linux/security.h>
0012 #include <linux/sched/task.h>
0013 
0014 #include "blk.h"
0015 #include "blk-mq-sched.h"
0016 
0017 /*
0018  * For io context allocations
0019  */
0020 static struct kmem_cache *iocontext_cachep;
0021 
0022 #ifdef CONFIG_BLK_ICQ
0023 /**
0024  * get_io_context - increment reference count to io_context
0025  * @ioc: io_context to get
0026  *
0027  * Increment reference count to @ioc.
0028  */
0029 static void get_io_context(struct io_context *ioc)
0030 {
0031     BUG_ON(atomic_long_read(&ioc->refcount) <= 0);
0032     atomic_long_inc(&ioc->refcount);
0033 }
0034 
0035 static void icq_free_icq_rcu(struct rcu_head *head)
0036 {
0037     struct io_cq *icq = container_of(head, struct io_cq, __rcu_head);
0038 
0039     kmem_cache_free(icq->__rcu_icq_cache, icq);
0040 }
0041 
0042 /*
0043  * Exit an icq. Called with ioc locked for blk-mq, and with both ioc
0044  * and queue locked for legacy.
0045  */
0046 static void ioc_exit_icq(struct io_cq *icq)
0047 {
0048     struct elevator_type *et = icq->q->elevator->type;
0049 
0050     if (icq->flags & ICQ_EXITED)
0051         return;
0052 
0053     if (et->ops.exit_icq)
0054         et->ops.exit_icq(icq);
0055 
0056     icq->flags |= ICQ_EXITED;
0057 }
0058 
0059 static void ioc_exit_icqs(struct io_context *ioc)
0060 {
0061     struct io_cq *icq;
0062 
0063     spin_lock_irq(&ioc->lock);
0064     hlist_for_each_entry(icq, &ioc->icq_list, ioc_node)
0065         ioc_exit_icq(icq);
0066     spin_unlock_irq(&ioc->lock);
0067 }
0068 
0069 /*
0070  * Release an icq. Called with ioc locked for blk-mq, and with both ioc
0071  * and queue locked for legacy.
0072  */
0073 static void ioc_destroy_icq(struct io_cq *icq)
0074 {
0075     struct io_context *ioc = icq->ioc;
0076     struct request_queue *q = icq->q;
0077     struct elevator_type *et = q->elevator->type;
0078 
0079     lockdep_assert_held(&ioc->lock);
0080 
0081     radix_tree_delete(&ioc->icq_tree, icq->q->id);
0082     hlist_del_init(&icq->ioc_node);
0083     list_del_init(&icq->q_node);
0084 
0085     /*
0086      * Both setting lookup hint to and clearing it from @icq are done
0087      * under queue_lock.  If it's not pointing to @icq now, it never
0088      * will.  Hint assignment itself can race safely.
0089      */
0090     if (rcu_access_pointer(ioc->icq_hint) == icq)
0091         rcu_assign_pointer(ioc->icq_hint, NULL);
0092 
0093     ioc_exit_icq(icq);
0094 
0095     /*
0096      * @icq->q might have gone away by the time RCU callback runs
0097      * making it impossible to determine icq_cache.  Record it in @icq.
0098      */
0099     icq->__rcu_icq_cache = et->icq_cache;
0100     icq->flags |= ICQ_DESTROYED;
0101     call_rcu(&icq->__rcu_head, icq_free_icq_rcu);
0102 }
0103 
0104 /*
0105  * Slow path for ioc release in put_io_context().  Performs double-lock
0106  * dancing to unlink all icq's and then frees ioc.
0107  */
0108 static void ioc_release_fn(struct work_struct *work)
0109 {
0110     struct io_context *ioc = container_of(work, struct io_context,
0111                           release_work);
0112     spin_lock_irq(&ioc->lock);
0113 
0114     while (!hlist_empty(&ioc->icq_list)) {
0115         struct io_cq *icq = hlist_entry(ioc->icq_list.first,
0116                         struct io_cq, ioc_node);
0117         struct request_queue *q = icq->q;
0118 
0119         if (spin_trylock(&q->queue_lock)) {
0120             ioc_destroy_icq(icq);
0121             spin_unlock(&q->queue_lock);
0122         } else {
0123             /* Make sure q and icq cannot be freed. */
0124             rcu_read_lock();
0125 
0126             /* Re-acquire the locks in the correct order. */
0127             spin_unlock(&ioc->lock);
0128             spin_lock(&q->queue_lock);
0129             spin_lock(&ioc->lock);
0130 
0131             /*
0132              * The icq may have been destroyed when the ioc lock
0133              * was released.
0134              */
0135             if (!(icq->flags & ICQ_DESTROYED))
0136                 ioc_destroy_icq(icq);
0137 
0138             spin_unlock(&q->queue_lock);
0139             rcu_read_unlock();
0140         }
0141     }
0142 
0143     spin_unlock_irq(&ioc->lock);
0144 
0145     kmem_cache_free(iocontext_cachep, ioc);
0146 }
0147 
0148 /*
0149  * Releasing icqs requires reverse order double locking and we may already be
0150  * holding a queue_lock.  Do it asynchronously from a workqueue.
0151  */
0152 static bool ioc_delay_free(struct io_context *ioc)
0153 {
0154     unsigned long flags;
0155 
0156     spin_lock_irqsave(&ioc->lock, flags);
0157     if (!hlist_empty(&ioc->icq_list)) {
0158         queue_work(system_power_efficient_wq, &ioc->release_work);
0159         spin_unlock_irqrestore(&ioc->lock, flags);
0160         return true;
0161     }
0162     spin_unlock_irqrestore(&ioc->lock, flags);
0163     return false;
0164 }
0165 
0166 /**
0167  * ioc_clear_queue - break any ioc association with the specified queue
0168  * @q: request_queue being cleared
0169  *
0170  * Walk @q->icq_list and exit all io_cq's.
0171  */
0172 void ioc_clear_queue(struct request_queue *q)
0173 {
0174     LIST_HEAD(icq_list);
0175 
0176     spin_lock_irq(&q->queue_lock);
0177     list_splice_init(&q->icq_list, &icq_list);
0178     spin_unlock_irq(&q->queue_lock);
0179 
0180     rcu_read_lock();
0181     while (!list_empty(&icq_list)) {
0182         struct io_cq *icq =
0183             list_entry(icq_list.next, struct io_cq, q_node);
0184 
0185         spin_lock_irq(&icq->ioc->lock);
0186         if (!(icq->flags & ICQ_DESTROYED))
0187             ioc_destroy_icq(icq);
0188         spin_unlock_irq(&icq->ioc->lock);
0189     }
0190     rcu_read_unlock();
0191 }
0192 #else /* CONFIG_BLK_ICQ */
0193 static inline void ioc_exit_icqs(struct io_context *ioc)
0194 {
0195 }
0196 static inline bool ioc_delay_free(struct io_context *ioc)
0197 {
0198     return false;
0199 }
0200 #endif /* CONFIG_BLK_ICQ */
0201 
0202 /**
0203  * put_io_context - put a reference of io_context
0204  * @ioc: io_context to put
0205  *
0206  * Decrement reference count of @ioc and release it if the count reaches
0207  * zero.
0208  */
0209 void put_io_context(struct io_context *ioc)
0210 {
0211     BUG_ON(atomic_long_read(&ioc->refcount) <= 0);
0212     if (atomic_long_dec_and_test(&ioc->refcount) && !ioc_delay_free(ioc))
0213         kmem_cache_free(iocontext_cachep, ioc);
0214 }
0215 EXPORT_SYMBOL_GPL(put_io_context);
0216 
0217 /* Called by the exiting task */
0218 void exit_io_context(struct task_struct *task)
0219 {
0220     struct io_context *ioc;
0221 
0222     task_lock(task);
0223     ioc = task->io_context;
0224     task->io_context = NULL;
0225     task_unlock(task);
0226 
0227     if (atomic_dec_and_test(&ioc->active_ref)) {
0228         ioc_exit_icqs(ioc);
0229         put_io_context(ioc);
0230     }
0231 }
0232 
0233 static struct io_context *alloc_io_context(gfp_t gfp_flags, int node)
0234 {
0235     struct io_context *ioc;
0236 
0237     ioc = kmem_cache_alloc_node(iocontext_cachep, gfp_flags | __GFP_ZERO,
0238                     node);
0239     if (unlikely(!ioc))
0240         return NULL;
0241 
0242     atomic_long_set(&ioc->refcount, 1);
0243     atomic_set(&ioc->active_ref, 1);
0244 #ifdef CONFIG_BLK_ICQ
0245     spin_lock_init(&ioc->lock);
0246     INIT_RADIX_TREE(&ioc->icq_tree, GFP_ATOMIC);
0247     INIT_HLIST_HEAD(&ioc->icq_list);
0248     INIT_WORK(&ioc->release_work, ioc_release_fn);
0249 #endif
0250     ioc->ioprio = IOPRIO_DEFAULT;
0251 
0252     return ioc;
0253 }
0254 
0255 int set_task_ioprio(struct task_struct *task, int ioprio)
0256 {
0257     int err;
0258     const struct cred *cred = current_cred(), *tcred;
0259 
0260     rcu_read_lock();
0261     tcred = __task_cred(task);
0262     if (!uid_eq(tcred->uid, cred->euid) &&
0263         !uid_eq(tcred->uid, cred->uid) && !capable(CAP_SYS_NICE)) {
0264         rcu_read_unlock();
0265         return -EPERM;
0266     }
0267     rcu_read_unlock();
0268 
0269     err = security_task_setioprio(task, ioprio);
0270     if (err)
0271         return err;
0272 
0273     task_lock(task);
0274     if (unlikely(!task->io_context)) {
0275         struct io_context *ioc;
0276 
0277         task_unlock(task);
0278 
0279         ioc = alloc_io_context(GFP_ATOMIC, NUMA_NO_NODE);
0280         if (!ioc)
0281             return -ENOMEM;
0282 
0283         task_lock(task);
0284         if (task->flags & PF_EXITING) {
0285             kmem_cache_free(iocontext_cachep, ioc);
0286             goto out;
0287         }
0288         if (task->io_context)
0289             kmem_cache_free(iocontext_cachep, ioc);
0290         else
0291             task->io_context = ioc;
0292     }
0293     task->io_context->ioprio = ioprio;
0294 out:
0295     task_unlock(task);
0296     return 0;
0297 }
0298 EXPORT_SYMBOL_GPL(set_task_ioprio);
0299 
0300 int __copy_io(unsigned long clone_flags, struct task_struct *tsk)
0301 {
0302     struct io_context *ioc = current->io_context;
0303 
0304     /*
0305      * Share io context with parent, if CLONE_IO is set
0306      */
0307     if (clone_flags & CLONE_IO) {
0308         atomic_inc(&ioc->active_ref);
0309         tsk->io_context = ioc;
0310     } else if (ioprio_valid(ioc->ioprio)) {
0311         tsk->io_context = alloc_io_context(GFP_KERNEL, NUMA_NO_NODE);
0312         if (!tsk->io_context)
0313             return -ENOMEM;
0314         tsk->io_context->ioprio = ioc->ioprio;
0315     }
0316 
0317     return 0;
0318 }
0319 
0320 #ifdef CONFIG_BLK_ICQ
0321 /**
0322  * ioc_lookup_icq - lookup io_cq from ioc
0323  * @q: the associated request_queue
0324  *
0325  * Look up io_cq associated with @ioc - @q pair from @ioc.  Must be called
0326  * with @q->queue_lock held.
0327  */
0328 struct io_cq *ioc_lookup_icq(struct request_queue *q)
0329 {
0330     struct io_context *ioc = current->io_context;
0331     struct io_cq *icq;
0332 
0333     lockdep_assert_held(&q->queue_lock);
0334 
0335     /*
0336      * icq's are indexed from @ioc using radix tree and hint pointer,
0337      * both of which are protected with RCU.  All removals are done
0338      * holding both q and ioc locks, and we're holding q lock - if we
0339      * find a icq which points to us, it's guaranteed to be valid.
0340      */
0341     rcu_read_lock();
0342     icq = rcu_dereference(ioc->icq_hint);
0343     if (icq && icq->q == q)
0344         goto out;
0345 
0346     icq = radix_tree_lookup(&ioc->icq_tree, q->id);
0347     if (icq && icq->q == q)
0348         rcu_assign_pointer(ioc->icq_hint, icq); /* allowed to race */
0349     else
0350         icq = NULL;
0351 out:
0352     rcu_read_unlock();
0353     return icq;
0354 }
0355 EXPORT_SYMBOL(ioc_lookup_icq);
0356 
0357 /**
0358  * ioc_create_icq - create and link io_cq
0359  * @q: request_queue of interest
0360  *
0361  * Make sure io_cq linking @ioc and @q exists.  If icq doesn't exist, they
0362  * will be created using @gfp_mask.
0363  *
0364  * The caller is responsible for ensuring @ioc won't go away and @q is
0365  * alive and will stay alive until this function returns.
0366  */
0367 static struct io_cq *ioc_create_icq(struct request_queue *q)
0368 {
0369     struct io_context *ioc = current->io_context;
0370     struct elevator_type *et = q->elevator->type;
0371     struct io_cq *icq;
0372 
0373     /* allocate stuff */
0374     icq = kmem_cache_alloc_node(et->icq_cache, GFP_ATOMIC | __GFP_ZERO,
0375                     q->node);
0376     if (!icq)
0377         return NULL;
0378 
0379     if (radix_tree_maybe_preload(GFP_ATOMIC) < 0) {
0380         kmem_cache_free(et->icq_cache, icq);
0381         return NULL;
0382     }
0383 
0384     icq->ioc = ioc;
0385     icq->q = q;
0386     INIT_LIST_HEAD(&icq->q_node);
0387     INIT_HLIST_NODE(&icq->ioc_node);
0388 
0389     /* lock both q and ioc and try to link @icq */
0390     spin_lock_irq(&q->queue_lock);
0391     spin_lock(&ioc->lock);
0392 
0393     if (likely(!radix_tree_insert(&ioc->icq_tree, q->id, icq))) {
0394         hlist_add_head(&icq->ioc_node, &ioc->icq_list);
0395         list_add(&icq->q_node, &q->icq_list);
0396         if (et->ops.init_icq)
0397             et->ops.init_icq(icq);
0398     } else {
0399         kmem_cache_free(et->icq_cache, icq);
0400         icq = ioc_lookup_icq(q);
0401         if (!icq)
0402             printk(KERN_ERR "cfq: icq link failed!\n");
0403     }
0404 
0405     spin_unlock(&ioc->lock);
0406     spin_unlock_irq(&q->queue_lock);
0407     radix_tree_preload_end();
0408     return icq;
0409 }
0410 
0411 struct io_cq *ioc_find_get_icq(struct request_queue *q)
0412 {
0413     struct io_context *ioc = current->io_context;
0414     struct io_cq *icq = NULL;
0415 
0416     if (unlikely(!ioc)) {
0417         ioc = alloc_io_context(GFP_ATOMIC, q->node);
0418         if (!ioc)
0419             return NULL;
0420 
0421         task_lock(current);
0422         if (current->io_context) {
0423             kmem_cache_free(iocontext_cachep, ioc);
0424             ioc = current->io_context;
0425         } else {
0426             current->io_context = ioc;
0427         }
0428 
0429         get_io_context(ioc);
0430         task_unlock(current);
0431     } else {
0432         get_io_context(ioc);
0433 
0434         spin_lock_irq(&q->queue_lock);
0435         icq = ioc_lookup_icq(q);
0436         spin_unlock_irq(&q->queue_lock);
0437     }
0438 
0439     if (!icq) {
0440         icq = ioc_create_icq(q);
0441         if (!icq) {
0442             put_io_context(ioc);
0443             return NULL;
0444         }
0445     }
0446     return icq;
0447 }
0448 EXPORT_SYMBOL_GPL(ioc_find_get_icq);
0449 #endif /* CONFIG_BLK_ICQ */
0450 
0451 static int __init blk_ioc_init(void)
0452 {
0453     iocontext_cachep = kmem_cache_create("blkdev_ioc",
0454             sizeof(struct io_context), 0, SLAB_PANIC, NULL);
0455     return 0;
0456 }
0457 subsys_initcall(blk_ioc_init);