Back to home page

LXR

 
 

    


0001 /*
0002  * Functions related to io context handling
0003  */
0004 #include <linux/kernel.h>
0005 #include <linux/module.h>
0006 #include <linux/init.h>
0007 #include <linux/bio.h>
0008 #include <linux/blkdev.h>
0009 #include <linux/slab.h>
0010 
0011 #include "blk.h"
0012 
0013 /*
0014  * For io context allocations
0015  */
0016 static struct kmem_cache *iocontext_cachep;
0017 
0018 /**
0019  * get_io_context - increment reference count to io_context
0020  * @ioc: io_context to get
0021  *
0022  * Increment reference count to @ioc.
0023  */
0024 void get_io_context(struct io_context *ioc)
0025 {
0026     BUG_ON(atomic_long_read(&ioc->refcount) <= 0);
0027     atomic_long_inc(&ioc->refcount);
0028 }
0029 EXPORT_SYMBOL(get_io_context);
0030 
0031 static void icq_free_icq_rcu(struct rcu_head *head)
0032 {
0033     struct io_cq *icq = container_of(head, struct io_cq, __rcu_head);
0034 
0035     kmem_cache_free(icq->__rcu_icq_cache, icq);
0036 }
0037 
0038 /* Exit an icq. Called with both ioc and q locked. */
0039 static void ioc_exit_icq(struct io_cq *icq)
0040 {
0041     struct elevator_type *et = icq->q->elevator->type;
0042 
0043     if (icq->flags & ICQ_EXITED)
0044         return;
0045 
0046     if (et->ops.elevator_exit_icq_fn)
0047         et->ops.elevator_exit_icq_fn(icq);
0048 
0049     icq->flags |= ICQ_EXITED;
0050 }
0051 
0052 /* Release an icq.  Called with both ioc and q locked. */
0053 static void ioc_destroy_icq(struct io_cq *icq)
0054 {
0055     struct io_context *ioc = icq->ioc;
0056     struct request_queue *q = icq->q;
0057     struct elevator_type *et = q->elevator->type;
0058 
0059     lockdep_assert_held(&ioc->lock);
0060     lockdep_assert_held(q->queue_lock);
0061 
0062     radix_tree_delete(&ioc->icq_tree, icq->q->id);
0063     hlist_del_init(&icq->ioc_node);
0064     list_del_init(&icq->q_node);
0065 
0066     /*
0067      * Both setting lookup hint to and clearing it from @icq are done
0068      * under queue_lock.  If it's not pointing to @icq now, it never
0069      * will.  Hint assignment itself can race safely.
0070      */
0071     if (rcu_access_pointer(ioc->icq_hint) == icq)
0072         rcu_assign_pointer(ioc->icq_hint, NULL);
0073 
0074     ioc_exit_icq(icq);
0075 
0076     /*
0077      * @icq->q might have gone away by the time RCU callback runs
0078      * making it impossible to determine icq_cache.  Record it in @icq.
0079      */
0080     icq->__rcu_icq_cache = et->icq_cache;
0081     call_rcu(&icq->__rcu_head, icq_free_icq_rcu);
0082 }
0083 
0084 /*
0085  * Slow path for ioc release in put_io_context().  Performs double-lock
0086  * dancing to unlink all icq's and then frees ioc.
0087  */
0088 static void ioc_release_fn(struct work_struct *work)
0089 {
0090     struct io_context *ioc = container_of(work, struct io_context,
0091                           release_work);
0092     unsigned long flags;
0093 
0094     /*
0095      * Exiting icq may call into put_io_context() through elevator
0096      * which will trigger lockdep warning.  The ioc's are guaranteed to
0097      * be different, use a different locking subclass here.  Use
0098      * irqsave variant as there's no spin_lock_irq_nested().
0099      */
0100     spin_lock_irqsave_nested(&ioc->lock, flags, 1);
0101 
0102     while (!hlist_empty(&ioc->icq_list)) {
0103         struct io_cq *icq = hlist_entry(ioc->icq_list.first,
0104                         struct io_cq, ioc_node);
0105         struct request_queue *q = icq->q;
0106 
0107         if (spin_trylock(q->queue_lock)) {
0108             ioc_destroy_icq(icq);
0109             spin_unlock(q->queue_lock);
0110         } else {
0111             spin_unlock_irqrestore(&ioc->lock, flags);
0112             cpu_relax();
0113             spin_lock_irqsave_nested(&ioc->lock, flags, 1);
0114         }
0115     }
0116 
0117     spin_unlock_irqrestore(&ioc->lock, flags);
0118 
0119     kmem_cache_free(iocontext_cachep, ioc);
0120 }
0121 
0122 /**
0123  * put_io_context - put a reference of io_context
0124  * @ioc: io_context to put
0125  *
0126  * Decrement reference count of @ioc and release it if the count reaches
0127  * zero.
0128  */
0129 void put_io_context(struct io_context *ioc)
0130 {
0131     unsigned long flags;
0132     bool free_ioc = false;
0133 
0134     if (ioc == NULL)
0135         return;
0136 
0137     BUG_ON(atomic_long_read(&ioc->refcount) <= 0);
0138 
0139     /*
0140      * Releasing ioc requires reverse order double locking and we may
0141      * already be holding a queue_lock.  Do it asynchronously from wq.
0142      */
0143     if (atomic_long_dec_and_test(&ioc->refcount)) {
0144         spin_lock_irqsave(&ioc->lock, flags);
0145         if (!hlist_empty(&ioc->icq_list))
0146             queue_work(system_power_efficient_wq,
0147                     &ioc->release_work);
0148         else
0149             free_ioc = true;
0150         spin_unlock_irqrestore(&ioc->lock, flags);
0151     }
0152 
0153     if (free_ioc)
0154         kmem_cache_free(iocontext_cachep, ioc);
0155 }
0156 EXPORT_SYMBOL(put_io_context);
0157 
0158 /**
0159  * put_io_context_active - put active reference on ioc
0160  * @ioc: ioc of interest
0161  *
0162  * Undo get_io_context_active().  If active reference reaches zero after
0163  * put, @ioc can never issue further IOs and ioscheds are notified.
0164  */
0165 void put_io_context_active(struct io_context *ioc)
0166 {
0167     unsigned long flags;
0168     struct io_cq *icq;
0169 
0170     if (!atomic_dec_and_test(&ioc->active_ref)) {
0171         put_io_context(ioc);
0172         return;
0173     }
0174 
0175     /*
0176      * Need ioc lock to walk icq_list and q lock to exit icq.  Perform
0177      * reverse double locking.  Read comment in ioc_release_fn() for
0178      * explanation on the nested locking annotation.
0179      */
0180 retry:
0181     spin_lock_irqsave_nested(&ioc->lock, flags, 1);
0182     hlist_for_each_entry(icq, &ioc->icq_list, ioc_node) {
0183         if (icq->flags & ICQ_EXITED)
0184             continue;
0185         if (spin_trylock(icq->q->queue_lock)) {
0186             ioc_exit_icq(icq);
0187             spin_unlock(icq->q->queue_lock);
0188         } else {
0189             spin_unlock_irqrestore(&ioc->lock, flags);
0190             cpu_relax();
0191             goto retry;
0192         }
0193     }
0194     spin_unlock_irqrestore(&ioc->lock, flags);
0195 
0196     put_io_context(ioc);
0197 }
0198 
0199 /* Called by the exiting task */
0200 void exit_io_context(struct task_struct *task)
0201 {
0202     struct io_context *ioc;
0203 
0204     task_lock(task);
0205     ioc = task->io_context;
0206     task->io_context = NULL;
0207     task_unlock(task);
0208 
0209     atomic_dec(&ioc->nr_tasks);
0210     put_io_context_active(ioc);
0211 }
0212 
0213 /**
0214  * ioc_clear_queue - break any ioc association with the specified queue
0215  * @q: request_queue being cleared
0216  *
0217  * Walk @q->icq_list and exit all io_cq's.  Must be called with @q locked.
0218  */
0219 void ioc_clear_queue(struct request_queue *q)
0220 {
0221     lockdep_assert_held(q->queue_lock);
0222 
0223     while (!list_empty(&q->icq_list)) {
0224         struct io_cq *icq = list_entry(q->icq_list.next,
0225                            struct io_cq, q_node);
0226         struct io_context *ioc = icq->ioc;
0227 
0228         spin_lock(&ioc->lock);
0229         ioc_destroy_icq(icq);
0230         spin_unlock(&ioc->lock);
0231     }
0232 }
0233 
0234 int create_task_io_context(struct task_struct *task, gfp_t gfp_flags, int node)
0235 {
0236     struct io_context *ioc;
0237     int ret;
0238 
0239     ioc = kmem_cache_alloc_node(iocontext_cachep, gfp_flags | __GFP_ZERO,
0240                     node);
0241     if (unlikely(!ioc))
0242         return -ENOMEM;
0243 
0244     /* initialize */
0245     atomic_long_set(&ioc->refcount, 1);
0246     atomic_set(&ioc->nr_tasks, 1);
0247     atomic_set(&ioc->active_ref, 1);
0248     spin_lock_init(&ioc->lock);
0249     INIT_RADIX_TREE(&ioc->icq_tree, GFP_ATOMIC | __GFP_HIGH);
0250     INIT_HLIST_HEAD(&ioc->icq_list);
0251     INIT_WORK(&ioc->release_work, ioc_release_fn);
0252 
0253     /*
0254      * Try to install.  ioc shouldn't be installed if someone else
0255      * already did or @task, which isn't %current, is exiting.  Note
0256      * that we need to allow ioc creation on exiting %current as exit
0257      * path may issue IOs from e.g. exit_files().  The exit path is
0258      * responsible for not issuing IO after exit_io_context().
0259      */
0260     task_lock(task);
0261     if (!task->io_context &&
0262         (task == current || !(task->flags & PF_EXITING)))
0263         task->io_context = ioc;
0264     else
0265         kmem_cache_free(iocontext_cachep, ioc);
0266 
0267     ret = task->io_context ? 0 : -EBUSY;
0268 
0269     task_unlock(task);
0270 
0271     return ret;
0272 }
0273 
0274 /**
0275  * get_task_io_context - get io_context of a task
0276  * @task: task of interest
0277  * @gfp_flags: allocation flags, used if allocation is necessary
0278  * @node: allocation node, used if allocation is necessary
0279  *
0280  * Return io_context of @task.  If it doesn't exist, it is created with
0281  * @gfp_flags and @node.  The returned io_context has its reference count
0282  * incremented.
0283  *
0284  * This function always goes through task_lock() and it's better to use
0285  * %current->io_context + get_io_context() for %current.
0286  */
0287 struct io_context *get_task_io_context(struct task_struct *task,
0288                        gfp_t gfp_flags, int node)
0289 {
0290     struct io_context *ioc;
0291 
0292     might_sleep_if(gfpflags_allow_blocking(gfp_flags));
0293 
0294     do {
0295         task_lock(task);
0296         ioc = task->io_context;
0297         if (likely(ioc)) {
0298             get_io_context(ioc);
0299             task_unlock(task);
0300             return ioc;
0301         }
0302         task_unlock(task);
0303     } while (!create_task_io_context(task, gfp_flags, node));
0304 
0305     return NULL;
0306 }
0307 EXPORT_SYMBOL(get_task_io_context);
0308 
0309 /**
0310  * ioc_lookup_icq - lookup io_cq from ioc
0311  * @ioc: the associated io_context
0312  * @q: the associated request_queue
0313  *
0314  * Look up io_cq associated with @ioc - @q pair from @ioc.  Must be called
0315  * with @q->queue_lock held.
0316  */
0317 struct io_cq *ioc_lookup_icq(struct io_context *ioc, struct request_queue *q)
0318 {
0319     struct io_cq *icq;
0320 
0321     lockdep_assert_held(q->queue_lock);
0322 
0323     /*
0324      * icq's are indexed from @ioc using radix tree and hint pointer,
0325      * both of which are protected with RCU.  All removals are done
0326      * holding both q and ioc locks, and we're holding q lock - if we
0327      * find a icq which points to us, it's guaranteed to be valid.
0328      */
0329     rcu_read_lock();
0330     icq = rcu_dereference(ioc->icq_hint);
0331     if (icq && icq->q == q)
0332         goto out;
0333 
0334     icq = radix_tree_lookup(&ioc->icq_tree, q->id);
0335     if (icq && icq->q == q)
0336         rcu_assign_pointer(ioc->icq_hint, icq); /* allowed to race */
0337     else
0338         icq = NULL;
0339 out:
0340     rcu_read_unlock();
0341     return icq;
0342 }
0343 EXPORT_SYMBOL(ioc_lookup_icq);
0344 
0345 /**
0346  * ioc_create_icq - create and link io_cq
0347  * @ioc: io_context of interest
0348  * @q: request_queue of interest
0349  * @gfp_mask: allocation mask
0350  *
0351  * Make sure io_cq linking @ioc and @q exists.  If icq doesn't exist, they
0352  * will be created using @gfp_mask.
0353  *
0354  * The caller is responsible for ensuring @ioc won't go away and @q is
0355  * alive and will stay alive until this function returns.
0356  */
0357 struct io_cq *ioc_create_icq(struct io_context *ioc, struct request_queue *q,
0358                  gfp_t gfp_mask)
0359 {
0360     struct elevator_type *et = q->elevator->type;
0361     struct io_cq *icq;
0362 
0363     /* allocate stuff */
0364     icq = kmem_cache_alloc_node(et->icq_cache, gfp_mask | __GFP_ZERO,
0365                     q->node);
0366     if (!icq)
0367         return NULL;
0368 
0369     if (radix_tree_maybe_preload(gfp_mask) < 0) {
0370         kmem_cache_free(et->icq_cache, icq);
0371         return NULL;
0372     }
0373 
0374     icq->ioc = ioc;
0375     icq->q = q;
0376     INIT_LIST_HEAD(&icq->q_node);
0377     INIT_HLIST_NODE(&icq->ioc_node);
0378 
0379     /* lock both q and ioc and try to link @icq */
0380     spin_lock_irq(q->queue_lock);
0381     spin_lock(&ioc->lock);
0382 
0383     if (likely(!radix_tree_insert(&ioc->icq_tree, q->id, icq))) {
0384         hlist_add_head(&icq->ioc_node, &ioc->icq_list);
0385         list_add(&icq->q_node, &q->icq_list);
0386         if (et->ops.elevator_init_icq_fn)
0387             et->ops.elevator_init_icq_fn(icq);
0388     } else {
0389         kmem_cache_free(et->icq_cache, icq);
0390         icq = ioc_lookup_icq(ioc, q);
0391         if (!icq)
0392             printk(KERN_ERR "cfq: icq link failed!\n");
0393     }
0394 
0395     spin_unlock(&ioc->lock);
0396     spin_unlock_irq(q->queue_lock);
0397     radix_tree_preload_end();
0398     return icq;
0399 }
0400 
0401 static int __init blk_ioc_init(void)
0402 {
0403     iocontext_cachep = kmem_cache_create("blkdev_ioc",
0404             sizeof(struct io_context), 0, SLAB_PANIC, NULL);
0405     return 0;
0406 }
0407 subsys_initcall(blk_ioc_init);