Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * kvm eventfd support - use eventfd objects to signal various KVM events
0004  *
0005  * Copyright 2009 Novell.  All Rights Reserved.
0006  * Copyright 2010 Red Hat, Inc. and/or its affiliates.
0007  *
0008  * Author:
0009  *  Gregory Haskins <ghaskins@novell.com>
0010  */
0011 
0012 #include <linux/kvm_host.h>
0013 #include <linux/kvm.h>
0014 #include <linux/kvm_irqfd.h>
0015 #include <linux/workqueue.h>
0016 #include <linux/syscalls.h>
0017 #include <linux/wait.h>
0018 #include <linux/poll.h>
0019 #include <linux/file.h>
0020 #include <linux/list.h>
0021 #include <linux/eventfd.h>
0022 #include <linux/kernel.h>
0023 #include <linux/srcu.h>
0024 #include <linux/slab.h>
0025 #include <linux/seqlock.h>
0026 #include <linux/irqbypass.h>
0027 #include <trace/events/kvm.h>
0028 
0029 #include <kvm/iodev.h>
0030 
0031 #ifdef CONFIG_HAVE_KVM_IRQFD
0032 
0033 static struct workqueue_struct *irqfd_cleanup_wq;
0034 
0035 bool __attribute__((weak))
0036 kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args)
0037 {
0038     return true;
0039 }
0040 
0041 static void
0042 irqfd_inject(struct work_struct *work)
0043 {
0044     struct kvm_kernel_irqfd *irqfd =
0045         container_of(work, struct kvm_kernel_irqfd, inject);
0046     struct kvm *kvm = irqfd->kvm;
0047 
0048     if (!irqfd->resampler) {
0049         kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1,
0050                 false);
0051         kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0,
0052                 false);
0053     } else
0054         kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
0055                 irqfd->gsi, 1, false);
0056 }
0057 
0058 /*
0059  * Since resampler irqfds share an IRQ source ID, we de-assert once
0060  * then notify all of the resampler irqfds using this GSI.  We can't
0061  * do multiple de-asserts or we risk racing with incoming re-asserts.
0062  */
0063 static void
0064 irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian)
0065 {
0066     struct kvm_kernel_irqfd_resampler *resampler;
0067     struct kvm *kvm;
0068     struct kvm_kernel_irqfd *irqfd;
0069     int idx;
0070 
0071     resampler = container_of(kian,
0072             struct kvm_kernel_irqfd_resampler, notifier);
0073     kvm = resampler->kvm;
0074 
0075     kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
0076             resampler->notifier.gsi, 0, false);
0077 
0078     idx = srcu_read_lock(&kvm->irq_srcu);
0079 
0080     list_for_each_entry_srcu(irqfd, &resampler->list, resampler_link,
0081         srcu_read_lock_held(&kvm->irq_srcu))
0082         eventfd_signal(irqfd->resamplefd, 1);
0083 
0084     srcu_read_unlock(&kvm->irq_srcu, idx);
0085 }
0086 
0087 static void
0088 irqfd_resampler_shutdown(struct kvm_kernel_irqfd *irqfd)
0089 {
0090     struct kvm_kernel_irqfd_resampler *resampler = irqfd->resampler;
0091     struct kvm *kvm = resampler->kvm;
0092 
0093     mutex_lock(&kvm->irqfds.resampler_lock);
0094 
0095     list_del_rcu(&irqfd->resampler_link);
0096     synchronize_srcu(&kvm->irq_srcu);
0097 
0098     if (list_empty(&resampler->list)) {
0099         list_del(&resampler->link);
0100         kvm_unregister_irq_ack_notifier(kvm, &resampler->notifier);
0101         kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
0102                 resampler->notifier.gsi, 0, false);
0103         kfree(resampler);
0104     }
0105 
0106     mutex_unlock(&kvm->irqfds.resampler_lock);
0107 }
0108 
0109 /*
0110  * Race-free decouple logic (ordering is critical)
0111  */
0112 static void
0113 irqfd_shutdown(struct work_struct *work)
0114 {
0115     struct kvm_kernel_irqfd *irqfd =
0116         container_of(work, struct kvm_kernel_irqfd, shutdown);
0117     struct kvm *kvm = irqfd->kvm;
0118     u64 cnt;
0119 
0120     /* Make sure irqfd has been initialized in assign path. */
0121     synchronize_srcu(&kvm->irq_srcu);
0122 
0123     /*
0124      * Synchronize with the wait-queue and unhook ourselves to prevent
0125      * further events.
0126      */
0127     eventfd_ctx_remove_wait_queue(irqfd->eventfd, &irqfd->wait, &cnt);
0128 
0129     /*
0130      * We know no new events will be scheduled at this point, so block
0131      * until all previously outstanding events have completed
0132      */
0133     flush_work(&irqfd->inject);
0134 
0135     if (irqfd->resampler) {
0136         irqfd_resampler_shutdown(irqfd);
0137         eventfd_ctx_put(irqfd->resamplefd);
0138     }
0139 
0140     /*
0141      * It is now safe to release the object's resources
0142      */
0143 #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
0144     irq_bypass_unregister_consumer(&irqfd->consumer);
0145 #endif
0146     eventfd_ctx_put(irqfd->eventfd);
0147     kfree(irqfd);
0148 }
0149 
0150 
0151 /* assumes kvm->irqfds.lock is held */
0152 static bool
0153 irqfd_is_active(struct kvm_kernel_irqfd *irqfd)
0154 {
0155     return list_empty(&irqfd->list) ? false : true;
0156 }
0157 
0158 /*
0159  * Mark the irqfd as inactive and schedule it for removal
0160  *
0161  * assumes kvm->irqfds.lock is held
0162  */
0163 static void
0164 irqfd_deactivate(struct kvm_kernel_irqfd *irqfd)
0165 {
0166     BUG_ON(!irqfd_is_active(irqfd));
0167 
0168     list_del_init(&irqfd->list);
0169 
0170     queue_work(irqfd_cleanup_wq, &irqfd->shutdown);
0171 }
0172 
0173 int __attribute__((weak)) kvm_arch_set_irq_inatomic(
0174                 struct kvm_kernel_irq_routing_entry *irq,
0175                 struct kvm *kvm, int irq_source_id,
0176                 int level,
0177                 bool line_status)
0178 {
0179     return -EWOULDBLOCK;
0180 }
0181 
0182 /*
0183  * Called with wqh->lock held and interrupts disabled
0184  */
0185 static int
0186 irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
0187 {
0188     struct kvm_kernel_irqfd *irqfd =
0189         container_of(wait, struct kvm_kernel_irqfd, wait);
0190     __poll_t flags = key_to_poll(key);
0191     struct kvm_kernel_irq_routing_entry irq;
0192     struct kvm *kvm = irqfd->kvm;
0193     unsigned seq;
0194     int idx;
0195     int ret = 0;
0196 
0197     if (flags & EPOLLIN) {
0198         u64 cnt;
0199         eventfd_ctx_do_read(irqfd->eventfd, &cnt);
0200 
0201         idx = srcu_read_lock(&kvm->irq_srcu);
0202         do {
0203             seq = read_seqcount_begin(&irqfd->irq_entry_sc);
0204             irq = irqfd->irq_entry;
0205         } while (read_seqcount_retry(&irqfd->irq_entry_sc, seq));
0206         /* An event has been signaled, inject an interrupt */
0207         if (kvm_arch_set_irq_inatomic(&irq, kvm,
0208                           KVM_USERSPACE_IRQ_SOURCE_ID, 1,
0209                           false) == -EWOULDBLOCK)
0210             schedule_work(&irqfd->inject);
0211         srcu_read_unlock(&kvm->irq_srcu, idx);
0212         ret = 1;
0213     }
0214 
0215     if (flags & EPOLLHUP) {
0216         /* The eventfd is closing, detach from KVM */
0217         unsigned long iflags;
0218 
0219         spin_lock_irqsave(&kvm->irqfds.lock, iflags);
0220 
0221         /*
0222          * We must check if someone deactivated the irqfd before
0223          * we could acquire the irqfds.lock since the item is
0224          * deactivated from the KVM side before it is unhooked from
0225          * the wait-queue.  If it is already deactivated, we can
0226          * simply return knowing the other side will cleanup for us.
0227          * We cannot race against the irqfd going away since the
0228          * other side is required to acquire wqh->lock, which we hold
0229          */
0230         if (irqfd_is_active(irqfd))
0231             irqfd_deactivate(irqfd);
0232 
0233         spin_unlock_irqrestore(&kvm->irqfds.lock, iflags);
0234     }
0235 
0236     return ret;
0237 }
0238 
0239 static void
0240 irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
0241             poll_table *pt)
0242 {
0243     struct kvm_kernel_irqfd *irqfd =
0244         container_of(pt, struct kvm_kernel_irqfd, pt);
0245     add_wait_queue_priority(wqh, &irqfd->wait);
0246 }
0247 
0248 /* Must be called under irqfds.lock */
0249 static void irqfd_update(struct kvm *kvm, struct kvm_kernel_irqfd *irqfd)
0250 {
0251     struct kvm_kernel_irq_routing_entry *e;
0252     struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
0253     int n_entries;
0254 
0255     n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi);
0256 
0257     write_seqcount_begin(&irqfd->irq_entry_sc);
0258 
0259     e = entries;
0260     if (n_entries == 1)
0261         irqfd->irq_entry = *e;
0262     else
0263         irqfd->irq_entry.type = 0;
0264 
0265     write_seqcount_end(&irqfd->irq_entry_sc);
0266 }
0267 
0268 #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
0269 void __attribute__((weak)) kvm_arch_irq_bypass_stop(
0270                 struct irq_bypass_consumer *cons)
0271 {
0272 }
0273 
0274 void __attribute__((weak)) kvm_arch_irq_bypass_start(
0275                 struct irq_bypass_consumer *cons)
0276 {
0277 }
0278 
0279 int  __attribute__((weak)) kvm_arch_update_irqfd_routing(
0280                 struct kvm *kvm, unsigned int host_irq,
0281                 uint32_t guest_irq, bool set)
0282 {
0283     return 0;
0284 }
0285 
0286 bool __attribute__((weak)) kvm_arch_irqfd_route_changed(
0287                 struct kvm_kernel_irq_routing_entry *old,
0288                 struct kvm_kernel_irq_routing_entry *new)
0289 {
0290     return true;
0291 }
0292 #endif
0293 
0294 static int
0295 kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
0296 {
0297     struct kvm_kernel_irqfd *irqfd, *tmp;
0298     struct fd f;
0299     struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL;
0300     int ret;
0301     __poll_t events;
0302     int idx;
0303 
0304     if (!kvm_arch_intc_initialized(kvm))
0305         return -EAGAIN;
0306 
0307     if (!kvm_arch_irqfd_allowed(kvm, args))
0308         return -EINVAL;
0309 
0310     irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL_ACCOUNT);
0311     if (!irqfd)
0312         return -ENOMEM;
0313 
0314     irqfd->kvm = kvm;
0315     irqfd->gsi = args->gsi;
0316     INIT_LIST_HEAD(&irqfd->list);
0317     INIT_WORK(&irqfd->inject, irqfd_inject);
0318     INIT_WORK(&irqfd->shutdown, irqfd_shutdown);
0319     seqcount_spinlock_init(&irqfd->irq_entry_sc, &kvm->irqfds.lock);
0320 
0321     f = fdget(args->fd);
0322     if (!f.file) {
0323         ret = -EBADF;
0324         goto out;
0325     }
0326 
0327     eventfd = eventfd_ctx_fileget(f.file);
0328     if (IS_ERR(eventfd)) {
0329         ret = PTR_ERR(eventfd);
0330         goto fail;
0331     }
0332 
0333     irqfd->eventfd = eventfd;
0334 
0335     if (args->flags & KVM_IRQFD_FLAG_RESAMPLE) {
0336         struct kvm_kernel_irqfd_resampler *resampler;
0337 
0338         resamplefd = eventfd_ctx_fdget(args->resamplefd);
0339         if (IS_ERR(resamplefd)) {
0340             ret = PTR_ERR(resamplefd);
0341             goto fail;
0342         }
0343 
0344         irqfd->resamplefd = resamplefd;
0345         INIT_LIST_HEAD(&irqfd->resampler_link);
0346 
0347         mutex_lock(&kvm->irqfds.resampler_lock);
0348 
0349         list_for_each_entry(resampler,
0350                     &kvm->irqfds.resampler_list, link) {
0351             if (resampler->notifier.gsi == irqfd->gsi) {
0352                 irqfd->resampler = resampler;
0353                 break;
0354             }
0355         }
0356 
0357         if (!irqfd->resampler) {
0358             resampler = kzalloc(sizeof(*resampler),
0359                         GFP_KERNEL_ACCOUNT);
0360             if (!resampler) {
0361                 ret = -ENOMEM;
0362                 mutex_unlock(&kvm->irqfds.resampler_lock);
0363                 goto fail;
0364             }
0365 
0366             resampler->kvm = kvm;
0367             INIT_LIST_HEAD(&resampler->list);
0368             resampler->notifier.gsi = irqfd->gsi;
0369             resampler->notifier.irq_acked = irqfd_resampler_ack;
0370             INIT_LIST_HEAD(&resampler->link);
0371 
0372             list_add(&resampler->link, &kvm->irqfds.resampler_list);
0373             kvm_register_irq_ack_notifier(kvm,
0374                               &resampler->notifier);
0375             irqfd->resampler = resampler;
0376         }
0377 
0378         list_add_rcu(&irqfd->resampler_link, &irqfd->resampler->list);
0379         synchronize_srcu(&kvm->irq_srcu);
0380 
0381         mutex_unlock(&kvm->irqfds.resampler_lock);
0382     }
0383 
0384     /*
0385      * Install our own custom wake-up handling so we are notified via
0386      * a callback whenever someone signals the underlying eventfd
0387      */
0388     init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup);
0389     init_poll_funcptr(&irqfd->pt, irqfd_ptable_queue_proc);
0390 
0391     spin_lock_irq(&kvm->irqfds.lock);
0392 
0393     ret = 0;
0394     list_for_each_entry(tmp, &kvm->irqfds.items, list) {
0395         if (irqfd->eventfd != tmp->eventfd)
0396             continue;
0397         /* This fd is used for another irq already. */
0398         ret = -EBUSY;
0399         spin_unlock_irq(&kvm->irqfds.lock);
0400         goto fail;
0401     }
0402 
0403     idx = srcu_read_lock(&kvm->irq_srcu);
0404     irqfd_update(kvm, irqfd);
0405 
0406     list_add_tail(&irqfd->list, &kvm->irqfds.items);
0407 
0408     spin_unlock_irq(&kvm->irqfds.lock);
0409 
0410     /*
0411      * Check if there was an event already pending on the eventfd
0412      * before we registered, and trigger it as if we didn't miss it.
0413      */
0414     events = vfs_poll(f.file, &irqfd->pt);
0415 
0416     if (events & EPOLLIN)
0417         schedule_work(&irqfd->inject);
0418 
0419 #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
0420     if (kvm_arch_has_irq_bypass()) {
0421         irqfd->consumer.token = (void *)irqfd->eventfd;
0422         irqfd->consumer.add_producer = kvm_arch_irq_bypass_add_producer;
0423         irqfd->consumer.del_producer = kvm_arch_irq_bypass_del_producer;
0424         irqfd->consumer.stop = kvm_arch_irq_bypass_stop;
0425         irqfd->consumer.start = kvm_arch_irq_bypass_start;
0426         ret = irq_bypass_register_consumer(&irqfd->consumer);
0427         if (ret)
0428             pr_info("irq bypass consumer (token %p) registration fails: %d\n",
0429                 irqfd->consumer.token, ret);
0430     }
0431 #endif
0432 
0433     srcu_read_unlock(&kvm->irq_srcu, idx);
0434 
0435     /*
0436      * do not drop the file until the irqfd is fully initialized, otherwise
0437      * we might race against the EPOLLHUP
0438      */
0439     fdput(f);
0440     return 0;
0441 
0442 fail:
0443     if (irqfd->resampler)
0444         irqfd_resampler_shutdown(irqfd);
0445 
0446     if (resamplefd && !IS_ERR(resamplefd))
0447         eventfd_ctx_put(resamplefd);
0448 
0449     if (eventfd && !IS_ERR(eventfd))
0450         eventfd_ctx_put(eventfd);
0451 
0452     fdput(f);
0453 
0454 out:
0455     kfree(irqfd);
0456     return ret;
0457 }
0458 
0459 bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
0460 {
0461     struct kvm_irq_ack_notifier *kian;
0462     int gsi, idx;
0463 
0464     idx = srcu_read_lock(&kvm->irq_srcu);
0465     gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
0466     if (gsi != -1)
0467         hlist_for_each_entry_srcu(kian, &kvm->irq_ack_notifier_list,
0468                       link, srcu_read_lock_held(&kvm->irq_srcu))
0469             if (kian->gsi == gsi) {
0470                 srcu_read_unlock(&kvm->irq_srcu, idx);
0471                 return true;
0472             }
0473 
0474     srcu_read_unlock(&kvm->irq_srcu, idx);
0475 
0476     return false;
0477 }
0478 EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
0479 
0480 void kvm_notify_acked_gsi(struct kvm *kvm, int gsi)
0481 {
0482     struct kvm_irq_ack_notifier *kian;
0483 
0484     hlist_for_each_entry_srcu(kian, &kvm->irq_ack_notifier_list,
0485                   link, srcu_read_lock_held(&kvm->irq_srcu))
0486         if (kian->gsi == gsi)
0487             kian->irq_acked(kian);
0488 }
0489 
0490 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
0491 {
0492     int gsi, idx;
0493 
0494     trace_kvm_ack_irq(irqchip, pin);
0495 
0496     idx = srcu_read_lock(&kvm->irq_srcu);
0497     gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
0498     if (gsi != -1)
0499         kvm_notify_acked_gsi(kvm, gsi);
0500     srcu_read_unlock(&kvm->irq_srcu, idx);
0501 }
0502 
0503 void kvm_register_irq_ack_notifier(struct kvm *kvm,
0504                    struct kvm_irq_ack_notifier *kian)
0505 {
0506     mutex_lock(&kvm->irq_lock);
0507     hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
0508     mutex_unlock(&kvm->irq_lock);
0509     kvm_arch_post_irq_ack_notifier_list_update(kvm);
0510 }
0511 
0512 void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
0513                     struct kvm_irq_ack_notifier *kian)
0514 {
0515     mutex_lock(&kvm->irq_lock);
0516     hlist_del_init_rcu(&kian->link);
0517     mutex_unlock(&kvm->irq_lock);
0518     synchronize_srcu(&kvm->irq_srcu);
0519     kvm_arch_post_irq_ack_notifier_list_update(kvm);
0520 }
0521 #endif
0522 
0523 void
0524 kvm_eventfd_init(struct kvm *kvm)
0525 {
0526 #ifdef CONFIG_HAVE_KVM_IRQFD
0527     spin_lock_init(&kvm->irqfds.lock);
0528     INIT_LIST_HEAD(&kvm->irqfds.items);
0529     INIT_LIST_HEAD(&kvm->irqfds.resampler_list);
0530     mutex_init(&kvm->irqfds.resampler_lock);
0531 #endif
0532     INIT_LIST_HEAD(&kvm->ioeventfds);
0533 }
0534 
0535 #ifdef CONFIG_HAVE_KVM_IRQFD
0536 /*
0537  * shutdown any irqfd's that match fd+gsi
0538  */
0539 static int
0540 kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args)
0541 {
0542     struct kvm_kernel_irqfd *irqfd, *tmp;
0543     struct eventfd_ctx *eventfd;
0544 
0545     eventfd = eventfd_ctx_fdget(args->fd);
0546     if (IS_ERR(eventfd))
0547         return PTR_ERR(eventfd);
0548 
0549     spin_lock_irq(&kvm->irqfds.lock);
0550 
0551     list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) {
0552         if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi) {
0553             /*
0554              * This clearing of irq_entry.type is needed for when
0555              * another thread calls kvm_irq_routing_update before
0556              * we flush workqueue below (we synchronize with
0557              * kvm_irq_routing_update using irqfds.lock).
0558              */
0559             write_seqcount_begin(&irqfd->irq_entry_sc);
0560             irqfd->irq_entry.type = 0;
0561             write_seqcount_end(&irqfd->irq_entry_sc);
0562             irqfd_deactivate(irqfd);
0563         }
0564     }
0565 
0566     spin_unlock_irq(&kvm->irqfds.lock);
0567     eventfd_ctx_put(eventfd);
0568 
0569     /*
0570      * Block until we know all outstanding shutdown jobs have completed
0571      * so that we guarantee there will not be any more interrupts on this
0572      * gsi once this deassign function returns.
0573      */
0574     flush_workqueue(irqfd_cleanup_wq);
0575 
0576     return 0;
0577 }
0578 
0579 int
0580 kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
0581 {
0582     if (args->flags & ~(KVM_IRQFD_FLAG_DEASSIGN | KVM_IRQFD_FLAG_RESAMPLE))
0583         return -EINVAL;
0584 
0585     if (args->flags & KVM_IRQFD_FLAG_DEASSIGN)
0586         return kvm_irqfd_deassign(kvm, args);
0587 
0588     return kvm_irqfd_assign(kvm, args);
0589 }
0590 
0591 /*
0592  * This function is called as the kvm VM fd is being released. Shutdown all
0593  * irqfds that still remain open
0594  */
0595 void
0596 kvm_irqfd_release(struct kvm *kvm)
0597 {
0598     struct kvm_kernel_irqfd *irqfd, *tmp;
0599 
0600     spin_lock_irq(&kvm->irqfds.lock);
0601 
0602     list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list)
0603         irqfd_deactivate(irqfd);
0604 
0605     spin_unlock_irq(&kvm->irqfds.lock);
0606 
0607     /*
0608      * Block until we know all outstanding shutdown jobs have completed
0609      * since we do not take a kvm* reference.
0610      */
0611     flush_workqueue(irqfd_cleanup_wq);
0612 
0613 }
0614 
0615 /*
0616  * Take note of a change in irq routing.
0617  * Caller must invoke synchronize_srcu(&kvm->irq_srcu) afterwards.
0618  */
0619 void kvm_irq_routing_update(struct kvm *kvm)
0620 {
0621     struct kvm_kernel_irqfd *irqfd;
0622 
0623     spin_lock_irq(&kvm->irqfds.lock);
0624 
0625     list_for_each_entry(irqfd, &kvm->irqfds.items, list) {
0626 #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
0627         /* Under irqfds.lock, so can read irq_entry safely */
0628         struct kvm_kernel_irq_routing_entry old = irqfd->irq_entry;
0629 #endif
0630 
0631         irqfd_update(kvm, irqfd);
0632 
0633 #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
0634         if (irqfd->producer &&
0635             kvm_arch_irqfd_route_changed(&old, &irqfd->irq_entry)) {
0636             int ret = kvm_arch_update_irqfd_routing(
0637                     irqfd->kvm, irqfd->producer->irq,
0638                     irqfd->gsi, 1);
0639             WARN_ON(ret);
0640         }
0641 #endif
0642     }
0643 
0644     spin_unlock_irq(&kvm->irqfds.lock);
0645 }
0646 
0647 /*
0648  * create a host-wide workqueue for issuing deferred shutdown requests
0649  * aggregated from all vm* instances. We need our own isolated
0650  * queue to ease flushing work items when a VM exits.
0651  */
0652 int kvm_irqfd_init(void)
0653 {
0654     irqfd_cleanup_wq = alloc_workqueue("kvm-irqfd-cleanup", 0, 0);
0655     if (!irqfd_cleanup_wq)
0656         return -ENOMEM;
0657 
0658     return 0;
0659 }
0660 
0661 void kvm_irqfd_exit(void)
0662 {
0663     destroy_workqueue(irqfd_cleanup_wq);
0664 }
0665 #endif
0666 
0667 /*
0668  * --------------------------------------------------------------------
0669  * ioeventfd: translate a PIO/MMIO memory write to an eventfd signal.
0670  *
0671  * userspace can register a PIO/MMIO address with an eventfd for receiving
0672  * notification when the memory has been touched.
0673  * --------------------------------------------------------------------
0674  */
0675 
0676 struct _ioeventfd {
0677     struct list_head     list;
0678     u64                  addr;
0679     int                  length;
0680     struct eventfd_ctx  *eventfd;
0681     u64                  datamatch;
0682     struct kvm_io_device dev;
0683     u8                   bus_idx;
0684     bool                 wildcard;
0685 };
0686 
0687 static inline struct _ioeventfd *
0688 to_ioeventfd(struct kvm_io_device *dev)
0689 {
0690     return container_of(dev, struct _ioeventfd, dev);
0691 }
0692 
0693 static void
0694 ioeventfd_release(struct _ioeventfd *p)
0695 {
0696     eventfd_ctx_put(p->eventfd);
0697     list_del(&p->list);
0698     kfree(p);
0699 }
0700 
0701 static bool
0702 ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val)
0703 {
0704     u64 _val;
0705 
0706     if (addr != p->addr)
0707         /* address must be precise for a hit */
0708         return false;
0709 
0710     if (!p->length)
0711         /* length = 0 means only look at the address, so always a hit */
0712         return true;
0713 
0714     if (len != p->length)
0715         /* address-range must be precise for a hit */
0716         return false;
0717 
0718     if (p->wildcard)
0719         /* all else equal, wildcard is always a hit */
0720         return true;
0721 
0722     /* otherwise, we have to actually compare the data */
0723 
0724     BUG_ON(!IS_ALIGNED((unsigned long)val, len));
0725 
0726     switch (len) {
0727     case 1:
0728         _val = *(u8 *)val;
0729         break;
0730     case 2:
0731         _val = *(u16 *)val;
0732         break;
0733     case 4:
0734         _val = *(u32 *)val;
0735         break;
0736     case 8:
0737         _val = *(u64 *)val;
0738         break;
0739     default:
0740         return false;
0741     }
0742 
0743     return _val == p->datamatch;
0744 }
0745 
0746 /* MMIO/PIO writes trigger an event if the addr/val match */
0747 static int
0748 ioeventfd_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr,
0749         int len, const void *val)
0750 {
0751     struct _ioeventfd *p = to_ioeventfd(this);
0752 
0753     if (!ioeventfd_in_range(p, addr, len, val))
0754         return -EOPNOTSUPP;
0755 
0756     eventfd_signal(p->eventfd, 1);
0757     return 0;
0758 }
0759 
0760 /*
0761  * This function is called as KVM is completely shutting down.  We do not
0762  * need to worry about locking just nuke anything we have as quickly as possible
0763  */
0764 static void
0765 ioeventfd_destructor(struct kvm_io_device *this)
0766 {
0767     struct _ioeventfd *p = to_ioeventfd(this);
0768 
0769     ioeventfd_release(p);
0770 }
0771 
0772 static const struct kvm_io_device_ops ioeventfd_ops = {
0773     .write      = ioeventfd_write,
0774     .destructor = ioeventfd_destructor,
0775 };
0776 
0777 /* assumes kvm->slots_lock held */
0778 static bool
0779 ioeventfd_check_collision(struct kvm *kvm, struct _ioeventfd *p)
0780 {
0781     struct _ioeventfd *_p;
0782 
0783     list_for_each_entry(_p, &kvm->ioeventfds, list)
0784         if (_p->bus_idx == p->bus_idx &&
0785             _p->addr == p->addr &&
0786             (!_p->length || !p->length ||
0787              (_p->length == p->length &&
0788               (_p->wildcard || p->wildcard ||
0789                _p->datamatch == p->datamatch))))
0790             return true;
0791 
0792     return false;
0793 }
0794 
0795 static enum kvm_bus ioeventfd_bus_from_flags(__u32 flags)
0796 {
0797     if (flags & KVM_IOEVENTFD_FLAG_PIO)
0798         return KVM_PIO_BUS;
0799     if (flags & KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY)
0800         return KVM_VIRTIO_CCW_NOTIFY_BUS;
0801     return KVM_MMIO_BUS;
0802 }
0803 
0804 static int kvm_assign_ioeventfd_idx(struct kvm *kvm,
0805                 enum kvm_bus bus_idx,
0806                 struct kvm_ioeventfd *args)
0807 {
0808 
0809     struct eventfd_ctx *eventfd;
0810     struct _ioeventfd *p;
0811     int ret;
0812 
0813     eventfd = eventfd_ctx_fdget(args->fd);
0814     if (IS_ERR(eventfd))
0815         return PTR_ERR(eventfd);
0816 
0817     p = kzalloc(sizeof(*p), GFP_KERNEL_ACCOUNT);
0818     if (!p) {
0819         ret = -ENOMEM;
0820         goto fail;
0821     }
0822 
0823     INIT_LIST_HEAD(&p->list);
0824     p->addr    = args->addr;
0825     p->bus_idx = bus_idx;
0826     p->length  = args->len;
0827     p->eventfd = eventfd;
0828 
0829     /* The datamatch feature is optional, otherwise this is a wildcard */
0830     if (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH)
0831         p->datamatch = args->datamatch;
0832     else
0833         p->wildcard = true;
0834 
0835     mutex_lock(&kvm->slots_lock);
0836 
0837     /* Verify that there isn't a match already */
0838     if (ioeventfd_check_collision(kvm, p)) {
0839         ret = -EEXIST;
0840         goto unlock_fail;
0841     }
0842 
0843     kvm_iodevice_init(&p->dev, &ioeventfd_ops);
0844 
0845     ret = kvm_io_bus_register_dev(kvm, bus_idx, p->addr, p->length,
0846                       &p->dev);
0847     if (ret < 0)
0848         goto unlock_fail;
0849 
0850     kvm_get_bus(kvm, bus_idx)->ioeventfd_count++;
0851     list_add_tail(&p->list, &kvm->ioeventfds);
0852 
0853     mutex_unlock(&kvm->slots_lock);
0854 
0855     return 0;
0856 
0857 unlock_fail:
0858     mutex_unlock(&kvm->slots_lock);
0859 
0860 fail:
0861     kfree(p);
0862     eventfd_ctx_put(eventfd);
0863 
0864     return ret;
0865 }
0866 
0867 static int
0868 kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx,
0869                struct kvm_ioeventfd *args)
0870 {
0871     struct _ioeventfd        *p, *tmp;
0872     struct eventfd_ctx       *eventfd;
0873     struct kvm_io_bus    *bus;
0874     int                       ret = -ENOENT;
0875     bool                      wildcard;
0876 
0877     eventfd = eventfd_ctx_fdget(args->fd);
0878     if (IS_ERR(eventfd))
0879         return PTR_ERR(eventfd);
0880 
0881     wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH);
0882 
0883     mutex_lock(&kvm->slots_lock);
0884 
0885     list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) {
0886 
0887         if (p->bus_idx != bus_idx ||
0888             p->eventfd != eventfd  ||
0889             p->addr != args->addr  ||
0890             p->length != args->len ||
0891             p->wildcard != wildcard)
0892             continue;
0893 
0894         if (!p->wildcard && p->datamatch != args->datamatch)
0895             continue;
0896 
0897         kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
0898         bus = kvm_get_bus(kvm, bus_idx);
0899         if (bus)
0900             bus->ioeventfd_count--;
0901         ioeventfd_release(p);
0902         ret = 0;
0903         break;
0904     }
0905 
0906     mutex_unlock(&kvm->slots_lock);
0907 
0908     eventfd_ctx_put(eventfd);
0909 
0910     return ret;
0911 }
0912 
0913 static int kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
0914 {
0915     enum kvm_bus bus_idx = ioeventfd_bus_from_flags(args->flags);
0916     int ret = kvm_deassign_ioeventfd_idx(kvm, bus_idx, args);
0917 
0918     if (!args->len && bus_idx == KVM_MMIO_BUS)
0919         kvm_deassign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args);
0920 
0921     return ret;
0922 }
0923 
0924 static int
0925 kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
0926 {
0927     enum kvm_bus              bus_idx;
0928     int ret;
0929 
0930     bus_idx = ioeventfd_bus_from_flags(args->flags);
0931     /* must be natural-word sized, or 0 to ignore length */
0932     switch (args->len) {
0933     case 0:
0934     case 1:
0935     case 2:
0936     case 4:
0937     case 8:
0938         break;
0939     default:
0940         return -EINVAL;
0941     }
0942 
0943     /* check for range overflow */
0944     if (args->addr + args->len < args->addr)
0945         return -EINVAL;
0946 
0947     /* check for extra flags that we don't understand */
0948     if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK)
0949         return -EINVAL;
0950 
0951     /* ioeventfd with no length can't be combined with DATAMATCH */
0952     if (!args->len && (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH))
0953         return -EINVAL;
0954 
0955     ret = kvm_assign_ioeventfd_idx(kvm, bus_idx, args);
0956     if (ret)
0957         goto fail;
0958 
0959     /* When length is ignored, MMIO is also put on a separate bus, for
0960      * faster lookups.
0961      */
0962     if (!args->len && bus_idx == KVM_MMIO_BUS) {
0963         ret = kvm_assign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args);
0964         if (ret < 0)
0965             goto fast_fail;
0966     }
0967 
0968     return 0;
0969 
0970 fast_fail:
0971     kvm_deassign_ioeventfd_idx(kvm, bus_idx, args);
0972 fail:
0973     return ret;
0974 }
0975 
0976 int
0977 kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
0978 {
0979     if (args->flags & KVM_IOEVENTFD_FLAG_DEASSIGN)
0980         return kvm_deassign_ioeventfd(kvm, args);
0981 
0982     return kvm_assign_ioeventfd(kvm, args);
0983 }