Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /*  Xenbus code for blkif backend
0003     Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au>
0004     Copyright (C) 2005 XenSource Ltd
0005 
0006 
0007 */
0008 
0009 #define pr_fmt(fmt) "xen-blkback: " fmt
0010 
0011 #include <linux/module.h>
0012 #include <linux/kthread.h>
0013 #include <linux/pagemap.h>
0014 #include <xen/events.h>
0015 #include <xen/grant_table.h>
0016 #include "common.h"
0017 
0018 /* On the XenBus the max length of 'ring-ref%u'. */
0019 #define RINGREF_NAME_LEN (20)
0020 
0021 struct backend_info {
0022     struct xenbus_device    *dev;
0023     struct xen_blkif    *blkif;
0024     struct xenbus_watch backend_watch;
0025     unsigned        major;
0026     unsigned        minor;
0027     char            *mode;
0028 };
0029 
0030 static struct kmem_cache *xen_blkif_cachep;
0031 static void connect(struct backend_info *);
0032 static int connect_ring(struct backend_info *);
0033 static void backend_changed(struct xenbus_watch *, const char *,
0034                 const char *);
0035 static void xen_blkif_free(struct xen_blkif *blkif);
0036 static void xen_vbd_free(struct xen_vbd *vbd);
0037 
0038 struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be)
0039 {
0040     return be->dev;
0041 }
0042 
0043 /*
0044  * The last request could free the device from softirq context and
0045  * xen_blkif_free() can sleep.
0046  */
0047 static void xen_blkif_deferred_free(struct work_struct *work)
0048 {
0049     struct xen_blkif *blkif;
0050 
0051     blkif = container_of(work, struct xen_blkif, free_work);
0052     xen_blkif_free(blkif);
0053 }
0054 
0055 static int blkback_name(struct xen_blkif *blkif, char *buf)
0056 {
0057     char *devpath, *devname;
0058     struct xenbus_device *dev = blkif->be->dev;
0059 
0060     devpath = xenbus_read(XBT_NIL, dev->nodename, "dev", NULL);
0061     if (IS_ERR(devpath))
0062         return PTR_ERR(devpath);
0063 
0064     devname = strstr(devpath, "/dev/");
0065     if (devname != NULL)
0066         devname += strlen("/dev/");
0067     else
0068         devname  = devpath;
0069 
0070     snprintf(buf, TASK_COMM_LEN, "%d.%s", blkif->domid, devname);
0071     kfree(devpath);
0072 
0073     return 0;
0074 }
0075 
0076 static void xen_update_blkif_status(struct xen_blkif *blkif)
0077 {
0078     int err;
0079     char name[TASK_COMM_LEN];
0080     struct xen_blkif_ring *ring;
0081     int i;
0082 
0083     /* Not ready to connect? */
0084     if (!blkif->rings || !blkif->rings[0].irq || !blkif->vbd.bdev)
0085         return;
0086 
0087     /* Already connected? */
0088     if (blkif->be->dev->state == XenbusStateConnected)
0089         return;
0090 
0091     /* Attempt to connect: exit if we fail to. */
0092     connect(blkif->be);
0093     if (blkif->be->dev->state != XenbusStateConnected)
0094         return;
0095 
0096     err = blkback_name(blkif, name);
0097     if (err) {
0098         xenbus_dev_error(blkif->be->dev, err, "get blkback dev name");
0099         return;
0100     }
0101 
0102     err = sync_blockdev(blkif->vbd.bdev);
0103     if (err) {
0104         xenbus_dev_error(blkif->be->dev, err, "block flush");
0105         return;
0106     }
0107     invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping);
0108 
0109     for (i = 0; i < blkif->nr_rings; i++) {
0110         ring = &blkif->rings[i];
0111         ring->xenblkd = kthread_run(xen_blkif_schedule, ring, "%s-%d", name, i);
0112         if (IS_ERR(ring->xenblkd)) {
0113             err = PTR_ERR(ring->xenblkd);
0114             ring->xenblkd = NULL;
0115             xenbus_dev_fatal(blkif->be->dev, err,
0116                     "start %s-%d xenblkd", name, i);
0117             goto out;
0118         }
0119     }
0120     return;
0121 
0122 out:
0123     while (--i >= 0) {
0124         ring = &blkif->rings[i];
0125         kthread_stop(ring->xenblkd);
0126     }
0127     return;
0128 }
0129 
0130 static int xen_blkif_alloc_rings(struct xen_blkif *blkif)
0131 {
0132     unsigned int r;
0133 
0134     blkif->rings = kcalloc(blkif->nr_rings, sizeof(struct xen_blkif_ring),
0135                    GFP_KERNEL);
0136     if (!blkif->rings)
0137         return -ENOMEM;
0138 
0139     for (r = 0; r < blkif->nr_rings; r++) {
0140         struct xen_blkif_ring *ring = &blkif->rings[r];
0141 
0142         spin_lock_init(&ring->blk_ring_lock);
0143         init_waitqueue_head(&ring->wq);
0144         INIT_LIST_HEAD(&ring->pending_free);
0145         INIT_LIST_HEAD(&ring->persistent_purge_list);
0146         INIT_WORK(&ring->persistent_purge_work, xen_blkbk_unmap_purged_grants);
0147         gnttab_page_cache_init(&ring->free_pages);
0148 
0149         spin_lock_init(&ring->pending_free_lock);
0150         init_waitqueue_head(&ring->pending_free_wq);
0151         init_waitqueue_head(&ring->shutdown_wq);
0152         ring->blkif = blkif;
0153         ring->st_print = jiffies;
0154         ring->active = true;
0155     }
0156 
0157     return 0;
0158 }
0159 
0160 /* Enable the persistent grants feature. */
0161 static bool feature_persistent = true;
0162 module_param(feature_persistent, bool, 0644);
0163 MODULE_PARM_DESC(feature_persistent, "Enables the persistent grants feature");
0164 
0165 static struct xen_blkif *xen_blkif_alloc(domid_t domid)
0166 {
0167     struct xen_blkif *blkif;
0168 
0169     BUILD_BUG_ON(MAX_INDIRECT_PAGES > BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST);
0170 
0171     blkif = kmem_cache_zalloc(xen_blkif_cachep, GFP_KERNEL);
0172     if (!blkif)
0173         return ERR_PTR(-ENOMEM);
0174 
0175     blkif->domid = domid;
0176     atomic_set(&blkif->refcnt, 1);
0177     init_completion(&blkif->drain_complete);
0178 
0179     /*
0180      * Because freeing back to the cache may be deferred, it is not
0181      * safe to unload the module (and hence destroy the cache) until
0182      * this has completed. To prevent premature unloading, take an
0183      * extra module reference here and release only when the object
0184      * has been freed back to the cache.
0185      */
0186     __module_get(THIS_MODULE);
0187     INIT_WORK(&blkif->free_work, xen_blkif_deferred_free);
0188 
0189     return blkif;
0190 }
0191 
0192 static int xen_blkif_map(struct xen_blkif_ring *ring, grant_ref_t *gref,
0193              unsigned int nr_grefs, unsigned int evtchn)
0194 {
0195     int err;
0196     struct xen_blkif *blkif = ring->blkif;
0197     const struct blkif_common_sring *sring_common;
0198     RING_IDX rsp_prod, req_prod;
0199     unsigned int size;
0200 
0201     /* Already connected through? */
0202     if (ring->irq)
0203         return 0;
0204 
0205     err = xenbus_map_ring_valloc(blkif->be->dev, gref, nr_grefs,
0206                      &ring->blk_ring);
0207     if (err < 0)
0208         return err;
0209 
0210     sring_common = (struct blkif_common_sring *)ring->blk_ring;
0211     rsp_prod = READ_ONCE(sring_common->rsp_prod);
0212     req_prod = READ_ONCE(sring_common->req_prod);
0213 
0214     switch (blkif->blk_protocol) {
0215     case BLKIF_PROTOCOL_NATIVE:
0216     {
0217         struct blkif_sring *sring_native =
0218             (struct blkif_sring *)ring->blk_ring;
0219 
0220         BACK_RING_ATTACH(&ring->blk_rings.native, sring_native,
0221                  rsp_prod, XEN_PAGE_SIZE * nr_grefs);
0222         size = __RING_SIZE(sring_native, XEN_PAGE_SIZE * nr_grefs);
0223         break;
0224     }
0225     case BLKIF_PROTOCOL_X86_32:
0226     {
0227         struct blkif_x86_32_sring *sring_x86_32 =
0228             (struct blkif_x86_32_sring *)ring->blk_ring;
0229 
0230         BACK_RING_ATTACH(&ring->blk_rings.x86_32, sring_x86_32,
0231                  rsp_prod, XEN_PAGE_SIZE * nr_grefs);
0232         size = __RING_SIZE(sring_x86_32, XEN_PAGE_SIZE * nr_grefs);
0233         break;
0234     }
0235     case BLKIF_PROTOCOL_X86_64:
0236     {
0237         struct blkif_x86_64_sring *sring_x86_64 =
0238             (struct blkif_x86_64_sring *)ring->blk_ring;
0239 
0240         BACK_RING_ATTACH(&ring->blk_rings.x86_64, sring_x86_64,
0241                  rsp_prod, XEN_PAGE_SIZE * nr_grefs);
0242         size = __RING_SIZE(sring_x86_64, XEN_PAGE_SIZE * nr_grefs);
0243         break;
0244     }
0245     default:
0246         BUG();
0247     }
0248 
0249     err = -EIO;
0250     if (req_prod - rsp_prod > size)
0251         goto fail;
0252 
0253     err = bind_interdomain_evtchn_to_irqhandler_lateeoi(blkif->be->dev,
0254             evtchn, xen_blkif_be_int, 0, "blkif-backend", ring);
0255     if (err < 0)
0256         goto fail;
0257     ring->irq = err;
0258 
0259     return 0;
0260 
0261 fail:
0262     xenbus_unmap_ring_vfree(blkif->be->dev, ring->blk_ring);
0263     ring->blk_rings.common.sring = NULL;
0264     return err;
0265 }
0266 
0267 static int xen_blkif_disconnect(struct xen_blkif *blkif)
0268 {
0269     struct pending_req *req, *n;
0270     unsigned int j, r;
0271     bool busy = false;
0272 
0273     for (r = 0; r < blkif->nr_rings; r++) {
0274         struct xen_blkif_ring *ring = &blkif->rings[r];
0275         unsigned int i = 0;
0276 
0277         if (!ring->active)
0278             continue;
0279 
0280         if (ring->xenblkd) {
0281             kthread_stop(ring->xenblkd);
0282             ring->xenblkd = NULL;
0283             wake_up(&ring->shutdown_wq);
0284         }
0285 
0286         /* The above kthread_stop() guarantees that at this point we
0287          * don't have any discard_io or other_io requests. So, checking
0288          * for inflight IO is enough.
0289          */
0290         if (atomic_read(&ring->inflight) > 0) {
0291             busy = true;
0292             continue;
0293         }
0294 
0295         if (ring->irq) {
0296             unbind_from_irqhandler(ring->irq, ring);
0297             ring->irq = 0;
0298         }
0299 
0300         if (ring->blk_rings.common.sring) {
0301             xenbus_unmap_ring_vfree(blkif->be->dev, ring->blk_ring);
0302             ring->blk_rings.common.sring = NULL;
0303         }
0304 
0305         /* Remove all persistent grants and the cache of ballooned pages. */
0306         xen_blkbk_free_caches(ring);
0307 
0308         /* Check that there is no request in use */
0309         list_for_each_entry_safe(req, n, &ring->pending_free, free_list) {
0310             list_del(&req->free_list);
0311 
0312             for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++)
0313                 kfree(req->segments[j]);
0314 
0315             for (j = 0; j < MAX_INDIRECT_PAGES; j++)
0316                 kfree(req->indirect_pages[j]);
0317 
0318             kfree(req);
0319             i++;
0320         }
0321 
0322         BUG_ON(atomic_read(&ring->persistent_gnt_in_use) != 0);
0323         BUG_ON(!list_empty(&ring->persistent_purge_list));
0324         BUG_ON(!RB_EMPTY_ROOT(&ring->persistent_gnts));
0325         BUG_ON(ring->free_pages.num_pages != 0);
0326         BUG_ON(ring->persistent_gnt_c != 0);
0327         WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
0328         ring->active = false;
0329     }
0330     if (busy)
0331         return -EBUSY;
0332 
0333     blkif->nr_ring_pages = 0;
0334     /*
0335      * blkif->rings was allocated in connect_ring, so we should free it in
0336      * here.
0337      */
0338     kfree(blkif->rings);
0339     blkif->rings = NULL;
0340     blkif->nr_rings = 0;
0341 
0342     return 0;
0343 }
0344 
0345 static void xen_blkif_free(struct xen_blkif *blkif)
0346 {
0347     WARN_ON(xen_blkif_disconnect(blkif));
0348     xen_vbd_free(&blkif->vbd);
0349     kfree(blkif->be->mode);
0350     kfree(blkif->be);
0351 
0352     /* Make sure everything is drained before shutting down */
0353     kmem_cache_free(xen_blkif_cachep, blkif);
0354     module_put(THIS_MODULE);
0355 }
0356 
0357 int __init xen_blkif_interface_init(void)
0358 {
0359     xen_blkif_cachep = kmem_cache_create("blkif_cache",
0360                          sizeof(struct xen_blkif),
0361                          0, 0, NULL);
0362     if (!xen_blkif_cachep)
0363         return -ENOMEM;
0364 
0365     return 0;
0366 }
0367 
0368 void xen_blkif_interface_fini(void)
0369 {
0370     kmem_cache_destroy(xen_blkif_cachep);
0371     xen_blkif_cachep = NULL;
0372 }
0373 
0374 /*
0375  *  sysfs interface for VBD I/O requests
0376  */
0377 
0378 #define VBD_SHOW_ALLRING(name, format)                  \
0379     static ssize_t show_##name(struct device *_dev,         \
0380                    struct device_attribute *attr,   \
0381                    char *buf)               \
0382     {                               \
0383         struct xenbus_device *dev = to_xenbus_device(_dev); \
0384         struct backend_info *be = dev_get_drvdata(&dev->dev);   \
0385         struct xen_blkif *blkif = be->blkif;            \
0386         unsigned int i;                     \
0387         unsigned long long result = 0;              \
0388                                     \
0389         if (!blkif->rings)              \
0390             goto out;                   \
0391                                     \
0392         for (i = 0; i < blkif->nr_rings; i++) {     \
0393             struct xen_blkif_ring *ring = &blkif->rings[i]; \
0394                                     \
0395             result += ring->st_##name;          \
0396         }                           \
0397                                     \
0398 out:                                    \
0399         return sprintf(buf, format, result);            \
0400     }                               \
0401     static DEVICE_ATTR(name, 0444, show_##name, NULL)
0402 
0403 VBD_SHOW_ALLRING(oo_req,  "%llu\n");
0404 VBD_SHOW_ALLRING(rd_req,  "%llu\n");
0405 VBD_SHOW_ALLRING(wr_req,  "%llu\n");
0406 VBD_SHOW_ALLRING(f_req,  "%llu\n");
0407 VBD_SHOW_ALLRING(ds_req,  "%llu\n");
0408 VBD_SHOW_ALLRING(rd_sect, "%llu\n");
0409 VBD_SHOW_ALLRING(wr_sect, "%llu\n");
0410 
0411 static struct attribute *xen_vbdstat_attrs[] = {
0412     &dev_attr_oo_req.attr,
0413     &dev_attr_rd_req.attr,
0414     &dev_attr_wr_req.attr,
0415     &dev_attr_f_req.attr,
0416     &dev_attr_ds_req.attr,
0417     &dev_attr_rd_sect.attr,
0418     &dev_attr_wr_sect.attr,
0419     NULL
0420 };
0421 
0422 static const struct attribute_group xen_vbdstat_group = {
0423     .name = "statistics",
0424     .attrs = xen_vbdstat_attrs,
0425 };
0426 
0427 #define VBD_SHOW(name, format, args...)                 \
0428     static ssize_t show_##name(struct device *_dev,         \
0429                    struct device_attribute *attr,   \
0430                    char *buf)               \
0431     {                               \
0432         struct xenbus_device *dev = to_xenbus_device(_dev); \
0433         struct backend_info *be = dev_get_drvdata(&dev->dev);   \
0434                                     \
0435         return sprintf(buf, format, ##args);            \
0436     }                               \
0437     static DEVICE_ATTR(name, 0444, show_##name, NULL)
0438 
0439 VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor);
0440 VBD_SHOW(mode, "%s\n", be->mode);
0441 
0442 static int xenvbd_sysfs_addif(struct xenbus_device *dev)
0443 {
0444     int error;
0445 
0446     error = device_create_file(&dev->dev, &dev_attr_physical_device);
0447     if (error)
0448         goto fail1;
0449 
0450     error = device_create_file(&dev->dev, &dev_attr_mode);
0451     if (error)
0452         goto fail2;
0453 
0454     error = sysfs_create_group(&dev->dev.kobj, &xen_vbdstat_group);
0455     if (error)
0456         goto fail3;
0457 
0458     return 0;
0459 
0460 fail3:  sysfs_remove_group(&dev->dev.kobj, &xen_vbdstat_group);
0461 fail2:  device_remove_file(&dev->dev, &dev_attr_mode);
0462 fail1:  device_remove_file(&dev->dev, &dev_attr_physical_device);
0463     return error;
0464 }
0465 
0466 static void xenvbd_sysfs_delif(struct xenbus_device *dev)
0467 {
0468     sysfs_remove_group(&dev->dev.kobj, &xen_vbdstat_group);
0469     device_remove_file(&dev->dev, &dev_attr_mode);
0470     device_remove_file(&dev->dev, &dev_attr_physical_device);
0471 }
0472 
0473 static void xen_vbd_free(struct xen_vbd *vbd)
0474 {
0475     if (vbd->bdev)
0476         blkdev_put(vbd->bdev, vbd->readonly ? FMODE_READ : FMODE_WRITE);
0477     vbd->bdev = NULL;
0478 }
0479 
0480 static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
0481               unsigned major, unsigned minor, int readonly,
0482               int cdrom)
0483 {
0484     struct xen_vbd *vbd;
0485     struct block_device *bdev;
0486 
0487     vbd = &blkif->vbd;
0488     vbd->handle   = handle;
0489     vbd->readonly = readonly;
0490     vbd->type     = 0;
0491 
0492     vbd->pdevice  = MKDEV(major, minor);
0493 
0494     bdev = blkdev_get_by_dev(vbd->pdevice, vbd->readonly ?
0495                  FMODE_READ : FMODE_WRITE, NULL);
0496 
0497     if (IS_ERR(bdev)) {
0498         pr_warn("xen_vbd_create: device %08x could not be opened\n",
0499             vbd->pdevice);
0500         return -ENOENT;
0501     }
0502 
0503     vbd->bdev = bdev;
0504     if (vbd->bdev->bd_disk == NULL) {
0505         pr_warn("xen_vbd_create: device %08x doesn't exist\n",
0506             vbd->pdevice);
0507         xen_vbd_free(vbd);
0508         return -ENOENT;
0509     }
0510     vbd->size = vbd_sz(vbd);
0511 
0512     if (cdrom || disk_to_cdi(vbd->bdev->bd_disk))
0513         vbd->type |= VDISK_CDROM;
0514     if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE)
0515         vbd->type |= VDISK_REMOVABLE;
0516 
0517     if (bdev_write_cache(bdev))
0518         vbd->flush_support = true;
0519     if (bdev_max_secure_erase_sectors(bdev))
0520         vbd->discard_secure = true;
0521 
0522     pr_debug("Successful creation of handle=%04x (dom=%u)\n",
0523         handle, blkif->domid);
0524     return 0;
0525 }
0526 
0527 static int xen_blkbk_remove(struct xenbus_device *dev)
0528 {
0529     struct backend_info *be = dev_get_drvdata(&dev->dev);
0530 
0531     pr_debug("%s %p %d\n", __func__, dev, dev->otherend_id);
0532 
0533     if (be->major || be->minor)
0534         xenvbd_sysfs_delif(dev);
0535 
0536     if (be->backend_watch.node) {
0537         unregister_xenbus_watch(&be->backend_watch);
0538         kfree(be->backend_watch.node);
0539         be->backend_watch.node = NULL;
0540     }
0541 
0542     dev_set_drvdata(&dev->dev, NULL);
0543 
0544     if (be->blkif) {
0545         xen_blkif_disconnect(be->blkif);
0546 
0547         /* Put the reference we set in xen_blkif_alloc(). */
0548         xen_blkif_put(be->blkif);
0549     }
0550 
0551     return 0;
0552 }
0553 
0554 int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
0555                   struct backend_info *be, int state)
0556 {
0557     struct xenbus_device *dev = be->dev;
0558     int err;
0559 
0560     err = xenbus_printf(xbt, dev->nodename, "feature-flush-cache",
0561                 "%d", state);
0562     if (err)
0563         dev_warn(&dev->dev, "writing feature-flush-cache (%d)", err);
0564 
0565     return err;
0566 }
0567 
0568 static void xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info *be)
0569 {
0570     struct xenbus_device *dev = be->dev;
0571     struct xen_blkif *blkif = be->blkif;
0572     int err;
0573     int state = 0;
0574     struct block_device *bdev = be->blkif->vbd.bdev;
0575 
0576     if (!xenbus_read_unsigned(dev->nodename, "discard-enable", 1))
0577         return;
0578 
0579     if (bdev_max_discard_sectors(bdev)) {
0580         err = xenbus_printf(xbt, dev->nodename,
0581             "discard-granularity", "%u",
0582             bdev_discard_granularity(bdev));
0583         if (err) {
0584             dev_warn(&dev->dev, "writing discard-granularity (%d)", err);
0585             return;
0586         }
0587         err = xenbus_printf(xbt, dev->nodename,
0588             "discard-alignment", "%u",
0589             bdev_discard_alignment(bdev));
0590         if (err) {
0591             dev_warn(&dev->dev, "writing discard-alignment (%d)", err);
0592             return;
0593         }
0594         state = 1;
0595         /* Optional. */
0596         err = xenbus_printf(xbt, dev->nodename,
0597                     "discard-secure", "%d",
0598                     blkif->vbd.discard_secure);
0599         if (err) {
0600             dev_warn(&dev->dev, "writing discard-secure (%d)", err);
0601             return;
0602         }
0603     }
0604     err = xenbus_printf(xbt, dev->nodename, "feature-discard",
0605                 "%d", state);
0606     if (err)
0607         dev_warn(&dev->dev, "writing feature-discard (%d)", err);
0608 }
0609 
0610 int xen_blkbk_barrier(struct xenbus_transaction xbt,
0611               struct backend_info *be, int state)
0612 {
0613     struct xenbus_device *dev = be->dev;
0614     int err;
0615 
0616     err = xenbus_printf(xbt, dev->nodename, "feature-barrier",
0617                 "%d", state);
0618     if (err)
0619         dev_warn(&dev->dev, "writing feature-barrier (%d)", err);
0620 
0621     return err;
0622 }
0623 
0624 /*
0625  * Entry point to this code when a new device is created.  Allocate the basic
0626  * structures, and watch the store waiting for the hotplug scripts to tell us
0627  * the device's physical major and minor numbers.  Switch to InitWait.
0628  */
0629 static int xen_blkbk_probe(struct xenbus_device *dev,
0630                const struct xenbus_device_id *id)
0631 {
0632     int err;
0633     struct backend_info *be = kzalloc(sizeof(struct backend_info),
0634                       GFP_KERNEL);
0635 
0636     /* match the pr_debug in xen_blkbk_remove */
0637     pr_debug("%s %p %d\n", __func__, dev, dev->otherend_id);
0638 
0639     if (!be) {
0640         xenbus_dev_fatal(dev, -ENOMEM,
0641                  "allocating backend structure");
0642         return -ENOMEM;
0643     }
0644     be->dev = dev;
0645     dev_set_drvdata(&dev->dev, be);
0646 
0647     be->blkif = xen_blkif_alloc(dev->otherend_id);
0648     if (IS_ERR(be->blkif)) {
0649         err = PTR_ERR(be->blkif);
0650         be->blkif = NULL;
0651         xenbus_dev_fatal(dev, err, "creating block interface");
0652         goto fail;
0653     }
0654 
0655     err = xenbus_printf(XBT_NIL, dev->nodename,
0656                 "feature-max-indirect-segments", "%u",
0657                 MAX_INDIRECT_SEGMENTS);
0658     if (err)
0659         dev_warn(&dev->dev,
0660              "writing %s/feature-max-indirect-segments (%d)",
0661              dev->nodename, err);
0662 
0663     /* Multi-queue: advertise how many queues are supported by us.*/
0664     err = xenbus_printf(XBT_NIL, dev->nodename,
0665                 "multi-queue-max-queues", "%u", xenblk_max_queues);
0666     if (err)
0667         pr_warn("Error writing multi-queue-max-queues\n");
0668 
0669     /* setup back pointer */
0670     be->blkif->be = be;
0671 
0672     err = xenbus_watch_pathfmt(dev, &be->backend_watch, NULL,
0673                    backend_changed,
0674                    "%s/%s", dev->nodename, "physical-device");
0675     if (err)
0676         goto fail;
0677 
0678     err = xenbus_printf(XBT_NIL, dev->nodename, "max-ring-page-order", "%u",
0679                 xen_blkif_max_ring_order);
0680     if (err)
0681         pr_warn("%s write out 'max-ring-page-order' failed\n", __func__);
0682 
0683     err = xenbus_switch_state(dev, XenbusStateInitWait);
0684     if (err)
0685         goto fail;
0686 
0687     return 0;
0688 
0689 fail:
0690     pr_warn("%s failed\n", __func__);
0691     xen_blkbk_remove(dev);
0692     return err;
0693 }
0694 
0695 /*
0696  * Callback received when the hotplug scripts have placed the physical-device
0697  * node.  Read it and the mode node, and create a vbd.  If the frontend is
0698  * ready, connect.
0699  */
0700 static void backend_changed(struct xenbus_watch *watch,
0701                 const char *path, const char *token)
0702 {
0703     int err;
0704     unsigned major;
0705     unsigned minor;
0706     struct backend_info *be
0707         = container_of(watch, struct backend_info, backend_watch);
0708     struct xenbus_device *dev = be->dev;
0709     int cdrom = 0;
0710     unsigned long handle;
0711     char *device_type;
0712 
0713     pr_debug("%s %p %d\n", __func__, dev, dev->otherend_id);
0714 
0715     err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x",
0716                &major, &minor);
0717     if (XENBUS_EXIST_ERR(err)) {
0718         /*
0719          * Since this watch will fire once immediately after it is
0720          * registered, we expect this.  Ignore it, and wait for the
0721          * hotplug scripts.
0722          */
0723         return;
0724     }
0725     if (err != 2) {
0726         xenbus_dev_fatal(dev, err, "reading physical-device");
0727         return;
0728     }
0729 
0730     if (be->major | be->minor) {
0731         if (be->major != major || be->minor != minor)
0732             pr_warn("changing physical device (from %x:%x to %x:%x) not supported.\n",
0733                 be->major, be->minor, major, minor);
0734         return;
0735     }
0736 
0737     be->mode = xenbus_read(XBT_NIL, dev->nodename, "mode", NULL);
0738     if (IS_ERR(be->mode)) {
0739         err = PTR_ERR(be->mode);
0740         be->mode = NULL;
0741         xenbus_dev_fatal(dev, err, "reading mode");
0742         return;
0743     }
0744 
0745     device_type = xenbus_read(XBT_NIL, dev->otherend, "device-type", NULL);
0746     if (!IS_ERR(device_type)) {
0747         cdrom = strcmp(device_type, "cdrom") == 0;
0748         kfree(device_type);
0749     }
0750 
0751     /* Front end dir is a number, which is used as the handle. */
0752     err = kstrtoul(strrchr(dev->otherend, '/') + 1, 0, &handle);
0753     if (err) {
0754         kfree(be->mode);
0755         be->mode = NULL;
0756         return;
0757     }
0758 
0759     be->major = major;
0760     be->minor = minor;
0761 
0762     err = xen_vbd_create(be->blkif, handle, major, minor,
0763                  !strchr(be->mode, 'w'), cdrom);
0764 
0765     if (err)
0766         xenbus_dev_fatal(dev, err, "creating vbd structure");
0767     else {
0768         err = xenvbd_sysfs_addif(dev);
0769         if (err) {
0770             xen_vbd_free(&be->blkif->vbd);
0771             xenbus_dev_fatal(dev, err, "creating sysfs entries");
0772         }
0773     }
0774 
0775     if (err) {
0776         kfree(be->mode);
0777         be->mode = NULL;
0778         be->major = 0;
0779         be->minor = 0;
0780     } else {
0781         /* We're potentially connected now */
0782         xen_update_blkif_status(be->blkif);
0783     }
0784 }
0785 
0786 /*
0787  * Callback received when the frontend's state changes.
0788  */
0789 static void frontend_changed(struct xenbus_device *dev,
0790                  enum xenbus_state frontend_state)
0791 {
0792     struct backend_info *be = dev_get_drvdata(&dev->dev);
0793     int err;
0794 
0795     pr_debug("%s %p %s\n", __func__, dev, xenbus_strstate(frontend_state));
0796 
0797     switch (frontend_state) {
0798     case XenbusStateInitialising:
0799         if (dev->state == XenbusStateClosed) {
0800             pr_info("%s: prepare for reconnect\n", dev->nodename);
0801             xenbus_switch_state(dev, XenbusStateInitWait);
0802         }
0803         break;
0804 
0805     case XenbusStateInitialised:
0806     case XenbusStateConnected:
0807         /*
0808          * Ensure we connect even when two watches fire in
0809          * close succession and we miss the intermediate value
0810          * of frontend_state.
0811          */
0812         if (dev->state == XenbusStateConnected)
0813             break;
0814 
0815         /*
0816          * Enforce precondition before potential leak point.
0817          * xen_blkif_disconnect() is idempotent.
0818          */
0819         err = xen_blkif_disconnect(be->blkif);
0820         if (err) {
0821             xenbus_dev_fatal(dev, err, "pending I/O");
0822             break;
0823         }
0824 
0825         err = connect_ring(be);
0826         if (err) {
0827             /*
0828              * Clean up so that memory resources can be used by
0829              * other devices. connect_ring reported already error.
0830              */
0831             xen_blkif_disconnect(be->blkif);
0832             break;
0833         }
0834         xen_update_blkif_status(be->blkif);
0835         break;
0836 
0837     case XenbusStateClosing:
0838         xenbus_switch_state(dev, XenbusStateClosing);
0839         break;
0840 
0841     case XenbusStateClosed:
0842         xen_blkif_disconnect(be->blkif);
0843         xenbus_switch_state(dev, XenbusStateClosed);
0844         if (xenbus_dev_is_online(dev))
0845             break;
0846         fallthrough;
0847         /* if not online */
0848     case XenbusStateUnknown:
0849         /* implies xen_blkif_disconnect() via xen_blkbk_remove() */
0850         device_unregister(&dev->dev);
0851         break;
0852 
0853     default:
0854         xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
0855                  frontend_state);
0856         break;
0857     }
0858 }
0859 
0860 /* Once a memory pressure is detected, squeeze free page pools for a while. */
0861 static unsigned int buffer_squeeze_duration_ms = 10;
0862 module_param_named(buffer_squeeze_duration_ms,
0863         buffer_squeeze_duration_ms, int, 0644);
0864 MODULE_PARM_DESC(buffer_squeeze_duration_ms,
0865 "Duration in ms to squeeze pages buffer when a memory pressure is detected");
0866 
0867 /*
0868  * Callback received when the memory pressure is detected.
0869  */
0870 static void reclaim_memory(struct xenbus_device *dev)
0871 {
0872     struct backend_info *be = dev_get_drvdata(&dev->dev);
0873 
0874     if (!be)
0875         return;
0876     be->blkif->buffer_squeeze_end = jiffies +
0877         msecs_to_jiffies(buffer_squeeze_duration_ms);
0878 }
0879 
0880 /* ** Connection ** */
0881 
0882 /*
0883  * Write the physical details regarding the block device to the store, and
0884  * switch to Connected state.
0885  */
0886 static void connect(struct backend_info *be)
0887 {
0888     struct xenbus_transaction xbt;
0889     int err;
0890     struct xenbus_device *dev = be->dev;
0891 
0892     pr_debug("%s %s\n", __func__, dev->otherend);
0893 
0894     /* Supply the information about the device the frontend needs */
0895 again:
0896     err = xenbus_transaction_start(&xbt);
0897     if (err) {
0898         xenbus_dev_fatal(dev, err, "starting transaction");
0899         return;
0900     }
0901 
0902     /* If we can't advertise it is OK. */
0903     xen_blkbk_flush_diskcache(xbt, be, be->blkif->vbd.flush_support);
0904 
0905     xen_blkbk_discard(xbt, be);
0906 
0907     xen_blkbk_barrier(xbt, be, be->blkif->vbd.flush_support);
0908 
0909     err = xenbus_printf(xbt, dev->nodename, "feature-persistent", "%u",
0910             be->blkif->vbd.feature_gnt_persistent_parm);
0911     if (err) {
0912         xenbus_dev_fatal(dev, err, "writing %s/feature-persistent",
0913                  dev->nodename);
0914         goto abort;
0915     }
0916 
0917     err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
0918                 (unsigned long long)vbd_sz(&be->blkif->vbd));
0919     if (err) {
0920         xenbus_dev_fatal(dev, err, "writing %s/sectors",
0921                  dev->nodename);
0922         goto abort;
0923     }
0924 
0925     /* FIXME: use a typename instead */
0926     err = xenbus_printf(xbt, dev->nodename, "info", "%u",
0927                 be->blkif->vbd.type |
0928                 (be->blkif->vbd.readonly ? VDISK_READONLY : 0));
0929     if (err) {
0930         xenbus_dev_fatal(dev, err, "writing %s/info",
0931                  dev->nodename);
0932         goto abort;
0933     }
0934     err = xenbus_printf(xbt, dev->nodename, "sector-size", "%lu",
0935                 (unsigned long)
0936                 bdev_logical_block_size(be->blkif->vbd.bdev));
0937     if (err) {
0938         xenbus_dev_fatal(dev, err, "writing %s/sector-size",
0939                  dev->nodename);
0940         goto abort;
0941     }
0942     err = xenbus_printf(xbt, dev->nodename, "physical-sector-size", "%u",
0943                 bdev_physical_block_size(be->blkif->vbd.bdev));
0944     if (err)
0945         xenbus_dev_error(dev, err, "writing %s/physical-sector-size",
0946                  dev->nodename);
0947 
0948     err = xenbus_transaction_end(xbt, 0);
0949     if (err == -EAGAIN)
0950         goto again;
0951     if (err)
0952         xenbus_dev_fatal(dev, err, "ending transaction");
0953 
0954     err = xenbus_switch_state(dev, XenbusStateConnected);
0955     if (err)
0956         xenbus_dev_fatal(dev, err, "%s: switching to Connected state",
0957                  dev->nodename);
0958 
0959     return;
0960  abort:
0961     xenbus_transaction_end(xbt, 1);
0962 }
0963 
0964 /*
0965  * Each ring may have multi pages, depends on "ring-page-order".
0966  */
0967 static int read_per_ring_refs(struct xen_blkif_ring *ring, const char *dir)
0968 {
0969     unsigned int ring_ref[XENBUS_MAX_RING_GRANTS];
0970     struct pending_req *req, *n;
0971     int err, i, j;
0972     struct xen_blkif *blkif = ring->blkif;
0973     struct xenbus_device *dev = blkif->be->dev;
0974     unsigned int nr_grefs, evtchn;
0975 
0976     err = xenbus_scanf(XBT_NIL, dir, "event-channel", "%u",
0977               &evtchn);
0978     if (err != 1) {
0979         err = -EINVAL;
0980         xenbus_dev_fatal(dev, err, "reading %s/event-channel", dir);
0981         return err;
0982     }
0983 
0984     nr_grefs = blkif->nr_ring_pages;
0985 
0986     if (unlikely(!nr_grefs)) {
0987         WARN_ON(true);
0988         return -EINVAL;
0989     }
0990 
0991     for (i = 0; i < nr_grefs; i++) {
0992         char ring_ref_name[RINGREF_NAME_LEN];
0993 
0994         if (blkif->multi_ref)
0995             snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
0996         else {
0997             WARN_ON(i != 0);
0998             snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref");
0999         }
1000 
1001         err = xenbus_scanf(XBT_NIL, dir, ring_ref_name,
1002                    "%u", &ring_ref[i]);
1003 
1004         if (err != 1) {
1005             err = -EINVAL;
1006             xenbus_dev_fatal(dev, err, "reading %s/%s",
1007                      dir, ring_ref_name);
1008             return err;
1009         }
1010     }
1011 
1012     err = -ENOMEM;
1013     for (i = 0; i < nr_grefs * XEN_BLKIF_REQS_PER_PAGE; i++) {
1014         req = kzalloc(sizeof(*req), GFP_KERNEL);
1015         if (!req)
1016             goto fail;
1017         list_add_tail(&req->free_list, &ring->pending_free);
1018         for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
1019             req->segments[j] = kzalloc(sizeof(*req->segments[0]), GFP_KERNEL);
1020             if (!req->segments[j])
1021                 goto fail;
1022         }
1023         for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
1024             req->indirect_pages[j] = kzalloc(sizeof(*req->indirect_pages[0]),
1025                              GFP_KERNEL);
1026             if (!req->indirect_pages[j])
1027                 goto fail;
1028         }
1029     }
1030 
1031     /* Map the shared frame, irq etc. */
1032     err = xen_blkif_map(ring, ring_ref, nr_grefs, evtchn);
1033     if (err) {
1034         xenbus_dev_fatal(dev, err, "mapping ring-ref port %u", evtchn);
1035         goto fail;
1036     }
1037 
1038     return 0;
1039 
1040 fail:
1041     list_for_each_entry_safe(req, n, &ring->pending_free, free_list) {
1042         list_del(&req->free_list);
1043         for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
1044             if (!req->segments[j])
1045                 break;
1046             kfree(req->segments[j]);
1047         }
1048         for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
1049             if (!req->indirect_pages[j])
1050                 break;
1051             kfree(req->indirect_pages[j]);
1052         }
1053         kfree(req);
1054     }
1055     return err;
1056 }
1057 
1058 static int connect_ring(struct backend_info *be)
1059 {
1060     struct xenbus_device *dev = be->dev;
1061     struct xen_blkif *blkif = be->blkif;
1062     char protocol[64] = "";
1063     int err, i;
1064     char *xspath;
1065     size_t xspathsize;
1066     const size_t xenstore_path_ext_size = 11; /* sufficient for "/queue-NNN" */
1067     unsigned int requested_num_queues = 0;
1068     unsigned int ring_page_order;
1069 
1070     pr_debug("%s %s\n", __func__, dev->otherend);
1071 
1072     blkif->blk_protocol = BLKIF_PROTOCOL_DEFAULT;
1073     err = xenbus_scanf(XBT_NIL, dev->otherend, "protocol",
1074                "%63s", protocol);
1075     if (err <= 0)
1076         strcpy(protocol, "unspecified, assuming default");
1077     else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE))
1078         blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
1079     else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32))
1080         blkif->blk_protocol = BLKIF_PROTOCOL_X86_32;
1081     else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64))
1082         blkif->blk_protocol = BLKIF_PROTOCOL_X86_64;
1083     else {
1084         xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
1085         return -ENOSYS;
1086     }
1087 
1088     blkif->vbd.feature_gnt_persistent_parm = feature_persistent;
1089     blkif->vbd.feature_gnt_persistent =
1090         blkif->vbd.feature_gnt_persistent_parm &&
1091         xenbus_read_unsigned(dev->otherend, "feature-persistent", 0);
1092 
1093     blkif->vbd.overflow_max_grants = 0;
1094 
1095     /*
1096      * Read the number of hardware queues from frontend.
1097      */
1098     requested_num_queues = xenbus_read_unsigned(dev->otherend,
1099                             "multi-queue-num-queues",
1100                             1);
1101     if (requested_num_queues > xenblk_max_queues
1102         || requested_num_queues == 0) {
1103         /* Buggy or malicious guest. */
1104         xenbus_dev_fatal(dev, err,
1105                 "guest requested %u queues, exceeding the maximum of %u.",
1106                 requested_num_queues, xenblk_max_queues);
1107         return -ENOSYS;
1108     }
1109     blkif->nr_rings = requested_num_queues;
1110     if (xen_blkif_alloc_rings(blkif))
1111         return -ENOMEM;
1112 
1113     pr_info("%s: using %d queues, protocol %d (%s) %s\n", dev->nodename,
1114          blkif->nr_rings, blkif->blk_protocol, protocol,
1115          blkif->vbd.feature_gnt_persistent ? "persistent grants" : "");
1116 
1117     err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-page-order", "%u",
1118                &ring_page_order);
1119     if (err != 1) {
1120         blkif->nr_ring_pages = 1;
1121         blkif->multi_ref = false;
1122     } else if (ring_page_order <= xen_blkif_max_ring_order) {
1123         blkif->nr_ring_pages = 1 << ring_page_order;
1124         blkif->multi_ref = true;
1125     } else {
1126         err = -EINVAL;
1127         xenbus_dev_fatal(dev, err,
1128                  "requested ring page order %d exceed max:%d",
1129                  ring_page_order,
1130                  xen_blkif_max_ring_order);
1131         return err;
1132     }
1133 
1134     if (blkif->nr_rings == 1)
1135         return read_per_ring_refs(&blkif->rings[0], dev->otherend);
1136     else {
1137         xspathsize = strlen(dev->otherend) + xenstore_path_ext_size;
1138         xspath = kmalloc(xspathsize, GFP_KERNEL);
1139         if (!xspath) {
1140             xenbus_dev_fatal(dev, -ENOMEM, "reading ring references");
1141             return -ENOMEM;
1142         }
1143 
1144         for (i = 0; i < blkif->nr_rings; i++) {
1145             memset(xspath, 0, xspathsize);
1146             snprintf(xspath, xspathsize, "%s/queue-%u", dev->otherend, i);
1147             err = read_per_ring_refs(&blkif->rings[i], xspath);
1148             if (err) {
1149                 kfree(xspath);
1150                 return err;
1151             }
1152         }
1153         kfree(xspath);
1154     }
1155     return 0;
1156 }
1157 
1158 static const struct xenbus_device_id xen_blkbk_ids[] = {
1159     { "vbd" },
1160     { "" }
1161 };
1162 
1163 static struct xenbus_driver xen_blkbk_driver = {
1164     .ids  = xen_blkbk_ids,
1165     .probe = xen_blkbk_probe,
1166     .remove = xen_blkbk_remove,
1167     .otherend_changed = frontend_changed,
1168     .allow_rebind = true,
1169     .reclaim_memory = reclaim_memory,
1170 };
1171 
1172 int xen_blkif_xenbus_init(void)
1173 {
1174     return xenbus_register_backend(&xen_blkbk_driver);
1175 }
1176 
1177 void xen_blkif_xenbus_fini(void)
1178 {
1179     xenbus_unregister_driver(&xen_blkbk_driver);
1180 }