Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * This program is free software; you can redistribute it and/or
0003  * modify it under the terms of the GNU General Public License version 2
0004  * as published by the Free Software Foundation; or, when distributed
0005  * separately from the Linux kernel or incorporated into other
0006  * software packages, subject to the following license:
0007  *
0008  * Permission is hereby granted, free of charge, to any person obtaining a copy
0009  * of this source file (the "Software"), to deal in the Software without
0010  * restriction, including without limitation the rights to use, copy, modify,
0011  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
0012  * and to permit persons to whom the Software is furnished to do so, subject to
0013  * the following conditions:
0014  *
0015  * The above copyright notice and this permission notice shall be included in
0016  * all copies or substantial portions of the Software.
0017  *
0018  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
0019  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
0020  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
0021  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
0022  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
0023  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
0024  * IN THE SOFTWARE.
0025  */
0026 
0027 #ifndef __XEN_BLKIF__BACKEND__COMMON_H__
0028 #define __XEN_BLKIF__BACKEND__COMMON_H__
0029 
0030 #include <linux/module.h>
0031 #include <linux/interrupt.h>
0032 #include <linux/slab.h>
0033 #include <linux/blkdev.h>
0034 #include <linux/vmalloc.h>
0035 #include <linux/wait.h>
0036 #include <linux/io.h>
0037 #include <linux/rbtree.h>
0038 #include <asm/setup.h>
0039 #include <asm/hypervisor.h>
0040 #include <xen/grant_table.h>
0041 #include <xen/page.h>
0042 #include <xen/xenbus.h>
0043 #include <xen/interface/io/ring.h>
0044 #include <xen/interface/io/blkif.h>
0045 #include <xen/interface/io/protocols.h>
0046 
0047 extern unsigned int xen_blkif_max_ring_order;
0048 extern unsigned int xenblk_max_queues;
0049 /*
0050  * This is the maximum number of segments that would be allowed in indirect
0051  * requests. This value will also be passed to the frontend.
0052  */
0053 #define MAX_INDIRECT_SEGMENTS 256
0054 
0055 /*
0056  * Xen use 4K pages. The guest may use different page size (4K or 64K)
0057  * Number of Xen pages per segment
0058  */
0059 #define XEN_PAGES_PER_SEGMENT   (PAGE_SIZE / XEN_PAGE_SIZE)
0060 
0061 #define XEN_PAGES_PER_INDIRECT_FRAME \
0062     (XEN_PAGE_SIZE/sizeof(struct blkif_request_segment))
0063 #define SEGS_PER_INDIRECT_FRAME \
0064     (XEN_PAGES_PER_INDIRECT_FRAME / XEN_PAGES_PER_SEGMENT)
0065 
0066 #define MAX_INDIRECT_PAGES \
0067     ((MAX_INDIRECT_SEGMENTS + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
0068 #define INDIRECT_PAGES(_segs) DIV_ROUND_UP(_segs, XEN_PAGES_PER_INDIRECT_FRAME)
0069 
0070 /* Not a real protocol.  Used to generate ring structs which contain
0071  * the elements common to all protocols only.  This way we get a
0072  * compiler-checkable way to use common struct elements, so we can
0073  * avoid using switch(protocol) in a number of places.  */
0074 struct blkif_common_request {
0075     char dummy;
0076 };
0077 
0078 /* i386 protocol version */
0079 
0080 struct blkif_x86_32_request_rw {
0081     uint8_t        nr_segments;  /* number of segments                   */
0082     blkif_vdev_t   handle;       /* only for read/write requests         */
0083     uint64_t       id;           /* private guest value, echoed in resp  */
0084     blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
0085     struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
0086 } __attribute__((__packed__));
0087 
0088 struct blkif_x86_32_request_discard {
0089     uint8_t        flag;         /* BLKIF_DISCARD_SECURE or zero         */
0090     blkif_vdev_t   _pad1;        /* was "handle" for read/write requests */
0091     uint64_t       id;           /* private guest value, echoed in resp  */
0092     blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
0093     uint64_t       nr_sectors;
0094 } __attribute__((__packed__));
0095 
0096 struct blkif_x86_32_request_other {
0097     uint8_t        _pad1;
0098     blkif_vdev_t   _pad2;
0099     uint64_t       id;           /* private guest value, echoed in resp  */
0100 } __attribute__((__packed__));
0101 
0102 struct blkif_x86_32_request_indirect {
0103     uint8_t        indirect_op;
0104     uint16_t       nr_segments;
0105     uint64_t       id;
0106     blkif_sector_t sector_number;
0107     blkif_vdev_t   handle;
0108     uint16_t       _pad1;
0109     grant_ref_t    indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
0110     /*
0111      * The maximum number of indirect segments (and pages) that will
0112      * be used is determined by MAX_INDIRECT_SEGMENTS, this value
0113      * is also exported to the guest (via xenstore
0114      * feature-max-indirect-segments entry), so the frontend knows how
0115      * many indirect segments the backend supports.
0116      */
0117     uint64_t       _pad2;        /* make it 64 byte aligned */
0118 } __attribute__((__packed__));
0119 
0120 struct blkif_x86_32_request {
0121     uint8_t        operation;    /* BLKIF_OP_???                         */
0122     union {
0123         struct blkif_x86_32_request_rw rw;
0124         struct blkif_x86_32_request_discard discard;
0125         struct blkif_x86_32_request_other other;
0126         struct blkif_x86_32_request_indirect indirect;
0127     } u;
0128 } __attribute__((__packed__));
0129 
0130 /* x86_64 protocol version */
0131 
0132 struct blkif_x86_64_request_rw {
0133     uint8_t        nr_segments;  /* number of segments                   */
0134     blkif_vdev_t   handle;       /* only for read/write requests         */
0135     uint32_t       _pad1;        /* offsetof(blkif_reqest..,u.rw.id)==8  */
0136     uint64_t       id;
0137     blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
0138     struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
0139 } __attribute__((__packed__));
0140 
0141 struct blkif_x86_64_request_discard {
0142     uint8_t        flag;         /* BLKIF_DISCARD_SECURE or zero         */
0143     blkif_vdev_t   _pad1;        /* was "handle" for read/write requests */
0144         uint32_t       _pad2;        /* offsetof(blkif_..,u.discard.id)==8   */
0145     uint64_t       id;
0146     blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
0147     uint64_t       nr_sectors;
0148 } __attribute__((__packed__));
0149 
0150 struct blkif_x86_64_request_other {
0151     uint8_t        _pad1;
0152     blkif_vdev_t   _pad2;
0153     uint32_t       _pad3;        /* offsetof(blkif_..,u.discard.id)==8   */
0154     uint64_t       id;           /* private guest value, echoed in resp  */
0155 } __attribute__((__packed__));
0156 
0157 struct blkif_x86_64_request_indirect {
0158     uint8_t        indirect_op;
0159     uint16_t       nr_segments;
0160     uint32_t       _pad1;        /* offsetof(blkif_..,u.indirect.id)==8   */
0161     uint64_t       id;
0162     blkif_sector_t sector_number;
0163     blkif_vdev_t   handle;
0164     uint16_t       _pad2;
0165     grant_ref_t    indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
0166     /*
0167      * The maximum number of indirect segments (and pages) that will
0168      * be used is determined by MAX_INDIRECT_SEGMENTS, this value
0169      * is also exported to the guest (via xenstore
0170      * feature-max-indirect-segments entry), so the frontend knows how
0171      * many indirect segments the backend supports.
0172      */
0173     uint32_t       _pad3;        /* make it 64 byte aligned */
0174 } __attribute__((__packed__));
0175 
0176 struct blkif_x86_64_request {
0177     uint8_t        operation;    /* BLKIF_OP_???                         */
0178     union {
0179         struct blkif_x86_64_request_rw rw;
0180         struct blkif_x86_64_request_discard discard;
0181         struct blkif_x86_64_request_other other;
0182         struct blkif_x86_64_request_indirect indirect;
0183     } u;
0184 } __attribute__((__packed__));
0185 
0186 DEFINE_RING_TYPES(blkif_common, struct blkif_common_request,
0187           struct blkif_response);
0188 DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request,
0189           struct blkif_response __packed);
0190 DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request,
0191           struct blkif_response);
0192 
0193 union blkif_back_rings {
0194     struct blkif_back_ring        native;
0195     struct blkif_common_back_ring common;
0196     struct blkif_x86_32_back_ring x86_32;
0197     struct blkif_x86_64_back_ring x86_64;
0198 };
0199 
0200 enum blkif_protocol {
0201     BLKIF_PROTOCOL_NATIVE = 1,
0202     BLKIF_PROTOCOL_X86_32 = 2,
0203     BLKIF_PROTOCOL_X86_64 = 3,
0204 };
0205 
0206 /*
0207  * Default protocol if the frontend doesn't specify one.
0208  */
0209 #ifdef CONFIG_X86
0210 #  define BLKIF_PROTOCOL_DEFAULT BLKIF_PROTOCOL_X86_32
0211 #else
0212 #  define BLKIF_PROTOCOL_DEFAULT BLKIF_PROTOCOL_NATIVE
0213 #endif
0214 
0215 struct xen_vbd {
0216     /* What the domain refers to this vbd as. */
0217     blkif_vdev_t        handle;
0218     /* Non-zero -> read-only */
0219     unsigned char       readonly;
0220     /* VDISK_xxx */
0221     unsigned char       type;
0222     /* phys device that this vbd maps to. */
0223     u32         pdevice;
0224     struct block_device *bdev;
0225     /* Cached size parameter. */
0226     sector_t        size;
0227     unsigned int        flush_support:1;
0228     unsigned int        discard_secure:1;
0229     /* Connect-time cached feature_persistent parameter value */
0230     unsigned int        feature_gnt_persistent_parm:1;
0231     /* Persistent grants feature negotiation result */
0232     unsigned int        feature_gnt_persistent:1;
0233     unsigned int        overflow_max_grants:1;
0234 };
0235 
0236 struct backend_info;
0237 
0238 /* Number of requests that we can fit in a ring */
0239 #define XEN_BLKIF_REQS_PER_PAGE     32
0240 
0241 struct persistent_gnt {
0242     struct page *page;
0243     grant_ref_t gnt;
0244     grant_handle_t handle;
0245     unsigned long last_used;
0246     bool active;
0247     struct rb_node node;
0248     struct list_head remove_node;
0249 };
0250 
0251 /* Per-ring information. */
0252 struct xen_blkif_ring {
0253     /* Physical parameters of the comms window. */
0254     unsigned int        irq;
0255     union blkif_back_rings  blk_rings;
0256     void            *blk_ring;
0257     /* Private fields. */
0258     spinlock_t      blk_ring_lock;
0259 
0260     wait_queue_head_t   wq;
0261     atomic_t        inflight;
0262     bool            active;
0263     /* One thread per blkif ring. */
0264     struct task_struct  *xenblkd;
0265     unsigned int        waiting_reqs;
0266 
0267     /* List of all 'pending_req' available */
0268     struct list_head    pending_free;
0269     /* And its spinlock. */
0270     spinlock_t      pending_free_lock;
0271     wait_queue_head_t   pending_free_wq;
0272 
0273     /* Tree to store persistent grants. */
0274     struct rb_root      persistent_gnts;
0275     unsigned int        persistent_gnt_c;
0276     atomic_t        persistent_gnt_in_use;
0277     unsigned long           next_lru;
0278 
0279     /* Statistics. */
0280     unsigned long       st_print;
0281     unsigned long long  st_rd_req;
0282     unsigned long long  st_wr_req;
0283     unsigned long long  st_oo_req;
0284     unsigned long long  st_f_req;
0285     unsigned long long  st_ds_req;
0286     unsigned long long  st_rd_sect;
0287     unsigned long long  st_wr_sect;
0288 
0289     /* Used by the kworker that offload work from the persistent purge. */
0290     struct list_head    persistent_purge_list;
0291     struct work_struct  persistent_purge_work;
0292 
0293     /* Buffer of free pages to map grant refs. */
0294     struct gnttab_page_cache free_pages;
0295 
0296     struct work_struct  free_work;
0297     /* Thread shutdown wait queue. */
0298     wait_queue_head_t   shutdown_wq;
0299     struct xen_blkif    *blkif;
0300 };
0301 
0302 struct xen_blkif {
0303     /* Unique identifier for this interface. */
0304     domid_t         domid;
0305     unsigned int        handle;
0306     /* Comms information. */
0307     enum blkif_protocol blk_protocol;
0308     /* The VBD attached to this interface. */
0309     struct xen_vbd      vbd;
0310     /* Back pointer to the backend_info. */
0311     struct backend_info *be;
0312     atomic_t        refcnt;
0313     /* for barrier (drain) requests */
0314     struct completion   drain_complete;
0315     atomic_t        drain;
0316 
0317     struct work_struct  free_work;
0318     unsigned int        nr_ring_pages;
0319     bool            multi_ref;
0320     /* All rings for this device. */
0321     struct xen_blkif_ring   *rings;
0322     unsigned int        nr_rings;
0323     unsigned long       buffer_squeeze_end;
0324 };
0325 
0326 struct seg_buf {
0327     unsigned long offset;
0328     unsigned int nsec;
0329 };
0330 
0331 struct grant_page {
0332     struct page         *page;
0333     struct persistent_gnt   *persistent_gnt;
0334     grant_handle_t      handle;
0335     grant_ref_t     gref;
0336 };
0337 
0338 /*
0339  * Each outstanding request that we've passed to the lower device layers has a
0340  * 'pending_req' allocated to it. Each buffer_head that completes decrements
0341  * the pendcnt towards zero. When it hits zero, the specified domain has a
0342  * response queued for it, with the saved 'id' passed back.
0343  */
0344 struct pending_req {
0345     struct xen_blkif_ring   *ring;
0346     u64         id;
0347     int         nr_segs;
0348     atomic_t        pendcnt;
0349     unsigned short      operation;
0350     int         status;
0351     struct list_head    free_list;
0352     struct grant_page   *segments[MAX_INDIRECT_SEGMENTS];
0353     /* Indirect descriptors */
0354     struct grant_page   *indirect_pages[MAX_INDIRECT_PAGES];
0355     struct seg_buf      seg[MAX_INDIRECT_SEGMENTS];
0356     struct bio      *biolist[MAX_INDIRECT_SEGMENTS];
0357     struct gnttab_unmap_grant_ref unmap[MAX_INDIRECT_SEGMENTS];
0358     struct page                   *unmap_pages[MAX_INDIRECT_SEGMENTS];
0359     struct gntab_unmap_queue_data gnttab_unmap_data;
0360 };
0361 
0362 
0363 #define vbd_sz(_v)  bdev_nr_sectors((_v)->bdev)
0364 
0365 #define xen_blkif_get(_b) (atomic_inc(&(_b)->refcnt))
0366 #define xen_blkif_put(_b)               \
0367     do {                        \
0368         if (atomic_dec_and_test(&(_b)->refcnt)) \
0369             schedule_work(&(_b)->free_work);\
0370     } while (0)
0371 
0372 struct phys_req {
0373     unsigned short      dev;
0374     blkif_sector_t      nr_sects;
0375     struct block_device *bdev;
0376     blkif_sector_t      sector_number;
0377 };
0378 
0379 int xen_blkif_interface_init(void);
0380 void xen_blkif_interface_fini(void);
0381 
0382 int xen_blkif_xenbus_init(void);
0383 void xen_blkif_xenbus_fini(void);
0384 
0385 irqreturn_t xen_blkif_be_int(int irq, void *dev_id);
0386 int xen_blkif_schedule(void *arg);
0387 int xen_blkif_purge_persistent(void *arg);
0388 void xen_blkbk_free_caches(struct xen_blkif_ring *ring);
0389 
0390 int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
0391                   struct backend_info *be, int state);
0392 
0393 int xen_blkbk_barrier(struct xenbus_transaction xbt,
0394               struct backend_info *be, int state);
0395 struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be);
0396 void xen_blkbk_unmap_purged_grants(struct work_struct *work);
0397 
0398 static inline void blkif_get_x86_32_req(struct blkif_request *dst,
0399                     struct blkif_x86_32_request *src)
0400 {
0401     int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST, j;
0402     dst->operation = READ_ONCE(src->operation);
0403     switch (dst->operation) {
0404     case BLKIF_OP_READ:
0405     case BLKIF_OP_WRITE:
0406     case BLKIF_OP_WRITE_BARRIER:
0407     case BLKIF_OP_FLUSH_DISKCACHE:
0408         dst->u.rw.nr_segments = src->u.rw.nr_segments;
0409         dst->u.rw.handle = src->u.rw.handle;
0410         dst->u.rw.id = src->u.rw.id;
0411         dst->u.rw.sector_number = src->u.rw.sector_number;
0412         barrier();
0413         if (n > dst->u.rw.nr_segments)
0414             n = dst->u.rw.nr_segments;
0415         for (i = 0; i < n; i++)
0416             dst->u.rw.seg[i] = src->u.rw.seg[i];
0417         break;
0418     case BLKIF_OP_DISCARD:
0419         dst->u.discard.flag = src->u.discard.flag;
0420         dst->u.discard.id = src->u.discard.id;
0421         dst->u.discard.sector_number = src->u.discard.sector_number;
0422         dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
0423         break;
0424     case BLKIF_OP_INDIRECT:
0425         dst->u.indirect.indirect_op = src->u.indirect.indirect_op;
0426         dst->u.indirect.nr_segments = src->u.indirect.nr_segments;
0427         dst->u.indirect.handle = src->u.indirect.handle;
0428         dst->u.indirect.id = src->u.indirect.id;
0429         dst->u.indirect.sector_number = src->u.indirect.sector_number;
0430         barrier();
0431         j = min(MAX_INDIRECT_PAGES, INDIRECT_PAGES(dst->u.indirect.nr_segments));
0432         for (i = 0; i < j; i++)
0433             dst->u.indirect.indirect_grefs[i] =
0434                 src->u.indirect.indirect_grefs[i];
0435         break;
0436     default:
0437         /*
0438          * Don't know how to translate this op. Only get the
0439          * ID so failure can be reported to the frontend.
0440          */
0441         dst->u.other.id = src->u.other.id;
0442         break;
0443     }
0444 }
0445 
0446 static inline void blkif_get_x86_64_req(struct blkif_request *dst,
0447                     struct blkif_x86_64_request *src)
0448 {
0449     int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST, j;
0450     dst->operation = READ_ONCE(src->operation);
0451     switch (dst->operation) {
0452     case BLKIF_OP_READ:
0453     case BLKIF_OP_WRITE:
0454     case BLKIF_OP_WRITE_BARRIER:
0455     case BLKIF_OP_FLUSH_DISKCACHE:
0456         dst->u.rw.nr_segments = src->u.rw.nr_segments;
0457         dst->u.rw.handle = src->u.rw.handle;
0458         dst->u.rw.id = src->u.rw.id;
0459         dst->u.rw.sector_number = src->u.rw.sector_number;
0460         barrier();
0461         if (n > dst->u.rw.nr_segments)
0462             n = dst->u.rw.nr_segments;
0463         for (i = 0; i < n; i++)
0464             dst->u.rw.seg[i] = src->u.rw.seg[i];
0465         break;
0466     case BLKIF_OP_DISCARD:
0467         dst->u.discard.flag = src->u.discard.flag;
0468         dst->u.discard.id = src->u.discard.id;
0469         dst->u.discard.sector_number = src->u.discard.sector_number;
0470         dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
0471         break;
0472     case BLKIF_OP_INDIRECT:
0473         dst->u.indirect.indirect_op = src->u.indirect.indirect_op;
0474         dst->u.indirect.nr_segments = src->u.indirect.nr_segments;
0475         dst->u.indirect.handle = src->u.indirect.handle;
0476         dst->u.indirect.id = src->u.indirect.id;
0477         dst->u.indirect.sector_number = src->u.indirect.sector_number;
0478         barrier();
0479         j = min(MAX_INDIRECT_PAGES, INDIRECT_PAGES(dst->u.indirect.nr_segments));
0480         for (i = 0; i < j; i++)
0481             dst->u.indirect.indirect_grefs[i] =
0482                 src->u.indirect.indirect_grefs[i];
0483         break;
0484     default:
0485         /*
0486          * Don't know how to translate this op. Only get the
0487          * ID so failure can be reported to the frontend.
0488          */
0489         dst->u.other.id = src->u.other.id;
0490         break;
0491     }
0492 }
0493 
0494 #endif /* __XEN_BLKIF__BACKEND__COMMON_H__ */