Back to home page

OSCL-LXR

 
 

    


0001 /******************************************************************************
0002  * xenbus_comms.c
0003  *
0004  * Low level code to talks to Xen Store: ringbuffer and event channel.
0005  *
0006  * Copyright (C) 2005 Rusty Russell, IBM Corporation
0007  *
0008  * This program is free software; you can redistribute it and/or
0009  * modify it under the terms of the GNU General Public License version 2
0010  * as published by the Free Software Foundation; or, when distributed
0011  * separately from the Linux kernel or incorporated into other
0012  * software packages, subject to the following license:
0013  *
0014  * Permission is hereby granted, free of charge, to any person obtaining a copy
0015  * of this source file (the "Software"), to deal in the Software without
0016  * restriction, including without limitation the rights to use, copy, modify,
0017  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
0018  * and to permit persons to whom the Software is furnished to do so, subject to
0019  * the following conditions:
0020  *
0021  * The above copyright notice and this permission notice shall be included in
0022  * all copies or substantial portions of the Software.
0023  *
0024  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
0025  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
0026  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
0027  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
0028  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
0029  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
0030  * IN THE SOFTWARE.
0031  */
0032 
0033 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
0034 
0035 #include <linux/wait.h>
0036 #include <linux/interrupt.h>
0037 #include <linux/kthread.h>
0038 #include <linux/sched.h>
0039 #include <linux/err.h>
0040 #include <xen/xenbus.h>
0041 #include <asm/xen/hypervisor.h>
0042 #include <xen/events.h>
0043 #include <xen/page.h>
0044 #include "xenbus.h"
0045 
0046 /* A list of replies. Currently only one will ever be outstanding. */
0047 LIST_HEAD(xs_reply_list);
0048 
0049 /* A list of write requests. */
0050 LIST_HEAD(xb_write_list);
0051 DECLARE_WAIT_QUEUE_HEAD(xb_waitq);
0052 DEFINE_MUTEX(xb_write_mutex);
0053 
0054 /* Protect xenbus reader thread against save/restore. */
0055 DEFINE_MUTEX(xs_response_mutex);
0056 
0057 static int xenbus_irq;
0058 static struct task_struct *xenbus_task;
0059 
0060 static irqreturn_t wake_waiting(int irq, void *unused)
0061 {
0062     wake_up(&xb_waitq);
0063     return IRQ_HANDLED;
0064 }
0065 
0066 static int check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod)
0067 {
0068     return ((prod - cons) <= XENSTORE_RING_SIZE);
0069 }
0070 
0071 static void *get_output_chunk(XENSTORE_RING_IDX cons,
0072                   XENSTORE_RING_IDX prod,
0073                   char *buf, uint32_t *len)
0074 {
0075     *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod);
0076     if ((XENSTORE_RING_SIZE - (prod - cons)) < *len)
0077         *len = XENSTORE_RING_SIZE - (prod - cons);
0078     return buf + MASK_XENSTORE_IDX(prod);
0079 }
0080 
0081 static const void *get_input_chunk(XENSTORE_RING_IDX cons,
0082                    XENSTORE_RING_IDX prod,
0083                    const char *buf, uint32_t *len)
0084 {
0085     *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons);
0086     if ((prod - cons) < *len)
0087         *len = prod - cons;
0088     return buf + MASK_XENSTORE_IDX(cons);
0089 }
0090 
0091 static int xb_data_to_write(void)
0092 {
0093     struct xenstore_domain_interface *intf = xen_store_interface;
0094 
0095     return (intf->req_prod - intf->req_cons) != XENSTORE_RING_SIZE &&
0096         !list_empty(&xb_write_list);
0097 }
0098 
0099 /**
0100  * xb_write - low level write
0101  * @data: buffer to send
0102  * @len: length of buffer
0103  *
0104  * Returns number of bytes written or -err.
0105  */
0106 static int xb_write(const void *data, unsigned int len)
0107 {
0108     struct xenstore_domain_interface *intf = xen_store_interface;
0109     XENSTORE_RING_IDX cons, prod;
0110     unsigned int bytes = 0;
0111 
0112     while (len != 0) {
0113         void *dst;
0114         unsigned int avail;
0115 
0116         /* Read indexes, then verify. */
0117         cons = intf->req_cons;
0118         prod = intf->req_prod;
0119         if (!check_indexes(cons, prod)) {
0120             intf->req_cons = intf->req_prod = 0;
0121             return -EIO;
0122         }
0123         if (!xb_data_to_write())
0124             return bytes;
0125 
0126         /* Must write data /after/ reading the consumer index. */
0127         virt_mb();
0128 
0129         dst = get_output_chunk(cons, prod, intf->req, &avail);
0130         if (avail == 0)
0131             continue;
0132         if (avail > len)
0133             avail = len;
0134 
0135         memcpy(dst, data, avail);
0136         data += avail;
0137         len -= avail;
0138         bytes += avail;
0139 
0140         /* Other side must not see new producer until data is there. */
0141         virt_wmb();
0142         intf->req_prod += avail;
0143 
0144         /* Implies mb(): other side will see the updated producer. */
0145         if (prod <= intf->req_cons)
0146             notify_remote_via_evtchn(xen_store_evtchn);
0147     }
0148 
0149     return bytes;
0150 }
0151 
0152 static int xb_data_to_read(void)
0153 {
0154     struct xenstore_domain_interface *intf = xen_store_interface;
0155     return (intf->rsp_cons != intf->rsp_prod);
0156 }
0157 
0158 static int xb_read(void *data, unsigned int len)
0159 {
0160     struct xenstore_domain_interface *intf = xen_store_interface;
0161     XENSTORE_RING_IDX cons, prod;
0162     unsigned int bytes = 0;
0163 
0164     while (len != 0) {
0165         unsigned int avail;
0166         const char *src;
0167 
0168         /* Read indexes, then verify. */
0169         cons = intf->rsp_cons;
0170         prod = intf->rsp_prod;
0171         if (cons == prod)
0172             return bytes;
0173 
0174         if (!check_indexes(cons, prod)) {
0175             intf->rsp_cons = intf->rsp_prod = 0;
0176             return -EIO;
0177         }
0178 
0179         src = get_input_chunk(cons, prod, intf->rsp, &avail);
0180         if (avail == 0)
0181             continue;
0182         if (avail > len)
0183             avail = len;
0184 
0185         /* Must read data /after/ reading the producer index. */
0186         virt_rmb();
0187 
0188         memcpy(data, src, avail);
0189         data += avail;
0190         len -= avail;
0191         bytes += avail;
0192 
0193         /* Other side must not see free space until we've copied out */
0194         virt_mb();
0195         intf->rsp_cons += avail;
0196 
0197         /* Implies mb(): other side will see the updated consumer. */
0198         if (intf->rsp_prod - cons >= XENSTORE_RING_SIZE)
0199             notify_remote_via_evtchn(xen_store_evtchn);
0200     }
0201 
0202     return bytes;
0203 }
0204 
0205 static int process_msg(void)
0206 {
0207     static struct {
0208         struct xsd_sockmsg msg;
0209         char *body;
0210         union {
0211             void *alloc;
0212             struct xs_watch_event *watch;
0213         };
0214         bool in_msg;
0215         bool in_hdr;
0216         unsigned int read;
0217     } state;
0218     struct xb_req_data *req;
0219     int err;
0220     unsigned int len;
0221 
0222     if (!state.in_msg) {
0223         state.in_msg = true;
0224         state.in_hdr = true;
0225         state.read = 0;
0226 
0227         /*
0228          * We must disallow save/restore while reading a message.
0229          * A partial read across s/r leaves us out of sync with
0230          * xenstored.
0231          * xs_response_mutex is locked as long as we are processing one
0232          * message. state.in_msg will be true as long as we are holding
0233          * the lock here.
0234          */
0235         mutex_lock(&xs_response_mutex);
0236 
0237         if (!xb_data_to_read()) {
0238             /* We raced with save/restore: pending data 'gone'. */
0239             mutex_unlock(&xs_response_mutex);
0240             state.in_msg = false;
0241             return 0;
0242         }
0243     }
0244 
0245     if (state.in_hdr) {
0246         if (state.read != sizeof(state.msg)) {
0247             err = xb_read((void *)&state.msg + state.read,
0248                       sizeof(state.msg) - state.read);
0249             if (err < 0)
0250                 goto out;
0251             state.read += err;
0252             if (state.read != sizeof(state.msg))
0253                 return 0;
0254             if (state.msg.len > XENSTORE_PAYLOAD_MAX) {
0255                 err = -EINVAL;
0256                 goto out;
0257             }
0258         }
0259 
0260         len = state.msg.len + 1;
0261         if (state.msg.type == XS_WATCH_EVENT)
0262             len += sizeof(*state.watch);
0263 
0264         state.alloc = kmalloc(len, GFP_NOIO | __GFP_HIGH);
0265         if (!state.alloc)
0266             return -ENOMEM;
0267 
0268         if (state.msg.type == XS_WATCH_EVENT)
0269             state.body = state.watch->body;
0270         else
0271             state.body = state.alloc;
0272         state.in_hdr = false;
0273         state.read = 0;
0274     }
0275 
0276     err = xb_read(state.body + state.read, state.msg.len - state.read);
0277     if (err < 0)
0278         goto out;
0279 
0280     state.read += err;
0281     if (state.read != state.msg.len)
0282         return 0;
0283 
0284     state.body[state.msg.len] = '\0';
0285 
0286     if (state.msg.type == XS_WATCH_EVENT) {
0287         state.watch->len = state.msg.len;
0288         err = xs_watch_msg(state.watch);
0289     } else {
0290         err = -ENOENT;
0291         mutex_lock(&xb_write_mutex);
0292         list_for_each_entry(req, &xs_reply_list, list) {
0293             if (req->msg.req_id == state.msg.req_id) {
0294                 list_del(&req->list);
0295                 err = 0;
0296                 break;
0297             }
0298         }
0299         mutex_unlock(&xb_write_mutex);
0300         if (err)
0301             goto out;
0302 
0303         if (req->state == xb_req_state_wait_reply) {
0304             req->msg.req_id = req->caller_req_id;
0305             req->msg.type = state.msg.type;
0306             req->msg.len = state.msg.len;
0307             req->body = state.body;
0308             /* write body, then update state */
0309             virt_wmb();
0310             req->state = xb_req_state_got_reply;
0311             req->cb(req);
0312         } else
0313             kfree(req);
0314     }
0315 
0316     mutex_unlock(&xs_response_mutex);
0317 
0318     state.in_msg = false;
0319     state.alloc = NULL;
0320     return err;
0321 
0322  out:
0323     mutex_unlock(&xs_response_mutex);
0324     state.in_msg = false;
0325     kfree(state.alloc);
0326     state.alloc = NULL;
0327     return err;
0328 }
0329 
0330 static int process_writes(void)
0331 {
0332     static struct {
0333         struct xb_req_data *req;
0334         int idx;
0335         unsigned int written;
0336     } state;
0337     void *base;
0338     unsigned int len;
0339     int err = 0;
0340 
0341     if (!xb_data_to_write())
0342         return 0;
0343 
0344     mutex_lock(&xb_write_mutex);
0345 
0346     if (!state.req) {
0347         state.req = list_first_entry(&xb_write_list,
0348                          struct xb_req_data, list);
0349         state.idx = -1;
0350         state.written = 0;
0351     }
0352 
0353     if (state.req->state == xb_req_state_aborted)
0354         goto out_err;
0355 
0356     while (state.idx < state.req->num_vecs) {
0357         if (state.idx < 0) {
0358             base = &state.req->msg;
0359             len = sizeof(state.req->msg);
0360         } else {
0361             base = state.req->vec[state.idx].iov_base;
0362             len = state.req->vec[state.idx].iov_len;
0363         }
0364         err = xb_write(base + state.written, len - state.written);
0365         if (err < 0)
0366             goto out_err;
0367         state.written += err;
0368         if (state.written != len)
0369             goto out;
0370 
0371         state.idx++;
0372         state.written = 0;
0373     }
0374 
0375     list_del(&state.req->list);
0376     state.req->state = xb_req_state_wait_reply;
0377     list_add_tail(&state.req->list, &xs_reply_list);
0378     state.req = NULL;
0379 
0380  out:
0381     mutex_unlock(&xb_write_mutex);
0382 
0383     return 0;
0384 
0385  out_err:
0386     state.req->msg.type = XS_ERROR;
0387     state.req->err = err;
0388     list_del(&state.req->list);
0389     if (state.req->state == xb_req_state_aborted)
0390         kfree(state.req);
0391     else {
0392         /* write err, then update state */
0393         virt_wmb();
0394         state.req->state = xb_req_state_got_reply;
0395         wake_up(&state.req->wq);
0396     }
0397 
0398     mutex_unlock(&xb_write_mutex);
0399 
0400     state.req = NULL;
0401 
0402     return err;
0403 }
0404 
0405 static int xb_thread_work(void)
0406 {
0407     return xb_data_to_read() || xb_data_to_write();
0408 }
0409 
0410 static int xenbus_thread(void *unused)
0411 {
0412     int err;
0413 
0414     while (!kthread_should_stop()) {
0415         if (wait_event_interruptible(xb_waitq, xb_thread_work()))
0416             continue;
0417 
0418         err = process_msg();
0419         if (err == -ENOMEM)
0420             schedule();
0421         else if (err)
0422             pr_warn_ratelimited("error %d while reading message\n",
0423                         err);
0424 
0425         err = process_writes();
0426         if (err)
0427             pr_warn_ratelimited("error %d while writing message\n",
0428                         err);
0429     }
0430 
0431     xenbus_task = NULL;
0432     return 0;
0433 }
0434 
0435 /**
0436  * xb_init_comms - Set up interrupt handler off store event channel.
0437  */
0438 int xb_init_comms(void)
0439 {
0440     struct xenstore_domain_interface *intf = xen_store_interface;
0441 
0442     if (intf->req_prod != intf->req_cons)
0443         pr_err("request ring is not quiescent (%08x:%08x)!\n",
0444                intf->req_cons, intf->req_prod);
0445 
0446     if (intf->rsp_prod != intf->rsp_cons) {
0447         pr_warn("response ring is not quiescent (%08x:%08x): fixing up\n",
0448             intf->rsp_cons, intf->rsp_prod);
0449         /* breaks kdump */
0450         if (!reset_devices)
0451             intf->rsp_cons = intf->rsp_prod;
0452     }
0453 
0454     if (xenbus_irq) {
0455         /* Already have an irq; assume we're resuming */
0456         rebind_evtchn_irq(xen_store_evtchn, xenbus_irq);
0457     } else {
0458         int err;
0459 
0460         err = bind_evtchn_to_irqhandler(xen_store_evtchn, wake_waiting,
0461                         0, "xenbus", &xb_waitq);
0462         if (err < 0) {
0463             pr_err("request irq failed %i\n", err);
0464             return err;
0465         }
0466 
0467         xenbus_irq = err;
0468 
0469         if (!xenbus_task) {
0470             xenbus_task = kthread_run(xenbus_thread, NULL,
0471                           "xenbus");
0472             if (IS_ERR(xenbus_task))
0473                 return PTR_ERR(xenbus_task);
0474         }
0475     }
0476 
0477     return 0;
0478 }
0479 
0480 void xb_deinit_comms(void)
0481 {
0482     unbind_from_irqhandler(xenbus_irq, &xb_waitq);
0483     xenbus_irq = 0;
0484 }