Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /* ldc.c: Logical Domain Channel link-layer protocol driver.
0003  *
0004  * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
0005  */
0006 
0007 #include <linux/kernel.h>
0008 #include <linux/export.h>
0009 #include <linux/slab.h>
0010 #include <linux/spinlock.h>
0011 #include <linux/delay.h>
0012 #include <linux/errno.h>
0013 #include <linux/string.h>
0014 #include <linux/scatterlist.h>
0015 #include <linux/interrupt.h>
0016 #include <linux/list.h>
0017 #include <linux/init.h>
0018 #include <linux/bitmap.h>
0019 #include <asm/iommu-common.h>
0020 
0021 #include <asm/hypervisor.h>
0022 #include <asm/iommu.h>
0023 #include <asm/page.h>
0024 #include <asm/ldc.h>
0025 #include <asm/mdesc.h>
0026 
0027 #define DRV_MODULE_NAME     "ldc"
0028 #define PFX DRV_MODULE_NAME ": "
0029 #define DRV_MODULE_VERSION  "1.1"
0030 #define DRV_MODULE_RELDATE  "July 22, 2008"
0031 
0032 #define COOKIE_PGSZ_CODE    0xf000000000000000ULL
0033 #define COOKIE_PGSZ_CODE_SHIFT  60ULL
0034 
0035 
0036 static char version[] =
0037     DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
0038 
0039 /* Packet header layout for unreliable and reliable mode frames.
0040  * When in RAW mode, packets are simply straight 64-byte payloads
0041  * with no headers.
0042  */
0043 struct ldc_packet {
0044     u8          type;
0045 #define LDC_CTRL        0x01
0046 #define LDC_DATA        0x02
0047 #define LDC_ERR         0x10
0048 
0049     u8          stype;
0050 #define LDC_INFO        0x01
0051 #define LDC_ACK         0x02
0052 #define LDC_NACK        0x04
0053 
0054     u8          ctrl;
0055 #define LDC_VERS        0x01 /* Link Version        */
0056 #define LDC_RTS         0x02 /* Request To Send     */
0057 #define LDC_RTR         0x03 /* Ready To Receive    */
0058 #define LDC_RDX         0x04 /* Ready for Data eXchange */
0059 #define LDC_CTRL_MSK        0x0f
0060 
0061     u8          env;
0062 #define LDC_LEN         0x3f
0063 #define LDC_FRAG_MASK       0xc0
0064 #define LDC_START       0x40
0065 #define LDC_STOP        0x80
0066 
0067     u32         seqid;
0068 
0069     union {
0070         u8      u_data[LDC_PACKET_SIZE - 8];
0071         struct {
0072             u32 pad;
0073             u32 ackid;
0074             u8  r_data[LDC_PACKET_SIZE - 8 - 8];
0075         } r;
0076     } u;
0077 };
0078 
0079 struct ldc_version {
0080     u16 major;
0081     u16 minor;
0082 };
0083 
0084 /* Ordered from largest major to lowest.  */
0085 static struct ldc_version ver_arr[] = {
0086     { .major = 1, .minor = 0 },
0087 };
0088 
0089 #define LDC_DEFAULT_MTU         (4 * LDC_PACKET_SIZE)
0090 #define LDC_DEFAULT_NUM_ENTRIES     (PAGE_SIZE / LDC_PACKET_SIZE)
0091 
0092 struct ldc_channel;
0093 
0094 struct ldc_mode_ops {
0095     int (*write)(struct ldc_channel *, const void *, unsigned int);
0096     int (*read)(struct ldc_channel *, void *, unsigned int);
0097 };
0098 
0099 static const struct ldc_mode_ops raw_ops;
0100 static const struct ldc_mode_ops nonraw_ops;
0101 static const struct ldc_mode_ops stream_ops;
0102 
0103 int ldom_domaining_enabled;
0104 
0105 struct ldc_iommu {
0106     /* Protects ldc_unmap.  */
0107     spinlock_t          lock;
0108     struct ldc_mtable_entry     *page_table;
0109     struct iommu_map_table      iommu_map_table;
0110 };
0111 
0112 struct ldc_channel {
0113     /* Protects all operations that depend upon channel state.  */
0114     spinlock_t          lock;
0115 
0116     unsigned long           id;
0117 
0118     u8              *mssbuf;
0119     u32             mssbuf_len;
0120     u32             mssbuf_off;
0121 
0122     struct ldc_packet       *tx_base;
0123     unsigned long           tx_head;
0124     unsigned long           tx_tail;
0125     unsigned long           tx_num_entries;
0126     unsigned long           tx_ra;
0127 
0128     unsigned long           tx_acked;
0129 
0130     struct ldc_packet       *rx_base;
0131     unsigned long           rx_head;
0132     unsigned long           rx_tail;
0133     unsigned long           rx_num_entries;
0134     unsigned long           rx_ra;
0135 
0136     u32             rcv_nxt;
0137     u32             snd_nxt;
0138 
0139     unsigned long           chan_state;
0140 
0141     struct ldc_channel_config   cfg;
0142     void                *event_arg;
0143 
0144     const struct ldc_mode_ops   *mops;
0145 
0146     struct ldc_iommu        iommu;
0147 
0148     struct ldc_version      ver;
0149 
0150     u8              hs_state;
0151 #define LDC_HS_CLOSED           0x00
0152 #define LDC_HS_OPEN         0x01
0153 #define LDC_HS_GOTVERS          0x02
0154 #define LDC_HS_SENTRTR          0x03
0155 #define LDC_HS_GOTRTR           0x04
0156 #define LDC_HS_COMPLETE         0x10
0157 
0158     u8              flags;
0159 #define LDC_FLAG_ALLOCED_QUEUES     0x01
0160 #define LDC_FLAG_REGISTERED_QUEUES  0x02
0161 #define LDC_FLAG_REGISTERED_IRQS    0x04
0162 #define LDC_FLAG_RESET          0x10
0163 
0164     u8              mss;
0165     u8              state;
0166 
0167 #define LDC_IRQ_NAME_MAX        32
0168     char                rx_irq_name[LDC_IRQ_NAME_MAX];
0169     char                tx_irq_name[LDC_IRQ_NAME_MAX];
0170 
0171     struct hlist_head       mh_list;
0172 
0173     struct hlist_node       list;
0174 };
0175 
0176 #define ldcdbg(TYPE, f, a...) \
0177 do {    if (lp->cfg.debug & LDC_DEBUG_##TYPE) \
0178         printk(KERN_INFO PFX "ID[%lu] " f, lp->id, ## a); \
0179 } while (0)
0180 
0181 #define LDC_ABORT(lp)   ldc_abort((lp), __func__)
0182 
0183 static const char *state_to_str(u8 state)
0184 {
0185     switch (state) {
0186     case LDC_STATE_INVALID:
0187         return "INVALID";
0188     case LDC_STATE_INIT:
0189         return "INIT";
0190     case LDC_STATE_BOUND:
0191         return "BOUND";
0192     case LDC_STATE_READY:
0193         return "READY";
0194     case LDC_STATE_CONNECTED:
0195         return "CONNECTED";
0196     default:
0197         return "<UNKNOWN>";
0198     }
0199 }
0200 
0201 static unsigned long __advance(unsigned long off, unsigned long num_entries)
0202 {
0203     off += LDC_PACKET_SIZE;
0204     if (off == (num_entries * LDC_PACKET_SIZE))
0205         off = 0;
0206 
0207     return off;
0208 }
0209 
0210 static unsigned long rx_advance(struct ldc_channel *lp, unsigned long off)
0211 {
0212     return __advance(off, lp->rx_num_entries);
0213 }
0214 
0215 static unsigned long tx_advance(struct ldc_channel *lp, unsigned long off)
0216 {
0217     return __advance(off, lp->tx_num_entries);
0218 }
0219 
0220 static struct ldc_packet *handshake_get_tx_packet(struct ldc_channel *lp,
0221                           unsigned long *new_tail)
0222 {
0223     struct ldc_packet *p;
0224     unsigned long t;
0225 
0226     t = tx_advance(lp, lp->tx_tail);
0227     if (t == lp->tx_head)
0228         return NULL;
0229 
0230     *new_tail = t;
0231 
0232     p = lp->tx_base;
0233     return p + (lp->tx_tail / LDC_PACKET_SIZE);
0234 }
0235 
0236 /* When we are in reliable or stream mode, have to track the next packet
0237  * we haven't gotten an ACK for in the TX queue using tx_acked.  We have
0238  * to be careful not to stomp over the queue past that point.  During
0239  * the handshake, we don't have TX data packets pending in the queue
0240  * and that's why handshake_get_tx_packet() need not be mindful of
0241  * lp->tx_acked.
0242  */
0243 static unsigned long head_for_data(struct ldc_channel *lp)
0244 {
0245     if (lp->cfg.mode == LDC_MODE_STREAM)
0246         return lp->tx_acked;
0247     return lp->tx_head;
0248 }
0249 
0250 static int tx_has_space_for(struct ldc_channel *lp, unsigned int size)
0251 {
0252     unsigned long limit, tail, new_tail, diff;
0253     unsigned int mss;
0254 
0255     limit = head_for_data(lp);
0256     tail = lp->tx_tail;
0257     new_tail = tx_advance(lp, tail);
0258     if (new_tail == limit)
0259         return 0;
0260 
0261     if (limit > new_tail)
0262         diff = limit - new_tail;
0263     else
0264         diff = (limit +
0265             ((lp->tx_num_entries * LDC_PACKET_SIZE) - new_tail));
0266     diff /= LDC_PACKET_SIZE;
0267     mss = lp->mss;
0268 
0269     if (diff * mss < size)
0270         return 0;
0271 
0272     return 1;
0273 }
0274 
0275 static struct ldc_packet *data_get_tx_packet(struct ldc_channel *lp,
0276                          unsigned long *new_tail)
0277 {
0278     struct ldc_packet *p;
0279     unsigned long h, t;
0280 
0281     h = head_for_data(lp);
0282     t = tx_advance(lp, lp->tx_tail);
0283     if (t == h)
0284         return NULL;
0285 
0286     *new_tail = t;
0287 
0288     p = lp->tx_base;
0289     return p + (lp->tx_tail / LDC_PACKET_SIZE);
0290 }
0291 
0292 static int set_tx_tail(struct ldc_channel *lp, unsigned long tail)
0293 {
0294     unsigned long orig_tail = lp->tx_tail;
0295     int limit = 1000;
0296 
0297     lp->tx_tail = tail;
0298     while (limit-- > 0) {
0299         unsigned long err;
0300 
0301         err = sun4v_ldc_tx_set_qtail(lp->id, tail);
0302         if (!err)
0303             return 0;
0304 
0305         if (err != HV_EWOULDBLOCK) {
0306             lp->tx_tail = orig_tail;
0307             return -EINVAL;
0308         }
0309         udelay(1);
0310     }
0311 
0312     lp->tx_tail = orig_tail;
0313     return -EBUSY;
0314 }
0315 
0316 /* This just updates the head value in the hypervisor using
0317  * a polling loop with a timeout.  The caller takes care of
0318  * upating software state representing the head change, if any.
0319  */
0320 static int __set_rx_head(struct ldc_channel *lp, unsigned long head)
0321 {
0322     int limit = 1000;
0323 
0324     while (limit-- > 0) {
0325         unsigned long err;
0326 
0327         err = sun4v_ldc_rx_set_qhead(lp->id, head);
0328         if (!err)
0329             return 0;
0330 
0331         if (err != HV_EWOULDBLOCK)
0332             return -EINVAL;
0333 
0334         udelay(1);
0335     }
0336 
0337     return -EBUSY;
0338 }
0339 
0340 static int send_tx_packet(struct ldc_channel *lp,
0341               struct ldc_packet *p,
0342               unsigned long new_tail)
0343 {
0344     BUG_ON(p != (lp->tx_base + (lp->tx_tail / LDC_PACKET_SIZE)));
0345 
0346     return set_tx_tail(lp, new_tail);
0347 }
0348 
0349 static struct ldc_packet *handshake_compose_ctrl(struct ldc_channel *lp,
0350                          u8 stype, u8 ctrl,
0351                          void *data, int dlen,
0352                          unsigned long *new_tail)
0353 {
0354     struct ldc_packet *p = handshake_get_tx_packet(lp, new_tail);
0355 
0356     if (p) {
0357         memset(p, 0, sizeof(*p));
0358         p->type = LDC_CTRL;
0359         p->stype = stype;
0360         p->ctrl = ctrl;
0361         if (data)
0362             memcpy(p->u.u_data, data, dlen);
0363     }
0364     return p;
0365 }
0366 
0367 static int start_handshake(struct ldc_channel *lp)
0368 {
0369     struct ldc_packet *p;
0370     struct ldc_version *ver;
0371     unsigned long new_tail;
0372 
0373     ver = &ver_arr[0];
0374 
0375     ldcdbg(HS, "SEND VER INFO maj[%u] min[%u]\n",
0376            ver->major, ver->minor);
0377 
0378     p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
0379                    ver, sizeof(*ver), &new_tail);
0380     if (p) {
0381         int err = send_tx_packet(lp, p, new_tail);
0382         if (!err)
0383             lp->flags &= ~LDC_FLAG_RESET;
0384         return err;
0385     }
0386     return -EBUSY;
0387 }
0388 
0389 static int send_version_nack(struct ldc_channel *lp,
0390                  u16 major, u16 minor)
0391 {
0392     struct ldc_packet *p;
0393     struct ldc_version ver;
0394     unsigned long new_tail;
0395 
0396     ver.major = major;
0397     ver.minor = minor;
0398 
0399     p = handshake_compose_ctrl(lp, LDC_NACK, LDC_VERS,
0400                    &ver, sizeof(ver), &new_tail);
0401     if (p) {
0402         ldcdbg(HS, "SEND VER NACK maj[%u] min[%u]\n",
0403                ver.major, ver.minor);
0404 
0405         return send_tx_packet(lp, p, new_tail);
0406     }
0407     return -EBUSY;
0408 }
0409 
0410 static int send_version_ack(struct ldc_channel *lp,
0411                 struct ldc_version *vp)
0412 {
0413     struct ldc_packet *p;
0414     unsigned long new_tail;
0415 
0416     p = handshake_compose_ctrl(lp, LDC_ACK, LDC_VERS,
0417                    vp, sizeof(*vp), &new_tail);
0418     if (p) {
0419         ldcdbg(HS, "SEND VER ACK maj[%u] min[%u]\n",
0420                vp->major, vp->minor);
0421 
0422         return send_tx_packet(lp, p, new_tail);
0423     }
0424     return -EBUSY;
0425 }
0426 
0427 static int send_rts(struct ldc_channel *lp)
0428 {
0429     struct ldc_packet *p;
0430     unsigned long new_tail;
0431 
0432     p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTS, NULL, 0,
0433                    &new_tail);
0434     if (p) {
0435         p->env = lp->cfg.mode;
0436         p->seqid = 0;
0437         lp->rcv_nxt = 0;
0438 
0439         ldcdbg(HS, "SEND RTS env[0x%x] seqid[0x%x]\n",
0440                p->env, p->seqid);
0441 
0442         return send_tx_packet(lp, p, new_tail);
0443     }
0444     return -EBUSY;
0445 }
0446 
0447 static int send_rtr(struct ldc_channel *lp)
0448 {
0449     struct ldc_packet *p;
0450     unsigned long new_tail;
0451 
0452     p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTR, NULL, 0,
0453                    &new_tail);
0454     if (p) {
0455         p->env = lp->cfg.mode;
0456         p->seqid = 0;
0457 
0458         ldcdbg(HS, "SEND RTR env[0x%x] seqid[0x%x]\n",
0459                p->env, p->seqid);
0460 
0461         return send_tx_packet(lp, p, new_tail);
0462     }
0463     return -EBUSY;
0464 }
0465 
0466 static int send_rdx(struct ldc_channel *lp)
0467 {
0468     struct ldc_packet *p;
0469     unsigned long new_tail;
0470 
0471     p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RDX, NULL, 0,
0472                    &new_tail);
0473     if (p) {
0474         p->env = 0;
0475         p->seqid = ++lp->snd_nxt;
0476         p->u.r.ackid = lp->rcv_nxt;
0477 
0478         ldcdbg(HS, "SEND RDX env[0x%x] seqid[0x%x] ackid[0x%x]\n",
0479                p->env, p->seqid, p->u.r.ackid);
0480 
0481         return send_tx_packet(lp, p, new_tail);
0482     }
0483     return -EBUSY;
0484 }
0485 
0486 static int send_data_nack(struct ldc_channel *lp, struct ldc_packet *data_pkt)
0487 {
0488     struct ldc_packet *p;
0489     unsigned long new_tail;
0490     int err;
0491 
0492     p = data_get_tx_packet(lp, &new_tail);
0493     if (!p)
0494         return -EBUSY;
0495     memset(p, 0, sizeof(*p));
0496     p->type = data_pkt->type;
0497     p->stype = LDC_NACK;
0498     p->ctrl = data_pkt->ctrl & LDC_CTRL_MSK;
0499     p->seqid = lp->snd_nxt + 1;
0500     p->u.r.ackid = lp->rcv_nxt;
0501 
0502     ldcdbg(HS, "SEND DATA NACK type[0x%x] ctl[0x%x] seq[0x%x] ack[0x%x]\n",
0503            p->type, p->ctrl, p->seqid, p->u.r.ackid);
0504 
0505     err = send_tx_packet(lp, p, new_tail);
0506     if (!err)
0507         lp->snd_nxt++;
0508 
0509     return err;
0510 }
0511 
0512 static int ldc_abort(struct ldc_channel *lp, const char *msg)
0513 {
0514     unsigned long hv_err;
0515 
0516     ldcdbg(STATE, "ABORT[%s]\n", msg);
0517     ldc_print(lp);
0518 
0519     /* We report but do not act upon the hypervisor errors because
0520      * there really isn't much we can do if they fail at this point.
0521      */
0522     hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
0523     if (hv_err)
0524         printk(KERN_ERR PFX "ldc_abort: "
0525                "sun4v_ldc_tx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
0526                lp->id, lp->tx_ra, lp->tx_num_entries, hv_err);
0527 
0528     hv_err = sun4v_ldc_tx_get_state(lp->id,
0529                     &lp->tx_head,
0530                     &lp->tx_tail,
0531                     &lp->chan_state);
0532     if (hv_err)
0533         printk(KERN_ERR PFX "ldc_abort: "
0534                "sun4v_ldc_tx_get_state(%lx,...) failed, err=%lu\n",
0535                lp->id, hv_err);
0536 
0537     hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
0538     if (hv_err)
0539         printk(KERN_ERR PFX "ldc_abort: "
0540                "sun4v_ldc_rx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
0541                lp->id, lp->rx_ra, lp->rx_num_entries, hv_err);
0542 
0543     /* Refetch the RX queue state as well, because we could be invoked
0544      * here in the queue processing context.
0545      */
0546     hv_err = sun4v_ldc_rx_get_state(lp->id,
0547                     &lp->rx_head,
0548                     &lp->rx_tail,
0549                     &lp->chan_state);
0550     if (hv_err)
0551         printk(KERN_ERR PFX "ldc_abort: "
0552                "sun4v_ldc_rx_get_state(%lx,...) failed, err=%lu\n",
0553                lp->id, hv_err);
0554 
0555     return -ECONNRESET;
0556 }
0557 
0558 static struct ldc_version *find_by_major(u16 major)
0559 {
0560     struct ldc_version *ret = NULL;
0561     int i;
0562 
0563     for (i = 0; i < ARRAY_SIZE(ver_arr); i++) {
0564         struct ldc_version *v = &ver_arr[i];
0565         if (v->major <= major) {
0566             ret = v;
0567             break;
0568         }
0569     }
0570     return ret;
0571 }
0572 
0573 static int process_ver_info(struct ldc_channel *lp, struct ldc_version *vp)
0574 {
0575     struct ldc_version *vap;
0576     int err;
0577 
0578     ldcdbg(HS, "GOT VERSION INFO major[%x] minor[%x]\n",
0579            vp->major, vp->minor);
0580 
0581     if (lp->hs_state == LDC_HS_GOTVERS) {
0582         lp->hs_state = LDC_HS_OPEN;
0583         memset(&lp->ver, 0, sizeof(lp->ver));
0584     }
0585 
0586     vap = find_by_major(vp->major);
0587     if (!vap) {
0588         err = send_version_nack(lp, 0, 0);
0589     } else if (vap->major != vp->major) {
0590         err = send_version_nack(lp, vap->major, vap->minor);
0591     } else {
0592         struct ldc_version ver = *vp;
0593         if (ver.minor > vap->minor)
0594             ver.minor = vap->minor;
0595         err = send_version_ack(lp, &ver);
0596         if (!err) {
0597             lp->ver = ver;
0598             lp->hs_state = LDC_HS_GOTVERS;
0599         }
0600     }
0601     if (err)
0602         return LDC_ABORT(lp);
0603 
0604     return 0;
0605 }
0606 
0607 static int process_ver_ack(struct ldc_channel *lp, struct ldc_version *vp)
0608 {
0609     ldcdbg(HS, "GOT VERSION ACK major[%x] minor[%x]\n",
0610            vp->major, vp->minor);
0611 
0612     if (lp->hs_state == LDC_HS_GOTVERS) {
0613         if (lp->ver.major != vp->major ||
0614             lp->ver.minor != vp->minor)
0615             return LDC_ABORT(lp);
0616     } else {
0617         lp->ver = *vp;
0618         lp->hs_state = LDC_HS_GOTVERS;
0619     }
0620     if (send_rts(lp))
0621         return LDC_ABORT(lp);
0622     return 0;
0623 }
0624 
0625 static int process_ver_nack(struct ldc_channel *lp, struct ldc_version *vp)
0626 {
0627     struct ldc_version *vap;
0628     struct ldc_packet *p;
0629     unsigned long new_tail;
0630 
0631     if (vp->major == 0 && vp->minor == 0)
0632         return LDC_ABORT(lp);
0633 
0634     vap = find_by_major(vp->major);
0635     if (!vap)
0636         return LDC_ABORT(lp);
0637 
0638     p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
0639                        vap, sizeof(*vap),
0640                        &new_tail);
0641     if (!p)
0642         return LDC_ABORT(lp);
0643 
0644     return send_tx_packet(lp, p, new_tail);
0645 }
0646 
0647 static int process_version(struct ldc_channel *lp,
0648                struct ldc_packet *p)
0649 {
0650     struct ldc_version *vp;
0651 
0652     vp = (struct ldc_version *) p->u.u_data;
0653 
0654     switch (p->stype) {
0655     case LDC_INFO:
0656         return process_ver_info(lp, vp);
0657 
0658     case LDC_ACK:
0659         return process_ver_ack(lp, vp);
0660 
0661     case LDC_NACK:
0662         return process_ver_nack(lp, vp);
0663 
0664     default:
0665         return LDC_ABORT(lp);
0666     }
0667 }
0668 
0669 static int process_rts(struct ldc_channel *lp,
0670                struct ldc_packet *p)
0671 {
0672     ldcdbg(HS, "GOT RTS stype[%x] seqid[%x] env[%x]\n",
0673            p->stype, p->seqid, p->env);
0674 
0675     if (p->stype     != LDC_INFO       ||
0676         lp->hs_state != LDC_HS_GOTVERS ||
0677         p->env       != lp->cfg.mode)
0678         return LDC_ABORT(lp);
0679 
0680     lp->snd_nxt = p->seqid;
0681     lp->rcv_nxt = p->seqid;
0682     lp->hs_state = LDC_HS_SENTRTR;
0683     if (send_rtr(lp))
0684         return LDC_ABORT(lp);
0685 
0686     return 0;
0687 }
0688 
0689 static int process_rtr(struct ldc_channel *lp,
0690                struct ldc_packet *p)
0691 {
0692     ldcdbg(HS, "GOT RTR stype[%x] seqid[%x] env[%x]\n",
0693            p->stype, p->seqid, p->env);
0694 
0695     if (p->stype     != LDC_INFO ||
0696         p->env       != lp->cfg.mode)
0697         return LDC_ABORT(lp);
0698 
0699     lp->snd_nxt = p->seqid;
0700     lp->hs_state = LDC_HS_COMPLETE;
0701     ldc_set_state(lp, LDC_STATE_CONNECTED);
0702     send_rdx(lp);
0703 
0704     return LDC_EVENT_UP;
0705 }
0706 
0707 static int rx_seq_ok(struct ldc_channel *lp, u32 seqid)
0708 {
0709     return lp->rcv_nxt + 1 == seqid;
0710 }
0711 
0712 static int process_rdx(struct ldc_channel *lp,
0713                struct ldc_packet *p)
0714 {
0715     ldcdbg(HS, "GOT RDX stype[%x] seqid[%x] env[%x] ackid[%x]\n",
0716            p->stype, p->seqid, p->env, p->u.r.ackid);
0717 
0718     if (p->stype != LDC_INFO ||
0719         !(rx_seq_ok(lp, p->seqid)))
0720         return LDC_ABORT(lp);
0721 
0722     lp->rcv_nxt = p->seqid;
0723 
0724     lp->hs_state = LDC_HS_COMPLETE;
0725     ldc_set_state(lp, LDC_STATE_CONNECTED);
0726 
0727     return LDC_EVENT_UP;
0728 }
0729 
0730 static int process_control_frame(struct ldc_channel *lp,
0731                  struct ldc_packet *p)
0732 {
0733     switch (p->ctrl) {
0734     case LDC_VERS:
0735         return process_version(lp, p);
0736 
0737     case LDC_RTS:
0738         return process_rts(lp, p);
0739 
0740     case LDC_RTR:
0741         return process_rtr(lp, p);
0742 
0743     case LDC_RDX:
0744         return process_rdx(lp, p);
0745 
0746     default:
0747         return LDC_ABORT(lp);
0748     }
0749 }
0750 
0751 static int process_error_frame(struct ldc_channel *lp,
0752                    struct ldc_packet *p)
0753 {
0754     return LDC_ABORT(lp);
0755 }
0756 
0757 static int process_data_ack(struct ldc_channel *lp,
0758                 struct ldc_packet *ack)
0759 {
0760     unsigned long head = lp->tx_acked;
0761     u32 ackid = ack->u.r.ackid;
0762 
0763     while (1) {
0764         struct ldc_packet *p = lp->tx_base + (head / LDC_PACKET_SIZE);
0765 
0766         head = tx_advance(lp, head);
0767 
0768         if (p->seqid == ackid) {
0769             lp->tx_acked = head;
0770             return 0;
0771         }
0772         if (head == lp->tx_tail)
0773             return LDC_ABORT(lp);
0774     }
0775 
0776     return 0;
0777 }
0778 
0779 static void send_events(struct ldc_channel *lp, unsigned int event_mask)
0780 {
0781     if (event_mask & LDC_EVENT_RESET)
0782         lp->cfg.event(lp->event_arg, LDC_EVENT_RESET);
0783     if (event_mask & LDC_EVENT_UP)
0784         lp->cfg.event(lp->event_arg, LDC_EVENT_UP);
0785     if (event_mask & LDC_EVENT_DATA_READY)
0786         lp->cfg.event(lp->event_arg, LDC_EVENT_DATA_READY);
0787 }
0788 
0789 static irqreturn_t ldc_rx(int irq, void *dev_id)
0790 {
0791     struct ldc_channel *lp = dev_id;
0792     unsigned long orig_state, flags;
0793     unsigned int event_mask;
0794 
0795     spin_lock_irqsave(&lp->lock, flags);
0796 
0797     orig_state = lp->chan_state;
0798 
0799     /* We should probably check for hypervisor errors here and
0800      * reset the LDC channel if we get one.
0801      */
0802     sun4v_ldc_rx_get_state(lp->id,
0803                    &lp->rx_head,
0804                    &lp->rx_tail,
0805                    &lp->chan_state);
0806 
0807     ldcdbg(RX, "RX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
0808            orig_state, lp->chan_state, lp->rx_head, lp->rx_tail);
0809 
0810     event_mask = 0;
0811 
0812     if (lp->cfg.mode == LDC_MODE_RAW &&
0813         lp->chan_state == LDC_CHANNEL_UP) {
0814         lp->hs_state = LDC_HS_COMPLETE;
0815         ldc_set_state(lp, LDC_STATE_CONNECTED);
0816 
0817         /*
0818          * Generate an LDC_EVENT_UP event if the channel
0819          * was not already up.
0820          */
0821         if (orig_state != LDC_CHANNEL_UP) {
0822             event_mask |= LDC_EVENT_UP;
0823             orig_state = lp->chan_state;
0824         }
0825     }
0826 
0827     /* If we are in reset state, flush the RX queue and ignore
0828      * everything.
0829      */
0830     if (lp->flags & LDC_FLAG_RESET) {
0831         (void) ldc_rx_reset(lp);
0832         goto out;
0833     }
0834 
0835     /* Once we finish the handshake, we let the ldc_read()
0836      * paths do all of the control frame and state management.
0837      * Just trigger the callback.
0838      */
0839     if (lp->hs_state == LDC_HS_COMPLETE) {
0840 handshake_complete:
0841         if (lp->chan_state != orig_state) {
0842             unsigned int event = LDC_EVENT_RESET;
0843 
0844             if (lp->chan_state == LDC_CHANNEL_UP)
0845                 event = LDC_EVENT_UP;
0846 
0847             event_mask |= event;
0848         }
0849         if (lp->rx_head != lp->rx_tail)
0850             event_mask |= LDC_EVENT_DATA_READY;
0851 
0852         goto out;
0853     }
0854 
0855     if (lp->chan_state != orig_state)
0856         goto out;
0857 
0858     while (lp->rx_head != lp->rx_tail) {
0859         struct ldc_packet *p;
0860         unsigned long new;
0861         int err;
0862 
0863         p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
0864 
0865         switch (p->type) {
0866         case LDC_CTRL:
0867             err = process_control_frame(lp, p);
0868             if (err > 0)
0869                 event_mask |= err;
0870             break;
0871 
0872         case LDC_DATA:
0873             event_mask |= LDC_EVENT_DATA_READY;
0874             err = 0;
0875             break;
0876 
0877         case LDC_ERR:
0878             err = process_error_frame(lp, p);
0879             break;
0880 
0881         default:
0882             err = LDC_ABORT(lp);
0883             break;
0884         }
0885 
0886         if (err < 0)
0887             break;
0888 
0889         new = lp->rx_head;
0890         new += LDC_PACKET_SIZE;
0891         if (new == (lp->rx_num_entries * LDC_PACKET_SIZE))
0892             new = 0;
0893         lp->rx_head = new;
0894 
0895         err = __set_rx_head(lp, new);
0896         if (err < 0) {
0897             (void) LDC_ABORT(lp);
0898             break;
0899         }
0900         if (lp->hs_state == LDC_HS_COMPLETE)
0901             goto handshake_complete;
0902     }
0903 
0904 out:
0905     spin_unlock_irqrestore(&lp->lock, flags);
0906 
0907     send_events(lp, event_mask);
0908 
0909     return IRQ_HANDLED;
0910 }
0911 
0912 static irqreturn_t ldc_tx(int irq, void *dev_id)
0913 {
0914     struct ldc_channel *lp = dev_id;
0915     unsigned long flags, orig_state;
0916     unsigned int event_mask = 0;
0917 
0918     spin_lock_irqsave(&lp->lock, flags);
0919 
0920     orig_state = lp->chan_state;
0921 
0922     /* We should probably check for hypervisor errors here and
0923      * reset the LDC channel if we get one.
0924      */
0925     sun4v_ldc_tx_get_state(lp->id,
0926                    &lp->tx_head,
0927                    &lp->tx_tail,
0928                    &lp->chan_state);
0929 
0930     ldcdbg(TX, " TX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
0931            orig_state, lp->chan_state, lp->tx_head, lp->tx_tail);
0932 
0933     if (lp->cfg.mode == LDC_MODE_RAW &&
0934         lp->chan_state == LDC_CHANNEL_UP) {
0935         lp->hs_state = LDC_HS_COMPLETE;
0936         ldc_set_state(lp, LDC_STATE_CONNECTED);
0937 
0938         /*
0939          * Generate an LDC_EVENT_UP event if the channel
0940          * was not already up.
0941          */
0942         if (orig_state != LDC_CHANNEL_UP) {
0943             event_mask |= LDC_EVENT_UP;
0944             orig_state = lp->chan_state;
0945         }
0946     }
0947 
0948     spin_unlock_irqrestore(&lp->lock, flags);
0949 
0950     send_events(lp, event_mask);
0951 
0952     return IRQ_HANDLED;
0953 }
0954 
0955 /* XXX ldc_alloc() and ldc_free() needs to run under a mutex so
0956  * XXX that addition and removal from the ldc_channel_list has
0957  * XXX atomicity, otherwise the __ldc_channel_exists() check is
0958  * XXX totally pointless as another thread can slip into ldc_alloc()
0959  * XXX and add a channel with the same ID.  There also needs to be
0960  * XXX a spinlock for ldc_channel_list.
0961  */
0962 static HLIST_HEAD(ldc_channel_list);
0963 
0964 static int __ldc_channel_exists(unsigned long id)
0965 {
0966     struct ldc_channel *lp;
0967 
0968     hlist_for_each_entry(lp, &ldc_channel_list, list) {
0969         if (lp->id == id)
0970             return 1;
0971     }
0972     return 0;
0973 }
0974 
0975 static int alloc_queue(const char *name, unsigned long num_entries,
0976                struct ldc_packet **base, unsigned long *ra)
0977 {
0978     unsigned long size, order;
0979     void *q;
0980 
0981     size = num_entries * LDC_PACKET_SIZE;
0982     order = get_order(size);
0983 
0984     q = (void *) __get_free_pages(GFP_KERNEL, order);
0985     if (!q) {
0986         printk(KERN_ERR PFX "Alloc of %s queue failed with "
0987                "size=%lu order=%lu\n", name, size, order);
0988         return -ENOMEM;
0989     }
0990 
0991     memset(q, 0, PAGE_SIZE << order);
0992 
0993     *base = q;
0994     *ra = __pa(q);
0995 
0996     return 0;
0997 }
0998 
0999 static void free_queue(unsigned long num_entries, struct ldc_packet *q)
1000 {
1001     unsigned long size, order;
1002 
1003     if (!q)
1004         return;
1005 
1006     size = num_entries * LDC_PACKET_SIZE;
1007     order = get_order(size);
1008 
1009     free_pages((unsigned long)q, order);
1010 }
1011 
1012 static unsigned long ldc_cookie_to_index(u64 cookie, void *arg)
1013 {
1014     u64 szcode = cookie >> COOKIE_PGSZ_CODE_SHIFT;
1015     /* struct ldc_iommu *ldc_iommu = (struct ldc_iommu *)arg; */
1016 
1017     cookie &= ~COOKIE_PGSZ_CODE;
1018 
1019     return (cookie >> (13ULL + (szcode * 3ULL)));
1020 }
1021 
1022 static void ldc_demap(struct ldc_iommu *iommu, unsigned long id, u64 cookie,
1023               unsigned long entry, unsigned long npages)
1024 {
1025     struct ldc_mtable_entry *base;
1026     unsigned long i, shift;
1027 
1028     shift = (cookie >> COOKIE_PGSZ_CODE_SHIFT) * 3;
1029     base = iommu->page_table + entry;
1030     for (i = 0; i < npages; i++) {
1031         if (base->cookie)
1032             sun4v_ldc_revoke(id, cookie + (i << shift),
1033                      base->cookie);
1034         base->mte = 0;
1035     }
1036 }
1037 
1038 /* XXX Make this configurable... XXX */
1039 #define LDC_IOTABLE_SIZE    (8 * 1024)
1040 
1041 static int ldc_iommu_init(const char *name, struct ldc_channel *lp)
1042 {
1043     unsigned long sz, num_tsb_entries, tsbsize, order;
1044     struct ldc_iommu *ldc_iommu = &lp->iommu;
1045     struct iommu_map_table *iommu = &ldc_iommu->iommu_map_table;
1046     struct ldc_mtable_entry *table;
1047     unsigned long hv_err;
1048     int err;
1049 
1050     num_tsb_entries = LDC_IOTABLE_SIZE;
1051     tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
1052     spin_lock_init(&ldc_iommu->lock);
1053 
1054     sz = num_tsb_entries / 8;
1055     sz = (sz + 7UL) & ~7UL;
1056     iommu->map = kzalloc(sz, GFP_KERNEL);
1057     if (!iommu->map) {
1058         printk(KERN_ERR PFX "Alloc of arena map failed, sz=%lu\n", sz);
1059         return -ENOMEM;
1060     }
1061     iommu_tbl_pool_init(iommu, num_tsb_entries, PAGE_SHIFT,
1062                 NULL, false /* no large pool */,
1063                 1 /* npools */,
1064                 true /* skip span boundary check */);
1065 
1066     order = get_order(tsbsize);
1067 
1068     table = (struct ldc_mtable_entry *)
1069         __get_free_pages(GFP_KERNEL, order);
1070     err = -ENOMEM;
1071     if (!table) {
1072         printk(KERN_ERR PFX "Alloc of MTE table failed, "
1073                "size=%lu order=%lu\n", tsbsize, order);
1074         goto out_free_map;
1075     }
1076 
1077     memset(table, 0, PAGE_SIZE << order);
1078 
1079     ldc_iommu->page_table = table;
1080 
1081     hv_err = sun4v_ldc_set_map_table(lp->id, __pa(table),
1082                      num_tsb_entries);
1083     err = -EINVAL;
1084     if (hv_err)
1085         goto out_free_table;
1086 
1087     return 0;
1088 
1089 out_free_table:
1090     free_pages((unsigned long) table, order);
1091     ldc_iommu->page_table = NULL;
1092 
1093 out_free_map:
1094     kfree(iommu->map);
1095     iommu->map = NULL;
1096 
1097     return err;
1098 }
1099 
1100 static void ldc_iommu_release(struct ldc_channel *lp)
1101 {
1102     struct ldc_iommu *ldc_iommu = &lp->iommu;
1103     struct iommu_map_table *iommu = &ldc_iommu->iommu_map_table;
1104     unsigned long num_tsb_entries, tsbsize, order;
1105 
1106     (void) sun4v_ldc_set_map_table(lp->id, 0, 0);
1107 
1108     num_tsb_entries = iommu->poolsize * iommu->nr_pools;
1109     tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
1110     order = get_order(tsbsize);
1111 
1112     free_pages((unsigned long) ldc_iommu->page_table, order);
1113     ldc_iommu->page_table = NULL;
1114 
1115     kfree(iommu->map);
1116     iommu->map = NULL;
1117 }
1118 
1119 struct ldc_channel *ldc_alloc(unsigned long id,
1120                   const struct ldc_channel_config *cfgp,
1121                   void *event_arg,
1122                   const char *name)
1123 {
1124     struct ldc_channel *lp;
1125     const struct ldc_mode_ops *mops;
1126     unsigned long dummy1, dummy2, hv_err;
1127     u8 mss, *mssbuf;
1128     int err;
1129 
1130     err = -ENODEV;
1131     if (!ldom_domaining_enabled)
1132         goto out_err;
1133 
1134     err = -EINVAL;
1135     if (!cfgp)
1136         goto out_err;
1137     if (!name)
1138         goto out_err;
1139 
1140     switch (cfgp->mode) {
1141     case LDC_MODE_RAW:
1142         mops = &raw_ops;
1143         mss = LDC_PACKET_SIZE;
1144         break;
1145 
1146     case LDC_MODE_UNRELIABLE:
1147         mops = &nonraw_ops;
1148         mss = LDC_PACKET_SIZE - 8;
1149         break;
1150 
1151     case LDC_MODE_STREAM:
1152         mops = &stream_ops;
1153         mss = LDC_PACKET_SIZE - 8 - 8;
1154         break;
1155 
1156     default:
1157         goto out_err;
1158     }
1159 
1160     if (!cfgp->event || !event_arg || !cfgp->rx_irq || !cfgp->tx_irq)
1161         goto out_err;
1162 
1163     hv_err = sun4v_ldc_tx_qinfo(id, &dummy1, &dummy2);
1164     err = -ENODEV;
1165     if (hv_err == HV_ECHANNEL)
1166         goto out_err;
1167 
1168     err = -EEXIST;
1169     if (__ldc_channel_exists(id))
1170         goto out_err;
1171 
1172     mssbuf = NULL;
1173 
1174     lp = kzalloc(sizeof(*lp), GFP_KERNEL);
1175     err = -ENOMEM;
1176     if (!lp)
1177         goto out_err;
1178 
1179     spin_lock_init(&lp->lock);
1180 
1181     lp->id = id;
1182 
1183     err = ldc_iommu_init(name, lp);
1184     if (err)
1185         goto out_free_ldc;
1186 
1187     lp->mops = mops;
1188     lp->mss = mss;
1189 
1190     lp->cfg = *cfgp;
1191     if (!lp->cfg.mtu)
1192         lp->cfg.mtu = LDC_DEFAULT_MTU;
1193 
1194     if (lp->cfg.mode == LDC_MODE_STREAM) {
1195         mssbuf = kzalloc(lp->cfg.mtu, GFP_KERNEL);
1196         if (!mssbuf) {
1197             err = -ENOMEM;
1198             goto out_free_iommu;
1199         }
1200         lp->mssbuf = mssbuf;
1201     }
1202 
1203     lp->event_arg = event_arg;
1204 
1205     /* XXX allow setting via ldc_channel_config to override defaults
1206      * XXX or use some formula based upon mtu
1207      */
1208     lp->tx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
1209     lp->rx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
1210 
1211     err = alloc_queue("TX", lp->tx_num_entries,
1212               &lp->tx_base, &lp->tx_ra);
1213     if (err)
1214         goto out_free_mssbuf;
1215 
1216     err = alloc_queue("RX", lp->rx_num_entries,
1217               &lp->rx_base, &lp->rx_ra);
1218     if (err)
1219         goto out_free_txq;
1220 
1221     lp->flags |= LDC_FLAG_ALLOCED_QUEUES;
1222 
1223     lp->hs_state = LDC_HS_CLOSED;
1224     ldc_set_state(lp, LDC_STATE_INIT);
1225 
1226     INIT_HLIST_NODE(&lp->list);
1227     hlist_add_head(&lp->list, &ldc_channel_list);
1228 
1229     INIT_HLIST_HEAD(&lp->mh_list);
1230 
1231     snprintf(lp->rx_irq_name, LDC_IRQ_NAME_MAX, "%s RX", name);
1232     snprintf(lp->tx_irq_name, LDC_IRQ_NAME_MAX, "%s TX", name);
1233 
1234     err = request_irq(lp->cfg.rx_irq, ldc_rx, 0,
1235               lp->rx_irq_name, lp);
1236     if (err)
1237         goto out_free_txq;
1238 
1239     err = request_irq(lp->cfg.tx_irq, ldc_tx, 0,
1240               lp->tx_irq_name, lp);
1241     if (err) {
1242         free_irq(lp->cfg.rx_irq, lp);
1243         goto out_free_txq;
1244     }
1245 
1246     return lp;
1247 
1248 out_free_txq:
1249     free_queue(lp->tx_num_entries, lp->tx_base);
1250 
1251 out_free_mssbuf:
1252     kfree(mssbuf);
1253 
1254 out_free_iommu:
1255     ldc_iommu_release(lp);
1256 
1257 out_free_ldc:
1258     kfree(lp);
1259 
1260 out_err:
1261     return ERR_PTR(err);
1262 }
1263 EXPORT_SYMBOL(ldc_alloc);
1264 
1265 void ldc_unbind(struct ldc_channel *lp)
1266 {
1267     if (lp->flags & LDC_FLAG_REGISTERED_IRQS) {
1268         free_irq(lp->cfg.rx_irq, lp);
1269         free_irq(lp->cfg.tx_irq, lp);
1270         lp->flags &= ~LDC_FLAG_REGISTERED_IRQS;
1271     }
1272 
1273     if (lp->flags & LDC_FLAG_REGISTERED_QUEUES) {
1274         sun4v_ldc_tx_qconf(lp->id, 0, 0);
1275         sun4v_ldc_rx_qconf(lp->id, 0, 0);
1276         lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
1277     }
1278     if (lp->flags & LDC_FLAG_ALLOCED_QUEUES) {
1279         free_queue(lp->tx_num_entries, lp->tx_base);
1280         free_queue(lp->rx_num_entries, lp->rx_base);
1281         lp->flags &= ~LDC_FLAG_ALLOCED_QUEUES;
1282     }
1283 
1284     ldc_set_state(lp, LDC_STATE_INIT);
1285 }
1286 EXPORT_SYMBOL(ldc_unbind);
1287 
1288 void ldc_free(struct ldc_channel *lp)
1289 {
1290     ldc_unbind(lp);
1291     hlist_del(&lp->list);
1292     kfree(lp->mssbuf);
1293     ldc_iommu_release(lp);
1294 
1295     kfree(lp);
1296 }
1297 EXPORT_SYMBOL(ldc_free);
1298 
1299 /* Bind the channel.  This registers the LDC queues with
1300  * the hypervisor and puts the channel into a pseudo-listening
1301  * state.  This does not initiate a handshake, ldc_connect() does
1302  * that.
1303  */
1304 int ldc_bind(struct ldc_channel *lp)
1305 {
1306     unsigned long hv_err, flags;
1307     int err = -EINVAL;
1308 
1309     if (lp->state != LDC_STATE_INIT)
1310         return -EINVAL;
1311 
1312     spin_lock_irqsave(&lp->lock, flags);
1313 
1314     enable_irq(lp->cfg.rx_irq);
1315     enable_irq(lp->cfg.tx_irq);
1316 
1317     lp->flags |= LDC_FLAG_REGISTERED_IRQS;
1318 
1319     err = -ENODEV;
1320     hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
1321     if (hv_err)
1322         goto out_free_irqs;
1323 
1324     hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
1325     if (hv_err)
1326         goto out_free_irqs;
1327 
1328     hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
1329     if (hv_err)
1330         goto out_unmap_tx;
1331 
1332     hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
1333     if (hv_err)
1334         goto out_unmap_tx;
1335 
1336     lp->flags |= LDC_FLAG_REGISTERED_QUEUES;
1337 
1338     hv_err = sun4v_ldc_tx_get_state(lp->id,
1339                     &lp->tx_head,
1340                     &lp->tx_tail,
1341                     &lp->chan_state);
1342     err = -EBUSY;
1343     if (hv_err)
1344         goto out_unmap_rx;
1345 
1346     lp->tx_acked = lp->tx_head;
1347 
1348     lp->hs_state = LDC_HS_OPEN;
1349     ldc_set_state(lp, LDC_STATE_BOUND);
1350 
1351     if (lp->cfg.mode == LDC_MODE_RAW) {
1352         /*
1353          * There is no handshake in RAW mode, so handshake
1354          * is completed.
1355          */
1356         lp->hs_state = LDC_HS_COMPLETE;
1357     }
1358 
1359     spin_unlock_irqrestore(&lp->lock, flags);
1360 
1361     return 0;
1362 
1363 out_unmap_rx:
1364     lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
1365     sun4v_ldc_rx_qconf(lp->id, 0, 0);
1366 
1367 out_unmap_tx:
1368     sun4v_ldc_tx_qconf(lp->id, 0, 0);
1369 
1370 out_free_irqs:
1371     lp->flags &= ~LDC_FLAG_REGISTERED_IRQS;
1372     free_irq(lp->cfg.tx_irq, lp);
1373     free_irq(lp->cfg.rx_irq, lp);
1374 
1375     spin_unlock_irqrestore(&lp->lock, flags);
1376 
1377     return err;
1378 }
1379 EXPORT_SYMBOL(ldc_bind);
1380 
1381 int ldc_connect(struct ldc_channel *lp)
1382 {
1383     unsigned long flags;
1384     int err;
1385 
1386     if (lp->cfg.mode == LDC_MODE_RAW)
1387         return -EINVAL;
1388 
1389     spin_lock_irqsave(&lp->lock, flags);
1390 
1391     if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
1392         !(lp->flags & LDC_FLAG_REGISTERED_QUEUES) ||
1393         lp->hs_state != LDC_HS_OPEN)
1394         err = ((lp->hs_state > LDC_HS_OPEN) ? 0 : -EINVAL);
1395     else
1396         err = start_handshake(lp);
1397 
1398     spin_unlock_irqrestore(&lp->lock, flags);
1399 
1400     return err;
1401 }
1402 EXPORT_SYMBOL(ldc_connect);
1403 
1404 int ldc_disconnect(struct ldc_channel *lp)
1405 {
1406     unsigned long hv_err, flags;
1407     int err;
1408 
1409     if (lp->cfg.mode == LDC_MODE_RAW)
1410         return -EINVAL;
1411 
1412     if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
1413         !(lp->flags & LDC_FLAG_REGISTERED_QUEUES))
1414         return -EINVAL;
1415 
1416     spin_lock_irqsave(&lp->lock, flags);
1417 
1418     err = -ENODEV;
1419     hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
1420     if (hv_err)
1421         goto out_err;
1422 
1423     hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
1424     if (hv_err)
1425         goto out_err;
1426 
1427     hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
1428     if (hv_err)
1429         goto out_err;
1430 
1431     hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
1432     if (hv_err)
1433         goto out_err;
1434 
1435     ldc_set_state(lp, LDC_STATE_BOUND);
1436     lp->hs_state = LDC_HS_OPEN;
1437     lp->flags |= LDC_FLAG_RESET;
1438 
1439     spin_unlock_irqrestore(&lp->lock, flags);
1440 
1441     return 0;
1442 
1443 out_err:
1444     sun4v_ldc_tx_qconf(lp->id, 0, 0);
1445     sun4v_ldc_rx_qconf(lp->id, 0, 0);
1446     free_irq(lp->cfg.tx_irq, lp);
1447     free_irq(lp->cfg.rx_irq, lp);
1448     lp->flags &= ~(LDC_FLAG_REGISTERED_IRQS |
1449                LDC_FLAG_REGISTERED_QUEUES);
1450     ldc_set_state(lp, LDC_STATE_INIT);
1451 
1452     spin_unlock_irqrestore(&lp->lock, flags);
1453 
1454     return err;
1455 }
1456 EXPORT_SYMBOL(ldc_disconnect);
1457 
1458 int ldc_state(struct ldc_channel *lp)
1459 {
1460     return lp->state;
1461 }
1462 EXPORT_SYMBOL(ldc_state);
1463 
1464 void ldc_set_state(struct ldc_channel *lp, u8 state)
1465 {
1466     ldcdbg(STATE, "STATE (%s) --> (%s)\n",
1467            state_to_str(lp->state),
1468            state_to_str(state));
1469 
1470     lp->state = state;
1471 }
1472 EXPORT_SYMBOL(ldc_set_state);
1473 
1474 int ldc_mode(struct ldc_channel *lp)
1475 {
1476     return lp->cfg.mode;
1477 }
1478 EXPORT_SYMBOL(ldc_mode);
1479 
1480 int ldc_rx_reset(struct ldc_channel *lp)
1481 {
1482     return __set_rx_head(lp, lp->rx_tail);
1483 }
1484 EXPORT_SYMBOL(ldc_rx_reset);
1485 
1486 void __ldc_print(struct ldc_channel *lp, const char *caller)
1487 {
1488     pr_info("%s: id=0x%lx flags=0x%x state=%s cstate=0x%lx hsstate=0x%x\n"
1489         "\trx_h=0x%lx rx_t=0x%lx rx_n=%ld\n"
1490         "\ttx_h=0x%lx tx_t=0x%lx tx_n=%ld\n"
1491         "\trcv_nxt=%u snd_nxt=%u\n",
1492         caller, lp->id, lp->flags, state_to_str(lp->state),
1493         lp->chan_state, lp->hs_state,
1494         lp->rx_head, lp->rx_tail, lp->rx_num_entries,
1495         lp->tx_head, lp->tx_tail, lp->tx_num_entries,
1496         lp->rcv_nxt, lp->snd_nxt);
1497 }
1498 EXPORT_SYMBOL(__ldc_print);
1499 
1500 static int write_raw(struct ldc_channel *lp, const void *buf, unsigned int size)
1501 {
1502     struct ldc_packet *p;
1503     unsigned long new_tail, hv_err;
1504     int err;
1505 
1506     hv_err = sun4v_ldc_tx_get_state(lp->id, &lp->tx_head, &lp->tx_tail,
1507                     &lp->chan_state);
1508     if (unlikely(hv_err))
1509         return -EBUSY;
1510 
1511     if (unlikely(lp->chan_state != LDC_CHANNEL_UP))
1512         return LDC_ABORT(lp);
1513 
1514     if (size > LDC_PACKET_SIZE)
1515         return -EMSGSIZE;
1516 
1517     p = data_get_tx_packet(lp, &new_tail);
1518     if (!p)
1519         return -EAGAIN;
1520 
1521     memcpy(p, buf, size);
1522 
1523     err = send_tx_packet(lp, p, new_tail);
1524     if (!err)
1525         err = size;
1526 
1527     return err;
1528 }
1529 
1530 static int read_raw(struct ldc_channel *lp, void *buf, unsigned int size)
1531 {
1532     struct ldc_packet *p;
1533     unsigned long hv_err, new;
1534     int err;
1535 
1536     if (size < LDC_PACKET_SIZE)
1537         return -EINVAL;
1538 
1539     hv_err = sun4v_ldc_rx_get_state(lp->id,
1540                     &lp->rx_head,
1541                     &lp->rx_tail,
1542                     &lp->chan_state);
1543     if (hv_err)
1544         return LDC_ABORT(lp);
1545 
1546     if (lp->chan_state == LDC_CHANNEL_DOWN ||
1547         lp->chan_state == LDC_CHANNEL_RESETTING)
1548         return -ECONNRESET;
1549 
1550     if (lp->rx_head == lp->rx_tail)
1551         return 0;
1552 
1553     p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
1554     memcpy(buf, p, LDC_PACKET_SIZE);
1555 
1556     new = rx_advance(lp, lp->rx_head);
1557     lp->rx_head = new;
1558 
1559     err = __set_rx_head(lp, new);
1560     if (err < 0)
1561         err = -ECONNRESET;
1562     else
1563         err = LDC_PACKET_SIZE;
1564 
1565     return err;
1566 }
1567 
1568 static const struct ldc_mode_ops raw_ops = {
1569     .write      =   write_raw,
1570     .read       =   read_raw,
1571 };
1572 
1573 static int write_nonraw(struct ldc_channel *lp, const void *buf,
1574             unsigned int size)
1575 {
1576     unsigned long hv_err, tail;
1577     unsigned int copied;
1578     u32 seq;
1579     int err;
1580 
1581     hv_err = sun4v_ldc_tx_get_state(lp->id, &lp->tx_head, &lp->tx_tail,
1582                     &lp->chan_state);
1583     if (unlikely(hv_err))
1584         return -EBUSY;
1585 
1586     if (unlikely(lp->chan_state != LDC_CHANNEL_UP))
1587         return LDC_ABORT(lp);
1588 
1589     if (!tx_has_space_for(lp, size))
1590         return -EAGAIN;
1591 
1592     seq = lp->snd_nxt;
1593     copied = 0;
1594     tail = lp->tx_tail;
1595     while (copied < size) {
1596         struct ldc_packet *p = lp->tx_base + (tail / LDC_PACKET_SIZE);
1597         u8 *data = ((lp->cfg.mode == LDC_MODE_UNRELIABLE) ?
1598                 p->u.u_data :
1599                 p->u.r.r_data);
1600         int data_len;
1601 
1602         p->type = LDC_DATA;
1603         p->stype = LDC_INFO;
1604         p->ctrl = 0;
1605 
1606         data_len = size - copied;
1607         if (data_len > lp->mss)
1608             data_len = lp->mss;
1609 
1610         BUG_ON(data_len > LDC_LEN);
1611 
1612         p->env = (data_len |
1613               (copied == 0 ? LDC_START : 0) |
1614               (data_len == size - copied ? LDC_STOP : 0));
1615 
1616         p->seqid = ++seq;
1617 
1618         ldcdbg(DATA, "SENT DATA [%02x:%02x:%02x:%02x:%08x]\n",
1619                p->type,
1620                p->stype,
1621                p->ctrl,
1622                p->env,
1623                p->seqid);
1624 
1625         memcpy(data, buf, data_len);
1626         buf += data_len;
1627         copied += data_len;
1628 
1629         tail = tx_advance(lp, tail);
1630     }
1631 
1632     err = set_tx_tail(lp, tail);
1633     if (!err) {
1634         lp->snd_nxt = seq;
1635         err = size;
1636     }
1637 
1638     return err;
1639 }
1640 
1641 static int rx_bad_seq(struct ldc_channel *lp, struct ldc_packet *p,
1642               struct ldc_packet *first_frag)
1643 {
1644     int err;
1645 
1646     if (first_frag)
1647         lp->rcv_nxt = first_frag->seqid - 1;
1648 
1649     err = send_data_nack(lp, p);
1650     if (err)
1651         return err;
1652 
1653     err = ldc_rx_reset(lp);
1654     if (err < 0)
1655         return LDC_ABORT(lp);
1656 
1657     return 0;
1658 }
1659 
1660 static int data_ack_nack(struct ldc_channel *lp, struct ldc_packet *p)
1661 {
1662     if (p->stype & LDC_ACK) {
1663         int err = process_data_ack(lp, p);
1664         if (err)
1665             return err;
1666     }
1667     if (p->stype & LDC_NACK)
1668         return LDC_ABORT(lp);
1669 
1670     return 0;
1671 }
1672 
1673 static int rx_data_wait(struct ldc_channel *lp, unsigned long cur_head)
1674 {
1675     unsigned long dummy;
1676     int limit = 1000;
1677 
1678     ldcdbg(DATA, "DATA WAIT cur_head[%lx] rx_head[%lx] rx_tail[%lx]\n",
1679            cur_head, lp->rx_head, lp->rx_tail);
1680     while (limit-- > 0) {
1681         unsigned long hv_err;
1682 
1683         hv_err = sun4v_ldc_rx_get_state(lp->id,
1684                         &dummy,
1685                         &lp->rx_tail,
1686                         &lp->chan_state);
1687         if (hv_err)
1688             return LDC_ABORT(lp);
1689 
1690         if (lp->chan_state == LDC_CHANNEL_DOWN ||
1691             lp->chan_state == LDC_CHANNEL_RESETTING)
1692             return -ECONNRESET;
1693 
1694         if (cur_head != lp->rx_tail) {
1695             ldcdbg(DATA, "DATA WAIT DONE "
1696                    "head[%lx] tail[%lx] chan_state[%lx]\n",
1697                    dummy, lp->rx_tail, lp->chan_state);
1698             return 0;
1699         }
1700 
1701         udelay(1);
1702     }
1703     return -EAGAIN;
1704 }
1705 
1706 static int rx_set_head(struct ldc_channel *lp, unsigned long head)
1707 {
1708     int err = __set_rx_head(lp, head);
1709 
1710     if (err < 0)
1711         return LDC_ABORT(lp);
1712 
1713     lp->rx_head = head;
1714     return 0;
1715 }
1716 
1717 static void send_data_ack(struct ldc_channel *lp)
1718 {
1719     unsigned long new_tail;
1720     struct ldc_packet *p;
1721 
1722     p = data_get_tx_packet(lp, &new_tail);
1723     if (likely(p)) {
1724         int err;
1725 
1726         memset(p, 0, sizeof(*p));
1727         p->type = LDC_DATA;
1728         p->stype = LDC_ACK;
1729         p->ctrl = 0;
1730         p->seqid = lp->snd_nxt + 1;
1731         p->u.r.ackid = lp->rcv_nxt;
1732 
1733         err = send_tx_packet(lp, p, new_tail);
1734         if (!err)
1735             lp->snd_nxt++;
1736     }
1737 }
1738 
1739 static int read_nonraw(struct ldc_channel *lp, void *buf, unsigned int size)
1740 {
1741     struct ldc_packet *first_frag;
1742     unsigned long hv_err, new;
1743     int err, copied;
1744 
1745     hv_err = sun4v_ldc_rx_get_state(lp->id,
1746                     &lp->rx_head,
1747                     &lp->rx_tail,
1748                     &lp->chan_state);
1749     if (hv_err)
1750         return LDC_ABORT(lp);
1751 
1752     if (lp->chan_state == LDC_CHANNEL_DOWN ||
1753         lp->chan_state == LDC_CHANNEL_RESETTING)
1754         return -ECONNRESET;
1755 
1756     if (lp->rx_head == lp->rx_tail)
1757         return 0;
1758 
1759     first_frag = NULL;
1760     copied = err = 0;
1761     new = lp->rx_head;
1762     while (1) {
1763         struct ldc_packet *p;
1764         int pkt_len;
1765 
1766         BUG_ON(new == lp->rx_tail);
1767         p = lp->rx_base + (new / LDC_PACKET_SIZE);
1768 
1769         ldcdbg(RX, "RX read pkt[%02x:%02x:%02x:%02x:%08x:%08x] "
1770                "rcv_nxt[%08x]\n",
1771                p->type,
1772                p->stype,
1773                p->ctrl,
1774                p->env,
1775                p->seqid,
1776                p->u.r.ackid,
1777                lp->rcv_nxt);
1778 
1779         if (unlikely(!rx_seq_ok(lp, p->seqid))) {
1780             err = rx_bad_seq(lp, p, first_frag);
1781             copied = 0;
1782             break;
1783         }
1784 
1785         if (p->type & LDC_CTRL) {
1786             err = process_control_frame(lp, p);
1787             if (err < 0)
1788                 break;
1789             err = 0;
1790         }
1791 
1792         lp->rcv_nxt = p->seqid;
1793 
1794         /*
1795          * If this is a control-only packet, there is nothing
1796          * else to do but advance the rx queue since the packet
1797          * was already processed above.
1798          */
1799         if (!(p->type & LDC_DATA)) {
1800             new = rx_advance(lp, new);
1801             break;
1802         }
1803         if (p->stype & (LDC_ACK | LDC_NACK)) {
1804             err = data_ack_nack(lp, p);
1805             if (err)
1806                 break;
1807         }
1808         if (!(p->stype & LDC_INFO)) {
1809             new = rx_advance(lp, new);
1810             err = rx_set_head(lp, new);
1811             if (err)
1812                 break;
1813             goto no_data;
1814         }
1815 
1816         pkt_len = p->env & LDC_LEN;
1817 
1818         /* Every initial packet starts with the START bit set.
1819          *
1820          * Singleton packets will have both START+STOP set.
1821          *
1822          * Fragments will have START set in the first frame, STOP
1823          * set in the last frame, and neither bit set in middle
1824          * frames of the packet.
1825          *
1826          * Therefore if we are at the beginning of a packet and
1827          * we don't see START, or we are in the middle of a fragmented
1828          * packet and do see START, we are unsynchronized and should
1829          * flush the RX queue.
1830          */
1831         if ((first_frag == NULL && !(p->env & LDC_START)) ||
1832             (first_frag != NULL &&  (p->env & LDC_START))) {
1833             if (!first_frag)
1834                 new = rx_advance(lp, new);
1835 
1836             err = rx_set_head(lp, new);
1837             if (err)
1838                 break;
1839 
1840             if (!first_frag)
1841                 goto no_data;
1842         }
1843         if (!first_frag)
1844             first_frag = p;
1845 
1846         if (pkt_len > size - copied) {
1847             /* User didn't give us a big enough buffer,
1848              * what to do?  This is a pretty serious error.
1849              *
1850              * Since we haven't updated the RX ring head to
1851              * consume any of the packets, signal the error
1852              * to the user and just leave the RX ring alone.
1853              *
1854              * This seems the best behavior because this allows
1855              * a user of the LDC layer to start with a small
1856              * RX buffer for ldc_read() calls and use -EMSGSIZE
1857              * as a cue to enlarge it's read buffer.
1858              */
1859             err = -EMSGSIZE;
1860             break;
1861         }
1862 
1863         /* Ok, we are gonna eat this one.  */
1864         new = rx_advance(lp, new);
1865 
1866         memcpy(buf,
1867                (lp->cfg.mode == LDC_MODE_UNRELIABLE ?
1868             p->u.u_data : p->u.r.r_data), pkt_len);
1869         buf += pkt_len;
1870         copied += pkt_len;
1871 
1872         if (p->env & LDC_STOP)
1873             break;
1874 
1875 no_data:
1876         if (new == lp->rx_tail) {
1877             err = rx_data_wait(lp, new);
1878             if (err)
1879                 break;
1880         }
1881     }
1882 
1883     if (!err)
1884         err = rx_set_head(lp, new);
1885 
1886     if (err && first_frag)
1887         lp->rcv_nxt = first_frag->seqid - 1;
1888 
1889     if (!err) {
1890         err = copied;
1891         if (err > 0 && lp->cfg.mode != LDC_MODE_UNRELIABLE)
1892             send_data_ack(lp);
1893     }
1894 
1895     return err;
1896 }
1897 
1898 static const struct ldc_mode_ops nonraw_ops = {
1899     .write      =   write_nonraw,
1900     .read       =   read_nonraw,
1901 };
1902 
1903 static int write_stream(struct ldc_channel *lp, const void *buf,
1904             unsigned int size)
1905 {
1906     if (size > lp->cfg.mtu)
1907         size = lp->cfg.mtu;
1908     return write_nonraw(lp, buf, size);
1909 }
1910 
1911 static int read_stream(struct ldc_channel *lp, void *buf, unsigned int size)
1912 {
1913     if (!lp->mssbuf_len) {
1914         int err = read_nonraw(lp, lp->mssbuf, lp->cfg.mtu);
1915         if (err < 0)
1916             return err;
1917 
1918         lp->mssbuf_len = err;
1919         lp->mssbuf_off = 0;
1920     }
1921 
1922     if (size > lp->mssbuf_len)
1923         size = lp->mssbuf_len;
1924     memcpy(buf, lp->mssbuf + lp->mssbuf_off, size);
1925 
1926     lp->mssbuf_off += size;
1927     lp->mssbuf_len -= size;
1928 
1929     return size;
1930 }
1931 
1932 static const struct ldc_mode_ops stream_ops = {
1933     .write      =   write_stream,
1934     .read       =   read_stream,
1935 };
1936 
1937 int ldc_write(struct ldc_channel *lp, const void *buf, unsigned int size)
1938 {
1939     unsigned long flags;
1940     int err;
1941 
1942     if (!buf)
1943         return -EINVAL;
1944 
1945     if (!size)
1946         return 0;
1947 
1948     spin_lock_irqsave(&lp->lock, flags);
1949 
1950     if (lp->hs_state != LDC_HS_COMPLETE)
1951         err = -ENOTCONN;
1952     else
1953         err = lp->mops->write(lp, buf, size);
1954 
1955     spin_unlock_irqrestore(&lp->lock, flags);
1956 
1957     return err;
1958 }
1959 EXPORT_SYMBOL(ldc_write);
1960 
1961 int ldc_read(struct ldc_channel *lp, void *buf, unsigned int size)
1962 {
1963     unsigned long flags;
1964     int err;
1965 
1966     ldcdbg(RX, "%s: entered size=%d\n", __func__, size);
1967 
1968     if (!buf)
1969         return -EINVAL;
1970 
1971     if (!size)
1972         return 0;
1973 
1974     spin_lock_irqsave(&lp->lock, flags);
1975 
1976     if (lp->hs_state != LDC_HS_COMPLETE)
1977         err = -ENOTCONN;
1978     else
1979         err = lp->mops->read(lp, buf, size);
1980 
1981     spin_unlock_irqrestore(&lp->lock, flags);
1982 
1983     ldcdbg(RX, "%s: mode=%d, head=%lu, tail=%lu rv=%d\n", __func__,
1984            lp->cfg.mode, lp->rx_head, lp->rx_tail, err);
1985 
1986     return err;
1987 }
1988 EXPORT_SYMBOL(ldc_read);
1989 
1990 static u64 pagesize_code(void)
1991 {
1992     switch (PAGE_SIZE) {
1993     default:
1994     case (8ULL * 1024ULL):
1995         return 0;
1996     case (64ULL * 1024ULL):
1997         return 1;
1998     case (512ULL * 1024ULL):
1999         return 2;
2000     case (4ULL * 1024ULL * 1024ULL):
2001         return 3;
2002     case (32ULL * 1024ULL * 1024ULL):
2003         return 4;
2004     case (256ULL * 1024ULL * 1024ULL):
2005         return 5;
2006     }
2007 }
2008 
2009 static u64 make_cookie(u64 index, u64 pgsz_code, u64 page_offset)
2010 {
2011     return ((pgsz_code << COOKIE_PGSZ_CODE_SHIFT) |
2012         (index << PAGE_SHIFT) |
2013         page_offset);
2014 }
2015 
2016 
2017 static struct ldc_mtable_entry *alloc_npages(struct ldc_iommu *iommu,
2018                          unsigned long npages)
2019 {
2020     long entry;
2021 
2022     entry = iommu_tbl_range_alloc(NULL, &iommu->iommu_map_table,
2023                       npages, NULL, (unsigned long)-1, 0);
2024     if (unlikely(entry == IOMMU_ERROR_CODE))
2025         return NULL;
2026 
2027     return iommu->page_table + entry;
2028 }
2029 
2030 static u64 perm_to_mte(unsigned int map_perm)
2031 {
2032     u64 mte_base;
2033 
2034     mte_base = pagesize_code();
2035 
2036     if (map_perm & LDC_MAP_SHADOW) {
2037         if (map_perm & LDC_MAP_R)
2038             mte_base |= LDC_MTE_COPY_R;
2039         if (map_perm & LDC_MAP_W)
2040             mte_base |= LDC_MTE_COPY_W;
2041     }
2042     if (map_perm & LDC_MAP_DIRECT) {
2043         if (map_perm & LDC_MAP_R)
2044             mte_base |= LDC_MTE_READ;
2045         if (map_perm & LDC_MAP_W)
2046             mte_base |= LDC_MTE_WRITE;
2047         if (map_perm & LDC_MAP_X)
2048             mte_base |= LDC_MTE_EXEC;
2049     }
2050     if (map_perm & LDC_MAP_IO) {
2051         if (map_perm & LDC_MAP_R)
2052             mte_base |= LDC_MTE_IOMMU_R;
2053         if (map_perm & LDC_MAP_W)
2054             mte_base |= LDC_MTE_IOMMU_W;
2055     }
2056 
2057     return mte_base;
2058 }
2059 
2060 static int pages_in_region(unsigned long base, long len)
2061 {
2062     int count = 0;
2063 
2064     do {
2065         unsigned long new = (base + PAGE_SIZE) & PAGE_MASK;
2066 
2067         len -= (new - base);
2068         base = new;
2069         count++;
2070     } while (len > 0);
2071 
2072     return count;
2073 }
2074 
2075 struct cookie_state {
2076     struct ldc_mtable_entry     *page_table;
2077     struct ldc_trans_cookie     *cookies;
2078     u64             mte_base;
2079     u64             prev_cookie;
2080     u32             pte_idx;
2081     u32             nc;
2082 };
2083 
2084 static void fill_cookies(struct cookie_state *sp, unsigned long pa,
2085              unsigned long off, unsigned long len)
2086 {
2087     do {
2088         unsigned long tlen, new = pa + PAGE_SIZE;
2089         u64 this_cookie;
2090 
2091         sp->page_table[sp->pte_idx].mte = sp->mte_base | pa;
2092 
2093         tlen = PAGE_SIZE;
2094         if (off)
2095             tlen = PAGE_SIZE - off;
2096         if (tlen > len)
2097             tlen = len;
2098 
2099         this_cookie = make_cookie(sp->pte_idx,
2100                       pagesize_code(), off);
2101 
2102         off = 0;
2103 
2104         if (this_cookie == sp->prev_cookie) {
2105             sp->cookies[sp->nc - 1].cookie_size += tlen;
2106         } else {
2107             sp->cookies[sp->nc].cookie_addr = this_cookie;
2108             sp->cookies[sp->nc].cookie_size = tlen;
2109             sp->nc++;
2110         }
2111         sp->prev_cookie = this_cookie + tlen;
2112 
2113         sp->pte_idx++;
2114 
2115         len -= tlen;
2116         pa = new;
2117     } while (len > 0);
2118 }
2119 
2120 static int sg_count_one(struct scatterlist *sg)
2121 {
2122     unsigned long base = page_to_pfn(sg_page(sg)) << PAGE_SHIFT;
2123     long len = sg->length;
2124 
2125     if ((sg->offset | len) & (8UL - 1))
2126         return -EFAULT;
2127 
2128     return pages_in_region(base + sg->offset, len);
2129 }
2130 
2131 static int sg_count_pages(struct scatterlist *sg, int num_sg)
2132 {
2133     int count;
2134     int i;
2135 
2136     count = 0;
2137     for (i = 0; i < num_sg; i++) {
2138         int err = sg_count_one(sg + i);
2139         if (err < 0)
2140             return err;
2141         count += err;
2142     }
2143 
2144     return count;
2145 }
2146 
2147 int ldc_map_sg(struct ldc_channel *lp,
2148            struct scatterlist *sg, int num_sg,
2149            struct ldc_trans_cookie *cookies, int ncookies,
2150            unsigned int map_perm)
2151 {
2152     unsigned long i, npages;
2153     struct ldc_mtable_entry *base;
2154     struct cookie_state state;
2155     struct ldc_iommu *iommu;
2156     int err;
2157     struct scatterlist *s;
2158 
2159     if (map_perm & ~LDC_MAP_ALL)
2160         return -EINVAL;
2161 
2162     err = sg_count_pages(sg, num_sg);
2163     if (err < 0)
2164         return err;
2165 
2166     npages = err;
2167     if (err > ncookies)
2168         return -EMSGSIZE;
2169 
2170     iommu = &lp->iommu;
2171 
2172     base = alloc_npages(iommu, npages);
2173 
2174     if (!base)
2175         return -ENOMEM;
2176 
2177     state.page_table = iommu->page_table;
2178     state.cookies = cookies;
2179     state.mte_base = perm_to_mte(map_perm);
2180     state.prev_cookie = ~(u64)0;
2181     state.pte_idx = (base - iommu->page_table);
2182     state.nc = 0;
2183 
2184     for_each_sg(sg, s, num_sg, i) {
2185         fill_cookies(&state, page_to_pfn(sg_page(s)) << PAGE_SHIFT,
2186                  s->offset, s->length);
2187     }
2188 
2189     return state.nc;
2190 }
2191 EXPORT_SYMBOL(ldc_map_sg);
2192 
2193 int ldc_map_single(struct ldc_channel *lp,
2194            void *buf, unsigned int len,
2195            struct ldc_trans_cookie *cookies, int ncookies,
2196            unsigned int map_perm)
2197 {
2198     unsigned long npages, pa;
2199     struct ldc_mtable_entry *base;
2200     struct cookie_state state;
2201     struct ldc_iommu *iommu;
2202 
2203     if ((map_perm & ~LDC_MAP_ALL) || (ncookies < 1))
2204         return -EINVAL;
2205 
2206     pa = __pa(buf);
2207     if ((pa | len) & (8UL - 1))
2208         return -EFAULT;
2209 
2210     npages = pages_in_region(pa, len);
2211 
2212     iommu = &lp->iommu;
2213 
2214     base = alloc_npages(iommu, npages);
2215 
2216     if (!base)
2217         return -ENOMEM;
2218 
2219     state.page_table = iommu->page_table;
2220     state.cookies = cookies;
2221     state.mte_base = perm_to_mte(map_perm);
2222     state.prev_cookie = ~(u64)0;
2223     state.pte_idx = (base - iommu->page_table);
2224     state.nc = 0;
2225     fill_cookies(&state, (pa & PAGE_MASK), (pa & ~PAGE_MASK), len);
2226     BUG_ON(state.nc > ncookies);
2227 
2228     return state.nc;
2229 }
2230 EXPORT_SYMBOL(ldc_map_single);
2231 
2232 
2233 static void free_npages(unsigned long id, struct ldc_iommu *iommu,
2234             u64 cookie, u64 size)
2235 {
2236     unsigned long npages, entry;
2237 
2238     npages = PAGE_ALIGN(((cookie & ~PAGE_MASK) + size)) >> PAGE_SHIFT;
2239 
2240     entry = ldc_cookie_to_index(cookie, iommu);
2241     ldc_demap(iommu, id, cookie, entry, npages);
2242     iommu_tbl_range_free(&iommu->iommu_map_table, cookie, npages, entry);
2243 }
2244 
2245 void ldc_unmap(struct ldc_channel *lp, struct ldc_trans_cookie *cookies,
2246            int ncookies)
2247 {
2248     struct ldc_iommu *iommu = &lp->iommu;
2249     int i;
2250     unsigned long flags;
2251 
2252     spin_lock_irqsave(&iommu->lock, flags);
2253     for (i = 0; i < ncookies; i++) {
2254         u64 addr = cookies[i].cookie_addr;
2255         u64 size = cookies[i].cookie_size;
2256 
2257         free_npages(lp->id, iommu, addr, size);
2258     }
2259     spin_unlock_irqrestore(&iommu->lock, flags);
2260 }
2261 EXPORT_SYMBOL(ldc_unmap);
2262 
2263 int ldc_copy(struct ldc_channel *lp, int copy_dir,
2264          void *buf, unsigned int len, unsigned long offset,
2265          struct ldc_trans_cookie *cookies, int ncookies)
2266 {
2267     unsigned int orig_len;
2268     unsigned long ra;
2269     int i;
2270 
2271     if (copy_dir != LDC_COPY_IN && copy_dir != LDC_COPY_OUT) {
2272         printk(KERN_ERR PFX "ldc_copy: ID[%lu] Bad copy_dir[%d]\n",
2273                lp->id, copy_dir);
2274         return -EINVAL;
2275     }
2276 
2277     ra = __pa(buf);
2278     if ((ra | len | offset) & (8UL - 1)) {
2279         printk(KERN_ERR PFX "ldc_copy: ID[%lu] Unaligned buffer "
2280                "ra[%lx] len[%x] offset[%lx]\n",
2281                lp->id, ra, len, offset);
2282         return -EFAULT;
2283     }
2284 
2285     if (lp->hs_state != LDC_HS_COMPLETE ||
2286         (lp->flags & LDC_FLAG_RESET)) {
2287         printk(KERN_ERR PFX "ldc_copy: ID[%lu] Link down hs_state[%x] "
2288                "flags[%x]\n", lp->id, lp->hs_state, lp->flags);
2289         return -ECONNRESET;
2290     }
2291 
2292     orig_len = len;
2293     for (i = 0; i < ncookies; i++) {
2294         unsigned long cookie_raddr = cookies[i].cookie_addr;
2295         unsigned long this_len = cookies[i].cookie_size;
2296         unsigned long actual_len;
2297 
2298         if (unlikely(offset)) {
2299             unsigned long this_off = offset;
2300 
2301             if (this_off > this_len)
2302                 this_off = this_len;
2303 
2304             offset -= this_off;
2305             this_len -= this_off;
2306             if (!this_len)
2307                 continue;
2308             cookie_raddr += this_off;
2309         }
2310 
2311         if (this_len > len)
2312             this_len = len;
2313 
2314         while (1) {
2315             unsigned long hv_err;
2316 
2317             hv_err = sun4v_ldc_copy(lp->id, copy_dir,
2318                         cookie_raddr, ra,
2319                         this_len, &actual_len);
2320             if (unlikely(hv_err)) {
2321                 printk(KERN_ERR PFX "ldc_copy: ID[%lu] "
2322                        "HV error %lu\n",
2323                        lp->id, hv_err);
2324                 if (lp->hs_state != LDC_HS_COMPLETE ||
2325                     (lp->flags & LDC_FLAG_RESET))
2326                     return -ECONNRESET;
2327                 else
2328                     return -EFAULT;
2329             }
2330 
2331             cookie_raddr += actual_len;
2332             ra += actual_len;
2333             len -= actual_len;
2334             if (actual_len == this_len)
2335                 break;
2336 
2337             this_len -= actual_len;
2338         }
2339 
2340         if (!len)
2341             break;
2342     }
2343 
2344     /* It is caller policy what to do about short copies.
2345      * For example, a networking driver can declare the
2346      * packet a runt and drop it.
2347      */
2348 
2349     return orig_len - len;
2350 }
2351 EXPORT_SYMBOL(ldc_copy);
2352 
2353 void *ldc_alloc_exp_dring(struct ldc_channel *lp, unsigned int len,
2354               struct ldc_trans_cookie *cookies, int *ncookies,
2355               unsigned int map_perm)
2356 {
2357     void *buf;
2358     int err;
2359 
2360     if (len & (8UL - 1))
2361         return ERR_PTR(-EINVAL);
2362 
2363     buf = kzalloc(len, GFP_ATOMIC);
2364     if (!buf)
2365         return ERR_PTR(-ENOMEM);
2366 
2367     err = ldc_map_single(lp, buf, len, cookies, *ncookies, map_perm);
2368     if (err < 0) {
2369         kfree(buf);
2370         return ERR_PTR(err);
2371     }
2372     *ncookies = err;
2373 
2374     return buf;
2375 }
2376 EXPORT_SYMBOL(ldc_alloc_exp_dring);
2377 
2378 void ldc_free_exp_dring(struct ldc_channel *lp, void *buf, unsigned int len,
2379             struct ldc_trans_cookie *cookies, int ncookies)
2380 {
2381     ldc_unmap(lp, cookies, ncookies);
2382     kfree(buf);
2383 }
2384 EXPORT_SYMBOL(ldc_free_exp_dring);
2385 
2386 static int __init ldc_init(void)
2387 {
2388     unsigned long major, minor;
2389     struct mdesc_handle *hp;
2390     const u64 *v;
2391     int err;
2392     u64 mp;
2393 
2394     hp = mdesc_grab();
2395     if (!hp)
2396         return -ENODEV;
2397 
2398     mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "platform");
2399     err = -ENODEV;
2400     if (mp == MDESC_NODE_NULL)
2401         goto out;
2402 
2403     v = mdesc_get_property(hp, mp, "domaining-enabled", NULL);
2404     if (!v)
2405         goto out;
2406 
2407     major = 1;
2408     minor = 0;
2409     if (sun4v_hvapi_register(HV_GRP_LDOM, major, &minor)) {
2410         printk(KERN_INFO PFX "Could not register LDOM hvapi.\n");
2411         goto out;
2412     }
2413 
2414     printk(KERN_INFO "%s", version);
2415 
2416     if (!*v) {
2417         printk(KERN_INFO PFX "Domaining disabled.\n");
2418         goto out;
2419     }
2420     ldom_domaining_enabled = 1;
2421     err = 0;
2422 
2423 out:
2424     mdesc_release(hp);
2425     return err;
2426 }
2427 
2428 core_initcall(ldc_init);