Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Copyright (c) 2007, 2008, 2009 QLogic Corporation. All rights reserved.
0003  *
0004  * This software is available to you under a choice of one of two
0005  * licenses.  You may choose to be licensed under the terms of the GNU
0006  * General Public License (GPL) Version 2, available from the file
0007  * COPYING in the main directory of this source tree, or the
0008  * OpenIB.org BSD license below:
0009  *
0010  *     Redistribution and use in source and binary forms, with or
0011  *     without modification, are permitted provided that the following
0012  *     conditions are met:
0013  *
0014  *      - Redistributions of source code must retain the above
0015  *        copyright notice, this list of conditions and the following
0016  *        disclaimer.
0017  *
0018  *      - Redistributions in binary form must reproduce the above
0019  *        copyright notice, this list of conditions and the following
0020  *        disclaimer in the documentation and/or other materials
0021  *        provided with the distribution.
0022  *
0023  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
0024  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
0025  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
0026  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
0027  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
0028  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
0029  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
0030  * SOFTWARE.
0031  */
0032 #include <linux/mm.h>
0033 #include <linux/types.h>
0034 #include <linux/device.h>
0035 #include <linux/dmapool.h>
0036 #include <linux/slab.h>
0037 #include <linux/list.h>
0038 #include <linux/highmem.h>
0039 #include <linux/io.h>
0040 #include <linux/uio.h>
0041 #include <linux/rbtree.h>
0042 #include <linux/spinlock.h>
0043 #include <linux/delay.h>
0044 
0045 #include "qib.h"
0046 #include "qib_user_sdma.h"
0047 
0048 /* minimum size of header */
0049 #define QIB_USER_SDMA_MIN_HEADER_LENGTH 64
0050 /* expected size of headers (for dma_pool) */
0051 #define QIB_USER_SDMA_EXP_HEADER_LENGTH 64
0052 /* attempt to drain the queue for 5secs */
0053 #define QIB_USER_SDMA_DRAIN_TIMEOUT 250
0054 
0055 /*
0056  * track how many times a process open this driver.
0057  */
0058 static struct rb_root qib_user_sdma_rb_root = RB_ROOT;
0059 
0060 struct qib_user_sdma_rb_node {
0061     struct rb_node node;
0062     int refcount;
0063     pid_t pid;
0064 };
0065 
0066 struct qib_user_sdma_pkt {
0067     struct list_head list;  /* list element */
0068 
0069     u8  tiddma;     /* if this is NEW tid-sdma */
0070     u8  largepkt;       /* this is large pkt from kmalloc */
0071     u16 frag_size;      /* frag size used by PSM */
0072     u16 index;              /* last header index or push index */
0073     u16 naddr;              /* dimension of addr (1..3) ... */
0074     u16 addrlimit;      /* addr array size */
0075     u16 tidsmidx;       /* current tidsm index */
0076     u16 tidsmcount;     /* tidsm array item count */
0077     u16 payload_size;   /* payload size so far for header */
0078     u32 bytes_togo;     /* bytes for processing */
0079     u32 counter;            /* sdma pkts queued counter for this entry */
0080     struct qib_tid_session_member *tidsm;   /* tid session member array */
0081     struct qib_user_sdma_queue *pq; /* which pq this pkt belongs to */
0082     u64 added;              /* global descq number of entries */
0083 
0084     struct {
0085         u16 offset;                     /* offset for kvaddr, addr */
0086         u16 length;                     /* length in page */
0087         u16 first_desc;         /* first desc */
0088         u16 last_desc;          /* last desc */
0089         u16 put_page;                   /* should we put_page? */
0090         u16 dma_mapped;                 /* is page dma_mapped? */
0091         u16 dma_length;         /* for dma_unmap_page() */
0092         u16 padding;
0093         struct page *page;              /* may be NULL (coherent mem) */
0094         void *kvaddr;                   /* FIXME: only for pio hack */
0095         dma_addr_t addr;
0096     } addr[4];   /* max pages, any more and we coalesce */
0097 };
0098 
0099 struct qib_user_sdma_queue {
0100     /*
0101      * pkts sent to dma engine are queued on this
0102      * list head.  the type of the elements of this
0103      * list are struct qib_user_sdma_pkt...
0104      */
0105     struct list_head sent;
0106 
0107     /*
0108      * Because above list will be accessed by both process and
0109      * signal handler, we need a spinlock for it.
0110      */
0111     spinlock_t sent_lock ____cacheline_aligned_in_smp;
0112 
0113     /* headers with expected length are allocated from here... */
0114     char header_cache_name[64];
0115     struct dma_pool *header_cache;
0116 
0117     /* packets are allocated from the slab cache... */
0118     char pkt_slab_name[64];
0119     struct kmem_cache *pkt_slab;
0120 
0121     /* as packets go on the queued queue, they are counted... */
0122     u32 counter;
0123     u32 sent_counter;
0124     /* pending packets, not sending yet */
0125     u32 num_pending;
0126     /* sending packets, not complete yet */
0127     u32 num_sending;
0128     /* global descq number of entry of last sending packet */
0129     u64 added;
0130 
0131     /* dma page table */
0132     struct rb_root dma_pages_root;
0133 
0134     struct qib_user_sdma_rb_node *sdma_rb_node;
0135 
0136     /* protect everything above... */
0137     struct mutex lock;
0138 };
0139 
0140 static struct qib_user_sdma_rb_node *
0141 qib_user_sdma_rb_search(struct rb_root *root, pid_t pid)
0142 {
0143     struct qib_user_sdma_rb_node *sdma_rb_node;
0144     struct rb_node *node = root->rb_node;
0145 
0146     while (node) {
0147         sdma_rb_node = rb_entry(node, struct qib_user_sdma_rb_node,
0148                     node);
0149         if (pid < sdma_rb_node->pid)
0150             node = node->rb_left;
0151         else if (pid > sdma_rb_node->pid)
0152             node = node->rb_right;
0153         else
0154             return sdma_rb_node;
0155     }
0156     return NULL;
0157 }
0158 
0159 static int
0160 qib_user_sdma_rb_insert(struct rb_root *root, struct qib_user_sdma_rb_node *new)
0161 {
0162     struct rb_node **node = &(root->rb_node);
0163     struct rb_node *parent = NULL;
0164     struct qib_user_sdma_rb_node *got;
0165 
0166     while (*node) {
0167         got = rb_entry(*node, struct qib_user_sdma_rb_node, node);
0168         parent = *node;
0169         if (new->pid < got->pid)
0170             node = &((*node)->rb_left);
0171         else if (new->pid > got->pid)
0172             node = &((*node)->rb_right);
0173         else
0174             return 0;
0175     }
0176 
0177     rb_link_node(&new->node, parent, node);
0178     rb_insert_color(&new->node, root);
0179     return 1;
0180 }
0181 
0182 struct qib_user_sdma_queue *
0183 qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt)
0184 {
0185     struct qib_user_sdma_queue *pq =
0186         kmalloc(sizeof(struct qib_user_sdma_queue), GFP_KERNEL);
0187     struct qib_user_sdma_rb_node *sdma_rb_node;
0188 
0189     if (!pq)
0190         goto done;
0191 
0192     pq->counter = 0;
0193     pq->sent_counter = 0;
0194     pq->num_pending = 0;
0195     pq->num_sending = 0;
0196     pq->added = 0;
0197     pq->sdma_rb_node = NULL;
0198 
0199     INIT_LIST_HEAD(&pq->sent);
0200     spin_lock_init(&pq->sent_lock);
0201     mutex_init(&pq->lock);
0202 
0203     snprintf(pq->pkt_slab_name, sizeof(pq->pkt_slab_name),
0204          "qib-user-sdma-pkts-%u-%02u.%02u", unit, ctxt, sctxt);
0205     pq->pkt_slab = kmem_cache_create(pq->pkt_slab_name,
0206                      sizeof(struct qib_user_sdma_pkt),
0207                      0, 0, NULL);
0208 
0209     if (!pq->pkt_slab)
0210         goto err_kfree;
0211 
0212     snprintf(pq->header_cache_name, sizeof(pq->header_cache_name),
0213          "qib-user-sdma-headers-%u-%02u.%02u", unit, ctxt, sctxt);
0214     pq->header_cache = dma_pool_create(pq->header_cache_name,
0215                        dev,
0216                        QIB_USER_SDMA_EXP_HEADER_LENGTH,
0217                        4, 0);
0218     if (!pq->header_cache)
0219         goto err_slab;
0220 
0221     pq->dma_pages_root = RB_ROOT;
0222 
0223     sdma_rb_node = qib_user_sdma_rb_search(&qib_user_sdma_rb_root,
0224                     current->pid);
0225     if (sdma_rb_node) {
0226         sdma_rb_node->refcount++;
0227     } else {
0228         sdma_rb_node = kmalloc(sizeof(
0229             struct qib_user_sdma_rb_node), GFP_KERNEL);
0230         if (!sdma_rb_node)
0231             goto err_rb;
0232 
0233         sdma_rb_node->refcount = 1;
0234         sdma_rb_node->pid = current->pid;
0235 
0236         qib_user_sdma_rb_insert(&qib_user_sdma_rb_root, sdma_rb_node);
0237     }
0238     pq->sdma_rb_node = sdma_rb_node;
0239 
0240     goto done;
0241 
0242 err_rb:
0243     dma_pool_destroy(pq->header_cache);
0244 err_slab:
0245     kmem_cache_destroy(pq->pkt_slab);
0246 err_kfree:
0247     kfree(pq);
0248     pq = NULL;
0249 
0250 done:
0251     return pq;
0252 }
0253 
0254 static void qib_user_sdma_init_frag(struct qib_user_sdma_pkt *pkt,
0255                     int i, u16 offset, u16 len,
0256                     u16 first_desc, u16 last_desc,
0257                     u16 put_page, u16 dma_mapped,
0258                     struct page *page, void *kvaddr,
0259                     dma_addr_t dma_addr, u16 dma_length)
0260 {
0261     pkt->addr[i].offset = offset;
0262     pkt->addr[i].length = len;
0263     pkt->addr[i].first_desc = first_desc;
0264     pkt->addr[i].last_desc = last_desc;
0265     pkt->addr[i].put_page = put_page;
0266     pkt->addr[i].dma_mapped = dma_mapped;
0267     pkt->addr[i].page = page;
0268     pkt->addr[i].kvaddr = kvaddr;
0269     pkt->addr[i].addr = dma_addr;
0270     pkt->addr[i].dma_length = dma_length;
0271 }
0272 
0273 static void *qib_user_sdma_alloc_header(struct qib_user_sdma_queue *pq,
0274                 size_t len, dma_addr_t *dma_addr)
0275 {
0276     void *hdr;
0277 
0278     if (len == QIB_USER_SDMA_EXP_HEADER_LENGTH)
0279         hdr = dma_pool_alloc(pq->header_cache, GFP_KERNEL,
0280                          dma_addr);
0281     else
0282         hdr = NULL;
0283 
0284     if (!hdr) {
0285         hdr = kmalloc(len, GFP_KERNEL);
0286         if (!hdr)
0287             return NULL;
0288 
0289         *dma_addr = 0;
0290     }
0291 
0292     return hdr;
0293 }
0294 
0295 static int qib_user_sdma_page_to_frags(const struct qib_devdata *dd,
0296                        struct qib_user_sdma_queue *pq,
0297                        struct qib_user_sdma_pkt *pkt,
0298                        struct page *page, u16 put,
0299                        u16 offset, u16 len, void *kvaddr)
0300 {
0301     __le16 *pbc16;
0302     void *pbcvaddr;
0303     struct qib_message_header *hdr;
0304     u16 newlen, pbclen, lastdesc, dma_mapped;
0305     u32 vcto;
0306     union qib_seqnum seqnum;
0307     dma_addr_t pbcdaddr;
0308     dma_addr_t dma_addr =
0309         dma_map_page(&dd->pcidev->dev,
0310             page, offset, len, DMA_TO_DEVICE);
0311     int ret = 0;
0312 
0313     if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
0314         /*
0315          * dma mapping error, pkt has not managed
0316          * this page yet, return the page here so
0317          * the caller can ignore this page.
0318          */
0319         if (put) {
0320             unpin_user_page(page);
0321         } else {
0322             /* coalesce case */
0323             kunmap(page);
0324             __free_page(page);
0325         }
0326         ret = -ENOMEM;
0327         goto done;
0328     }
0329     offset = 0;
0330     dma_mapped = 1;
0331 
0332 
0333 next_fragment:
0334 
0335     /*
0336      * In tid-sdma, the transfer length is restricted by
0337      * receiver side current tid page length.
0338      */
0339     if (pkt->tiddma && len > pkt->tidsm[pkt->tidsmidx].length)
0340         newlen = pkt->tidsm[pkt->tidsmidx].length;
0341     else
0342         newlen = len;
0343 
0344     /*
0345      * Then the transfer length is restricted by MTU.
0346      * the last descriptor flag is determined by:
0347      * 1. the current packet is at frag size length.
0348      * 2. the current tid page is done if tid-sdma.
0349      * 3. there is no more byte togo if sdma.
0350      */
0351     lastdesc = 0;
0352     if ((pkt->payload_size + newlen) >= pkt->frag_size) {
0353         newlen = pkt->frag_size - pkt->payload_size;
0354         lastdesc = 1;
0355     } else if (pkt->tiddma) {
0356         if (newlen == pkt->tidsm[pkt->tidsmidx].length)
0357             lastdesc = 1;
0358     } else {
0359         if (newlen == pkt->bytes_togo)
0360             lastdesc = 1;
0361     }
0362 
0363     /* fill the next fragment in this page */
0364     qib_user_sdma_init_frag(pkt, pkt->naddr, /* index */
0365         offset, newlen,     /* offset, len */
0366         0, lastdesc,        /* first last desc */
0367         put, dma_mapped,    /* put page, dma mapped */
0368         page, kvaddr,       /* struct page, virt addr */
0369         dma_addr, len);     /* dma addr, dma length */
0370     pkt->bytes_togo -= newlen;
0371     pkt->payload_size += newlen;
0372     pkt->naddr++;
0373     if (pkt->naddr == pkt->addrlimit) {
0374         ret = -EFAULT;
0375         goto done;
0376     }
0377 
0378     /* If there is no more byte togo. (lastdesc==1) */
0379     if (pkt->bytes_togo == 0) {
0380         /* The packet is done, header is not dma mapped yet.
0381          * it should be from kmalloc */
0382         if (!pkt->addr[pkt->index].addr) {
0383             pkt->addr[pkt->index].addr =
0384                 dma_map_single(&dd->pcidev->dev,
0385                     pkt->addr[pkt->index].kvaddr,
0386                     pkt->addr[pkt->index].dma_length,
0387                     DMA_TO_DEVICE);
0388             if (dma_mapping_error(&dd->pcidev->dev,
0389                     pkt->addr[pkt->index].addr)) {
0390                 ret = -ENOMEM;
0391                 goto done;
0392             }
0393             pkt->addr[pkt->index].dma_mapped = 1;
0394         }
0395 
0396         goto done;
0397     }
0398 
0399     /* If tid-sdma, advance tid info. */
0400     if (pkt->tiddma) {
0401         pkt->tidsm[pkt->tidsmidx].length -= newlen;
0402         if (pkt->tidsm[pkt->tidsmidx].length) {
0403             pkt->tidsm[pkt->tidsmidx].offset += newlen;
0404         } else {
0405             pkt->tidsmidx++;
0406             if (pkt->tidsmidx == pkt->tidsmcount) {
0407                 ret = -EFAULT;
0408                 goto done;
0409             }
0410         }
0411     }
0412 
0413     /*
0414      * If this is NOT the last descriptor. (newlen==len)
0415      * the current packet is not done yet, but the current
0416      * send side page is done.
0417      */
0418     if (lastdesc == 0)
0419         goto done;
0420 
0421     /*
0422      * If running this driver under PSM with message size
0423      * fitting into one transfer unit, it is not possible
0424      * to pass this line. otherwise, it is a buggggg.
0425      */
0426 
0427     /*
0428      * Since the current packet is done, and there are more
0429      * bytes togo, we need to create a new sdma header, copying
0430      * from previous sdma header and modify both.
0431      */
0432     pbclen = pkt->addr[pkt->index].length;
0433     pbcvaddr = qib_user_sdma_alloc_header(pq, pbclen, &pbcdaddr);
0434     if (!pbcvaddr) {
0435         ret = -ENOMEM;
0436         goto done;
0437     }
0438     /* Copy the previous sdma header to new sdma header */
0439     pbc16 = (__le16 *)pkt->addr[pkt->index].kvaddr;
0440     memcpy(pbcvaddr, pbc16, pbclen);
0441 
0442     /* Modify the previous sdma header */
0443     hdr = (struct qib_message_header *)&pbc16[4];
0444 
0445     /* New pbc length */
0446     pbc16[0] = cpu_to_le16(le16_to_cpu(pbc16[0])-(pkt->bytes_togo>>2));
0447 
0448     /* New packet length */
0449     hdr->lrh[2] = cpu_to_be16(le16_to_cpu(pbc16[0]));
0450 
0451     if (pkt->tiddma) {
0452         /* turn on the header suppression */
0453         hdr->iph.pkt_flags =
0454             cpu_to_le16(le16_to_cpu(hdr->iph.pkt_flags)|0x2);
0455         /* turn off ACK_REQ: 0x04 and EXPECTED_DONE: 0x20 */
0456         hdr->flags &= ~(0x04|0x20);
0457     } else {
0458         /* turn off extra bytes: 20-21 bits */
0459         hdr->bth[0] = cpu_to_be32(be32_to_cpu(hdr->bth[0])&0xFFCFFFFF);
0460         /* turn off ACK_REQ: 0x04 */
0461         hdr->flags &= ~(0x04);
0462     }
0463 
0464     /* New kdeth checksum */
0465     vcto = le32_to_cpu(hdr->iph.ver_ctxt_tid_offset);
0466     hdr->iph.chksum = cpu_to_le16(QIB_LRH_BTH +
0467         be16_to_cpu(hdr->lrh[2]) -
0468         ((vcto>>16)&0xFFFF) - (vcto&0xFFFF) -
0469         le16_to_cpu(hdr->iph.pkt_flags));
0470 
0471     /* The packet is done, header is not dma mapped yet.
0472      * it should be from kmalloc */
0473     if (!pkt->addr[pkt->index].addr) {
0474         pkt->addr[pkt->index].addr =
0475             dma_map_single(&dd->pcidev->dev,
0476                 pkt->addr[pkt->index].kvaddr,
0477                 pkt->addr[pkt->index].dma_length,
0478                 DMA_TO_DEVICE);
0479         if (dma_mapping_error(&dd->pcidev->dev,
0480                 pkt->addr[pkt->index].addr)) {
0481             ret = -ENOMEM;
0482             goto done;
0483         }
0484         pkt->addr[pkt->index].dma_mapped = 1;
0485     }
0486 
0487     /* Modify the new sdma header */
0488     pbc16 = (__le16 *)pbcvaddr;
0489     hdr = (struct qib_message_header *)&pbc16[4];
0490 
0491     /* New pbc length */
0492     pbc16[0] = cpu_to_le16(le16_to_cpu(pbc16[0])-(pkt->payload_size>>2));
0493 
0494     /* New packet length */
0495     hdr->lrh[2] = cpu_to_be16(le16_to_cpu(pbc16[0]));
0496 
0497     if (pkt->tiddma) {
0498         /* Set new tid and offset for new sdma header */
0499         hdr->iph.ver_ctxt_tid_offset = cpu_to_le32(
0500             (le32_to_cpu(hdr->iph.ver_ctxt_tid_offset)&0xFF000000) +
0501             (pkt->tidsm[pkt->tidsmidx].tid<<QLOGIC_IB_I_TID_SHIFT) +
0502             (pkt->tidsm[pkt->tidsmidx].offset>>2));
0503     } else {
0504         /* Middle protocol new packet offset */
0505         hdr->uwords[2] += pkt->payload_size;
0506     }
0507 
0508     /* New kdeth checksum */
0509     vcto = le32_to_cpu(hdr->iph.ver_ctxt_tid_offset);
0510     hdr->iph.chksum = cpu_to_le16(QIB_LRH_BTH +
0511         be16_to_cpu(hdr->lrh[2]) -
0512         ((vcto>>16)&0xFFFF) - (vcto&0xFFFF) -
0513         le16_to_cpu(hdr->iph.pkt_flags));
0514 
0515     /* Next sequence number in new sdma header */
0516     seqnum.val = be32_to_cpu(hdr->bth[2]);
0517     if (pkt->tiddma)
0518         seqnum.seq++;
0519     else
0520         seqnum.pkt++;
0521     hdr->bth[2] = cpu_to_be32(seqnum.val);
0522 
0523     /* Init new sdma header. */
0524     qib_user_sdma_init_frag(pkt, pkt->naddr, /* index */
0525         0, pbclen,      /* offset, len */
0526         1, 0,           /* first last desc */
0527         0, 0,           /* put page, dma mapped */
0528         NULL, pbcvaddr,     /* struct page, virt addr */
0529         pbcdaddr, pbclen);  /* dma addr, dma length */
0530     pkt->index = pkt->naddr;
0531     pkt->payload_size = 0;
0532     pkt->naddr++;
0533     if (pkt->naddr == pkt->addrlimit) {
0534         ret = -EFAULT;
0535         goto done;
0536     }
0537 
0538     /* Prepare for next fragment in this page */
0539     if (newlen != len) {
0540         if (dma_mapped) {
0541             put = 0;
0542             dma_mapped = 0;
0543             page = NULL;
0544             kvaddr = NULL;
0545         }
0546         len -= newlen;
0547         offset += newlen;
0548 
0549         goto next_fragment;
0550     }
0551 
0552 done:
0553     return ret;
0554 }
0555 
0556 /* we've too many pages in the iovec, coalesce to a single page */
0557 static int qib_user_sdma_coalesce(const struct qib_devdata *dd,
0558                   struct qib_user_sdma_queue *pq,
0559                   struct qib_user_sdma_pkt *pkt,
0560                   const struct iovec *iov,
0561                   unsigned long niov)
0562 {
0563     int ret = 0;
0564     struct page *page = alloc_page(GFP_KERNEL);
0565     void *mpage_save;
0566     char *mpage;
0567     int i;
0568     int len = 0;
0569 
0570     if (!page) {
0571         ret = -ENOMEM;
0572         goto done;
0573     }
0574 
0575     mpage = kmap(page);
0576     mpage_save = mpage;
0577     for (i = 0; i < niov; i++) {
0578         int cfur;
0579 
0580         cfur = copy_from_user(mpage,
0581                       iov[i].iov_base, iov[i].iov_len);
0582         if (cfur) {
0583             ret = -EFAULT;
0584             goto free_unmap;
0585         }
0586 
0587         mpage += iov[i].iov_len;
0588         len += iov[i].iov_len;
0589     }
0590 
0591     ret = qib_user_sdma_page_to_frags(dd, pq, pkt,
0592             page, 0, 0, len, mpage_save);
0593     goto done;
0594 
0595 free_unmap:
0596     kunmap(page);
0597     __free_page(page);
0598 done:
0599     return ret;
0600 }
0601 
0602 /*
0603  * How many pages in this iovec element?
0604  */
0605 static size_t qib_user_sdma_num_pages(const struct iovec *iov)
0606 {
0607     const unsigned long addr  = (unsigned long) iov->iov_base;
0608     const unsigned long  len  = iov->iov_len;
0609     const unsigned long spage = addr & PAGE_MASK;
0610     const unsigned long epage = (addr + len - 1) & PAGE_MASK;
0611 
0612     return 1 + ((epage - spage) >> PAGE_SHIFT);
0613 }
0614 
0615 static void qib_user_sdma_free_pkt_frag(struct device *dev,
0616                     struct qib_user_sdma_queue *pq,
0617                     struct qib_user_sdma_pkt *pkt,
0618                     int frag)
0619 {
0620     const int i = frag;
0621 
0622     if (pkt->addr[i].page) {
0623         /* only user data has page */
0624         if (pkt->addr[i].dma_mapped)
0625             dma_unmap_page(dev,
0626                        pkt->addr[i].addr,
0627                        pkt->addr[i].dma_length,
0628                        DMA_TO_DEVICE);
0629 
0630         if (pkt->addr[i].kvaddr)
0631             kunmap(pkt->addr[i].page);
0632 
0633         if (pkt->addr[i].put_page)
0634             unpin_user_page(pkt->addr[i].page);
0635         else
0636             __free_page(pkt->addr[i].page);
0637     } else if (pkt->addr[i].kvaddr) {
0638         /* for headers */
0639         if (pkt->addr[i].dma_mapped) {
0640             /* from kmalloc & dma mapped */
0641             dma_unmap_single(dev,
0642                        pkt->addr[i].addr,
0643                        pkt->addr[i].dma_length,
0644                        DMA_TO_DEVICE);
0645             kfree(pkt->addr[i].kvaddr);
0646         } else if (pkt->addr[i].addr) {
0647             /* free coherent mem from cache... */
0648             dma_pool_free(pq->header_cache,
0649                   pkt->addr[i].kvaddr, pkt->addr[i].addr);
0650         } else {
0651             /* from kmalloc but not dma mapped */
0652             kfree(pkt->addr[i].kvaddr);
0653         }
0654     }
0655 }
0656 
0657 /* return number of pages pinned... */
0658 static int qib_user_sdma_pin_pages(const struct qib_devdata *dd,
0659                    struct qib_user_sdma_queue *pq,
0660                    struct qib_user_sdma_pkt *pkt,
0661                    unsigned long addr, int tlen, size_t npages)
0662 {
0663     struct page *pages[8];
0664     int i, j;
0665     int ret = 0;
0666 
0667     while (npages) {
0668         if (npages > 8)
0669             j = 8;
0670         else
0671             j = npages;
0672 
0673         ret = pin_user_pages_fast(addr, j, FOLL_LONGTERM, pages);
0674         if (ret != j) {
0675             i = 0;
0676             j = ret;
0677             ret = -ENOMEM;
0678             goto free_pages;
0679         }
0680 
0681         for (i = 0; i < j; i++) {
0682             /* map the pages... */
0683             unsigned long fofs = addr & ~PAGE_MASK;
0684             int flen = ((fofs + tlen) > PAGE_SIZE) ?
0685                 (PAGE_SIZE - fofs) : tlen;
0686 
0687             ret = qib_user_sdma_page_to_frags(dd, pq, pkt,
0688                 pages[i], 1, fofs, flen, NULL);
0689             if (ret < 0) {
0690                 /* current page has beed taken
0691                  * care of inside above call.
0692                  */
0693                 i++;
0694                 goto free_pages;
0695             }
0696 
0697             addr += flen;
0698             tlen -= flen;
0699         }
0700 
0701         npages -= j;
0702     }
0703 
0704     goto done;
0705 
0706     /* if error, return all pages not managed by pkt */
0707 free_pages:
0708     while (i < j)
0709         unpin_user_page(pages[i++]);
0710 
0711 done:
0712     return ret;
0713 }
0714 
0715 static int qib_user_sdma_pin_pkt(const struct qib_devdata *dd,
0716                  struct qib_user_sdma_queue *pq,
0717                  struct qib_user_sdma_pkt *pkt,
0718                  const struct iovec *iov,
0719                  unsigned long niov)
0720 {
0721     int ret = 0;
0722     unsigned long idx;
0723 
0724     for (idx = 0; idx < niov; idx++) {
0725         const size_t npages = qib_user_sdma_num_pages(iov + idx);
0726         const unsigned long addr = (unsigned long) iov[idx].iov_base;
0727 
0728         ret = qib_user_sdma_pin_pages(dd, pq, pkt, addr,
0729                           iov[idx].iov_len, npages);
0730         if (ret < 0)
0731             goto free_pkt;
0732     }
0733 
0734     goto done;
0735 
0736 free_pkt:
0737     /* we need to ignore the first entry here */
0738     for (idx = 1; idx < pkt->naddr; idx++)
0739         qib_user_sdma_free_pkt_frag(&dd->pcidev->dev, pq, pkt, idx);
0740 
0741     /* need to dma unmap the first entry, this is to restore to
0742      * the original state so that caller can free the memory in
0743      * error condition. Caller does not know if dma mapped or not*/
0744     if (pkt->addr[0].dma_mapped) {
0745         dma_unmap_single(&dd->pcidev->dev,
0746                pkt->addr[0].addr,
0747                pkt->addr[0].dma_length,
0748                DMA_TO_DEVICE);
0749         pkt->addr[0].addr = 0;
0750         pkt->addr[0].dma_mapped = 0;
0751     }
0752 
0753 done:
0754     return ret;
0755 }
0756 
0757 static int qib_user_sdma_init_payload(const struct qib_devdata *dd,
0758                       struct qib_user_sdma_queue *pq,
0759                       struct qib_user_sdma_pkt *pkt,
0760                       const struct iovec *iov,
0761                       unsigned long niov, int npages)
0762 {
0763     int ret = 0;
0764 
0765     if (pkt->frag_size == pkt->bytes_togo &&
0766             npages >= ARRAY_SIZE(pkt->addr))
0767         ret = qib_user_sdma_coalesce(dd, pq, pkt, iov, niov);
0768     else
0769         ret = qib_user_sdma_pin_pkt(dd, pq, pkt, iov, niov);
0770 
0771     return ret;
0772 }
0773 
0774 /* free a packet list -- return counter value of last packet */
0775 static void qib_user_sdma_free_pkt_list(struct device *dev,
0776                     struct qib_user_sdma_queue *pq,
0777                     struct list_head *list)
0778 {
0779     struct qib_user_sdma_pkt *pkt, *pkt_next;
0780 
0781     list_for_each_entry_safe(pkt, pkt_next, list, list) {
0782         int i;
0783 
0784         for (i = 0; i < pkt->naddr; i++)
0785             qib_user_sdma_free_pkt_frag(dev, pq, pkt, i);
0786 
0787         if (pkt->largepkt)
0788             kfree(pkt);
0789         else
0790             kmem_cache_free(pq->pkt_slab, pkt);
0791     }
0792     INIT_LIST_HEAD(list);
0793 }
0794 
0795 /*
0796  * copy headers, coalesce etc -- pq->lock must be held
0797  *
0798  * we queue all the packets to list, returning the
0799  * number of bytes total.  list must be empty initially,
0800  * as, if there is an error we clean it...
0801  */
0802 static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd,
0803                     struct qib_pportdata *ppd,
0804                     struct qib_user_sdma_queue *pq,
0805                     const struct iovec *iov,
0806                     unsigned long niov,
0807                     struct list_head *list,
0808                     int *maxpkts, int *ndesc)
0809 {
0810     unsigned long idx = 0;
0811     int ret = 0;
0812     int npkts = 0;
0813     __le32 *pbc;
0814     dma_addr_t dma_addr;
0815     struct qib_user_sdma_pkt *pkt = NULL;
0816     size_t len;
0817     size_t nw;
0818     u32 counter = pq->counter;
0819     u16 frag_size;
0820 
0821     while (idx < niov && npkts < *maxpkts) {
0822         const unsigned long addr = (unsigned long) iov[idx].iov_base;
0823         const unsigned long idx_save = idx;
0824         unsigned pktnw;
0825         unsigned pktnwc;
0826         int nfrags = 0;
0827         size_t npages = 0;
0828         size_t bytes_togo = 0;
0829         int tiddma = 0;
0830         int cfur;
0831 
0832         len = iov[idx].iov_len;
0833         nw = len >> 2;
0834 
0835         if (len < QIB_USER_SDMA_MIN_HEADER_LENGTH ||
0836             len > PAGE_SIZE || len & 3 || addr & 3) {
0837             ret = -EINVAL;
0838             goto free_list;
0839         }
0840 
0841         pbc = qib_user_sdma_alloc_header(pq, len, &dma_addr);
0842         if (!pbc) {
0843             ret = -ENOMEM;
0844             goto free_list;
0845         }
0846 
0847         cfur = copy_from_user(pbc, iov[idx].iov_base, len);
0848         if (cfur) {
0849             ret = -EFAULT;
0850             goto free_pbc;
0851         }
0852 
0853         /*
0854          * This assignment is a bit strange.  it's because the
0855          * the pbc counts the number of 32 bit words in the full
0856          * packet _except_ the first word of the pbc itself...
0857          */
0858         pktnwc = nw - 1;
0859 
0860         /*
0861          * pktnw computation yields the number of 32 bit words
0862          * that the caller has indicated in the PBC.  note that
0863          * this is one less than the total number of words that
0864          * goes to the send DMA engine as the first 32 bit word
0865          * of the PBC itself is not counted.  Armed with this count,
0866          * we can verify that the packet is consistent with the
0867          * iovec lengths.
0868          */
0869         pktnw = le32_to_cpu(*pbc) & 0xFFFF;
0870         if (pktnw < pktnwc) {
0871             ret = -EINVAL;
0872             goto free_pbc;
0873         }
0874 
0875         idx++;
0876         while (pktnwc < pktnw && idx < niov) {
0877             const size_t slen = iov[idx].iov_len;
0878             const unsigned long faddr =
0879                 (unsigned long) iov[idx].iov_base;
0880 
0881             if (slen & 3 || faddr & 3 || !slen) {
0882                 ret = -EINVAL;
0883                 goto free_pbc;
0884             }
0885 
0886             npages += qib_user_sdma_num_pages(&iov[idx]);
0887 
0888             if (check_add_overflow(bytes_togo, slen, &bytes_togo) ||
0889                 bytes_togo > type_max(typeof(pkt->bytes_togo))) {
0890                 ret = -EINVAL;
0891                 goto free_pbc;
0892             }
0893             pktnwc += slen >> 2;
0894             idx++;
0895             nfrags++;
0896         }
0897 
0898         if (pktnwc != pktnw) {
0899             ret = -EINVAL;
0900             goto free_pbc;
0901         }
0902 
0903         frag_size = ((le32_to_cpu(*pbc))>>16) & 0xFFFF;
0904         if (((frag_size ? frag_size : bytes_togo) + len) >
0905                         ppd->ibmaxlen) {
0906             ret = -EINVAL;
0907             goto free_pbc;
0908         }
0909 
0910         if (frag_size) {
0911             size_t tidsmsize, n, pktsize, sz, addrlimit;
0912 
0913             n = npages*((2*PAGE_SIZE/frag_size)+1);
0914             pktsize = struct_size(pkt, addr, n);
0915 
0916             /*
0917              * Determine if this is tid-sdma or just sdma.
0918              */
0919             tiddma = (((le32_to_cpu(pbc[7])>>
0920                 QLOGIC_IB_I_TID_SHIFT)&
0921                 QLOGIC_IB_I_TID_MASK) !=
0922                 QLOGIC_IB_I_TID_MASK);
0923 
0924             if (tiddma)
0925                 tidsmsize = iov[idx].iov_len;
0926             else
0927                 tidsmsize = 0;
0928 
0929             if (check_add_overflow(pktsize, tidsmsize, &sz)) {
0930                 ret = -EINVAL;
0931                 goto free_pbc;
0932             }
0933             pkt = kmalloc(sz, GFP_KERNEL);
0934             if (!pkt) {
0935                 ret = -ENOMEM;
0936                 goto free_pbc;
0937             }
0938             pkt->largepkt = 1;
0939             pkt->frag_size = frag_size;
0940             if (check_add_overflow(n, ARRAY_SIZE(pkt->addr),
0941                            &addrlimit) ||
0942                 addrlimit > type_max(typeof(pkt->addrlimit))) {
0943                 ret = -EINVAL;
0944                 goto free_pkt;
0945             }
0946             pkt->addrlimit = addrlimit;
0947 
0948             if (tiddma) {
0949                 char *tidsm = (char *)pkt + pktsize;
0950 
0951                 cfur = copy_from_user(tidsm,
0952                     iov[idx].iov_base, tidsmsize);
0953                 if (cfur) {
0954                     ret = -EFAULT;
0955                     goto free_pkt;
0956                 }
0957                 pkt->tidsm =
0958                     (struct qib_tid_session_member *)tidsm;
0959                 pkt->tidsmcount = tidsmsize/
0960                     sizeof(struct qib_tid_session_member);
0961                 pkt->tidsmidx = 0;
0962                 idx++;
0963             }
0964 
0965             /*
0966              * pbc 'fill1' field is borrowed to pass frag size,
0967              * we need to clear it after picking frag size, the
0968              * hardware requires this field to be zero.
0969              */
0970             *pbc = cpu_to_le32(le32_to_cpu(*pbc) & 0x0000FFFF);
0971         } else {
0972             pkt = kmem_cache_alloc(pq->pkt_slab, GFP_KERNEL);
0973             if (!pkt) {
0974                 ret = -ENOMEM;
0975                 goto free_pbc;
0976             }
0977             pkt->largepkt = 0;
0978             pkt->frag_size = bytes_togo;
0979             pkt->addrlimit = ARRAY_SIZE(pkt->addr);
0980         }
0981         pkt->bytes_togo = bytes_togo;
0982         pkt->payload_size = 0;
0983         pkt->counter = counter;
0984         pkt->tiddma = tiddma;
0985 
0986         /* setup the first header */
0987         qib_user_sdma_init_frag(pkt, 0, /* index */
0988             0, len,     /* offset, len */
0989             1, 0,       /* first last desc */
0990             0, 0,       /* put page, dma mapped */
0991             NULL, pbc,  /* struct page, virt addr */
0992             dma_addr, len); /* dma addr, dma length */
0993         pkt->index = 0;
0994         pkt->naddr = 1;
0995 
0996         if (nfrags) {
0997             ret = qib_user_sdma_init_payload(dd, pq, pkt,
0998                              iov + idx_save + 1,
0999                              nfrags, npages);
1000             if (ret < 0)
1001                 goto free_pkt;
1002         } else {
1003             /* since there is no payload, mark the
1004              * header as the last desc. */
1005             pkt->addr[0].last_desc = 1;
1006 
1007             if (dma_addr == 0) {
1008                 /*
1009                  * the header is not dma mapped yet.
1010                  * it should be from kmalloc.
1011                  */
1012                 dma_addr = dma_map_single(&dd->pcidev->dev,
1013                     pbc, len, DMA_TO_DEVICE);
1014                 if (dma_mapping_error(&dd->pcidev->dev,
1015                                 dma_addr)) {
1016                     ret = -ENOMEM;
1017                     goto free_pkt;
1018                 }
1019                 pkt->addr[0].addr = dma_addr;
1020                 pkt->addr[0].dma_mapped = 1;
1021             }
1022         }
1023 
1024         counter++;
1025         npkts++;
1026         pkt->pq = pq;
1027         pkt->index = 0; /* reset index for push on hw */
1028         *ndesc += pkt->naddr;
1029 
1030         list_add_tail(&pkt->list, list);
1031     }
1032 
1033     *maxpkts = npkts;
1034     ret = idx;
1035     goto done;
1036 
1037 free_pkt:
1038     if (pkt->largepkt)
1039         kfree(pkt);
1040     else
1041         kmem_cache_free(pq->pkt_slab, pkt);
1042 free_pbc:
1043     if (dma_addr)
1044         dma_pool_free(pq->header_cache, pbc, dma_addr);
1045     else
1046         kfree(pbc);
1047 free_list:
1048     qib_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, list);
1049 done:
1050     return ret;
1051 }
1052 
1053 static void qib_user_sdma_set_complete_counter(struct qib_user_sdma_queue *pq,
1054                            u32 c)
1055 {
1056     pq->sent_counter = c;
1057 }
1058 
1059 /* try to clean out queue -- needs pq->lock */
1060 static int qib_user_sdma_queue_clean(struct qib_pportdata *ppd,
1061                      struct qib_user_sdma_queue *pq)
1062 {
1063     struct qib_devdata *dd = ppd->dd;
1064     struct list_head free_list;
1065     struct qib_user_sdma_pkt *pkt;
1066     struct qib_user_sdma_pkt *pkt_prev;
1067     unsigned long flags;
1068     int ret = 0;
1069 
1070     if (!pq->num_sending)
1071         return 0;
1072 
1073     INIT_LIST_HEAD(&free_list);
1074 
1075     /*
1076      * We need this spin lock here because interrupt handler
1077      * might modify this list in qib_user_sdma_send_desc(), also
1078      * we can not get interrupted, otherwise it is a deadlock.
1079      */
1080     spin_lock_irqsave(&pq->sent_lock, flags);
1081     list_for_each_entry_safe(pkt, pkt_prev, &pq->sent, list) {
1082         s64 descd = ppd->sdma_descq_removed - pkt->added;
1083 
1084         if (descd < 0)
1085             break;
1086 
1087         list_move_tail(&pkt->list, &free_list);
1088 
1089         /* one more packet cleaned */
1090         ret++;
1091         pq->num_sending--;
1092     }
1093     spin_unlock_irqrestore(&pq->sent_lock, flags);
1094 
1095     if (!list_empty(&free_list)) {
1096         u32 counter;
1097 
1098         pkt = list_entry(free_list.prev,
1099                  struct qib_user_sdma_pkt, list);
1100         counter = pkt->counter;
1101 
1102         qib_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &free_list);
1103         qib_user_sdma_set_complete_counter(pq, counter);
1104     }
1105 
1106     return ret;
1107 }
1108 
1109 void qib_user_sdma_queue_destroy(struct qib_user_sdma_queue *pq)
1110 {
1111     if (!pq)
1112         return;
1113 
1114     pq->sdma_rb_node->refcount--;
1115     if (pq->sdma_rb_node->refcount == 0) {
1116         rb_erase(&pq->sdma_rb_node->node, &qib_user_sdma_rb_root);
1117         kfree(pq->sdma_rb_node);
1118     }
1119     dma_pool_destroy(pq->header_cache);
1120     kmem_cache_destroy(pq->pkt_slab);
1121     kfree(pq);
1122 }
1123 
1124 /* clean descriptor queue, returns > 0 if some elements cleaned */
1125 static int qib_user_sdma_hwqueue_clean(struct qib_pportdata *ppd)
1126 {
1127     int ret;
1128     unsigned long flags;
1129 
1130     spin_lock_irqsave(&ppd->sdma_lock, flags);
1131     ret = qib_sdma_make_progress(ppd);
1132     spin_unlock_irqrestore(&ppd->sdma_lock, flags);
1133 
1134     return ret;
1135 }
1136 
1137 /* we're in close, drain packets so that we can cleanup successfully... */
1138 void qib_user_sdma_queue_drain(struct qib_pportdata *ppd,
1139                    struct qib_user_sdma_queue *pq)
1140 {
1141     struct qib_devdata *dd = ppd->dd;
1142     unsigned long flags;
1143     int i;
1144 
1145     if (!pq)
1146         return;
1147 
1148     for (i = 0; i < QIB_USER_SDMA_DRAIN_TIMEOUT; i++) {
1149         mutex_lock(&pq->lock);
1150         if (!pq->num_pending && !pq->num_sending) {
1151             mutex_unlock(&pq->lock);
1152             break;
1153         }
1154         qib_user_sdma_hwqueue_clean(ppd);
1155         qib_user_sdma_queue_clean(ppd, pq);
1156         mutex_unlock(&pq->lock);
1157         msleep(20);
1158     }
1159 
1160     if (pq->num_pending || pq->num_sending) {
1161         struct qib_user_sdma_pkt *pkt;
1162         struct qib_user_sdma_pkt *pkt_prev;
1163         struct list_head free_list;
1164 
1165         mutex_lock(&pq->lock);
1166         spin_lock_irqsave(&ppd->sdma_lock, flags);
1167         /*
1168          * Since we hold sdma_lock, it is safe without sent_lock.
1169          */
1170         if (pq->num_pending) {
1171             list_for_each_entry_safe(pkt, pkt_prev,
1172                     &ppd->sdma_userpending, list) {
1173                 if (pkt->pq == pq) {
1174                     list_move_tail(&pkt->list, &pq->sent);
1175                     pq->num_pending--;
1176                     pq->num_sending++;
1177                 }
1178             }
1179         }
1180         spin_unlock_irqrestore(&ppd->sdma_lock, flags);
1181 
1182         qib_dev_err(dd, "user sdma lists not empty: forcing!\n");
1183         INIT_LIST_HEAD(&free_list);
1184         list_splice_init(&pq->sent, &free_list);
1185         pq->num_sending = 0;
1186         qib_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &free_list);
1187         mutex_unlock(&pq->lock);
1188     }
1189 }
1190 
1191 static inline __le64 qib_sdma_make_desc0(u8 gen,
1192                      u64 addr, u64 dwlen, u64 dwoffset)
1193 {
1194     return cpu_to_le64(/* SDmaPhyAddr[31:0] */
1195                ((addr & 0xfffffffcULL) << 32) |
1196                /* SDmaGeneration[1:0] */
1197                ((gen & 3ULL) << 30) |
1198                /* SDmaDwordCount[10:0] */
1199                ((dwlen & 0x7ffULL) << 16) |
1200                /* SDmaBufOffset[12:2] */
1201                (dwoffset & 0x7ffULL));
1202 }
1203 
1204 static inline __le64 qib_sdma_make_first_desc0(__le64 descq)
1205 {
1206     return descq | cpu_to_le64(1ULL << 12);
1207 }
1208 
1209 static inline __le64 qib_sdma_make_last_desc0(__le64 descq)
1210 {
1211                           /* last */  /* dma head */
1212     return descq | cpu_to_le64(1ULL << 11 | 1ULL << 13);
1213 }
1214 
1215 static inline __le64 qib_sdma_make_desc1(u64 addr)
1216 {
1217     /* SDmaPhyAddr[47:32] */
1218     return cpu_to_le64(addr >> 32);
1219 }
1220 
1221 static void qib_user_sdma_send_frag(struct qib_pportdata *ppd,
1222                     struct qib_user_sdma_pkt *pkt, int idx,
1223                     unsigned ofs, u16 tail, u8 gen)
1224 {
1225     const u64 addr = (u64) pkt->addr[idx].addr +
1226         (u64) pkt->addr[idx].offset;
1227     const u64 dwlen = (u64) pkt->addr[idx].length / 4;
1228     __le64 *descqp;
1229     __le64 descq0;
1230 
1231     descqp = &ppd->sdma_descq[tail].qw[0];
1232 
1233     descq0 = qib_sdma_make_desc0(gen, addr, dwlen, ofs);
1234     if (pkt->addr[idx].first_desc)
1235         descq0 = qib_sdma_make_first_desc0(descq0);
1236     if (pkt->addr[idx].last_desc) {
1237         descq0 = qib_sdma_make_last_desc0(descq0);
1238         if (ppd->sdma_intrequest) {
1239             descq0 |= cpu_to_le64(1ULL << 15);
1240             ppd->sdma_intrequest = 0;
1241         }
1242     }
1243 
1244     descqp[0] = descq0;
1245     descqp[1] = qib_sdma_make_desc1(addr);
1246 }
1247 
1248 void qib_user_sdma_send_desc(struct qib_pportdata *ppd,
1249                 struct list_head *pktlist)
1250 {
1251     struct qib_devdata *dd = ppd->dd;
1252     u16 nfree, nsent;
1253     u16 tail, tail_c;
1254     u8 gen, gen_c;
1255 
1256     nfree = qib_sdma_descq_freecnt(ppd);
1257     if (!nfree)
1258         return;
1259 
1260 retry:
1261     nsent = 0;
1262     tail_c = tail = ppd->sdma_descq_tail;
1263     gen_c = gen = ppd->sdma_generation;
1264     while (!list_empty(pktlist)) {
1265         struct qib_user_sdma_pkt *pkt =
1266             list_entry(pktlist->next, struct qib_user_sdma_pkt,
1267                    list);
1268         int i, j, c = 0;
1269         unsigned ofs = 0;
1270         u16 dtail = tail;
1271 
1272         for (i = pkt->index; i < pkt->naddr && nfree; i++) {
1273             qib_user_sdma_send_frag(ppd, pkt, i, ofs, tail, gen);
1274             ofs += pkt->addr[i].length >> 2;
1275 
1276             if (++tail == ppd->sdma_descq_cnt) {
1277                 tail = 0;
1278                 ++gen;
1279                 ppd->sdma_intrequest = 1;
1280             } else if (tail == (ppd->sdma_descq_cnt>>1)) {
1281                 ppd->sdma_intrequest = 1;
1282             }
1283             nfree--;
1284             if (pkt->addr[i].last_desc == 0)
1285                 continue;
1286 
1287             /*
1288              * If the packet is >= 2KB mtu equivalent, we
1289              * have to use the large buffers, and have to
1290              * mark each descriptor as part of a large
1291              * buffer packet.
1292              */
1293             if (ofs > dd->piosize2kmax_dwords) {
1294                 for (j = pkt->index; j <= i; j++) {
1295                     ppd->sdma_descq[dtail].qw[0] |=
1296                         cpu_to_le64(1ULL << 14);
1297                     if (++dtail == ppd->sdma_descq_cnt)
1298                         dtail = 0;
1299                 }
1300             }
1301             c += i + 1 - pkt->index;
1302             pkt->index = i + 1; /* index for next first */
1303             tail_c = dtail = tail;
1304             gen_c = gen;
1305             ofs = 0;  /* reset for next packet */
1306         }
1307 
1308         ppd->sdma_descq_added += c;
1309         nsent += c;
1310         if (pkt->index == pkt->naddr) {
1311             pkt->added = ppd->sdma_descq_added;
1312             pkt->pq->added = pkt->added;
1313             pkt->pq->num_pending--;
1314             spin_lock(&pkt->pq->sent_lock);
1315             pkt->pq->num_sending++;
1316             list_move_tail(&pkt->list, &pkt->pq->sent);
1317             spin_unlock(&pkt->pq->sent_lock);
1318         }
1319         if (!nfree || (nsent<<2) > ppd->sdma_descq_cnt)
1320             break;
1321     }
1322 
1323     /* advance the tail on the chip if necessary */
1324     if (ppd->sdma_descq_tail != tail_c) {
1325         ppd->sdma_generation = gen_c;
1326         dd->f_sdma_update_tail(ppd, tail_c);
1327     }
1328 
1329     if (nfree && !list_empty(pktlist))
1330         goto retry;
1331 }
1332 
1333 /* pq->lock must be held, get packets on the wire... */
1334 static int qib_user_sdma_push_pkts(struct qib_pportdata *ppd,
1335                  struct qib_user_sdma_queue *pq,
1336                  struct list_head *pktlist, int count)
1337 {
1338     unsigned long flags;
1339 
1340     if (unlikely(!(ppd->lflags & QIBL_LINKACTIVE)))
1341         return -ECOMM;
1342 
1343     /* non-blocking mode */
1344     if (pq->sdma_rb_node->refcount > 1) {
1345         spin_lock_irqsave(&ppd->sdma_lock, flags);
1346         if (unlikely(!__qib_sdma_running(ppd))) {
1347             spin_unlock_irqrestore(&ppd->sdma_lock, flags);
1348             return -ECOMM;
1349         }
1350         pq->num_pending += count;
1351         list_splice_tail_init(pktlist, &ppd->sdma_userpending);
1352         qib_user_sdma_send_desc(ppd, &ppd->sdma_userpending);
1353         spin_unlock_irqrestore(&ppd->sdma_lock, flags);
1354         return 0;
1355     }
1356 
1357     /* In this case, descriptors from this process are not
1358      * linked to ppd pending queue, interrupt handler
1359      * won't update this process, it is OK to directly
1360      * modify without sdma lock.
1361      */
1362 
1363 
1364     pq->num_pending += count;
1365     /*
1366      * Blocking mode for single rail process, we must
1367      * release/regain sdma_lock to give other process
1368      * chance to make progress. This is important for
1369      * performance.
1370      */
1371     do {
1372         spin_lock_irqsave(&ppd->sdma_lock, flags);
1373         if (unlikely(!__qib_sdma_running(ppd))) {
1374             spin_unlock_irqrestore(&ppd->sdma_lock, flags);
1375             return -ECOMM;
1376         }
1377         qib_user_sdma_send_desc(ppd, pktlist);
1378         if (!list_empty(pktlist))
1379             qib_sdma_make_progress(ppd);
1380         spin_unlock_irqrestore(&ppd->sdma_lock, flags);
1381     } while (!list_empty(pktlist));
1382 
1383     return 0;
1384 }
1385 
1386 int qib_user_sdma_writev(struct qib_ctxtdata *rcd,
1387              struct qib_user_sdma_queue *pq,
1388              const struct iovec *iov,
1389              unsigned long dim)
1390 {
1391     struct qib_devdata *dd = rcd->dd;
1392     struct qib_pportdata *ppd = rcd->ppd;
1393     int ret = 0;
1394     struct list_head list;
1395     int npkts = 0;
1396 
1397     INIT_LIST_HEAD(&list);
1398 
1399     mutex_lock(&pq->lock);
1400 
1401     /* why not -ECOMM like qib_user_sdma_push_pkts() below? */
1402     if (!qib_sdma_running(ppd))
1403         goto done_unlock;
1404 
1405     /* if I have packets not complete yet */
1406     if (pq->added > ppd->sdma_descq_removed)
1407         qib_user_sdma_hwqueue_clean(ppd);
1408     /* if I have complete packets to be freed */
1409     if (pq->num_sending)
1410         qib_user_sdma_queue_clean(ppd, pq);
1411 
1412     while (dim) {
1413         int mxp = 1;
1414         int ndesc = 0;
1415 
1416         ret = qib_user_sdma_queue_pkts(dd, ppd, pq,
1417                 iov, dim, &list, &mxp, &ndesc);
1418         if (ret < 0)
1419             goto done_unlock;
1420         else {
1421             dim -= ret;
1422             iov += ret;
1423         }
1424 
1425         /* force packets onto the sdma hw queue... */
1426         if (!list_empty(&list)) {
1427             /*
1428              * Lazily clean hw queue.
1429              */
1430             if (qib_sdma_descq_freecnt(ppd) < ndesc) {
1431                 qib_user_sdma_hwqueue_clean(ppd);
1432                 if (pq->num_sending)
1433                     qib_user_sdma_queue_clean(ppd, pq);
1434             }
1435 
1436             ret = qib_user_sdma_push_pkts(ppd, pq, &list, mxp);
1437             if (ret < 0)
1438                 goto done_unlock;
1439             else {
1440                 npkts += mxp;
1441                 pq->counter += mxp;
1442             }
1443         }
1444     }
1445 
1446 done_unlock:
1447     if (!list_empty(&list))
1448         qib_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &list);
1449     mutex_unlock(&pq->lock);
1450 
1451     return (ret < 0) ? ret : npkts;
1452 }
1453 
1454 int qib_user_sdma_make_progress(struct qib_pportdata *ppd,
1455                 struct qib_user_sdma_queue *pq)
1456 {
1457     int ret = 0;
1458 
1459     mutex_lock(&pq->lock);
1460     qib_user_sdma_hwqueue_clean(ppd);
1461     ret = qib_user_sdma_queue_clean(ppd, pq);
1462     mutex_unlock(&pq->lock);
1463 
1464     return ret;
1465 }
1466 
1467 u32 qib_user_sdma_complete_counter(const struct qib_user_sdma_queue *pq)
1468 {
1469     return pq ? pq->sent_counter : 0;
1470 }
1471 
1472 u32 qib_user_sdma_inflight_counter(struct qib_user_sdma_queue *pq)
1473 {
1474     return pq ? pq->counter : 0;
1475 }