0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032 #include <linux/mm.h>
0033 #include <linux/types.h>
0034 #include <linux/device.h>
0035 #include <linux/dmapool.h>
0036 #include <linux/slab.h>
0037 #include <linux/list.h>
0038 #include <linux/highmem.h>
0039 #include <linux/io.h>
0040 #include <linux/uio.h>
0041 #include <linux/rbtree.h>
0042 #include <linux/spinlock.h>
0043 #include <linux/delay.h>
0044
0045 #include "qib.h"
0046 #include "qib_user_sdma.h"
0047
0048
0049 #define QIB_USER_SDMA_MIN_HEADER_LENGTH 64
0050
0051 #define QIB_USER_SDMA_EXP_HEADER_LENGTH 64
0052
0053 #define QIB_USER_SDMA_DRAIN_TIMEOUT 250
0054
0055
0056
0057
0058 static struct rb_root qib_user_sdma_rb_root = RB_ROOT;
0059
0060 struct qib_user_sdma_rb_node {
0061 struct rb_node node;
0062 int refcount;
0063 pid_t pid;
0064 };
0065
0066 struct qib_user_sdma_pkt {
0067 struct list_head list;
0068
0069 u8 tiddma;
0070 u8 largepkt;
0071 u16 frag_size;
0072 u16 index;
0073 u16 naddr;
0074 u16 addrlimit;
0075 u16 tidsmidx;
0076 u16 tidsmcount;
0077 u16 payload_size;
0078 u32 bytes_togo;
0079 u32 counter;
0080 struct qib_tid_session_member *tidsm;
0081 struct qib_user_sdma_queue *pq;
0082 u64 added;
0083
0084 struct {
0085 u16 offset;
0086 u16 length;
0087 u16 first_desc;
0088 u16 last_desc;
0089 u16 put_page;
0090 u16 dma_mapped;
0091 u16 dma_length;
0092 u16 padding;
0093 struct page *page;
0094 void *kvaddr;
0095 dma_addr_t addr;
0096 } addr[4];
0097 };
0098
0099 struct qib_user_sdma_queue {
0100
0101
0102
0103
0104
0105 struct list_head sent;
0106
0107
0108
0109
0110
0111 spinlock_t sent_lock ____cacheline_aligned_in_smp;
0112
0113
0114 char header_cache_name[64];
0115 struct dma_pool *header_cache;
0116
0117
0118 char pkt_slab_name[64];
0119 struct kmem_cache *pkt_slab;
0120
0121
0122 u32 counter;
0123 u32 sent_counter;
0124
0125 u32 num_pending;
0126
0127 u32 num_sending;
0128
0129 u64 added;
0130
0131
0132 struct rb_root dma_pages_root;
0133
0134 struct qib_user_sdma_rb_node *sdma_rb_node;
0135
0136
0137 struct mutex lock;
0138 };
0139
0140 static struct qib_user_sdma_rb_node *
0141 qib_user_sdma_rb_search(struct rb_root *root, pid_t pid)
0142 {
0143 struct qib_user_sdma_rb_node *sdma_rb_node;
0144 struct rb_node *node = root->rb_node;
0145
0146 while (node) {
0147 sdma_rb_node = rb_entry(node, struct qib_user_sdma_rb_node,
0148 node);
0149 if (pid < sdma_rb_node->pid)
0150 node = node->rb_left;
0151 else if (pid > sdma_rb_node->pid)
0152 node = node->rb_right;
0153 else
0154 return sdma_rb_node;
0155 }
0156 return NULL;
0157 }
0158
0159 static int
0160 qib_user_sdma_rb_insert(struct rb_root *root, struct qib_user_sdma_rb_node *new)
0161 {
0162 struct rb_node **node = &(root->rb_node);
0163 struct rb_node *parent = NULL;
0164 struct qib_user_sdma_rb_node *got;
0165
0166 while (*node) {
0167 got = rb_entry(*node, struct qib_user_sdma_rb_node, node);
0168 parent = *node;
0169 if (new->pid < got->pid)
0170 node = &((*node)->rb_left);
0171 else if (new->pid > got->pid)
0172 node = &((*node)->rb_right);
0173 else
0174 return 0;
0175 }
0176
0177 rb_link_node(&new->node, parent, node);
0178 rb_insert_color(&new->node, root);
0179 return 1;
0180 }
0181
0182 struct qib_user_sdma_queue *
0183 qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt)
0184 {
0185 struct qib_user_sdma_queue *pq =
0186 kmalloc(sizeof(struct qib_user_sdma_queue), GFP_KERNEL);
0187 struct qib_user_sdma_rb_node *sdma_rb_node;
0188
0189 if (!pq)
0190 goto done;
0191
0192 pq->counter = 0;
0193 pq->sent_counter = 0;
0194 pq->num_pending = 0;
0195 pq->num_sending = 0;
0196 pq->added = 0;
0197 pq->sdma_rb_node = NULL;
0198
0199 INIT_LIST_HEAD(&pq->sent);
0200 spin_lock_init(&pq->sent_lock);
0201 mutex_init(&pq->lock);
0202
0203 snprintf(pq->pkt_slab_name, sizeof(pq->pkt_slab_name),
0204 "qib-user-sdma-pkts-%u-%02u.%02u", unit, ctxt, sctxt);
0205 pq->pkt_slab = kmem_cache_create(pq->pkt_slab_name,
0206 sizeof(struct qib_user_sdma_pkt),
0207 0, 0, NULL);
0208
0209 if (!pq->pkt_slab)
0210 goto err_kfree;
0211
0212 snprintf(pq->header_cache_name, sizeof(pq->header_cache_name),
0213 "qib-user-sdma-headers-%u-%02u.%02u", unit, ctxt, sctxt);
0214 pq->header_cache = dma_pool_create(pq->header_cache_name,
0215 dev,
0216 QIB_USER_SDMA_EXP_HEADER_LENGTH,
0217 4, 0);
0218 if (!pq->header_cache)
0219 goto err_slab;
0220
0221 pq->dma_pages_root = RB_ROOT;
0222
0223 sdma_rb_node = qib_user_sdma_rb_search(&qib_user_sdma_rb_root,
0224 current->pid);
0225 if (sdma_rb_node) {
0226 sdma_rb_node->refcount++;
0227 } else {
0228 sdma_rb_node = kmalloc(sizeof(
0229 struct qib_user_sdma_rb_node), GFP_KERNEL);
0230 if (!sdma_rb_node)
0231 goto err_rb;
0232
0233 sdma_rb_node->refcount = 1;
0234 sdma_rb_node->pid = current->pid;
0235
0236 qib_user_sdma_rb_insert(&qib_user_sdma_rb_root, sdma_rb_node);
0237 }
0238 pq->sdma_rb_node = sdma_rb_node;
0239
0240 goto done;
0241
0242 err_rb:
0243 dma_pool_destroy(pq->header_cache);
0244 err_slab:
0245 kmem_cache_destroy(pq->pkt_slab);
0246 err_kfree:
0247 kfree(pq);
0248 pq = NULL;
0249
0250 done:
0251 return pq;
0252 }
0253
0254 static void qib_user_sdma_init_frag(struct qib_user_sdma_pkt *pkt,
0255 int i, u16 offset, u16 len,
0256 u16 first_desc, u16 last_desc,
0257 u16 put_page, u16 dma_mapped,
0258 struct page *page, void *kvaddr,
0259 dma_addr_t dma_addr, u16 dma_length)
0260 {
0261 pkt->addr[i].offset = offset;
0262 pkt->addr[i].length = len;
0263 pkt->addr[i].first_desc = first_desc;
0264 pkt->addr[i].last_desc = last_desc;
0265 pkt->addr[i].put_page = put_page;
0266 pkt->addr[i].dma_mapped = dma_mapped;
0267 pkt->addr[i].page = page;
0268 pkt->addr[i].kvaddr = kvaddr;
0269 pkt->addr[i].addr = dma_addr;
0270 pkt->addr[i].dma_length = dma_length;
0271 }
0272
0273 static void *qib_user_sdma_alloc_header(struct qib_user_sdma_queue *pq,
0274 size_t len, dma_addr_t *dma_addr)
0275 {
0276 void *hdr;
0277
0278 if (len == QIB_USER_SDMA_EXP_HEADER_LENGTH)
0279 hdr = dma_pool_alloc(pq->header_cache, GFP_KERNEL,
0280 dma_addr);
0281 else
0282 hdr = NULL;
0283
0284 if (!hdr) {
0285 hdr = kmalloc(len, GFP_KERNEL);
0286 if (!hdr)
0287 return NULL;
0288
0289 *dma_addr = 0;
0290 }
0291
0292 return hdr;
0293 }
0294
0295 static int qib_user_sdma_page_to_frags(const struct qib_devdata *dd,
0296 struct qib_user_sdma_queue *pq,
0297 struct qib_user_sdma_pkt *pkt,
0298 struct page *page, u16 put,
0299 u16 offset, u16 len, void *kvaddr)
0300 {
0301 __le16 *pbc16;
0302 void *pbcvaddr;
0303 struct qib_message_header *hdr;
0304 u16 newlen, pbclen, lastdesc, dma_mapped;
0305 u32 vcto;
0306 union qib_seqnum seqnum;
0307 dma_addr_t pbcdaddr;
0308 dma_addr_t dma_addr =
0309 dma_map_page(&dd->pcidev->dev,
0310 page, offset, len, DMA_TO_DEVICE);
0311 int ret = 0;
0312
0313 if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
0314
0315
0316
0317
0318
0319 if (put) {
0320 unpin_user_page(page);
0321 } else {
0322
0323 kunmap(page);
0324 __free_page(page);
0325 }
0326 ret = -ENOMEM;
0327 goto done;
0328 }
0329 offset = 0;
0330 dma_mapped = 1;
0331
0332
0333 next_fragment:
0334
0335
0336
0337
0338
0339 if (pkt->tiddma && len > pkt->tidsm[pkt->tidsmidx].length)
0340 newlen = pkt->tidsm[pkt->tidsmidx].length;
0341 else
0342 newlen = len;
0343
0344
0345
0346
0347
0348
0349
0350
0351 lastdesc = 0;
0352 if ((pkt->payload_size + newlen) >= pkt->frag_size) {
0353 newlen = pkt->frag_size - pkt->payload_size;
0354 lastdesc = 1;
0355 } else if (pkt->tiddma) {
0356 if (newlen == pkt->tidsm[pkt->tidsmidx].length)
0357 lastdesc = 1;
0358 } else {
0359 if (newlen == pkt->bytes_togo)
0360 lastdesc = 1;
0361 }
0362
0363
0364 qib_user_sdma_init_frag(pkt, pkt->naddr,
0365 offset, newlen,
0366 0, lastdesc,
0367 put, dma_mapped,
0368 page, kvaddr,
0369 dma_addr, len);
0370 pkt->bytes_togo -= newlen;
0371 pkt->payload_size += newlen;
0372 pkt->naddr++;
0373 if (pkt->naddr == pkt->addrlimit) {
0374 ret = -EFAULT;
0375 goto done;
0376 }
0377
0378
0379 if (pkt->bytes_togo == 0) {
0380
0381
0382 if (!pkt->addr[pkt->index].addr) {
0383 pkt->addr[pkt->index].addr =
0384 dma_map_single(&dd->pcidev->dev,
0385 pkt->addr[pkt->index].kvaddr,
0386 pkt->addr[pkt->index].dma_length,
0387 DMA_TO_DEVICE);
0388 if (dma_mapping_error(&dd->pcidev->dev,
0389 pkt->addr[pkt->index].addr)) {
0390 ret = -ENOMEM;
0391 goto done;
0392 }
0393 pkt->addr[pkt->index].dma_mapped = 1;
0394 }
0395
0396 goto done;
0397 }
0398
0399
0400 if (pkt->tiddma) {
0401 pkt->tidsm[pkt->tidsmidx].length -= newlen;
0402 if (pkt->tidsm[pkt->tidsmidx].length) {
0403 pkt->tidsm[pkt->tidsmidx].offset += newlen;
0404 } else {
0405 pkt->tidsmidx++;
0406 if (pkt->tidsmidx == pkt->tidsmcount) {
0407 ret = -EFAULT;
0408 goto done;
0409 }
0410 }
0411 }
0412
0413
0414
0415
0416
0417
0418 if (lastdesc == 0)
0419 goto done;
0420
0421
0422
0423
0424
0425
0426
0427
0428
0429
0430
0431
0432 pbclen = pkt->addr[pkt->index].length;
0433 pbcvaddr = qib_user_sdma_alloc_header(pq, pbclen, &pbcdaddr);
0434 if (!pbcvaddr) {
0435 ret = -ENOMEM;
0436 goto done;
0437 }
0438
0439 pbc16 = (__le16 *)pkt->addr[pkt->index].kvaddr;
0440 memcpy(pbcvaddr, pbc16, pbclen);
0441
0442
0443 hdr = (struct qib_message_header *)&pbc16[4];
0444
0445
0446 pbc16[0] = cpu_to_le16(le16_to_cpu(pbc16[0])-(pkt->bytes_togo>>2));
0447
0448
0449 hdr->lrh[2] = cpu_to_be16(le16_to_cpu(pbc16[0]));
0450
0451 if (pkt->tiddma) {
0452
0453 hdr->iph.pkt_flags =
0454 cpu_to_le16(le16_to_cpu(hdr->iph.pkt_flags)|0x2);
0455
0456 hdr->flags &= ~(0x04|0x20);
0457 } else {
0458
0459 hdr->bth[0] = cpu_to_be32(be32_to_cpu(hdr->bth[0])&0xFFCFFFFF);
0460
0461 hdr->flags &= ~(0x04);
0462 }
0463
0464
0465 vcto = le32_to_cpu(hdr->iph.ver_ctxt_tid_offset);
0466 hdr->iph.chksum = cpu_to_le16(QIB_LRH_BTH +
0467 be16_to_cpu(hdr->lrh[2]) -
0468 ((vcto>>16)&0xFFFF) - (vcto&0xFFFF) -
0469 le16_to_cpu(hdr->iph.pkt_flags));
0470
0471
0472
0473 if (!pkt->addr[pkt->index].addr) {
0474 pkt->addr[pkt->index].addr =
0475 dma_map_single(&dd->pcidev->dev,
0476 pkt->addr[pkt->index].kvaddr,
0477 pkt->addr[pkt->index].dma_length,
0478 DMA_TO_DEVICE);
0479 if (dma_mapping_error(&dd->pcidev->dev,
0480 pkt->addr[pkt->index].addr)) {
0481 ret = -ENOMEM;
0482 goto done;
0483 }
0484 pkt->addr[pkt->index].dma_mapped = 1;
0485 }
0486
0487
0488 pbc16 = (__le16 *)pbcvaddr;
0489 hdr = (struct qib_message_header *)&pbc16[4];
0490
0491
0492 pbc16[0] = cpu_to_le16(le16_to_cpu(pbc16[0])-(pkt->payload_size>>2));
0493
0494
0495 hdr->lrh[2] = cpu_to_be16(le16_to_cpu(pbc16[0]));
0496
0497 if (pkt->tiddma) {
0498
0499 hdr->iph.ver_ctxt_tid_offset = cpu_to_le32(
0500 (le32_to_cpu(hdr->iph.ver_ctxt_tid_offset)&0xFF000000) +
0501 (pkt->tidsm[pkt->tidsmidx].tid<<QLOGIC_IB_I_TID_SHIFT) +
0502 (pkt->tidsm[pkt->tidsmidx].offset>>2));
0503 } else {
0504
0505 hdr->uwords[2] += pkt->payload_size;
0506 }
0507
0508
0509 vcto = le32_to_cpu(hdr->iph.ver_ctxt_tid_offset);
0510 hdr->iph.chksum = cpu_to_le16(QIB_LRH_BTH +
0511 be16_to_cpu(hdr->lrh[2]) -
0512 ((vcto>>16)&0xFFFF) - (vcto&0xFFFF) -
0513 le16_to_cpu(hdr->iph.pkt_flags));
0514
0515
0516 seqnum.val = be32_to_cpu(hdr->bth[2]);
0517 if (pkt->tiddma)
0518 seqnum.seq++;
0519 else
0520 seqnum.pkt++;
0521 hdr->bth[2] = cpu_to_be32(seqnum.val);
0522
0523
0524 qib_user_sdma_init_frag(pkt, pkt->naddr,
0525 0, pbclen,
0526 1, 0,
0527 0, 0,
0528 NULL, pbcvaddr,
0529 pbcdaddr, pbclen);
0530 pkt->index = pkt->naddr;
0531 pkt->payload_size = 0;
0532 pkt->naddr++;
0533 if (pkt->naddr == pkt->addrlimit) {
0534 ret = -EFAULT;
0535 goto done;
0536 }
0537
0538
0539 if (newlen != len) {
0540 if (dma_mapped) {
0541 put = 0;
0542 dma_mapped = 0;
0543 page = NULL;
0544 kvaddr = NULL;
0545 }
0546 len -= newlen;
0547 offset += newlen;
0548
0549 goto next_fragment;
0550 }
0551
0552 done:
0553 return ret;
0554 }
0555
0556
0557 static int qib_user_sdma_coalesce(const struct qib_devdata *dd,
0558 struct qib_user_sdma_queue *pq,
0559 struct qib_user_sdma_pkt *pkt,
0560 const struct iovec *iov,
0561 unsigned long niov)
0562 {
0563 int ret = 0;
0564 struct page *page = alloc_page(GFP_KERNEL);
0565 void *mpage_save;
0566 char *mpage;
0567 int i;
0568 int len = 0;
0569
0570 if (!page) {
0571 ret = -ENOMEM;
0572 goto done;
0573 }
0574
0575 mpage = kmap(page);
0576 mpage_save = mpage;
0577 for (i = 0; i < niov; i++) {
0578 int cfur;
0579
0580 cfur = copy_from_user(mpage,
0581 iov[i].iov_base, iov[i].iov_len);
0582 if (cfur) {
0583 ret = -EFAULT;
0584 goto free_unmap;
0585 }
0586
0587 mpage += iov[i].iov_len;
0588 len += iov[i].iov_len;
0589 }
0590
0591 ret = qib_user_sdma_page_to_frags(dd, pq, pkt,
0592 page, 0, 0, len, mpage_save);
0593 goto done;
0594
0595 free_unmap:
0596 kunmap(page);
0597 __free_page(page);
0598 done:
0599 return ret;
0600 }
0601
0602
0603
0604
0605 static size_t qib_user_sdma_num_pages(const struct iovec *iov)
0606 {
0607 const unsigned long addr = (unsigned long) iov->iov_base;
0608 const unsigned long len = iov->iov_len;
0609 const unsigned long spage = addr & PAGE_MASK;
0610 const unsigned long epage = (addr + len - 1) & PAGE_MASK;
0611
0612 return 1 + ((epage - spage) >> PAGE_SHIFT);
0613 }
0614
0615 static void qib_user_sdma_free_pkt_frag(struct device *dev,
0616 struct qib_user_sdma_queue *pq,
0617 struct qib_user_sdma_pkt *pkt,
0618 int frag)
0619 {
0620 const int i = frag;
0621
0622 if (pkt->addr[i].page) {
0623
0624 if (pkt->addr[i].dma_mapped)
0625 dma_unmap_page(dev,
0626 pkt->addr[i].addr,
0627 pkt->addr[i].dma_length,
0628 DMA_TO_DEVICE);
0629
0630 if (pkt->addr[i].kvaddr)
0631 kunmap(pkt->addr[i].page);
0632
0633 if (pkt->addr[i].put_page)
0634 unpin_user_page(pkt->addr[i].page);
0635 else
0636 __free_page(pkt->addr[i].page);
0637 } else if (pkt->addr[i].kvaddr) {
0638
0639 if (pkt->addr[i].dma_mapped) {
0640
0641 dma_unmap_single(dev,
0642 pkt->addr[i].addr,
0643 pkt->addr[i].dma_length,
0644 DMA_TO_DEVICE);
0645 kfree(pkt->addr[i].kvaddr);
0646 } else if (pkt->addr[i].addr) {
0647
0648 dma_pool_free(pq->header_cache,
0649 pkt->addr[i].kvaddr, pkt->addr[i].addr);
0650 } else {
0651
0652 kfree(pkt->addr[i].kvaddr);
0653 }
0654 }
0655 }
0656
0657
0658 static int qib_user_sdma_pin_pages(const struct qib_devdata *dd,
0659 struct qib_user_sdma_queue *pq,
0660 struct qib_user_sdma_pkt *pkt,
0661 unsigned long addr, int tlen, size_t npages)
0662 {
0663 struct page *pages[8];
0664 int i, j;
0665 int ret = 0;
0666
0667 while (npages) {
0668 if (npages > 8)
0669 j = 8;
0670 else
0671 j = npages;
0672
0673 ret = pin_user_pages_fast(addr, j, FOLL_LONGTERM, pages);
0674 if (ret != j) {
0675 i = 0;
0676 j = ret;
0677 ret = -ENOMEM;
0678 goto free_pages;
0679 }
0680
0681 for (i = 0; i < j; i++) {
0682
0683 unsigned long fofs = addr & ~PAGE_MASK;
0684 int flen = ((fofs + tlen) > PAGE_SIZE) ?
0685 (PAGE_SIZE - fofs) : tlen;
0686
0687 ret = qib_user_sdma_page_to_frags(dd, pq, pkt,
0688 pages[i], 1, fofs, flen, NULL);
0689 if (ret < 0) {
0690
0691
0692
0693 i++;
0694 goto free_pages;
0695 }
0696
0697 addr += flen;
0698 tlen -= flen;
0699 }
0700
0701 npages -= j;
0702 }
0703
0704 goto done;
0705
0706
0707 free_pages:
0708 while (i < j)
0709 unpin_user_page(pages[i++]);
0710
0711 done:
0712 return ret;
0713 }
0714
0715 static int qib_user_sdma_pin_pkt(const struct qib_devdata *dd,
0716 struct qib_user_sdma_queue *pq,
0717 struct qib_user_sdma_pkt *pkt,
0718 const struct iovec *iov,
0719 unsigned long niov)
0720 {
0721 int ret = 0;
0722 unsigned long idx;
0723
0724 for (idx = 0; idx < niov; idx++) {
0725 const size_t npages = qib_user_sdma_num_pages(iov + idx);
0726 const unsigned long addr = (unsigned long) iov[idx].iov_base;
0727
0728 ret = qib_user_sdma_pin_pages(dd, pq, pkt, addr,
0729 iov[idx].iov_len, npages);
0730 if (ret < 0)
0731 goto free_pkt;
0732 }
0733
0734 goto done;
0735
0736 free_pkt:
0737
0738 for (idx = 1; idx < pkt->naddr; idx++)
0739 qib_user_sdma_free_pkt_frag(&dd->pcidev->dev, pq, pkt, idx);
0740
0741
0742
0743
0744 if (pkt->addr[0].dma_mapped) {
0745 dma_unmap_single(&dd->pcidev->dev,
0746 pkt->addr[0].addr,
0747 pkt->addr[0].dma_length,
0748 DMA_TO_DEVICE);
0749 pkt->addr[0].addr = 0;
0750 pkt->addr[0].dma_mapped = 0;
0751 }
0752
0753 done:
0754 return ret;
0755 }
0756
0757 static int qib_user_sdma_init_payload(const struct qib_devdata *dd,
0758 struct qib_user_sdma_queue *pq,
0759 struct qib_user_sdma_pkt *pkt,
0760 const struct iovec *iov,
0761 unsigned long niov, int npages)
0762 {
0763 int ret = 0;
0764
0765 if (pkt->frag_size == pkt->bytes_togo &&
0766 npages >= ARRAY_SIZE(pkt->addr))
0767 ret = qib_user_sdma_coalesce(dd, pq, pkt, iov, niov);
0768 else
0769 ret = qib_user_sdma_pin_pkt(dd, pq, pkt, iov, niov);
0770
0771 return ret;
0772 }
0773
0774
0775 static void qib_user_sdma_free_pkt_list(struct device *dev,
0776 struct qib_user_sdma_queue *pq,
0777 struct list_head *list)
0778 {
0779 struct qib_user_sdma_pkt *pkt, *pkt_next;
0780
0781 list_for_each_entry_safe(pkt, pkt_next, list, list) {
0782 int i;
0783
0784 for (i = 0; i < pkt->naddr; i++)
0785 qib_user_sdma_free_pkt_frag(dev, pq, pkt, i);
0786
0787 if (pkt->largepkt)
0788 kfree(pkt);
0789 else
0790 kmem_cache_free(pq->pkt_slab, pkt);
0791 }
0792 INIT_LIST_HEAD(list);
0793 }
0794
0795
0796
0797
0798
0799
0800
0801
0802 static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd,
0803 struct qib_pportdata *ppd,
0804 struct qib_user_sdma_queue *pq,
0805 const struct iovec *iov,
0806 unsigned long niov,
0807 struct list_head *list,
0808 int *maxpkts, int *ndesc)
0809 {
0810 unsigned long idx = 0;
0811 int ret = 0;
0812 int npkts = 0;
0813 __le32 *pbc;
0814 dma_addr_t dma_addr;
0815 struct qib_user_sdma_pkt *pkt = NULL;
0816 size_t len;
0817 size_t nw;
0818 u32 counter = pq->counter;
0819 u16 frag_size;
0820
0821 while (idx < niov && npkts < *maxpkts) {
0822 const unsigned long addr = (unsigned long) iov[idx].iov_base;
0823 const unsigned long idx_save = idx;
0824 unsigned pktnw;
0825 unsigned pktnwc;
0826 int nfrags = 0;
0827 size_t npages = 0;
0828 size_t bytes_togo = 0;
0829 int tiddma = 0;
0830 int cfur;
0831
0832 len = iov[idx].iov_len;
0833 nw = len >> 2;
0834
0835 if (len < QIB_USER_SDMA_MIN_HEADER_LENGTH ||
0836 len > PAGE_SIZE || len & 3 || addr & 3) {
0837 ret = -EINVAL;
0838 goto free_list;
0839 }
0840
0841 pbc = qib_user_sdma_alloc_header(pq, len, &dma_addr);
0842 if (!pbc) {
0843 ret = -ENOMEM;
0844 goto free_list;
0845 }
0846
0847 cfur = copy_from_user(pbc, iov[idx].iov_base, len);
0848 if (cfur) {
0849 ret = -EFAULT;
0850 goto free_pbc;
0851 }
0852
0853
0854
0855
0856
0857
0858 pktnwc = nw - 1;
0859
0860
0861
0862
0863
0864
0865
0866
0867
0868
0869 pktnw = le32_to_cpu(*pbc) & 0xFFFF;
0870 if (pktnw < pktnwc) {
0871 ret = -EINVAL;
0872 goto free_pbc;
0873 }
0874
0875 idx++;
0876 while (pktnwc < pktnw && idx < niov) {
0877 const size_t slen = iov[idx].iov_len;
0878 const unsigned long faddr =
0879 (unsigned long) iov[idx].iov_base;
0880
0881 if (slen & 3 || faddr & 3 || !slen) {
0882 ret = -EINVAL;
0883 goto free_pbc;
0884 }
0885
0886 npages += qib_user_sdma_num_pages(&iov[idx]);
0887
0888 if (check_add_overflow(bytes_togo, slen, &bytes_togo) ||
0889 bytes_togo > type_max(typeof(pkt->bytes_togo))) {
0890 ret = -EINVAL;
0891 goto free_pbc;
0892 }
0893 pktnwc += slen >> 2;
0894 idx++;
0895 nfrags++;
0896 }
0897
0898 if (pktnwc != pktnw) {
0899 ret = -EINVAL;
0900 goto free_pbc;
0901 }
0902
0903 frag_size = ((le32_to_cpu(*pbc))>>16) & 0xFFFF;
0904 if (((frag_size ? frag_size : bytes_togo) + len) >
0905 ppd->ibmaxlen) {
0906 ret = -EINVAL;
0907 goto free_pbc;
0908 }
0909
0910 if (frag_size) {
0911 size_t tidsmsize, n, pktsize, sz, addrlimit;
0912
0913 n = npages*((2*PAGE_SIZE/frag_size)+1);
0914 pktsize = struct_size(pkt, addr, n);
0915
0916
0917
0918
0919 tiddma = (((le32_to_cpu(pbc[7])>>
0920 QLOGIC_IB_I_TID_SHIFT)&
0921 QLOGIC_IB_I_TID_MASK) !=
0922 QLOGIC_IB_I_TID_MASK);
0923
0924 if (tiddma)
0925 tidsmsize = iov[idx].iov_len;
0926 else
0927 tidsmsize = 0;
0928
0929 if (check_add_overflow(pktsize, tidsmsize, &sz)) {
0930 ret = -EINVAL;
0931 goto free_pbc;
0932 }
0933 pkt = kmalloc(sz, GFP_KERNEL);
0934 if (!pkt) {
0935 ret = -ENOMEM;
0936 goto free_pbc;
0937 }
0938 pkt->largepkt = 1;
0939 pkt->frag_size = frag_size;
0940 if (check_add_overflow(n, ARRAY_SIZE(pkt->addr),
0941 &addrlimit) ||
0942 addrlimit > type_max(typeof(pkt->addrlimit))) {
0943 ret = -EINVAL;
0944 goto free_pkt;
0945 }
0946 pkt->addrlimit = addrlimit;
0947
0948 if (tiddma) {
0949 char *tidsm = (char *)pkt + pktsize;
0950
0951 cfur = copy_from_user(tidsm,
0952 iov[idx].iov_base, tidsmsize);
0953 if (cfur) {
0954 ret = -EFAULT;
0955 goto free_pkt;
0956 }
0957 pkt->tidsm =
0958 (struct qib_tid_session_member *)tidsm;
0959 pkt->tidsmcount = tidsmsize/
0960 sizeof(struct qib_tid_session_member);
0961 pkt->tidsmidx = 0;
0962 idx++;
0963 }
0964
0965
0966
0967
0968
0969
0970 *pbc = cpu_to_le32(le32_to_cpu(*pbc) & 0x0000FFFF);
0971 } else {
0972 pkt = kmem_cache_alloc(pq->pkt_slab, GFP_KERNEL);
0973 if (!pkt) {
0974 ret = -ENOMEM;
0975 goto free_pbc;
0976 }
0977 pkt->largepkt = 0;
0978 pkt->frag_size = bytes_togo;
0979 pkt->addrlimit = ARRAY_SIZE(pkt->addr);
0980 }
0981 pkt->bytes_togo = bytes_togo;
0982 pkt->payload_size = 0;
0983 pkt->counter = counter;
0984 pkt->tiddma = tiddma;
0985
0986
0987 qib_user_sdma_init_frag(pkt, 0,
0988 0, len,
0989 1, 0,
0990 0, 0,
0991 NULL, pbc,
0992 dma_addr, len);
0993 pkt->index = 0;
0994 pkt->naddr = 1;
0995
0996 if (nfrags) {
0997 ret = qib_user_sdma_init_payload(dd, pq, pkt,
0998 iov + idx_save + 1,
0999 nfrags, npages);
1000 if (ret < 0)
1001 goto free_pkt;
1002 } else {
1003
1004
1005 pkt->addr[0].last_desc = 1;
1006
1007 if (dma_addr == 0) {
1008
1009
1010
1011
1012 dma_addr = dma_map_single(&dd->pcidev->dev,
1013 pbc, len, DMA_TO_DEVICE);
1014 if (dma_mapping_error(&dd->pcidev->dev,
1015 dma_addr)) {
1016 ret = -ENOMEM;
1017 goto free_pkt;
1018 }
1019 pkt->addr[0].addr = dma_addr;
1020 pkt->addr[0].dma_mapped = 1;
1021 }
1022 }
1023
1024 counter++;
1025 npkts++;
1026 pkt->pq = pq;
1027 pkt->index = 0;
1028 *ndesc += pkt->naddr;
1029
1030 list_add_tail(&pkt->list, list);
1031 }
1032
1033 *maxpkts = npkts;
1034 ret = idx;
1035 goto done;
1036
1037 free_pkt:
1038 if (pkt->largepkt)
1039 kfree(pkt);
1040 else
1041 kmem_cache_free(pq->pkt_slab, pkt);
1042 free_pbc:
1043 if (dma_addr)
1044 dma_pool_free(pq->header_cache, pbc, dma_addr);
1045 else
1046 kfree(pbc);
1047 free_list:
1048 qib_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, list);
1049 done:
1050 return ret;
1051 }
1052
1053 static void qib_user_sdma_set_complete_counter(struct qib_user_sdma_queue *pq,
1054 u32 c)
1055 {
1056 pq->sent_counter = c;
1057 }
1058
1059
1060 static int qib_user_sdma_queue_clean(struct qib_pportdata *ppd,
1061 struct qib_user_sdma_queue *pq)
1062 {
1063 struct qib_devdata *dd = ppd->dd;
1064 struct list_head free_list;
1065 struct qib_user_sdma_pkt *pkt;
1066 struct qib_user_sdma_pkt *pkt_prev;
1067 unsigned long flags;
1068 int ret = 0;
1069
1070 if (!pq->num_sending)
1071 return 0;
1072
1073 INIT_LIST_HEAD(&free_list);
1074
1075
1076
1077
1078
1079
1080 spin_lock_irqsave(&pq->sent_lock, flags);
1081 list_for_each_entry_safe(pkt, pkt_prev, &pq->sent, list) {
1082 s64 descd = ppd->sdma_descq_removed - pkt->added;
1083
1084 if (descd < 0)
1085 break;
1086
1087 list_move_tail(&pkt->list, &free_list);
1088
1089
1090 ret++;
1091 pq->num_sending--;
1092 }
1093 spin_unlock_irqrestore(&pq->sent_lock, flags);
1094
1095 if (!list_empty(&free_list)) {
1096 u32 counter;
1097
1098 pkt = list_entry(free_list.prev,
1099 struct qib_user_sdma_pkt, list);
1100 counter = pkt->counter;
1101
1102 qib_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &free_list);
1103 qib_user_sdma_set_complete_counter(pq, counter);
1104 }
1105
1106 return ret;
1107 }
1108
1109 void qib_user_sdma_queue_destroy(struct qib_user_sdma_queue *pq)
1110 {
1111 if (!pq)
1112 return;
1113
1114 pq->sdma_rb_node->refcount--;
1115 if (pq->sdma_rb_node->refcount == 0) {
1116 rb_erase(&pq->sdma_rb_node->node, &qib_user_sdma_rb_root);
1117 kfree(pq->sdma_rb_node);
1118 }
1119 dma_pool_destroy(pq->header_cache);
1120 kmem_cache_destroy(pq->pkt_slab);
1121 kfree(pq);
1122 }
1123
1124
1125 static int qib_user_sdma_hwqueue_clean(struct qib_pportdata *ppd)
1126 {
1127 int ret;
1128 unsigned long flags;
1129
1130 spin_lock_irqsave(&ppd->sdma_lock, flags);
1131 ret = qib_sdma_make_progress(ppd);
1132 spin_unlock_irqrestore(&ppd->sdma_lock, flags);
1133
1134 return ret;
1135 }
1136
1137
1138 void qib_user_sdma_queue_drain(struct qib_pportdata *ppd,
1139 struct qib_user_sdma_queue *pq)
1140 {
1141 struct qib_devdata *dd = ppd->dd;
1142 unsigned long flags;
1143 int i;
1144
1145 if (!pq)
1146 return;
1147
1148 for (i = 0; i < QIB_USER_SDMA_DRAIN_TIMEOUT; i++) {
1149 mutex_lock(&pq->lock);
1150 if (!pq->num_pending && !pq->num_sending) {
1151 mutex_unlock(&pq->lock);
1152 break;
1153 }
1154 qib_user_sdma_hwqueue_clean(ppd);
1155 qib_user_sdma_queue_clean(ppd, pq);
1156 mutex_unlock(&pq->lock);
1157 msleep(20);
1158 }
1159
1160 if (pq->num_pending || pq->num_sending) {
1161 struct qib_user_sdma_pkt *pkt;
1162 struct qib_user_sdma_pkt *pkt_prev;
1163 struct list_head free_list;
1164
1165 mutex_lock(&pq->lock);
1166 spin_lock_irqsave(&ppd->sdma_lock, flags);
1167
1168
1169
1170 if (pq->num_pending) {
1171 list_for_each_entry_safe(pkt, pkt_prev,
1172 &ppd->sdma_userpending, list) {
1173 if (pkt->pq == pq) {
1174 list_move_tail(&pkt->list, &pq->sent);
1175 pq->num_pending--;
1176 pq->num_sending++;
1177 }
1178 }
1179 }
1180 spin_unlock_irqrestore(&ppd->sdma_lock, flags);
1181
1182 qib_dev_err(dd, "user sdma lists not empty: forcing!\n");
1183 INIT_LIST_HEAD(&free_list);
1184 list_splice_init(&pq->sent, &free_list);
1185 pq->num_sending = 0;
1186 qib_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &free_list);
1187 mutex_unlock(&pq->lock);
1188 }
1189 }
1190
1191 static inline __le64 qib_sdma_make_desc0(u8 gen,
1192 u64 addr, u64 dwlen, u64 dwoffset)
1193 {
1194 return cpu_to_le64(
1195 ((addr & 0xfffffffcULL) << 32) |
1196
1197 ((gen & 3ULL) << 30) |
1198
1199 ((dwlen & 0x7ffULL) << 16) |
1200
1201 (dwoffset & 0x7ffULL));
1202 }
1203
1204 static inline __le64 qib_sdma_make_first_desc0(__le64 descq)
1205 {
1206 return descq | cpu_to_le64(1ULL << 12);
1207 }
1208
1209 static inline __le64 qib_sdma_make_last_desc0(__le64 descq)
1210 {
1211
1212 return descq | cpu_to_le64(1ULL << 11 | 1ULL << 13);
1213 }
1214
1215 static inline __le64 qib_sdma_make_desc1(u64 addr)
1216 {
1217
1218 return cpu_to_le64(addr >> 32);
1219 }
1220
1221 static void qib_user_sdma_send_frag(struct qib_pportdata *ppd,
1222 struct qib_user_sdma_pkt *pkt, int idx,
1223 unsigned ofs, u16 tail, u8 gen)
1224 {
1225 const u64 addr = (u64) pkt->addr[idx].addr +
1226 (u64) pkt->addr[idx].offset;
1227 const u64 dwlen = (u64) pkt->addr[idx].length / 4;
1228 __le64 *descqp;
1229 __le64 descq0;
1230
1231 descqp = &ppd->sdma_descq[tail].qw[0];
1232
1233 descq0 = qib_sdma_make_desc0(gen, addr, dwlen, ofs);
1234 if (pkt->addr[idx].first_desc)
1235 descq0 = qib_sdma_make_first_desc0(descq0);
1236 if (pkt->addr[idx].last_desc) {
1237 descq0 = qib_sdma_make_last_desc0(descq0);
1238 if (ppd->sdma_intrequest) {
1239 descq0 |= cpu_to_le64(1ULL << 15);
1240 ppd->sdma_intrequest = 0;
1241 }
1242 }
1243
1244 descqp[0] = descq0;
1245 descqp[1] = qib_sdma_make_desc1(addr);
1246 }
1247
1248 void qib_user_sdma_send_desc(struct qib_pportdata *ppd,
1249 struct list_head *pktlist)
1250 {
1251 struct qib_devdata *dd = ppd->dd;
1252 u16 nfree, nsent;
1253 u16 tail, tail_c;
1254 u8 gen, gen_c;
1255
1256 nfree = qib_sdma_descq_freecnt(ppd);
1257 if (!nfree)
1258 return;
1259
1260 retry:
1261 nsent = 0;
1262 tail_c = tail = ppd->sdma_descq_tail;
1263 gen_c = gen = ppd->sdma_generation;
1264 while (!list_empty(pktlist)) {
1265 struct qib_user_sdma_pkt *pkt =
1266 list_entry(pktlist->next, struct qib_user_sdma_pkt,
1267 list);
1268 int i, j, c = 0;
1269 unsigned ofs = 0;
1270 u16 dtail = tail;
1271
1272 for (i = pkt->index; i < pkt->naddr && nfree; i++) {
1273 qib_user_sdma_send_frag(ppd, pkt, i, ofs, tail, gen);
1274 ofs += pkt->addr[i].length >> 2;
1275
1276 if (++tail == ppd->sdma_descq_cnt) {
1277 tail = 0;
1278 ++gen;
1279 ppd->sdma_intrequest = 1;
1280 } else if (tail == (ppd->sdma_descq_cnt>>1)) {
1281 ppd->sdma_intrequest = 1;
1282 }
1283 nfree--;
1284 if (pkt->addr[i].last_desc == 0)
1285 continue;
1286
1287
1288
1289
1290
1291
1292
1293 if (ofs > dd->piosize2kmax_dwords) {
1294 for (j = pkt->index; j <= i; j++) {
1295 ppd->sdma_descq[dtail].qw[0] |=
1296 cpu_to_le64(1ULL << 14);
1297 if (++dtail == ppd->sdma_descq_cnt)
1298 dtail = 0;
1299 }
1300 }
1301 c += i + 1 - pkt->index;
1302 pkt->index = i + 1;
1303 tail_c = dtail = tail;
1304 gen_c = gen;
1305 ofs = 0;
1306 }
1307
1308 ppd->sdma_descq_added += c;
1309 nsent += c;
1310 if (pkt->index == pkt->naddr) {
1311 pkt->added = ppd->sdma_descq_added;
1312 pkt->pq->added = pkt->added;
1313 pkt->pq->num_pending--;
1314 spin_lock(&pkt->pq->sent_lock);
1315 pkt->pq->num_sending++;
1316 list_move_tail(&pkt->list, &pkt->pq->sent);
1317 spin_unlock(&pkt->pq->sent_lock);
1318 }
1319 if (!nfree || (nsent<<2) > ppd->sdma_descq_cnt)
1320 break;
1321 }
1322
1323
1324 if (ppd->sdma_descq_tail != tail_c) {
1325 ppd->sdma_generation = gen_c;
1326 dd->f_sdma_update_tail(ppd, tail_c);
1327 }
1328
1329 if (nfree && !list_empty(pktlist))
1330 goto retry;
1331 }
1332
1333
1334 static int qib_user_sdma_push_pkts(struct qib_pportdata *ppd,
1335 struct qib_user_sdma_queue *pq,
1336 struct list_head *pktlist, int count)
1337 {
1338 unsigned long flags;
1339
1340 if (unlikely(!(ppd->lflags & QIBL_LINKACTIVE)))
1341 return -ECOMM;
1342
1343
1344 if (pq->sdma_rb_node->refcount > 1) {
1345 spin_lock_irqsave(&ppd->sdma_lock, flags);
1346 if (unlikely(!__qib_sdma_running(ppd))) {
1347 spin_unlock_irqrestore(&ppd->sdma_lock, flags);
1348 return -ECOMM;
1349 }
1350 pq->num_pending += count;
1351 list_splice_tail_init(pktlist, &ppd->sdma_userpending);
1352 qib_user_sdma_send_desc(ppd, &ppd->sdma_userpending);
1353 spin_unlock_irqrestore(&ppd->sdma_lock, flags);
1354 return 0;
1355 }
1356
1357
1358
1359
1360
1361
1362
1363
1364 pq->num_pending += count;
1365
1366
1367
1368
1369
1370
1371 do {
1372 spin_lock_irqsave(&ppd->sdma_lock, flags);
1373 if (unlikely(!__qib_sdma_running(ppd))) {
1374 spin_unlock_irqrestore(&ppd->sdma_lock, flags);
1375 return -ECOMM;
1376 }
1377 qib_user_sdma_send_desc(ppd, pktlist);
1378 if (!list_empty(pktlist))
1379 qib_sdma_make_progress(ppd);
1380 spin_unlock_irqrestore(&ppd->sdma_lock, flags);
1381 } while (!list_empty(pktlist));
1382
1383 return 0;
1384 }
1385
1386 int qib_user_sdma_writev(struct qib_ctxtdata *rcd,
1387 struct qib_user_sdma_queue *pq,
1388 const struct iovec *iov,
1389 unsigned long dim)
1390 {
1391 struct qib_devdata *dd = rcd->dd;
1392 struct qib_pportdata *ppd = rcd->ppd;
1393 int ret = 0;
1394 struct list_head list;
1395 int npkts = 0;
1396
1397 INIT_LIST_HEAD(&list);
1398
1399 mutex_lock(&pq->lock);
1400
1401
1402 if (!qib_sdma_running(ppd))
1403 goto done_unlock;
1404
1405
1406 if (pq->added > ppd->sdma_descq_removed)
1407 qib_user_sdma_hwqueue_clean(ppd);
1408
1409 if (pq->num_sending)
1410 qib_user_sdma_queue_clean(ppd, pq);
1411
1412 while (dim) {
1413 int mxp = 1;
1414 int ndesc = 0;
1415
1416 ret = qib_user_sdma_queue_pkts(dd, ppd, pq,
1417 iov, dim, &list, &mxp, &ndesc);
1418 if (ret < 0)
1419 goto done_unlock;
1420 else {
1421 dim -= ret;
1422 iov += ret;
1423 }
1424
1425
1426 if (!list_empty(&list)) {
1427
1428
1429
1430 if (qib_sdma_descq_freecnt(ppd) < ndesc) {
1431 qib_user_sdma_hwqueue_clean(ppd);
1432 if (pq->num_sending)
1433 qib_user_sdma_queue_clean(ppd, pq);
1434 }
1435
1436 ret = qib_user_sdma_push_pkts(ppd, pq, &list, mxp);
1437 if (ret < 0)
1438 goto done_unlock;
1439 else {
1440 npkts += mxp;
1441 pq->counter += mxp;
1442 }
1443 }
1444 }
1445
1446 done_unlock:
1447 if (!list_empty(&list))
1448 qib_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &list);
1449 mutex_unlock(&pq->lock);
1450
1451 return (ret < 0) ? ret : npkts;
1452 }
1453
1454 int qib_user_sdma_make_progress(struct qib_pportdata *ppd,
1455 struct qib_user_sdma_queue *pq)
1456 {
1457 int ret = 0;
1458
1459 mutex_lock(&pq->lock);
1460 qib_user_sdma_hwqueue_clean(ppd);
1461 ret = qib_user_sdma_queue_clean(ppd, pq);
1462 mutex_unlock(&pq->lock);
1463
1464 return ret;
1465 }
1466
1467 u32 qib_user_sdma_complete_counter(const struct qib_user_sdma_queue *pq)
1468 {
1469 return pq ? pq->sent_counter : 0;
1470 }
1471
1472 u32 qib_user_sdma_inflight_counter(struct qib_user_sdma_queue *pq)
1473 {
1474 return pq ? pq->counter : 0;
1475 }