Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
0003  * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
0004  *
0005  * This software is available to you under a choice of one of two
0006  * licenses.  You may choose to be licensed under the terms of the GNU
0007  * General Public License (GPL) Version 2, available from the file
0008  * COPYING in the main directory of this source tree, or the
0009  * OpenIB.org BSD license below:
0010  *
0011  *     Redistribution and use in source and binary forms, with or
0012  *     without modification, are permitted provided that the following
0013  *     conditions are met:
0014  *
0015  *      - Redistributions of source code must retain the above
0016  *        copyright notice, this list of conditions and the following
0017  *        disclaimer.
0018  *
0019  *      - Redistributions in binary form must reproduce the above
0020  *        copyright notice, this list of conditions and the following
0021  *        disclaimer in the documentation and/or other materials
0022  *        provided with the distribution.
0023  *
0024  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
0025  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
0026  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
0027  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
0028  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
0029  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
0030  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
0031  * SOFTWARE.
0032  */
0033 
0034 #include <linux/slab.h>
0035 #include <rdma/ib_user_verbs.h>
0036 
0037 #include "mlx4_ib.h"
0038 
0039 static u32 convert_access(int acc)
0040 {
0041     return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX4_PERM_ATOMIC       : 0) |
0042            (acc & IB_ACCESS_REMOTE_WRITE  ? MLX4_PERM_REMOTE_WRITE : 0) |
0043            (acc & IB_ACCESS_REMOTE_READ   ? MLX4_PERM_REMOTE_READ  : 0) |
0044            (acc & IB_ACCESS_LOCAL_WRITE   ? MLX4_PERM_LOCAL_WRITE  : 0) |
0045            (acc & IB_ACCESS_MW_BIND       ? MLX4_PERM_BIND_MW      : 0) |
0046            MLX4_PERM_LOCAL_READ;
0047 }
0048 
0049 static enum mlx4_mw_type to_mlx4_type(enum ib_mw_type type)
0050 {
0051     switch (type) {
0052     case IB_MW_TYPE_1:  return MLX4_MW_TYPE_1;
0053     case IB_MW_TYPE_2:  return MLX4_MW_TYPE_2;
0054     default:        return -1;
0055     }
0056 }
0057 
0058 struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc)
0059 {
0060     struct mlx4_ib_mr *mr;
0061     int err;
0062 
0063     mr = kzalloc(sizeof(*mr), GFP_KERNEL);
0064     if (!mr)
0065         return ERR_PTR(-ENOMEM);
0066 
0067     err = mlx4_mr_alloc(to_mdev(pd->device)->dev, to_mpd(pd)->pdn, 0,
0068                 ~0ull, convert_access(acc), 0, 0, &mr->mmr);
0069     if (err)
0070         goto err_free;
0071 
0072     err = mlx4_mr_enable(to_mdev(pd->device)->dev, &mr->mmr);
0073     if (err)
0074         goto err_mr;
0075 
0076     mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
0077     mr->umem = NULL;
0078 
0079     return &mr->ibmr;
0080 
0081 err_mr:
0082     (void) mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr);
0083 
0084 err_free:
0085     kfree(mr);
0086 
0087     return ERR_PTR(err);
0088 }
0089 
0090 enum {
0091     MLX4_MAX_MTT_SHIFT = 31
0092 };
0093 
0094 static int mlx4_ib_umem_write_mtt_block(struct mlx4_ib_dev *dev,
0095                     struct mlx4_mtt *mtt,
0096                     u64 mtt_size, u64 mtt_shift, u64 len,
0097                     u64 cur_start_addr, u64 *pages,
0098                     int *start_index, int *npages)
0099 {
0100     u64 cur_end_addr = cur_start_addr + len;
0101     u64 cur_end_addr_aligned = 0;
0102     u64 mtt_entries;
0103     int err = 0;
0104     int k;
0105 
0106     len += (cur_start_addr & (mtt_size - 1ULL));
0107     cur_end_addr_aligned = round_up(cur_end_addr, mtt_size);
0108     len += (cur_end_addr_aligned - cur_end_addr);
0109     if (len & (mtt_size - 1ULL)) {
0110         pr_warn("write_block: len %llx is not aligned to mtt_size %llx\n",
0111             len, mtt_size);
0112         return -EINVAL;
0113     }
0114 
0115     mtt_entries = (len >> mtt_shift);
0116 
0117     /*
0118      * Align the MTT start address to the mtt_size.
0119      * Required to handle cases when the MR starts in the middle of an MTT
0120      * record. Was not required in old code since the physical addresses
0121      * provided by the dma subsystem were page aligned, which was also the
0122      * MTT size.
0123      */
0124     cur_start_addr = round_down(cur_start_addr, mtt_size);
0125     /* A new block is started ... */
0126     for (k = 0; k < mtt_entries; ++k) {
0127         pages[*npages] = cur_start_addr + (mtt_size * k);
0128         (*npages)++;
0129         /*
0130          * Be friendly to mlx4_write_mtt() and pass it chunks of
0131          * appropriate size.
0132          */
0133         if (*npages == PAGE_SIZE / sizeof(u64)) {
0134             err = mlx4_write_mtt(dev->dev, mtt, *start_index,
0135                          *npages, pages);
0136             if (err)
0137                 return err;
0138 
0139             (*start_index) += *npages;
0140             *npages = 0;
0141         }
0142     }
0143 
0144     return 0;
0145 }
0146 
0147 static inline u64 alignment_of(u64 ptr)
0148 {
0149     return ilog2(ptr & (~(ptr - 1)));
0150 }
0151 
0152 static int mlx4_ib_umem_calc_block_mtt(u64 next_block_start,
0153                        u64 current_block_end,
0154                        u64 block_shift)
0155 {
0156     /* Check whether the alignment of the new block is aligned as well as
0157      * the previous block.
0158      * Block address must start with zeros till size of entity_size.
0159      */
0160     if ((next_block_start & ((1ULL << block_shift) - 1ULL)) != 0)
0161         /*
0162          * It is not as well aligned as the previous block-reduce the
0163          * mtt size accordingly. Here we take the last right bit which
0164          * is 1.
0165          */
0166         block_shift = alignment_of(next_block_start);
0167 
0168     /*
0169      * Check whether the alignment of the end of previous block - is it
0170      * aligned as well as the start of the block
0171      */
0172     if (((current_block_end) & ((1ULL << block_shift) - 1ULL)) != 0)
0173         /*
0174          * It is not as well aligned as the start of the block -
0175          * reduce the mtt size accordingly.
0176          */
0177         block_shift = alignment_of(current_block_end);
0178 
0179     return block_shift;
0180 }
0181 
0182 int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
0183                struct ib_umem *umem)
0184 {
0185     u64 *pages;
0186     u64 len = 0;
0187     int err = 0;
0188     u64 mtt_size;
0189     u64 cur_start_addr = 0;
0190     u64 mtt_shift;
0191     int start_index = 0;
0192     int npages = 0;
0193     struct scatterlist *sg;
0194     int i;
0195 
0196     pages = (u64 *) __get_free_page(GFP_KERNEL);
0197     if (!pages)
0198         return -ENOMEM;
0199 
0200     mtt_shift = mtt->page_shift;
0201     mtt_size = 1ULL << mtt_shift;
0202 
0203     for_each_sgtable_dma_sg(&umem->sgt_append.sgt, sg, i) {
0204         if (cur_start_addr + len == sg_dma_address(sg)) {
0205             /* still the same block */
0206             len += sg_dma_len(sg);
0207             continue;
0208         }
0209         /*
0210          * A new block is started ...
0211          * If len is malaligned, write an extra mtt entry to cover the
0212          * misaligned area (round up the division)
0213          */
0214         err = mlx4_ib_umem_write_mtt_block(dev, mtt, mtt_size,
0215                            mtt_shift, len,
0216                            cur_start_addr,
0217                            pages, &start_index,
0218                            &npages);
0219         if (err)
0220             goto out;
0221 
0222         cur_start_addr = sg_dma_address(sg);
0223         len = sg_dma_len(sg);
0224     }
0225 
0226     /* Handle the last block */
0227     if (len > 0) {
0228         /*
0229          * If len is malaligned, write an extra mtt entry to cover
0230          * the misaligned area (round up the division)
0231          */
0232         err = mlx4_ib_umem_write_mtt_block(dev, mtt, mtt_size,
0233                            mtt_shift, len,
0234                            cur_start_addr, pages,
0235                            &start_index, &npages);
0236         if (err)
0237             goto out;
0238     }
0239 
0240     if (npages)
0241         err = mlx4_write_mtt(dev->dev, mtt, start_index, npages, pages);
0242 
0243 out:
0244     free_page((unsigned long) pages);
0245     return err;
0246 }
0247 
0248 /*
0249  * Calculate optimal mtt size based on contiguous pages.
0250  * Function will return also the number of pages that are not aligned to the
0251  * calculated mtt_size to be added to total number of pages. For that we should
0252  * check the first chunk length & last chunk length and if not aligned to
0253  * mtt_size we should increment the non_aligned_pages number. All chunks in the
0254  * middle already handled as part of mtt shift calculation for both their start
0255  * & end addresses.
0256  */
0257 int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va,
0258                        int *num_of_mtts)
0259 {
0260     u64 block_shift = MLX4_MAX_MTT_SHIFT;
0261     u64 min_shift = PAGE_SHIFT;
0262     u64 last_block_aligned_end = 0;
0263     u64 current_block_start = 0;
0264     u64 first_block_start = 0;
0265     u64 current_block_len = 0;
0266     u64 last_block_end = 0;
0267     struct scatterlist *sg;
0268     u64 current_block_end;
0269     u64 misalignment_bits;
0270     u64 next_block_start;
0271     u64 total_len = 0;
0272     int i;
0273 
0274     *num_of_mtts = ib_umem_num_dma_blocks(umem, PAGE_SIZE);
0275 
0276     for_each_sgtable_dma_sg(&umem->sgt_append.sgt, sg, i) {
0277         /*
0278          * Initialization - save the first chunk start as the
0279          * current_block_start - block means contiguous pages.
0280          */
0281         if (current_block_len == 0 && current_block_start == 0) {
0282             current_block_start = sg_dma_address(sg);
0283             first_block_start = current_block_start;
0284             /*
0285              * Find the bits that are different between the physical
0286              * address and the virtual address for the start of the
0287              * MR.
0288              * umem_get aligned the start_va to a page boundary.
0289              * Therefore, we need to align the start va to the same
0290              * boundary.
0291              * misalignment_bits is needed to handle the  case of a
0292              * single memory region. In this case, the rest of the
0293              * logic will not reduce the block size.  If we use a
0294              * block size which is bigger than the alignment of the
0295              * misalignment bits, we might use the virtual page
0296              * number instead of the physical page number, resulting
0297              * in access to the wrong data.
0298              */
0299             misalignment_bits =
0300                 (start_va & (~(((u64)(PAGE_SIZE)) - 1ULL))) ^
0301                 current_block_start;
0302             block_shift = min(alignment_of(misalignment_bits),
0303                       block_shift);
0304         }
0305 
0306         /*
0307          * Go over the scatter entries and check if they continue the
0308          * previous scatter entry.
0309          */
0310         next_block_start = sg_dma_address(sg);
0311         current_block_end = current_block_start + current_block_len;
0312         /* If we have a split (non-contig.) between two blocks */
0313         if (current_block_end != next_block_start) {
0314             block_shift = mlx4_ib_umem_calc_block_mtt
0315                     (next_block_start,
0316                      current_block_end,
0317                      block_shift);
0318 
0319             /*
0320              * If we reached the minimum shift for 4k page we stop
0321              * the loop.
0322              */
0323             if (block_shift <= min_shift)
0324                 goto end;
0325 
0326             /*
0327              * If not saved yet we are in first block - we save the
0328              * length of first block to calculate the
0329              * non_aligned_pages number at the end.
0330              */
0331             total_len += current_block_len;
0332 
0333             /* Start a new block */
0334             current_block_start = next_block_start;
0335             current_block_len = sg_dma_len(sg);
0336             continue;
0337         }
0338         /* The scatter entry is another part of the current block,
0339          * increase the block size.
0340          * An entry in the scatter can be larger than 4k (page) as of
0341          * dma mapping which merge some blocks together.
0342          */
0343         current_block_len += sg_dma_len(sg);
0344     }
0345 
0346     /* Account for the last block in the total len */
0347     total_len += current_block_len;
0348     /* Add to the first block the misalignment that it suffers from. */
0349     total_len += (first_block_start & ((1ULL << block_shift) - 1ULL));
0350     last_block_end = current_block_start + current_block_len;
0351     last_block_aligned_end = round_up(last_block_end, 1ULL << block_shift);
0352     total_len += (last_block_aligned_end - last_block_end);
0353 
0354     if (total_len & ((1ULL << block_shift) - 1ULL))
0355         pr_warn("misaligned total length detected (%llu, %llu)!",
0356             total_len, block_shift);
0357 
0358     *num_of_mtts = total_len >> block_shift;
0359 end:
0360     if (block_shift < min_shift) {
0361         /*
0362          * If shift is less than the min we set a warning and return the
0363          * min shift.
0364          */
0365         pr_warn("umem_calc_optimal_mtt_size - unexpected shift %lld\n", block_shift);
0366 
0367         block_shift = min_shift;
0368     }
0369     return block_shift;
0370 }
0371 
0372 static struct ib_umem *mlx4_get_umem_mr(struct ib_device *device, u64 start,
0373                     u64 length, int access_flags)
0374 {
0375     /*
0376      * Force registering the memory as writable if the underlying pages
0377      * are writable.  This is so rereg can change the access permissions
0378      * from readable to writable without having to run through ib_umem_get
0379      * again
0380      */
0381     if (!ib_access_writable(access_flags)) {
0382         unsigned long untagged_start = untagged_addr(start);
0383         struct vm_area_struct *vma;
0384 
0385         mmap_read_lock(current->mm);
0386         /*
0387          * FIXME: Ideally this would iterate over all the vmas that
0388          * cover the memory, but for now it requires a single vma to
0389          * entirely cover the MR to support RO mappings.
0390          */
0391         vma = find_vma(current->mm, untagged_start);
0392         if (vma && vma->vm_end >= untagged_start + length &&
0393             vma->vm_start <= untagged_start) {
0394             if (vma->vm_flags & VM_WRITE)
0395                 access_flags |= IB_ACCESS_LOCAL_WRITE;
0396         } else {
0397             access_flags |= IB_ACCESS_LOCAL_WRITE;
0398         }
0399 
0400         mmap_read_unlock(current->mm);
0401     }
0402 
0403     return ib_umem_get(device, start, length, access_flags);
0404 }
0405 
0406 struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
0407                   u64 virt_addr, int access_flags,
0408                   struct ib_udata *udata)
0409 {
0410     struct mlx4_ib_dev *dev = to_mdev(pd->device);
0411     struct mlx4_ib_mr *mr;
0412     int shift;
0413     int err;
0414     int n;
0415 
0416     mr = kzalloc(sizeof(*mr), GFP_KERNEL);
0417     if (!mr)
0418         return ERR_PTR(-ENOMEM);
0419 
0420     mr->umem = mlx4_get_umem_mr(pd->device, start, length, access_flags);
0421     if (IS_ERR(mr->umem)) {
0422         err = PTR_ERR(mr->umem);
0423         goto err_free;
0424     }
0425 
0426     shift = mlx4_ib_umem_calc_optimal_mtt_size(mr->umem, start, &n);
0427 
0428     err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length,
0429                 convert_access(access_flags), n, shift, &mr->mmr);
0430     if (err)
0431         goto err_umem;
0432 
0433     err = mlx4_ib_umem_write_mtt(dev, &mr->mmr.mtt, mr->umem);
0434     if (err)
0435         goto err_mr;
0436 
0437     err = mlx4_mr_enable(dev->dev, &mr->mmr);
0438     if (err)
0439         goto err_mr;
0440 
0441     mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
0442     mr->ibmr.length = length;
0443     mr->ibmr.page_size = 1U << shift;
0444 
0445     return &mr->ibmr;
0446 
0447 err_mr:
0448     (void) mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr);
0449 
0450 err_umem:
0451     ib_umem_release(mr->umem);
0452 
0453 err_free:
0454     kfree(mr);
0455 
0456     return ERR_PTR(err);
0457 }
0458 
0459 struct ib_mr *mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, u64 start,
0460                     u64 length, u64 virt_addr,
0461                     int mr_access_flags, struct ib_pd *pd,
0462                     struct ib_udata *udata)
0463 {
0464     struct mlx4_ib_dev *dev = to_mdev(mr->device);
0465     struct mlx4_ib_mr *mmr = to_mmr(mr);
0466     struct mlx4_mpt_entry *mpt_entry;
0467     struct mlx4_mpt_entry **pmpt_entry = &mpt_entry;
0468     int err;
0469 
0470     /* Since we synchronize this call and mlx4_ib_dereg_mr via uverbs,
0471      * we assume that the calls can't run concurrently. Otherwise, a
0472      * race exists.
0473      */
0474     err =  mlx4_mr_hw_get_mpt(dev->dev, &mmr->mmr, &pmpt_entry);
0475     if (err)
0476         return ERR_PTR(err);
0477 
0478     if (flags & IB_MR_REREG_PD) {
0479         err = mlx4_mr_hw_change_pd(dev->dev, *pmpt_entry,
0480                        to_mpd(pd)->pdn);
0481 
0482         if (err)
0483             goto release_mpt_entry;
0484     }
0485 
0486     if (flags & IB_MR_REREG_ACCESS) {
0487         if (ib_access_writable(mr_access_flags) &&
0488             !mmr->umem->writable) {
0489             err = -EPERM;
0490             goto release_mpt_entry;
0491         }
0492 
0493         err = mlx4_mr_hw_change_access(dev->dev, *pmpt_entry,
0494                            convert_access(mr_access_flags));
0495 
0496         if (err)
0497             goto release_mpt_entry;
0498     }
0499 
0500     if (flags & IB_MR_REREG_TRANS) {
0501         int shift;
0502         int n;
0503 
0504         mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr);
0505         ib_umem_release(mmr->umem);
0506         mmr->umem = mlx4_get_umem_mr(mr->device, start, length,
0507                          mr_access_flags);
0508         if (IS_ERR(mmr->umem)) {
0509             err = PTR_ERR(mmr->umem);
0510             /* Prevent mlx4_ib_dereg_mr from free'ing invalid pointer */
0511             mmr->umem = NULL;
0512             goto release_mpt_entry;
0513         }
0514         n = ib_umem_num_dma_blocks(mmr->umem, PAGE_SIZE);
0515         shift = PAGE_SHIFT;
0516 
0517         err = mlx4_mr_rereg_mem_write(dev->dev, &mmr->mmr,
0518                           virt_addr, length, n, shift,
0519                           *pmpt_entry);
0520         if (err) {
0521             ib_umem_release(mmr->umem);
0522             goto release_mpt_entry;
0523         }
0524         mmr->mmr.iova       = virt_addr;
0525         mmr->mmr.size       = length;
0526 
0527         err = mlx4_ib_umem_write_mtt(dev, &mmr->mmr.mtt, mmr->umem);
0528         if (err) {
0529             mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr);
0530             ib_umem_release(mmr->umem);
0531             goto release_mpt_entry;
0532         }
0533     }
0534 
0535     /* If we couldn't transfer the MR to the HCA, just remember to
0536      * return a failure. But dereg_mr will free the resources.
0537      */
0538     err = mlx4_mr_hw_write_mpt(dev->dev, &mmr->mmr, pmpt_entry);
0539     if (!err && flags & IB_MR_REREG_ACCESS)
0540         mmr->mmr.access = mr_access_flags;
0541 
0542 release_mpt_entry:
0543     mlx4_mr_hw_put_mpt(dev->dev, pmpt_entry);
0544     if (err)
0545         return ERR_PTR(err);
0546     return NULL;
0547 }
0548 
0549 static int
0550 mlx4_alloc_priv_pages(struct ib_device *device,
0551               struct mlx4_ib_mr *mr,
0552               int max_pages)
0553 {
0554     int ret;
0555 
0556     /* Ensure that size is aligned to DMA cacheline
0557      * requirements.
0558      * max_pages is limited to MLX4_MAX_FAST_REG_PAGES
0559      * so page_map_size will never cross PAGE_SIZE.
0560      */
0561     mr->page_map_size = roundup(max_pages * sizeof(u64),
0562                     MLX4_MR_PAGES_ALIGN);
0563 
0564     /* Prevent cross page boundary allocation. */
0565     mr->pages = (__be64 *)get_zeroed_page(GFP_KERNEL);
0566     if (!mr->pages)
0567         return -ENOMEM;
0568 
0569     mr->page_map = dma_map_single(device->dev.parent, mr->pages,
0570                       mr->page_map_size, DMA_TO_DEVICE);
0571 
0572     if (dma_mapping_error(device->dev.parent, mr->page_map)) {
0573         ret = -ENOMEM;
0574         goto err;
0575     }
0576 
0577     return 0;
0578 
0579 err:
0580     free_page((unsigned long)mr->pages);
0581     return ret;
0582 }
0583 
0584 static void
0585 mlx4_free_priv_pages(struct mlx4_ib_mr *mr)
0586 {
0587     if (mr->pages) {
0588         struct ib_device *device = mr->ibmr.device;
0589 
0590         dma_unmap_single(device->dev.parent, mr->page_map,
0591                  mr->page_map_size, DMA_TO_DEVICE);
0592         free_page((unsigned long)mr->pages);
0593         mr->pages = NULL;
0594     }
0595 }
0596 
0597 int mlx4_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
0598 {
0599     struct mlx4_ib_mr *mr = to_mmr(ibmr);
0600     int ret;
0601 
0602     mlx4_free_priv_pages(mr);
0603 
0604     ret = mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr);
0605     if (ret)
0606         return ret;
0607     if (mr->umem)
0608         ib_umem_release(mr->umem);
0609     kfree(mr);
0610 
0611     return 0;
0612 }
0613 
0614 int mlx4_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
0615 {
0616     struct mlx4_ib_dev *dev = to_mdev(ibmw->device);
0617     struct mlx4_ib_mw *mw = to_mmw(ibmw);
0618     int err;
0619 
0620     err = mlx4_mw_alloc(dev->dev, to_mpd(ibmw->pd)->pdn,
0621                 to_mlx4_type(ibmw->type), &mw->mmw);
0622     if (err)
0623         return err;
0624 
0625     err = mlx4_mw_enable(dev->dev, &mw->mmw);
0626     if (err)
0627         goto err_mw;
0628 
0629     ibmw->rkey = mw->mmw.key;
0630     return 0;
0631 
0632 err_mw:
0633     mlx4_mw_free(dev->dev, &mw->mmw);
0634     return err;
0635 }
0636 
0637 int mlx4_ib_dealloc_mw(struct ib_mw *ibmw)
0638 {
0639     struct mlx4_ib_mw *mw = to_mmw(ibmw);
0640 
0641     mlx4_mw_free(to_mdev(ibmw->device)->dev, &mw->mmw);
0642     return 0;
0643 }
0644 
0645 struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
0646                    u32 max_num_sg)
0647 {
0648     struct mlx4_ib_dev *dev = to_mdev(pd->device);
0649     struct mlx4_ib_mr *mr;
0650     int err;
0651 
0652     if (mr_type != IB_MR_TYPE_MEM_REG ||
0653         max_num_sg > MLX4_MAX_FAST_REG_PAGES)
0654         return ERR_PTR(-EINVAL);
0655 
0656     mr = kzalloc(sizeof(*mr), GFP_KERNEL);
0657     if (!mr)
0658         return ERR_PTR(-ENOMEM);
0659 
0660     err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, 0, 0, 0,
0661                 max_num_sg, 0, &mr->mmr);
0662     if (err)
0663         goto err_free;
0664 
0665     err = mlx4_alloc_priv_pages(pd->device, mr, max_num_sg);
0666     if (err)
0667         goto err_free_mr;
0668 
0669     mr->max_pages = max_num_sg;
0670     err = mlx4_mr_enable(dev->dev, &mr->mmr);
0671     if (err)
0672         goto err_free_pl;
0673 
0674     mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
0675     mr->umem = NULL;
0676 
0677     return &mr->ibmr;
0678 
0679 err_free_pl:
0680     mr->ibmr.device = pd->device;
0681     mlx4_free_priv_pages(mr);
0682 err_free_mr:
0683     (void) mlx4_mr_free(dev->dev, &mr->mmr);
0684 err_free:
0685     kfree(mr);
0686     return ERR_PTR(err);
0687 }
0688 
0689 static int mlx4_set_page(struct ib_mr *ibmr, u64 addr)
0690 {
0691     struct mlx4_ib_mr *mr = to_mmr(ibmr);
0692 
0693     if (unlikely(mr->npages == mr->max_pages))
0694         return -ENOMEM;
0695 
0696     mr->pages[mr->npages++] = cpu_to_be64(addr | MLX4_MTT_FLAG_PRESENT);
0697 
0698     return 0;
0699 }
0700 
0701 int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
0702               unsigned int *sg_offset)
0703 {
0704     struct mlx4_ib_mr *mr = to_mmr(ibmr);
0705     int rc;
0706 
0707     mr->npages = 0;
0708 
0709     ib_dma_sync_single_for_cpu(ibmr->device, mr->page_map,
0710                    mr->page_map_size, DMA_TO_DEVICE);
0711 
0712     rc = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, mlx4_set_page);
0713 
0714     ib_dma_sync_single_for_device(ibmr->device, mr->page_map,
0715                       mr->page_map_size, DMA_TO_DEVICE);
0716 
0717     return rc;
0718 }