Back to home page

LXR

 
 

    


0001 /*
0002  * Intel MIC Platform Software Stack (MPSS)
0003  *
0004  * Copyright(c) 2015 Intel Corporation.
0005  *
0006  * This program is free software; you can redistribute it and/or modify
0007  * it under the terms of the GNU General Public License, version 2, as
0008  * published by the Free Software Foundation.
0009  *
0010  * This program is distributed in the hope that it will be useful, but
0011  * WITHOUT ANY WARRANTY; without even the implied warranty of
0012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
0013  * General Public License for more details.
0014  *
0015  * Intel SCIF driver.
0016  *
0017  */
0018 #include <linux/dma_remapping.h>
0019 #include <linux/pagemap.h>
0020 #include "scif_main.h"
0021 #include "scif_map.h"
0022 
0023 /* Used to skip ulimit checks for registrations with SCIF_MAP_KERNEL flag */
0024 #define SCIF_MAP_ULIMIT 0x40
0025 
0026 bool scif_ulimit_check = 1;
0027 
0028 /**
0029  * scif_rma_ep_init:
0030  * @ep: end point
0031  *
0032  * Initialize RMA per EP data structures.
0033  */
0034 void scif_rma_ep_init(struct scif_endpt *ep)
0035 {
0036     struct scif_endpt_rma_info *rma = &ep->rma_info;
0037 
0038     mutex_init(&rma->rma_lock);
0039     init_iova_domain(&rma->iovad, PAGE_SIZE, SCIF_IOVA_START_PFN,
0040              SCIF_DMA_64BIT_PFN);
0041     spin_lock_init(&rma->tc_lock);
0042     mutex_init(&rma->mmn_lock);
0043     INIT_LIST_HEAD(&rma->reg_list);
0044     INIT_LIST_HEAD(&rma->remote_reg_list);
0045     atomic_set(&rma->tw_refcount, 0);
0046     atomic_set(&rma->tcw_refcount, 0);
0047     atomic_set(&rma->tcw_total_pages, 0);
0048     atomic_set(&rma->fence_refcount, 0);
0049 
0050     rma->async_list_del = 0;
0051     rma->dma_chan = NULL;
0052     INIT_LIST_HEAD(&rma->mmn_list);
0053     INIT_LIST_HEAD(&rma->vma_list);
0054     init_waitqueue_head(&rma->markwq);
0055 }
0056 
0057 /**
0058  * scif_rma_ep_can_uninit:
0059  * @ep: end point
0060  *
0061  * Returns 1 if an endpoint can be uninitialized and 0 otherwise.
0062  */
0063 int scif_rma_ep_can_uninit(struct scif_endpt *ep)
0064 {
0065     int ret = 0;
0066 
0067     mutex_lock(&ep->rma_info.rma_lock);
0068     /* Destroy RMA Info only if both lists are empty */
0069     if (list_empty(&ep->rma_info.reg_list) &&
0070         list_empty(&ep->rma_info.remote_reg_list) &&
0071         list_empty(&ep->rma_info.mmn_list) &&
0072         !atomic_read(&ep->rma_info.tw_refcount) &&
0073         !atomic_read(&ep->rma_info.tcw_refcount) &&
0074         !atomic_read(&ep->rma_info.fence_refcount))
0075         ret = 1;
0076     mutex_unlock(&ep->rma_info.rma_lock);
0077     return ret;
0078 }
0079 
0080 /**
0081  * scif_create_pinned_pages:
0082  * @nr_pages: number of pages in window
0083  * @prot: read/write protection
0084  *
0085  * Allocate and prepare a set of pinned pages.
0086  */
0087 static struct scif_pinned_pages *
0088 scif_create_pinned_pages(int nr_pages, int prot)
0089 {
0090     struct scif_pinned_pages *pin;
0091 
0092     might_sleep();
0093     pin = scif_zalloc(sizeof(*pin));
0094     if (!pin)
0095         goto error;
0096 
0097     pin->pages = scif_zalloc(nr_pages * sizeof(*pin->pages));
0098     if (!pin->pages)
0099         goto error_free_pinned_pages;
0100 
0101     pin->prot = prot;
0102     pin->magic = SCIFEP_MAGIC;
0103     return pin;
0104 
0105 error_free_pinned_pages:
0106     scif_free(pin, sizeof(*pin));
0107 error:
0108     return NULL;
0109 }
0110 
0111 /**
0112  * scif_destroy_pinned_pages:
0113  * @pin: A set of pinned pages.
0114  *
0115  * Deallocate resources for pinned pages.
0116  */
0117 static int scif_destroy_pinned_pages(struct scif_pinned_pages *pin)
0118 {
0119     int j;
0120     int writeable = pin->prot & SCIF_PROT_WRITE;
0121     int kernel = SCIF_MAP_KERNEL & pin->map_flags;
0122 
0123     for (j = 0; j < pin->nr_pages; j++) {
0124         if (pin->pages[j] && !kernel) {
0125             if (writeable)
0126                 SetPageDirty(pin->pages[j]);
0127             put_page(pin->pages[j]);
0128         }
0129     }
0130 
0131     scif_free(pin->pages,
0132           pin->nr_pages * sizeof(*pin->pages));
0133     scif_free(pin, sizeof(*pin));
0134     return 0;
0135 }
0136 
0137 /*
0138  * scif_create_window:
0139  * @ep: end point
0140  * @nr_pages: number of pages
0141  * @offset: registration offset
0142  * @temp: true if a temporary window is being created
0143  *
0144  * Allocate and prepare a self registration window.
0145  */
0146 struct scif_window *scif_create_window(struct scif_endpt *ep, int nr_pages,
0147                        s64 offset, bool temp)
0148 {
0149     struct scif_window *window;
0150 
0151     might_sleep();
0152     window = scif_zalloc(sizeof(*window));
0153     if (!window)
0154         goto error;
0155 
0156     window->dma_addr = scif_zalloc(nr_pages * sizeof(*window->dma_addr));
0157     if (!window->dma_addr)
0158         goto error_free_window;
0159 
0160     window->num_pages = scif_zalloc(nr_pages * sizeof(*window->num_pages));
0161     if (!window->num_pages)
0162         goto error_free_window;
0163 
0164     window->offset = offset;
0165     window->ep = (u64)ep;
0166     window->magic = SCIFEP_MAGIC;
0167     window->reg_state = OP_IDLE;
0168     init_waitqueue_head(&window->regwq);
0169     window->unreg_state = OP_IDLE;
0170     init_waitqueue_head(&window->unregwq);
0171     INIT_LIST_HEAD(&window->list);
0172     window->type = SCIF_WINDOW_SELF;
0173     window->temp = temp;
0174     return window;
0175 
0176 error_free_window:
0177     scif_free(window->dma_addr,
0178           nr_pages * sizeof(*window->dma_addr));
0179     scif_free(window, sizeof(*window));
0180 error:
0181     return NULL;
0182 }
0183 
0184 /**
0185  * scif_destroy_incomplete_window:
0186  * @ep: end point
0187  * @window: registration window
0188  *
0189  * Deallocate resources for self window.
0190  */
0191 static void scif_destroy_incomplete_window(struct scif_endpt *ep,
0192                        struct scif_window *window)
0193 {
0194     int err;
0195     int nr_pages = window->nr_pages;
0196     struct scif_allocmsg *alloc = &window->alloc_handle;
0197     struct scifmsg msg;
0198 
0199 retry:
0200     /* Wait for a SCIF_ALLOC_GNT/REJ message */
0201     err = wait_event_timeout(alloc->allocwq,
0202                  alloc->state != OP_IN_PROGRESS,
0203                  SCIF_NODE_ALIVE_TIMEOUT);
0204     if (!err && scifdev_alive(ep))
0205         goto retry;
0206 
0207     mutex_lock(&ep->rma_info.rma_lock);
0208     if (alloc->state == OP_COMPLETED) {
0209         msg.uop = SCIF_FREE_VIRT;
0210         msg.src = ep->port;
0211         msg.payload[0] = ep->remote_ep;
0212         msg.payload[1] = window->alloc_handle.vaddr;
0213         msg.payload[2] = (u64)window;
0214         msg.payload[3] = SCIF_REGISTER;
0215         _scif_nodeqp_send(ep->remote_dev, &msg);
0216     }
0217     mutex_unlock(&ep->rma_info.rma_lock);
0218 
0219     scif_free_window_offset(ep, window, window->offset);
0220     scif_free(window->dma_addr, nr_pages * sizeof(*window->dma_addr));
0221     scif_free(window->num_pages, nr_pages * sizeof(*window->num_pages));
0222     scif_free(window, sizeof(*window));
0223 }
0224 
0225 /**
0226  * scif_unmap_window:
0227  * @remote_dev: SCIF remote device
0228  * @window: registration window
0229  *
0230  * Delete any DMA mappings created for a registered self window
0231  */
0232 void scif_unmap_window(struct scif_dev *remote_dev, struct scif_window *window)
0233 {
0234     int j;
0235 
0236     if (scif_is_iommu_enabled() && !scifdev_self(remote_dev)) {
0237         if (window->st) {
0238             dma_unmap_sg(&remote_dev->sdev->dev,
0239                      window->st->sgl, window->st->nents,
0240                      DMA_BIDIRECTIONAL);
0241             sg_free_table(window->st);
0242             kfree(window->st);
0243             window->st = NULL;
0244         }
0245     } else {
0246         for (j = 0; j < window->nr_contig_chunks; j++) {
0247             if (window->dma_addr[j]) {
0248                 scif_unmap_single(window->dma_addr[j],
0249                           remote_dev,
0250                           window->num_pages[j] <<
0251                           PAGE_SHIFT);
0252                 window->dma_addr[j] = 0x0;
0253             }
0254         }
0255     }
0256 }
0257 
0258 static inline struct mm_struct *__scif_acquire_mm(void)
0259 {
0260     if (scif_ulimit_check)
0261         return get_task_mm(current);
0262     return NULL;
0263 }
0264 
0265 static inline void __scif_release_mm(struct mm_struct *mm)
0266 {
0267     if (mm)
0268         mmput(mm);
0269 }
0270 
0271 static inline int
0272 __scif_dec_pinned_vm_lock(struct mm_struct *mm,
0273               int nr_pages, bool try_lock)
0274 {
0275     if (!mm || !nr_pages || !scif_ulimit_check)
0276         return 0;
0277     if (try_lock) {
0278         if (!down_write_trylock(&mm->mmap_sem)) {
0279             dev_err(scif_info.mdev.this_device,
0280                 "%s %d err\n", __func__, __LINE__);
0281             return -1;
0282         }
0283     } else {
0284         down_write(&mm->mmap_sem);
0285     }
0286     mm->pinned_vm -= nr_pages;
0287     up_write(&mm->mmap_sem);
0288     return 0;
0289 }
0290 
0291 static inline int __scif_check_inc_pinned_vm(struct mm_struct *mm,
0292                          int nr_pages)
0293 {
0294     unsigned long locked, lock_limit;
0295 
0296     if (!mm || !nr_pages || !scif_ulimit_check)
0297         return 0;
0298 
0299     locked = nr_pages;
0300     locked += mm->pinned_vm;
0301     lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
0302     if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
0303         dev_err(scif_info.mdev.this_device,
0304             "locked(%lu) > lock_limit(%lu)\n",
0305             locked, lock_limit);
0306         return -ENOMEM;
0307     }
0308     mm->pinned_vm = locked;
0309     return 0;
0310 }
0311 
0312 /**
0313  * scif_destroy_window:
0314  * @ep: end point
0315  * @window: registration window
0316  *
0317  * Deallocate resources for self window.
0318  */
0319 int scif_destroy_window(struct scif_endpt *ep, struct scif_window *window)
0320 {
0321     int j;
0322     struct scif_pinned_pages *pinned_pages = window->pinned_pages;
0323     int nr_pages = window->nr_pages;
0324 
0325     might_sleep();
0326     if (!window->temp && window->mm) {
0327         __scif_dec_pinned_vm_lock(window->mm, window->nr_pages, 0);
0328         __scif_release_mm(window->mm);
0329         window->mm = NULL;
0330     }
0331 
0332     scif_free_window_offset(ep, window, window->offset);
0333     scif_unmap_window(ep->remote_dev, window);
0334     /*
0335      * Decrement references for this set of pinned pages from
0336      * this window.
0337      */
0338     j = atomic_sub_return(1, &pinned_pages->ref_count);
0339     if (j < 0)
0340         dev_err(scif_info.mdev.this_device,
0341             "%s %d incorrect ref count %d\n",
0342             __func__, __LINE__, j);
0343     /*
0344      * If the ref count for pinned_pages is zero then someone
0345      * has already called scif_unpin_pages() for it and we should
0346      * destroy the page cache.
0347      */
0348     if (!j)
0349         scif_destroy_pinned_pages(window->pinned_pages);
0350     scif_free(window->dma_addr, nr_pages * sizeof(*window->dma_addr));
0351     scif_free(window->num_pages, nr_pages * sizeof(*window->num_pages));
0352     window->magic = 0;
0353     scif_free(window, sizeof(*window));
0354     return 0;
0355 }
0356 
0357 /**
0358  * scif_create_remote_lookup:
0359  * @remote_dev: SCIF remote device
0360  * @window: remote window
0361  *
0362  * Allocate and prepare lookup entries for the remote
0363  * end to copy over the physical addresses.
0364  * Returns 0 on success and appropriate errno on failure.
0365  */
0366 static int scif_create_remote_lookup(struct scif_dev *remote_dev,
0367                      struct scif_window *window)
0368 {
0369     int i, j, err = 0;
0370     int nr_pages = window->nr_pages;
0371     bool vmalloc_dma_phys, vmalloc_num_pages;
0372 
0373     might_sleep();
0374     /* Map window */
0375     err = scif_map_single(&window->mapped_offset,
0376                   window, remote_dev, sizeof(*window));
0377     if (err)
0378         goto error_window;
0379 
0380     /* Compute the number of lookup entries. 21 == 2MB Shift */
0381     window->nr_lookup = ALIGN(nr_pages * PAGE_SIZE,
0382                     ((2) * 1024 * 1024)) >> 21;
0383 
0384     window->dma_addr_lookup.lookup =
0385         scif_alloc_coherent(&window->dma_addr_lookup.offset,
0386                     remote_dev, window->nr_lookup *
0387                     sizeof(*window->dma_addr_lookup.lookup),
0388                     GFP_KERNEL | __GFP_ZERO);
0389     if (!window->dma_addr_lookup.lookup) {
0390         err = -ENOMEM;
0391         goto error_window;
0392     }
0393 
0394     window->num_pages_lookup.lookup =
0395         scif_alloc_coherent(&window->num_pages_lookup.offset,
0396                     remote_dev, window->nr_lookup *
0397                     sizeof(*window->num_pages_lookup.lookup),
0398                     GFP_KERNEL | __GFP_ZERO);
0399     if (!window->num_pages_lookup.lookup) {
0400         err = -ENOMEM;
0401         goto error_window;
0402     }
0403 
0404     vmalloc_dma_phys = is_vmalloc_addr(&window->dma_addr[0]);
0405     vmalloc_num_pages = is_vmalloc_addr(&window->num_pages[0]);
0406 
0407     /* Now map each of the pages containing physical addresses */
0408     for (i = 0, j = 0; i < nr_pages; i += SCIF_NR_ADDR_IN_PAGE, j++) {
0409         err = scif_map_page(&window->dma_addr_lookup.lookup[j],
0410                     vmalloc_dma_phys ?
0411                     vmalloc_to_page(&window->dma_addr[i]) :
0412                     virt_to_page(&window->dma_addr[i]),
0413                     remote_dev);
0414         if (err)
0415             goto error_window;
0416         err = scif_map_page(&window->num_pages_lookup.lookup[j],
0417                     vmalloc_dma_phys ?
0418                     vmalloc_to_page(&window->num_pages[i]) :
0419                     virt_to_page(&window->num_pages[i]),
0420                     remote_dev);
0421         if (err)
0422             goto error_window;
0423     }
0424     return 0;
0425 error_window:
0426     return err;
0427 }
0428 
0429 /**
0430  * scif_destroy_remote_lookup:
0431  * @remote_dev: SCIF remote device
0432  * @window: remote window
0433  *
0434  * Destroy lookup entries used for the remote
0435  * end to copy over the physical addresses.
0436  */
0437 static void scif_destroy_remote_lookup(struct scif_dev *remote_dev,
0438                        struct scif_window *window)
0439 {
0440     int i, j;
0441 
0442     if (window->nr_lookup) {
0443         struct scif_rma_lookup *lup = &window->dma_addr_lookup;
0444         struct scif_rma_lookup *npup = &window->num_pages_lookup;
0445 
0446         for (i = 0, j = 0; i < window->nr_pages;
0447             i += SCIF_NR_ADDR_IN_PAGE, j++) {
0448             if (lup->lookup && lup->lookup[j])
0449                 scif_unmap_single(lup->lookup[j],
0450                           remote_dev,
0451                           PAGE_SIZE);
0452             if (npup->lookup && npup->lookup[j])
0453                 scif_unmap_single(npup->lookup[j],
0454                           remote_dev,
0455                           PAGE_SIZE);
0456         }
0457         if (lup->lookup)
0458             scif_free_coherent(lup->lookup, lup->offset,
0459                        remote_dev, window->nr_lookup *
0460                        sizeof(*lup->lookup));
0461         if (npup->lookup)
0462             scif_free_coherent(npup->lookup, npup->offset,
0463                        remote_dev, window->nr_lookup *
0464                        sizeof(*npup->lookup));
0465         if (window->mapped_offset)
0466             scif_unmap_single(window->mapped_offset,
0467                       remote_dev, sizeof(*window));
0468         window->nr_lookup = 0;
0469     }
0470 }
0471 
0472 /**
0473  * scif_create_remote_window:
0474  * @ep: end point
0475  * @nr_pages: number of pages in window
0476  *
0477  * Allocate and prepare a remote registration window.
0478  */
0479 static struct scif_window *
0480 scif_create_remote_window(struct scif_dev *scifdev, int nr_pages)
0481 {
0482     struct scif_window *window;
0483 
0484     might_sleep();
0485     window = scif_zalloc(sizeof(*window));
0486     if (!window)
0487         goto error_ret;
0488 
0489     window->magic = SCIFEP_MAGIC;
0490     window->nr_pages = nr_pages;
0491 
0492     window->dma_addr = scif_zalloc(nr_pages * sizeof(*window->dma_addr));
0493     if (!window->dma_addr)
0494         goto error_window;
0495 
0496     window->num_pages = scif_zalloc(nr_pages *
0497                     sizeof(*window->num_pages));
0498     if (!window->num_pages)
0499         goto error_window;
0500 
0501     if (scif_create_remote_lookup(scifdev, window))
0502         goto error_window;
0503 
0504     window->type = SCIF_WINDOW_PEER;
0505     window->unreg_state = OP_IDLE;
0506     INIT_LIST_HEAD(&window->list);
0507     return window;
0508 error_window:
0509     scif_destroy_remote_window(window);
0510 error_ret:
0511     return NULL;
0512 }
0513 
0514 /**
0515  * scif_destroy_remote_window:
0516  * @ep: end point
0517  * @window: remote registration window
0518  *
0519  * Deallocate resources for remote window.
0520  */
0521 void
0522 scif_destroy_remote_window(struct scif_window *window)
0523 {
0524     scif_free(window->dma_addr, window->nr_pages *
0525           sizeof(*window->dma_addr));
0526     scif_free(window->num_pages, window->nr_pages *
0527           sizeof(*window->num_pages));
0528     window->magic = 0;
0529     scif_free(window, sizeof(*window));
0530 }
0531 
0532 /**
0533  * scif_iommu_map: create DMA mappings if the IOMMU is enabled
0534  * @remote_dev: SCIF remote device
0535  * @window: remote registration window
0536  *
0537  * Map the physical pages using dma_map_sg(..) and then detect the number
0538  * of contiguous DMA mappings allocated
0539  */
0540 static int scif_iommu_map(struct scif_dev *remote_dev,
0541               struct scif_window *window)
0542 {
0543     struct scatterlist *sg;
0544     int i, err;
0545     scif_pinned_pages_t pin = window->pinned_pages;
0546 
0547     window->st = kzalloc(sizeof(*window->st), GFP_KERNEL);
0548     if (!window->st)
0549         return -ENOMEM;
0550 
0551     err = sg_alloc_table(window->st, window->nr_pages, GFP_KERNEL);
0552     if (err)
0553         return err;
0554 
0555     for_each_sg(window->st->sgl, sg, window->st->nents, i)
0556         sg_set_page(sg, pin->pages[i], PAGE_SIZE, 0x0);
0557 
0558     err = dma_map_sg(&remote_dev->sdev->dev, window->st->sgl,
0559              window->st->nents, DMA_BIDIRECTIONAL);
0560     if (!err)
0561         return -ENOMEM;
0562     /* Detect contiguous ranges of DMA mappings */
0563     sg = window->st->sgl;
0564     for (i = 0; sg; i++) {
0565         dma_addr_t last_da;
0566 
0567         window->dma_addr[i] = sg_dma_address(sg);
0568         window->num_pages[i] = sg_dma_len(sg) >> PAGE_SHIFT;
0569         last_da = sg_dma_address(sg) + sg_dma_len(sg);
0570         while ((sg = sg_next(sg)) && sg_dma_address(sg) == last_da) {
0571             window->num_pages[i] +=
0572                 (sg_dma_len(sg) >> PAGE_SHIFT);
0573             last_da = window->dma_addr[i] +
0574                 sg_dma_len(sg);
0575         }
0576         window->nr_contig_chunks++;
0577     }
0578     return 0;
0579 }
0580 
0581 /**
0582  * scif_map_window:
0583  * @remote_dev: SCIF remote device
0584  * @window: self registration window
0585  *
0586  * Map pages of a window into the aperture/PCI.
0587  * Also determine addresses required for DMA.
0588  */
0589 int
0590 scif_map_window(struct scif_dev *remote_dev, struct scif_window *window)
0591 {
0592     int i, j, k, err = 0, nr_contig_pages;
0593     scif_pinned_pages_t pin;
0594     phys_addr_t phys_prev, phys_curr;
0595 
0596     might_sleep();
0597 
0598     pin = window->pinned_pages;
0599 
0600     if (intel_iommu_enabled && !scifdev_self(remote_dev))
0601         return scif_iommu_map(remote_dev, window);
0602 
0603     for (i = 0, j = 0; i < window->nr_pages; i += nr_contig_pages, j++) {
0604         phys_prev = page_to_phys(pin->pages[i]);
0605         nr_contig_pages = 1;
0606 
0607         /* Detect physically contiguous chunks */
0608         for (k = i + 1; k < window->nr_pages; k++) {
0609             phys_curr = page_to_phys(pin->pages[k]);
0610             if (phys_curr != (phys_prev + PAGE_SIZE))
0611                 break;
0612             phys_prev = phys_curr;
0613             nr_contig_pages++;
0614         }
0615         window->num_pages[j] = nr_contig_pages;
0616         window->nr_contig_chunks++;
0617         if (scif_is_mgmt_node()) {
0618             /*
0619              * Management node has to deal with SMPT on X100 and
0620              * hence the DMA mapping is required
0621              */
0622             err = scif_map_single(&window->dma_addr[j],
0623                           phys_to_virt(page_to_phys(
0624                                pin->pages[i])),
0625                           remote_dev,
0626                           nr_contig_pages << PAGE_SHIFT);
0627             if (err)
0628                 return err;
0629         } else {
0630             window->dma_addr[j] = page_to_phys(pin->pages[i]);
0631         }
0632     }
0633     return err;
0634 }
0635 
0636 /**
0637  * scif_send_scif_unregister:
0638  * @ep: end point
0639  * @window: self registration window
0640  *
0641  * Send a SCIF_UNREGISTER message.
0642  */
0643 static int scif_send_scif_unregister(struct scif_endpt *ep,
0644                      struct scif_window *window)
0645 {
0646     struct scifmsg msg;
0647 
0648     msg.uop = SCIF_UNREGISTER;
0649     msg.src = ep->port;
0650     msg.payload[0] = window->alloc_handle.vaddr;
0651     msg.payload[1] = (u64)window;
0652     return scif_nodeqp_send(ep->remote_dev, &msg);
0653 }
0654 
0655 /**
0656  * scif_unregister_window:
0657  * @window: self registration window
0658  *
0659  * Send an unregistration request and wait for a response.
0660  */
0661 int scif_unregister_window(struct scif_window *window)
0662 {
0663     int err = 0;
0664     struct scif_endpt *ep = (struct scif_endpt *)window->ep;
0665     bool send_msg = false;
0666 
0667     might_sleep();
0668     switch (window->unreg_state) {
0669     case OP_IDLE:
0670     {
0671         window->unreg_state = OP_IN_PROGRESS;
0672         send_msg = true;
0673         /* fall through */
0674     }
0675     case OP_IN_PROGRESS:
0676     {
0677         scif_get_window(window, 1);
0678         mutex_unlock(&ep->rma_info.rma_lock);
0679         if (send_msg) {
0680             err = scif_send_scif_unregister(ep, window);
0681             if (err) {
0682                 window->unreg_state = OP_COMPLETED;
0683                 goto done;
0684             }
0685         } else {
0686             /* Return ENXIO since unregistration is in progress */
0687             mutex_lock(&ep->rma_info.rma_lock);
0688             return -ENXIO;
0689         }
0690 retry:
0691         /* Wait for a SCIF_UNREGISTER_(N)ACK message */
0692         err = wait_event_timeout(window->unregwq,
0693                      window->unreg_state != OP_IN_PROGRESS,
0694                      SCIF_NODE_ALIVE_TIMEOUT);
0695         if (!err && scifdev_alive(ep))
0696             goto retry;
0697         if (!err) {
0698             err = -ENODEV;
0699             window->unreg_state = OP_COMPLETED;
0700             dev_err(scif_info.mdev.this_device,
0701                 "%s %d err %d\n", __func__, __LINE__, err);
0702         }
0703         if (err > 0)
0704             err = 0;
0705 done:
0706         mutex_lock(&ep->rma_info.rma_lock);
0707         scif_put_window(window, 1);
0708         break;
0709     }
0710     case OP_FAILED:
0711     {
0712         if (!scifdev_alive(ep)) {
0713             err = -ENODEV;
0714             window->unreg_state = OP_COMPLETED;
0715         }
0716         break;
0717     }
0718     case OP_COMPLETED:
0719         break;
0720     default:
0721         err = -ENODEV;
0722     }
0723 
0724     if (window->unreg_state == OP_COMPLETED && window->ref_count)
0725         scif_put_window(window, window->nr_pages);
0726 
0727     if (!window->ref_count) {
0728         atomic_inc(&ep->rma_info.tw_refcount);
0729         list_del_init(&window->list);
0730         scif_free_window_offset(ep, window, window->offset);
0731         mutex_unlock(&ep->rma_info.rma_lock);
0732         if ((!!(window->pinned_pages->map_flags & SCIF_MAP_KERNEL)) &&
0733             scifdev_alive(ep)) {
0734             scif_drain_dma_intr(ep->remote_dev->sdev,
0735                         ep->rma_info.dma_chan);
0736         } else {
0737             if (!__scif_dec_pinned_vm_lock(window->mm,
0738                                window->nr_pages, 1)) {
0739                 __scif_release_mm(window->mm);
0740                 window->mm = NULL;
0741             }
0742         }
0743         scif_queue_for_cleanup(window, &scif_info.rma);
0744         mutex_lock(&ep->rma_info.rma_lock);
0745     }
0746     return err;
0747 }
0748 
0749 /**
0750  * scif_send_alloc_request:
0751  * @ep: end point
0752  * @window: self registration window
0753  *
0754  * Send a remote window allocation request
0755  */
0756 static int scif_send_alloc_request(struct scif_endpt *ep,
0757                    struct scif_window *window)
0758 {
0759     struct scifmsg msg;
0760     struct scif_allocmsg *alloc = &window->alloc_handle;
0761 
0762     /* Set up the Alloc Handle */
0763     alloc->state = OP_IN_PROGRESS;
0764     init_waitqueue_head(&alloc->allocwq);
0765 
0766     /* Send out an allocation request */
0767     msg.uop = SCIF_ALLOC_REQ;
0768     msg.payload[1] = window->nr_pages;
0769     msg.payload[2] = (u64)&window->alloc_handle;
0770     return _scif_nodeqp_send(ep->remote_dev, &msg);
0771 }
0772 
0773 /**
0774  * scif_prep_remote_window:
0775  * @ep: end point
0776  * @window: self registration window
0777  *
0778  * Send a remote window allocation request, wait for an allocation response,
0779  * and prepares the remote window by copying over the page lists
0780  */
0781 static int scif_prep_remote_window(struct scif_endpt *ep,
0782                    struct scif_window *window)
0783 {
0784     struct scifmsg msg;
0785     struct scif_window *remote_window;
0786     struct scif_allocmsg *alloc = &window->alloc_handle;
0787     dma_addr_t *dma_phys_lookup, *tmp, *num_pages_lookup, *tmp1;
0788     int i = 0, j = 0;
0789     int nr_contig_chunks, loop_nr_contig_chunks;
0790     int remaining_nr_contig_chunks, nr_lookup;
0791     int err, map_err;
0792 
0793     map_err = scif_map_window(ep->remote_dev, window);
0794     if (map_err)
0795         dev_err(&ep->remote_dev->sdev->dev,
0796             "%s %d map_err %d\n", __func__, __LINE__, map_err);
0797     remaining_nr_contig_chunks = window->nr_contig_chunks;
0798     nr_contig_chunks = window->nr_contig_chunks;
0799 retry:
0800     /* Wait for a SCIF_ALLOC_GNT/REJ message */
0801     err = wait_event_timeout(alloc->allocwq,
0802                  alloc->state != OP_IN_PROGRESS,
0803                  SCIF_NODE_ALIVE_TIMEOUT);
0804     mutex_lock(&ep->rma_info.rma_lock);
0805     /* Synchronize with the thread waking up allocwq */
0806     mutex_unlock(&ep->rma_info.rma_lock);
0807     if (!err && scifdev_alive(ep))
0808         goto retry;
0809 
0810     if (!err)
0811         err = -ENODEV;
0812 
0813     if (err > 0)
0814         err = 0;
0815     else
0816         return err;
0817 
0818     /* Bail out. The remote end rejected this request */
0819     if (alloc->state == OP_FAILED)
0820         return -ENOMEM;
0821 
0822     if (map_err) {
0823         dev_err(&ep->remote_dev->sdev->dev,
0824             "%s %d err %d\n", __func__, __LINE__, map_err);
0825         msg.uop = SCIF_FREE_VIRT;
0826         msg.src = ep->port;
0827         msg.payload[0] = ep->remote_ep;
0828         msg.payload[1] = window->alloc_handle.vaddr;
0829         msg.payload[2] = (u64)window;
0830         msg.payload[3] = SCIF_REGISTER;
0831         spin_lock(&ep->lock);
0832         if (ep->state == SCIFEP_CONNECTED)
0833             err = _scif_nodeqp_send(ep->remote_dev, &msg);
0834         else
0835             err = -ENOTCONN;
0836         spin_unlock(&ep->lock);
0837         return err;
0838     }
0839 
0840     remote_window = scif_ioremap(alloc->phys_addr, sizeof(*window),
0841                      ep->remote_dev);
0842 
0843     /* Compute the number of lookup entries. 21 == 2MB Shift */
0844     nr_lookup = ALIGN(nr_contig_chunks, SCIF_NR_ADDR_IN_PAGE)
0845               >> ilog2(SCIF_NR_ADDR_IN_PAGE);
0846 
0847     dma_phys_lookup =
0848         scif_ioremap(remote_window->dma_addr_lookup.offset,
0849                  nr_lookup *
0850                  sizeof(*remote_window->dma_addr_lookup.lookup),
0851                  ep->remote_dev);
0852     num_pages_lookup =
0853         scif_ioremap(remote_window->num_pages_lookup.offset,
0854                  nr_lookup *
0855                  sizeof(*remote_window->num_pages_lookup.lookup),
0856                  ep->remote_dev);
0857 
0858     while (remaining_nr_contig_chunks) {
0859         loop_nr_contig_chunks = min_t(int, remaining_nr_contig_chunks,
0860                           (int)SCIF_NR_ADDR_IN_PAGE);
0861         /* #1/2 - Copy  physical addresses over to the remote side */
0862 
0863         /* #2/2 - Copy DMA addresses (addresses that are fed into the
0864          * DMA engine) We transfer bus addresses which are then
0865          * converted into a MIC physical address on the remote
0866          * side if it is a MIC, if the remote node is a mgmt node we
0867          * transfer the MIC physical address
0868          */
0869         tmp = scif_ioremap(dma_phys_lookup[j],
0870                    loop_nr_contig_chunks *
0871                    sizeof(*window->dma_addr),
0872                    ep->remote_dev);
0873         tmp1 = scif_ioremap(num_pages_lookup[j],
0874                     loop_nr_contig_chunks *
0875                     sizeof(*window->num_pages),
0876                     ep->remote_dev);
0877         if (scif_is_mgmt_node()) {
0878             memcpy_toio((void __force __iomem *)tmp,
0879                     &window->dma_addr[i], loop_nr_contig_chunks
0880                     * sizeof(*window->dma_addr));
0881             memcpy_toio((void __force __iomem *)tmp1,
0882                     &window->num_pages[i], loop_nr_contig_chunks
0883                     * sizeof(*window->num_pages));
0884         } else {
0885             if (scifdev_is_p2p(ep->remote_dev)) {
0886                 /*
0887                  * add remote node's base address for this node
0888                  * to convert it into a MIC address
0889                  */
0890                 int m;
0891                 dma_addr_t dma_addr;
0892 
0893                 for (m = 0; m < loop_nr_contig_chunks; m++) {
0894                     dma_addr = window->dma_addr[i + m] +
0895                         ep->remote_dev->base_addr;
0896                     writeq(dma_addr,
0897                            (void __force __iomem *)&tmp[m]);
0898                 }
0899                 memcpy_toio((void __force __iomem *)tmp1,
0900                         &window->num_pages[i],
0901                         loop_nr_contig_chunks
0902                         * sizeof(*window->num_pages));
0903             } else {
0904                 /* Mgmt node or loopback - transfer DMA
0905                  * addresses as is, this is the same as a
0906                  * MIC physical address (we use the dma_addr
0907                  * and not the phys_addr array since the
0908                  * phys_addr is only setup if there is a mmap()
0909                  * request from the mgmt node)
0910                  */
0911                 memcpy_toio((void __force __iomem *)tmp,
0912                         &window->dma_addr[i],
0913                         loop_nr_contig_chunks *
0914                         sizeof(*window->dma_addr));
0915                 memcpy_toio((void __force __iomem *)tmp1,
0916                         &window->num_pages[i],
0917                         loop_nr_contig_chunks *
0918                         sizeof(*window->num_pages));
0919             }
0920         }
0921         remaining_nr_contig_chunks -= loop_nr_contig_chunks;
0922         i += loop_nr_contig_chunks;
0923         j++;
0924         scif_iounmap(tmp, loop_nr_contig_chunks *
0925                  sizeof(*window->dma_addr), ep->remote_dev);
0926         scif_iounmap(tmp1, loop_nr_contig_chunks *
0927                  sizeof(*window->num_pages), ep->remote_dev);
0928     }
0929 
0930     /* Prepare the remote window for the peer */
0931     remote_window->peer_window = (u64)window;
0932     remote_window->offset = window->offset;
0933     remote_window->prot = window->prot;
0934     remote_window->nr_contig_chunks = nr_contig_chunks;
0935     remote_window->ep = ep->remote_ep;
0936     scif_iounmap(num_pages_lookup,
0937              nr_lookup *
0938              sizeof(*remote_window->num_pages_lookup.lookup),
0939              ep->remote_dev);
0940     scif_iounmap(dma_phys_lookup,
0941              nr_lookup *
0942              sizeof(*remote_window->dma_addr_lookup.lookup),
0943              ep->remote_dev);
0944     scif_iounmap(remote_window, sizeof(*remote_window), ep->remote_dev);
0945     window->peer_window = alloc->vaddr;
0946     return err;
0947 }
0948 
0949 /**
0950  * scif_send_scif_register:
0951  * @ep: end point
0952  * @window: self registration window
0953  *
0954  * Send a SCIF_REGISTER message if EP is connected and wait for a
0955  * SCIF_REGISTER_(N)ACK message else send a SCIF_FREE_VIRT
0956  * message so that the peer can free its remote window allocated earlier.
0957  */
0958 static int scif_send_scif_register(struct scif_endpt *ep,
0959                    struct scif_window *window)
0960 {
0961     int err = 0;
0962     struct scifmsg msg;
0963 
0964     msg.src = ep->port;
0965     msg.payload[0] = ep->remote_ep;
0966     msg.payload[1] = window->alloc_handle.vaddr;
0967     msg.payload[2] = (u64)window;
0968     spin_lock(&ep->lock);
0969     if (ep->state == SCIFEP_CONNECTED) {
0970         msg.uop = SCIF_REGISTER;
0971         window->reg_state = OP_IN_PROGRESS;
0972         err = _scif_nodeqp_send(ep->remote_dev, &msg);
0973         spin_unlock(&ep->lock);
0974         if (!err) {
0975 retry:
0976             /* Wait for a SCIF_REGISTER_(N)ACK message */
0977             err = wait_event_timeout(window->regwq,
0978                          window->reg_state !=
0979                          OP_IN_PROGRESS,
0980                          SCIF_NODE_ALIVE_TIMEOUT);
0981             if (!err && scifdev_alive(ep))
0982                 goto retry;
0983             err = !err ? -ENODEV : 0;
0984             if (window->reg_state == OP_FAILED)
0985                 err = -ENOTCONN;
0986         }
0987     } else {
0988         msg.uop = SCIF_FREE_VIRT;
0989         msg.payload[3] = SCIF_REGISTER;
0990         err = _scif_nodeqp_send(ep->remote_dev, &msg);
0991         spin_unlock(&ep->lock);
0992         if (!err)
0993             err = -ENOTCONN;
0994     }
0995     return err;
0996 }
0997 
0998 /**
0999  * scif_get_window_offset:
1000  * @ep: end point descriptor
1001  * @flags: flags
1002  * @offset: offset hint
1003  * @num_pages: number of pages
1004  * @out_offset: computed offset returned by reference.
1005  *
1006  * Compute/Claim a new offset for this EP.
1007  */
1008 int scif_get_window_offset(struct scif_endpt *ep, int flags, s64 offset,
1009                int num_pages, s64 *out_offset)
1010 {
1011     s64 page_index;
1012     struct iova *iova_ptr;
1013     int err = 0;
1014 
1015     if (flags & SCIF_MAP_FIXED) {
1016         page_index = SCIF_IOVA_PFN(offset);
1017         iova_ptr = reserve_iova(&ep->rma_info.iovad, page_index,
1018                     page_index + num_pages - 1);
1019         if (!iova_ptr)
1020             err = -EADDRINUSE;
1021     } else {
1022         iova_ptr = alloc_iova(&ep->rma_info.iovad, num_pages,
1023                       SCIF_DMA_63BIT_PFN - 1, 0);
1024         if (!iova_ptr)
1025             err = -ENOMEM;
1026     }
1027     if (!err)
1028         *out_offset = (iova_ptr->pfn_lo) << PAGE_SHIFT;
1029     return err;
1030 }
1031 
1032 /**
1033  * scif_free_window_offset:
1034  * @ep: end point descriptor
1035  * @window: registration window
1036  * @offset: Offset to be freed
1037  *
1038  * Free offset for this EP. The callee is supposed to grab
1039  * the RMA mutex before calling this API.
1040  */
1041 void scif_free_window_offset(struct scif_endpt *ep,
1042                  struct scif_window *window, s64 offset)
1043 {
1044     if ((window && !window->offset_freed) || !window) {
1045         free_iova(&ep->rma_info.iovad, offset >> PAGE_SHIFT);
1046         if (window)
1047             window->offset_freed = true;
1048     }
1049 }
1050 
1051 /**
1052  * scif_alloc_req: Respond to SCIF_ALLOC_REQ interrupt message
1053  * @msg:        Interrupt message
1054  *
1055  * Remote side is requesting a memory allocation.
1056  */
1057 void scif_alloc_req(struct scif_dev *scifdev, struct scifmsg *msg)
1058 {
1059     int err;
1060     struct scif_window *window = NULL;
1061     int nr_pages = msg->payload[1];
1062 
1063     window = scif_create_remote_window(scifdev, nr_pages);
1064     if (!window) {
1065         err = -ENOMEM;
1066         goto error;
1067     }
1068 
1069     /* The peer's allocation request is granted */
1070     msg->uop = SCIF_ALLOC_GNT;
1071     msg->payload[0] = (u64)window;
1072     msg->payload[1] = window->mapped_offset;
1073     err = scif_nodeqp_send(scifdev, msg);
1074     if (err)
1075         scif_destroy_remote_window(window);
1076     return;
1077 error:
1078     /* The peer's allocation request is rejected */
1079     dev_err(&scifdev->sdev->dev,
1080         "%s %d error %d alloc_ptr %p nr_pages 0x%x\n",
1081         __func__, __LINE__, err, window, nr_pages);
1082     msg->uop = SCIF_ALLOC_REJ;
1083     scif_nodeqp_send(scifdev, msg);
1084 }
1085 
1086 /**
1087  * scif_alloc_gnt_rej: Respond to SCIF_ALLOC_GNT/REJ interrupt message
1088  * @msg:        Interrupt message
1089  *
1090  * Remote side responded to a memory allocation.
1091  */
1092 void scif_alloc_gnt_rej(struct scif_dev *scifdev, struct scifmsg *msg)
1093 {
1094     struct scif_allocmsg *handle = (struct scif_allocmsg *)msg->payload[2];
1095     struct scif_window *window = container_of(handle, struct scif_window,
1096                           alloc_handle);
1097     struct scif_endpt *ep = (struct scif_endpt *)window->ep;
1098 
1099     mutex_lock(&ep->rma_info.rma_lock);
1100     handle->vaddr = msg->payload[0];
1101     handle->phys_addr = msg->payload[1];
1102     if (msg->uop == SCIF_ALLOC_GNT)
1103         handle->state = OP_COMPLETED;
1104     else
1105         handle->state = OP_FAILED;
1106     wake_up(&handle->allocwq);
1107     mutex_unlock(&ep->rma_info.rma_lock);
1108 }
1109 
1110 /**
1111  * scif_free_virt: Respond to SCIF_FREE_VIRT interrupt message
1112  * @msg:        Interrupt message
1113  *
1114  * Free up memory kmalloc'd earlier.
1115  */
1116 void scif_free_virt(struct scif_dev *scifdev, struct scifmsg *msg)
1117 {
1118     struct scif_window *window = (struct scif_window *)msg->payload[1];
1119 
1120     scif_destroy_remote_window(window);
1121 }
1122 
1123 static void
1124 scif_fixup_aper_base(struct scif_dev *dev, struct scif_window *window)
1125 {
1126     int j;
1127     struct scif_hw_dev *sdev = dev->sdev;
1128     phys_addr_t apt_base = 0;
1129 
1130     /*
1131      * Add the aperture base if the DMA address is not card relative
1132      * since the DMA addresses need to be an offset into the bar
1133      */
1134     if (!scifdev_self(dev) && window->type == SCIF_WINDOW_PEER &&
1135         sdev->aper && !sdev->card_rel_da)
1136         apt_base = sdev->aper->pa;
1137     else
1138         return;
1139 
1140     for (j = 0; j < window->nr_contig_chunks; j++) {
1141         if (window->num_pages[j])
1142             window->dma_addr[j] += apt_base;
1143         else
1144             break;
1145     }
1146 }
1147 
1148 /**
1149  * scif_recv_reg: Respond to SCIF_REGISTER interrupt message
1150  * @msg:        Interrupt message
1151  *
1152  * Update remote window list with a new registered window.
1153  */
1154 void scif_recv_reg(struct scif_dev *scifdev, struct scifmsg *msg)
1155 {
1156     struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
1157     struct scif_window *window =
1158         (struct scif_window *)msg->payload[1];
1159 
1160     mutex_lock(&ep->rma_info.rma_lock);
1161     spin_lock(&ep->lock);
1162     if (ep->state == SCIFEP_CONNECTED) {
1163         msg->uop = SCIF_REGISTER_ACK;
1164         scif_nodeqp_send(ep->remote_dev, msg);
1165         scif_fixup_aper_base(ep->remote_dev, window);
1166         /* No further failures expected. Insert new window */
1167         scif_insert_window(window, &ep->rma_info.remote_reg_list);
1168     } else {
1169         msg->uop = SCIF_REGISTER_NACK;
1170         scif_nodeqp_send(ep->remote_dev, msg);
1171     }
1172     spin_unlock(&ep->lock);
1173     mutex_unlock(&ep->rma_info.rma_lock);
1174     /* free up any lookup resources now that page lists are transferred */
1175     scif_destroy_remote_lookup(ep->remote_dev, window);
1176     /*
1177      * We could not insert the window but we need to
1178      * destroy the window.
1179      */
1180     if (msg->uop == SCIF_REGISTER_NACK)
1181         scif_destroy_remote_window(window);
1182 }
1183 
1184 /**
1185  * scif_recv_unreg: Respond to SCIF_UNREGISTER interrupt message
1186  * @msg:        Interrupt message
1187  *
1188  * Remove window from remote registration list;
1189  */
1190 void scif_recv_unreg(struct scif_dev *scifdev, struct scifmsg *msg)
1191 {
1192     struct scif_rma_req req;
1193     struct scif_window *window = NULL;
1194     struct scif_window *recv_window =
1195         (struct scif_window *)msg->payload[0];
1196     struct scif_endpt *ep;
1197     int del_window = 0;
1198 
1199     ep = (struct scif_endpt *)recv_window->ep;
1200     req.out_window = &window;
1201     req.offset = recv_window->offset;
1202     req.prot = 0;
1203     req.nr_bytes = recv_window->nr_pages << PAGE_SHIFT;
1204     req.type = SCIF_WINDOW_FULL;
1205     req.head = &ep->rma_info.remote_reg_list;
1206     msg->payload[0] = ep->remote_ep;
1207 
1208     mutex_lock(&ep->rma_info.rma_lock);
1209     /* Does a valid window exist? */
1210     if (scif_query_window(&req)) {
1211         dev_err(&scifdev->sdev->dev,
1212             "%s %d -ENXIO\n", __func__, __LINE__);
1213         msg->uop = SCIF_UNREGISTER_ACK;
1214         goto error;
1215     }
1216     if (window) {
1217         if (window->ref_count)
1218             scif_put_window(window, window->nr_pages);
1219         else
1220             dev_err(&scifdev->sdev->dev,
1221                 "%s %d ref count should be +ve\n",
1222                 __func__, __LINE__);
1223         window->unreg_state = OP_COMPLETED;
1224         if (!window->ref_count) {
1225             msg->uop = SCIF_UNREGISTER_ACK;
1226             atomic_inc(&ep->rma_info.tw_refcount);
1227             ep->rma_info.async_list_del = 1;
1228             list_del_init(&window->list);
1229             del_window = 1;
1230         } else {
1231             /* NACK! There are valid references to this window */
1232             msg->uop = SCIF_UNREGISTER_NACK;
1233         }
1234     } else {
1235         /* The window did not make its way to the list at all. ACK */
1236         msg->uop = SCIF_UNREGISTER_ACK;
1237         scif_destroy_remote_window(recv_window);
1238     }
1239 error:
1240     mutex_unlock(&ep->rma_info.rma_lock);
1241     if (del_window)
1242         scif_drain_dma_intr(ep->remote_dev->sdev,
1243                     ep->rma_info.dma_chan);
1244     scif_nodeqp_send(ep->remote_dev, msg);
1245     if (del_window)
1246         scif_queue_for_cleanup(window, &scif_info.rma);
1247 }
1248 
1249 /**
1250  * scif_recv_reg_ack: Respond to SCIF_REGISTER_ACK interrupt message
1251  * @msg:        Interrupt message
1252  *
1253  * Wake up the window waiting to complete registration.
1254  */
1255 void scif_recv_reg_ack(struct scif_dev *scifdev, struct scifmsg *msg)
1256 {
1257     struct scif_window *window =
1258         (struct scif_window *)msg->payload[2];
1259     struct scif_endpt *ep = (struct scif_endpt *)window->ep;
1260 
1261     mutex_lock(&ep->rma_info.rma_lock);
1262     window->reg_state = OP_COMPLETED;
1263     wake_up(&window->regwq);
1264     mutex_unlock(&ep->rma_info.rma_lock);
1265 }
1266 
1267 /**
1268  * scif_recv_reg_nack: Respond to SCIF_REGISTER_NACK interrupt message
1269  * @msg:        Interrupt message
1270  *
1271  * Wake up the window waiting to inform it that registration
1272  * cannot be completed.
1273  */
1274 void scif_recv_reg_nack(struct scif_dev *scifdev, struct scifmsg *msg)
1275 {
1276     struct scif_window *window =
1277         (struct scif_window *)msg->payload[2];
1278     struct scif_endpt *ep = (struct scif_endpt *)window->ep;
1279 
1280     mutex_lock(&ep->rma_info.rma_lock);
1281     window->reg_state = OP_FAILED;
1282     wake_up(&window->regwq);
1283     mutex_unlock(&ep->rma_info.rma_lock);
1284 }
1285 
1286 /**
1287  * scif_recv_unreg_ack: Respond to SCIF_UNREGISTER_ACK interrupt message
1288  * @msg:        Interrupt message
1289  *
1290  * Wake up the window waiting to complete unregistration.
1291  */
1292 void scif_recv_unreg_ack(struct scif_dev *scifdev, struct scifmsg *msg)
1293 {
1294     struct scif_window *window =
1295         (struct scif_window *)msg->payload[1];
1296     struct scif_endpt *ep = (struct scif_endpt *)window->ep;
1297 
1298     mutex_lock(&ep->rma_info.rma_lock);
1299     window->unreg_state = OP_COMPLETED;
1300     wake_up(&window->unregwq);
1301     mutex_unlock(&ep->rma_info.rma_lock);
1302 }
1303 
1304 /**
1305  * scif_recv_unreg_nack: Respond to SCIF_UNREGISTER_NACK interrupt message
1306  * @msg:        Interrupt message
1307  *
1308  * Wake up the window waiting to inform it that unregistration
1309  * cannot be completed immediately.
1310  */
1311 void scif_recv_unreg_nack(struct scif_dev *scifdev, struct scifmsg *msg)
1312 {
1313     struct scif_window *window =
1314         (struct scif_window *)msg->payload[1];
1315     struct scif_endpt *ep = (struct scif_endpt *)window->ep;
1316 
1317     mutex_lock(&ep->rma_info.rma_lock);
1318     window->unreg_state = OP_FAILED;
1319     wake_up(&window->unregwq);
1320     mutex_unlock(&ep->rma_info.rma_lock);
1321 }
1322 
1323 int __scif_pin_pages(void *addr, size_t len, int *out_prot,
1324              int map_flags, scif_pinned_pages_t *pages)
1325 {
1326     struct scif_pinned_pages *pinned_pages;
1327     int nr_pages, err = 0, i;
1328     bool vmalloc_addr = false;
1329     bool try_upgrade = false;
1330     int prot = *out_prot;
1331     int ulimit = 0;
1332     struct mm_struct *mm = NULL;
1333 
1334     /* Unsupported flags */
1335     if (map_flags & ~(SCIF_MAP_KERNEL | SCIF_MAP_ULIMIT))
1336         return -EINVAL;
1337     ulimit = !!(map_flags & SCIF_MAP_ULIMIT);
1338 
1339     /* Unsupported protection requested */
1340     if (prot & ~(SCIF_PROT_READ | SCIF_PROT_WRITE))
1341         return -EINVAL;
1342 
1343     /* addr/len must be page aligned. len should be non zero */
1344     if (!len ||
1345         (ALIGN((u64)addr, PAGE_SIZE) != (u64)addr) ||
1346         (ALIGN((u64)len, PAGE_SIZE) != (u64)len))
1347         return -EINVAL;
1348 
1349     might_sleep();
1350 
1351     nr_pages = len >> PAGE_SHIFT;
1352 
1353     /* Allocate a set of pinned pages */
1354     pinned_pages = scif_create_pinned_pages(nr_pages, prot);
1355     if (!pinned_pages)
1356         return -ENOMEM;
1357 
1358     if (map_flags & SCIF_MAP_KERNEL) {
1359         if (is_vmalloc_addr(addr))
1360             vmalloc_addr = true;
1361 
1362         for (i = 0; i < nr_pages; i++) {
1363             if (vmalloc_addr)
1364                 pinned_pages->pages[i] =
1365                     vmalloc_to_page(addr + (i * PAGE_SIZE));
1366             else
1367                 pinned_pages->pages[i] =
1368                     virt_to_page(addr + (i * PAGE_SIZE));
1369         }
1370         pinned_pages->nr_pages = nr_pages;
1371         pinned_pages->map_flags = SCIF_MAP_KERNEL;
1372     } else {
1373         /*
1374          * SCIF supports registration caching. If a registration has
1375          * been requested with read only permissions, then we try
1376          * to pin the pages with RW permissions so that a subsequent
1377          * transfer with RW permission can hit the cache instead of
1378          * invalidating it. If the upgrade fails with RW then we
1379          * revert back to R permission and retry
1380          */
1381         if (prot == SCIF_PROT_READ)
1382             try_upgrade = true;
1383         prot |= SCIF_PROT_WRITE;
1384 retry:
1385         mm = current->mm;
1386         down_write(&mm->mmap_sem);
1387         if (ulimit) {
1388             err = __scif_check_inc_pinned_vm(mm, nr_pages);
1389             if (err) {
1390                 up_write(&mm->mmap_sem);
1391                 pinned_pages->nr_pages = 0;
1392                 goto error_unmap;
1393             }
1394         }
1395 
1396         pinned_pages->nr_pages = get_user_pages(
1397                 (u64)addr,
1398                 nr_pages,
1399                 (prot & SCIF_PROT_WRITE) ? FOLL_WRITE : 0,
1400                 pinned_pages->pages,
1401                 NULL);
1402         up_write(&mm->mmap_sem);
1403         if (nr_pages != pinned_pages->nr_pages) {
1404             if (try_upgrade) {
1405                 if (ulimit)
1406                     __scif_dec_pinned_vm_lock(mm,
1407                                   nr_pages, 0);
1408                 /* Roll back any pinned pages */
1409                 for (i = 0; i < pinned_pages->nr_pages; i++) {
1410                     if (pinned_pages->pages[i])
1411                         put_page(
1412                         pinned_pages->pages[i]);
1413                 }
1414                 prot &= ~SCIF_PROT_WRITE;
1415                 try_upgrade = false;
1416                 goto retry;
1417             }
1418         }
1419         pinned_pages->map_flags = 0;
1420     }
1421 
1422     if (pinned_pages->nr_pages < nr_pages) {
1423         err = -EFAULT;
1424         pinned_pages->nr_pages = nr_pages;
1425         goto dec_pinned;
1426     }
1427 
1428     *out_prot = prot;
1429     atomic_set(&pinned_pages->ref_count, 1);
1430     *pages = pinned_pages;
1431     return err;
1432 dec_pinned:
1433     if (ulimit)
1434         __scif_dec_pinned_vm_lock(mm, nr_pages, 0);
1435     /* Something went wrong! Rollback */
1436 error_unmap:
1437     pinned_pages->nr_pages = nr_pages;
1438     scif_destroy_pinned_pages(pinned_pages);
1439     *pages = NULL;
1440     dev_dbg(scif_info.mdev.this_device,
1441         "%s %d err %d len 0x%lx\n", __func__, __LINE__, err, len);
1442     return err;
1443 }
1444 
1445 int scif_pin_pages(void *addr, size_t len, int prot,
1446            int map_flags, scif_pinned_pages_t *pages)
1447 {
1448     return __scif_pin_pages(addr, len, &prot, map_flags, pages);
1449 }
1450 EXPORT_SYMBOL_GPL(scif_pin_pages);
1451 
1452 int scif_unpin_pages(scif_pinned_pages_t pinned_pages)
1453 {
1454     int err = 0, ret;
1455 
1456     if (!pinned_pages || SCIFEP_MAGIC != pinned_pages->magic)
1457         return -EINVAL;
1458 
1459     ret = atomic_sub_return(1, &pinned_pages->ref_count);
1460     if (ret < 0) {
1461         dev_err(scif_info.mdev.this_device,
1462             "%s %d scif_unpin_pages called without pinning? rc %d\n",
1463             __func__, __LINE__, ret);
1464         return -EINVAL;
1465     }
1466     /*
1467      * Destroy the window if the ref count for this set of pinned
1468      * pages has dropped to zero. If it is positive then there is
1469      * a valid registered window which is backed by these pages and
1470      * it will be destroyed once all such windows are unregistered.
1471      */
1472     if (!ret)
1473         err = scif_destroy_pinned_pages(pinned_pages);
1474 
1475     return err;
1476 }
1477 EXPORT_SYMBOL_GPL(scif_unpin_pages);
1478 
1479 static inline void
1480 scif_insert_local_window(struct scif_window *window, struct scif_endpt *ep)
1481 {
1482     mutex_lock(&ep->rma_info.rma_lock);
1483     scif_insert_window(window, &ep->rma_info.reg_list);
1484     mutex_unlock(&ep->rma_info.rma_lock);
1485 }
1486 
1487 off_t scif_register_pinned_pages(scif_epd_t epd,
1488                  scif_pinned_pages_t pinned_pages,
1489                  off_t offset, int map_flags)
1490 {
1491     struct scif_endpt *ep = (struct scif_endpt *)epd;
1492     s64 computed_offset;
1493     struct scif_window *window;
1494     int err;
1495     size_t len;
1496     struct device *spdev;
1497 
1498     /* Unsupported flags */
1499     if (map_flags & ~SCIF_MAP_FIXED)
1500         return -EINVAL;
1501 
1502     len = pinned_pages->nr_pages << PAGE_SHIFT;
1503 
1504     /*
1505      * Offset is not page aligned/negative or offset+len
1506      * wraps around with SCIF_MAP_FIXED.
1507      */
1508     if ((map_flags & SCIF_MAP_FIXED) &&
1509         ((ALIGN(offset, PAGE_SIZE) != offset) ||
1510         (offset < 0) ||
1511         (len > LONG_MAX - offset)))
1512         return -EINVAL;
1513 
1514     might_sleep();
1515 
1516     err = scif_verify_epd(ep);
1517     if (err)
1518         return err;
1519     /*
1520      * It is an error to pass pinned_pages to scif_register_pinned_pages()
1521      * after calling scif_unpin_pages().
1522      */
1523     if (!atomic_add_unless(&pinned_pages->ref_count, 1, 0))
1524         return -EINVAL;
1525 
1526     /* Compute the offset for this registration */
1527     err = scif_get_window_offset(ep, map_flags, offset,
1528                      len, &computed_offset);
1529     if (err) {
1530         atomic_sub(1, &pinned_pages->ref_count);
1531         return err;
1532     }
1533 
1534     /* Allocate and prepare self registration window */
1535     window = scif_create_window(ep, pinned_pages->nr_pages,
1536                     computed_offset, false);
1537     if (!window) {
1538         atomic_sub(1, &pinned_pages->ref_count);
1539         scif_free_window_offset(ep, NULL, computed_offset);
1540         return -ENOMEM;
1541     }
1542 
1543     window->pinned_pages = pinned_pages;
1544     window->nr_pages = pinned_pages->nr_pages;
1545     window->prot = pinned_pages->prot;
1546 
1547     spdev = scif_get_peer_dev(ep->remote_dev);
1548     if (IS_ERR(spdev)) {
1549         err = PTR_ERR(spdev);
1550         scif_destroy_window(ep, window);
1551         return err;
1552     }
1553     err = scif_send_alloc_request(ep, window);
1554     if (err) {
1555         dev_err(&ep->remote_dev->sdev->dev,
1556             "%s %d err %d\n", __func__, __LINE__, err);
1557         goto error_unmap;
1558     }
1559 
1560     /* Prepare the remote registration window */
1561     err = scif_prep_remote_window(ep, window);
1562     if (err) {
1563         dev_err(&ep->remote_dev->sdev->dev,
1564             "%s %d err %d\n", __func__, __LINE__, err);
1565         goto error_unmap;
1566     }
1567 
1568     /* Tell the peer about the new window */
1569     err = scif_send_scif_register(ep, window);
1570     if (err) {
1571         dev_err(&ep->remote_dev->sdev->dev,
1572             "%s %d err %d\n", __func__, __LINE__, err);
1573         goto error_unmap;
1574     }
1575 
1576     scif_put_peer_dev(spdev);
1577     /* No further failures expected. Insert new window */
1578     scif_insert_local_window(window, ep);
1579     return computed_offset;
1580 error_unmap:
1581     scif_destroy_window(ep, window);
1582     scif_put_peer_dev(spdev);
1583     dev_err(&ep->remote_dev->sdev->dev,
1584         "%s %d err %d\n", __func__, __LINE__, err);
1585     return err;
1586 }
1587 EXPORT_SYMBOL_GPL(scif_register_pinned_pages);
1588 
1589 off_t scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset,
1590             int prot, int map_flags)
1591 {
1592     scif_pinned_pages_t pinned_pages;
1593     off_t err;
1594     struct scif_endpt *ep = (struct scif_endpt *)epd;
1595     s64 computed_offset;
1596     struct scif_window *window;
1597     struct mm_struct *mm = NULL;
1598     struct device *spdev;
1599 
1600     dev_dbg(scif_info.mdev.this_device,
1601         "SCIFAPI register: ep %p addr %p len 0x%lx offset 0x%lx prot 0x%x map_flags 0x%x\n",
1602         epd, addr, len, offset, prot, map_flags);
1603     /* Unsupported flags */
1604     if (map_flags & ~(SCIF_MAP_FIXED | SCIF_MAP_KERNEL))
1605         return -EINVAL;
1606 
1607     /*
1608      * Offset is not page aligned/negative or offset+len
1609      * wraps around with SCIF_MAP_FIXED.
1610      */
1611     if ((map_flags & SCIF_MAP_FIXED) &&
1612         ((ALIGN(offset, PAGE_SIZE) != offset) ||
1613         (offset < 0) ||
1614         (len > LONG_MAX - offset)))
1615         return -EINVAL;
1616 
1617     /* Unsupported protection requested */
1618     if (prot & ~(SCIF_PROT_READ | SCIF_PROT_WRITE))
1619         return -EINVAL;
1620 
1621     /* addr/len must be page aligned. len should be non zero */
1622     if (!len || (ALIGN((u64)addr, PAGE_SIZE) != (u64)addr) ||
1623         (ALIGN(len, PAGE_SIZE) != len))
1624         return -EINVAL;
1625 
1626     might_sleep();
1627 
1628     err = scif_verify_epd(ep);
1629     if (err)
1630         return err;
1631 
1632     /* Compute the offset for this registration */
1633     err = scif_get_window_offset(ep, map_flags, offset,
1634                      len >> PAGE_SHIFT, &computed_offset);
1635     if (err)
1636         return err;
1637 
1638     spdev = scif_get_peer_dev(ep->remote_dev);
1639     if (IS_ERR(spdev)) {
1640         err = PTR_ERR(spdev);
1641         scif_free_window_offset(ep, NULL, computed_offset);
1642         return err;
1643     }
1644     /* Allocate and prepare self registration window */
1645     window = scif_create_window(ep, len >> PAGE_SHIFT,
1646                     computed_offset, false);
1647     if (!window) {
1648         scif_free_window_offset(ep, NULL, computed_offset);
1649         scif_put_peer_dev(spdev);
1650         return -ENOMEM;
1651     }
1652 
1653     window->nr_pages = len >> PAGE_SHIFT;
1654 
1655     err = scif_send_alloc_request(ep, window);
1656     if (err) {
1657         scif_destroy_incomplete_window(ep, window);
1658         scif_put_peer_dev(spdev);
1659         return err;
1660     }
1661 
1662     if (!(map_flags & SCIF_MAP_KERNEL)) {
1663         mm = __scif_acquire_mm();
1664         map_flags |= SCIF_MAP_ULIMIT;
1665     }
1666     /* Pin down the pages */
1667     err = __scif_pin_pages(addr, len, &prot,
1668                    map_flags & (SCIF_MAP_KERNEL | SCIF_MAP_ULIMIT),
1669                    &pinned_pages);
1670     if (err) {
1671         scif_destroy_incomplete_window(ep, window);
1672         __scif_release_mm(mm);
1673         goto error;
1674     }
1675 
1676     window->pinned_pages = pinned_pages;
1677     window->prot = pinned_pages->prot;
1678     window->mm = mm;
1679 
1680     /* Prepare the remote registration window */
1681     err = scif_prep_remote_window(ep, window);
1682     if (err) {
1683         dev_err(&ep->remote_dev->sdev->dev,
1684             "%s %d err %ld\n", __func__, __LINE__, err);
1685         goto error_unmap;
1686     }
1687 
1688     /* Tell the peer about the new window */
1689     err = scif_send_scif_register(ep, window);
1690     if (err) {
1691         dev_err(&ep->remote_dev->sdev->dev,
1692             "%s %d err %ld\n", __func__, __LINE__, err);
1693         goto error_unmap;
1694     }
1695 
1696     scif_put_peer_dev(spdev);
1697     /* No further failures expected. Insert new window */
1698     scif_insert_local_window(window, ep);
1699     dev_dbg(&ep->remote_dev->sdev->dev,
1700         "SCIFAPI register: ep %p addr %p len 0x%lx computed_offset 0x%llx\n",
1701         epd, addr, len, computed_offset);
1702     return computed_offset;
1703 error_unmap:
1704     scif_destroy_window(ep, window);
1705 error:
1706     scif_put_peer_dev(spdev);
1707     dev_err(&ep->remote_dev->sdev->dev,
1708         "%s %d err %ld\n", __func__, __LINE__, err);
1709     return err;
1710 }
1711 EXPORT_SYMBOL_GPL(scif_register);
1712 
1713 int
1714 scif_unregister(scif_epd_t epd, off_t offset, size_t len)
1715 {
1716     struct scif_endpt *ep = (struct scif_endpt *)epd;
1717     struct scif_window *window = NULL;
1718     struct scif_rma_req req;
1719     int nr_pages, err;
1720     struct device *spdev;
1721 
1722     dev_dbg(scif_info.mdev.this_device,
1723         "SCIFAPI unregister: ep %p offset 0x%lx len 0x%lx\n",
1724         ep, offset, len);
1725     /* len must be page aligned. len should be non zero */
1726     if (!len ||
1727         (ALIGN((u64)len, PAGE_SIZE) != (u64)len))
1728         return -EINVAL;
1729 
1730     /* Offset is not page aligned or offset+len wraps around */
1731     if ((ALIGN(offset, PAGE_SIZE) != offset) ||
1732         (offset < 0) ||
1733         (len > LONG_MAX - offset))
1734         return -EINVAL;
1735 
1736     err = scif_verify_epd(ep);
1737     if (err)
1738         return err;
1739 
1740     might_sleep();
1741     nr_pages = len >> PAGE_SHIFT;
1742 
1743     req.out_window = &window;
1744     req.offset = offset;
1745     req.prot = 0;
1746     req.nr_bytes = len;
1747     req.type = SCIF_WINDOW_FULL;
1748     req.head = &ep->rma_info.reg_list;
1749 
1750     spdev = scif_get_peer_dev(ep->remote_dev);
1751     if (IS_ERR(spdev)) {
1752         err = PTR_ERR(spdev);
1753         return err;
1754     }
1755     mutex_lock(&ep->rma_info.rma_lock);
1756     /* Does a valid window exist? */
1757     err = scif_query_window(&req);
1758     if (err) {
1759         dev_err(&ep->remote_dev->sdev->dev,
1760             "%s %d err %d\n", __func__, __LINE__, err);
1761         goto error;
1762     }
1763     /* Unregister all the windows in this range */
1764     err = scif_rma_list_unregister(window, offset, nr_pages);
1765     if (err)
1766         dev_err(&ep->remote_dev->sdev->dev,
1767             "%s %d err %d\n", __func__, __LINE__, err);
1768 error:
1769     mutex_unlock(&ep->rma_info.rma_lock);
1770     scif_put_peer_dev(spdev);
1771     return err;
1772 }
1773 EXPORT_SYMBOL_GPL(scif_unregister);