Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * MMU-based software IOTLB.
0004  *
0005  * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved.
0006  *
0007  * Author: Xie Yongji <xieyongji@bytedance.com>
0008  *
0009  */
0010 
0011 #include <linux/slab.h>
0012 #include <linux/file.h>
0013 #include <linux/anon_inodes.h>
0014 #include <linux/highmem.h>
0015 #include <linux/vmalloc.h>
0016 #include <linux/vdpa.h>
0017 
0018 #include "iova_domain.h"
0019 
0020 static int vduse_iotlb_add_range(struct vduse_iova_domain *domain,
0021                  u64 start, u64 last,
0022                  u64 addr, unsigned int perm,
0023                  struct file *file, u64 offset)
0024 {
0025     struct vdpa_map_file *map_file;
0026     int ret;
0027 
0028     map_file = kmalloc(sizeof(*map_file), GFP_ATOMIC);
0029     if (!map_file)
0030         return -ENOMEM;
0031 
0032     map_file->file = get_file(file);
0033     map_file->offset = offset;
0034 
0035     ret = vhost_iotlb_add_range_ctx(domain->iotlb, start, last,
0036                     addr, perm, map_file);
0037     if (ret) {
0038         fput(map_file->file);
0039         kfree(map_file);
0040         return ret;
0041     }
0042     return 0;
0043 }
0044 
0045 static void vduse_iotlb_del_range(struct vduse_iova_domain *domain,
0046                   u64 start, u64 last)
0047 {
0048     struct vdpa_map_file *map_file;
0049     struct vhost_iotlb_map *map;
0050 
0051     while ((map = vhost_iotlb_itree_first(domain->iotlb, start, last))) {
0052         map_file = (struct vdpa_map_file *)map->opaque;
0053         fput(map_file->file);
0054         kfree(map_file);
0055         vhost_iotlb_map_free(domain->iotlb, map);
0056     }
0057 }
0058 
0059 int vduse_domain_set_map(struct vduse_iova_domain *domain,
0060              struct vhost_iotlb *iotlb)
0061 {
0062     struct vdpa_map_file *map_file;
0063     struct vhost_iotlb_map *map;
0064     u64 start = 0ULL, last = ULLONG_MAX;
0065     int ret;
0066 
0067     spin_lock(&domain->iotlb_lock);
0068     vduse_iotlb_del_range(domain, start, last);
0069 
0070     for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
0071          map = vhost_iotlb_itree_next(map, start, last)) {
0072         map_file = (struct vdpa_map_file *)map->opaque;
0073         ret = vduse_iotlb_add_range(domain, map->start, map->last,
0074                         map->addr, map->perm,
0075                         map_file->file,
0076                         map_file->offset);
0077         if (ret)
0078             goto err;
0079     }
0080     spin_unlock(&domain->iotlb_lock);
0081 
0082     return 0;
0083 err:
0084     vduse_iotlb_del_range(domain, start, last);
0085     spin_unlock(&domain->iotlb_lock);
0086     return ret;
0087 }
0088 
0089 void vduse_domain_clear_map(struct vduse_iova_domain *domain,
0090                 struct vhost_iotlb *iotlb)
0091 {
0092     struct vhost_iotlb_map *map;
0093     u64 start = 0ULL, last = ULLONG_MAX;
0094 
0095     spin_lock(&domain->iotlb_lock);
0096     for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
0097          map = vhost_iotlb_itree_next(map, start, last)) {
0098         vduse_iotlb_del_range(domain, map->start, map->last);
0099     }
0100     spin_unlock(&domain->iotlb_lock);
0101 }
0102 
0103 static int vduse_domain_map_bounce_page(struct vduse_iova_domain *domain,
0104                      u64 iova, u64 size, u64 paddr)
0105 {
0106     struct vduse_bounce_map *map;
0107     u64 last = iova + size - 1;
0108 
0109     while (iova <= last) {
0110         map = &domain->bounce_maps[iova >> PAGE_SHIFT];
0111         if (!map->bounce_page) {
0112             map->bounce_page = alloc_page(GFP_ATOMIC);
0113             if (!map->bounce_page)
0114                 return -ENOMEM;
0115         }
0116         map->orig_phys = paddr;
0117         paddr += PAGE_SIZE;
0118         iova += PAGE_SIZE;
0119     }
0120     return 0;
0121 }
0122 
0123 static void vduse_domain_unmap_bounce_page(struct vduse_iova_domain *domain,
0124                        u64 iova, u64 size)
0125 {
0126     struct vduse_bounce_map *map;
0127     u64 last = iova + size - 1;
0128 
0129     while (iova <= last) {
0130         map = &domain->bounce_maps[iova >> PAGE_SHIFT];
0131         map->orig_phys = INVALID_PHYS_ADDR;
0132         iova += PAGE_SIZE;
0133     }
0134 }
0135 
0136 static void do_bounce(phys_addr_t orig, void *addr, size_t size,
0137               enum dma_data_direction dir)
0138 {
0139     unsigned long pfn = PFN_DOWN(orig);
0140     unsigned int offset = offset_in_page(orig);
0141     struct page *page;
0142     unsigned int sz = 0;
0143 
0144     while (size) {
0145         sz = min_t(size_t, PAGE_SIZE - offset, size);
0146 
0147         page = pfn_to_page(pfn);
0148         if (dir == DMA_TO_DEVICE)
0149             memcpy_from_page(addr, page, offset, sz);
0150         else
0151             memcpy_to_page(page, offset, addr, sz);
0152 
0153         size -= sz;
0154         pfn++;
0155         addr += sz;
0156         offset = 0;
0157     }
0158 }
0159 
0160 static void vduse_domain_bounce(struct vduse_iova_domain *domain,
0161                 dma_addr_t iova, size_t size,
0162                 enum dma_data_direction dir)
0163 {
0164     struct vduse_bounce_map *map;
0165     unsigned int offset;
0166     void *addr;
0167     size_t sz;
0168 
0169     if (iova >= domain->bounce_size)
0170         return;
0171 
0172     while (size) {
0173         map = &domain->bounce_maps[iova >> PAGE_SHIFT];
0174         offset = offset_in_page(iova);
0175         sz = min_t(size_t, PAGE_SIZE - offset, size);
0176 
0177         if (WARN_ON(!map->bounce_page ||
0178                 map->orig_phys == INVALID_PHYS_ADDR))
0179             return;
0180 
0181         addr = kmap_local_page(map->bounce_page);
0182         do_bounce(map->orig_phys + offset, addr + offset, sz, dir);
0183         kunmap_local(addr);
0184         size -= sz;
0185         iova += sz;
0186     }
0187 }
0188 
0189 static struct page *
0190 vduse_domain_get_coherent_page(struct vduse_iova_domain *domain, u64 iova)
0191 {
0192     u64 start = iova & PAGE_MASK;
0193     u64 last = start + PAGE_SIZE - 1;
0194     struct vhost_iotlb_map *map;
0195     struct page *page = NULL;
0196 
0197     spin_lock(&domain->iotlb_lock);
0198     map = vhost_iotlb_itree_first(domain->iotlb, start, last);
0199     if (!map)
0200         goto out;
0201 
0202     page = pfn_to_page((map->addr + iova - map->start) >> PAGE_SHIFT);
0203     get_page(page);
0204 out:
0205     spin_unlock(&domain->iotlb_lock);
0206 
0207     return page;
0208 }
0209 
0210 static struct page *
0211 vduse_domain_get_bounce_page(struct vduse_iova_domain *domain, u64 iova)
0212 {
0213     struct vduse_bounce_map *map;
0214     struct page *page = NULL;
0215 
0216     read_lock(&domain->bounce_lock);
0217     map = &domain->bounce_maps[iova >> PAGE_SHIFT];
0218     if (domain->user_bounce_pages || !map->bounce_page)
0219         goto out;
0220 
0221     page = map->bounce_page;
0222     get_page(page);
0223 out:
0224     read_unlock(&domain->bounce_lock);
0225 
0226     return page;
0227 }
0228 
0229 static void
0230 vduse_domain_free_kernel_bounce_pages(struct vduse_iova_domain *domain)
0231 {
0232     struct vduse_bounce_map *map;
0233     unsigned long pfn, bounce_pfns;
0234 
0235     bounce_pfns = domain->bounce_size >> PAGE_SHIFT;
0236 
0237     for (pfn = 0; pfn < bounce_pfns; pfn++) {
0238         map = &domain->bounce_maps[pfn];
0239         if (WARN_ON(map->orig_phys != INVALID_PHYS_ADDR))
0240             continue;
0241 
0242         if (!map->bounce_page)
0243             continue;
0244 
0245         __free_page(map->bounce_page);
0246         map->bounce_page = NULL;
0247     }
0248 }
0249 
0250 int vduse_domain_add_user_bounce_pages(struct vduse_iova_domain *domain,
0251                        struct page **pages, int count)
0252 {
0253     struct vduse_bounce_map *map;
0254     int i, ret;
0255 
0256     /* Now we don't support partial mapping */
0257     if (count != (domain->bounce_size >> PAGE_SHIFT))
0258         return -EINVAL;
0259 
0260     write_lock(&domain->bounce_lock);
0261     ret = -EEXIST;
0262     if (domain->user_bounce_pages)
0263         goto out;
0264 
0265     for (i = 0; i < count; i++) {
0266         map = &domain->bounce_maps[i];
0267         if (map->bounce_page) {
0268             /* Copy kernel page to user page if it's in use */
0269             if (map->orig_phys != INVALID_PHYS_ADDR)
0270                 memcpy_to_page(pages[i], 0,
0271                            page_address(map->bounce_page),
0272                            PAGE_SIZE);
0273             __free_page(map->bounce_page);
0274         }
0275         map->bounce_page = pages[i];
0276         get_page(pages[i]);
0277     }
0278     domain->user_bounce_pages = true;
0279     ret = 0;
0280 out:
0281     write_unlock(&domain->bounce_lock);
0282 
0283     return ret;
0284 }
0285 
0286 void vduse_domain_remove_user_bounce_pages(struct vduse_iova_domain *domain)
0287 {
0288     struct vduse_bounce_map *map;
0289     unsigned long i, count;
0290 
0291     write_lock(&domain->bounce_lock);
0292     if (!domain->user_bounce_pages)
0293         goto out;
0294 
0295     count = domain->bounce_size >> PAGE_SHIFT;
0296     for (i = 0; i < count; i++) {
0297         struct page *page = NULL;
0298 
0299         map = &domain->bounce_maps[i];
0300         if (WARN_ON(!map->bounce_page))
0301             continue;
0302 
0303         /* Copy user page to kernel page if it's in use */
0304         if (map->orig_phys != INVALID_PHYS_ADDR) {
0305             page = alloc_page(GFP_ATOMIC | __GFP_NOFAIL);
0306             memcpy_from_page(page_address(page),
0307                      map->bounce_page, 0, PAGE_SIZE);
0308         }
0309         put_page(map->bounce_page);
0310         map->bounce_page = page;
0311     }
0312     domain->user_bounce_pages = false;
0313 out:
0314     write_unlock(&domain->bounce_lock);
0315 }
0316 
0317 void vduse_domain_reset_bounce_map(struct vduse_iova_domain *domain)
0318 {
0319     if (!domain->bounce_map)
0320         return;
0321 
0322     spin_lock(&domain->iotlb_lock);
0323     if (!domain->bounce_map)
0324         goto unlock;
0325 
0326     vduse_iotlb_del_range(domain, 0, domain->bounce_size - 1);
0327     domain->bounce_map = 0;
0328 unlock:
0329     spin_unlock(&domain->iotlb_lock);
0330 }
0331 
0332 static int vduse_domain_init_bounce_map(struct vduse_iova_domain *domain)
0333 {
0334     int ret = 0;
0335 
0336     if (domain->bounce_map)
0337         return 0;
0338 
0339     spin_lock(&domain->iotlb_lock);
0340     if (domain->bounce_map)
0341         goto unlock;
0342 
0343     ret = vduse_iotlb_add_range(domain, 0, domain->bounce_size - 1,
0344                     0, VHOST_MAP_RW, domain->file, 0);
0345     if (ret)
0346         goto unlock;
0347 
0348     domain->bounce_map = 1;
0349 unlock:
0350     spin_unlock(&domain->iotlb_lock);
0351     return ret;
0352 }
0353 
0354 static dma_addr_t
0355 vduse_domain_alloc_iova(struct iova_domain *iovad,
0356             unsigned long size, unsigned long limit)
0357 {
0358     unsigned long shift = iova_shift(iovad);
0359     unsigned long iova_len = iova_align(iovad, size) >> shift;
0360     unsigned long iova_pfn;
0361 
0362     iova_pfn = alloc_iova_fast(iovad, iova_len, limit >> shift, true);
0363 
0364     return (dma_addr_t)iova_pfn << shift;
0365 }
0366 
0367 static void vduse_domain_free_iova(struct iova_domain *iovad,
0368                    dma_addr_t iova, size_t size)
0369 {
0370     unsigned long shift = iova_shift(iovad);
0371     unsigned long iova_len = iova_align(iovad, size) >> shift;
0372 
0373     free_iova_fast(iovad, iova >> shift, iova_len);
0374 }
0375 
0376 dma_addr_t vduse_domain_map_page(struct vduse_iova_domain *domain,
0377                  struct page *page, unsigned long offset,
0378                  size_t size, enum dma_data_direction dir,
0379                  unsigned long attrs)
0380 {
0381     struct iova_domain *iovad = &domain->stream_iovad;
0382     unsigned long limit = domain->bounce_size - 1;
0383     phys_addr_t pa = page_to_phys(page) + offset;
0384     dma_addr_t iova = vduse_domain_alloc_iova(iovad, size, limit);
0385 
0386     if (!iova)
0387         return DMA_MAPPING_ERROR;
0388 
0389     if (vduse_domain_init_bounce_map(domain))
0390         goto err;
0391 
0392     read_lock(&domain->bounce_lock);
0393     if (vduse_domain_map_bounce_page(domain, (u64)iova, (u64)size, pa))
0394         goto err_unlock;
0395 
0396     if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
0397         vduse_domain_bounce(domain, iova, size, DMA_TO_DEVICE);
0398 
0399     read_unlock(&domain->bounce_lock);
0400 
0401     return iova;
0402 err_unlock:
0403     read_unlock(&domain->bounce_lock);
0404 err:
0405     vduse_domain_free_iova(iovad, iova, size);
0406     return DMA_MAPPING_ERROR;
0407 }
0408 
0409 void vduse_domain_unmap_page(struct vduse_iova_domain *domain,
0410                  dma_addr_t dma_addr, size_t size,
0411                  enum dma_data_direction dir, unsigned long attrs)
0412 {
0413     struct iova_domain *iovad = &domain->stream_iovad;
0414 
0415     read_lock(&domain->bounce_lock);
0416     if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
0417         vduse_domain_bounce(domain, dma_addr, size, DMA_FROM_DEVICE);
0418 
0419     vduse_domain_unmap_bounce_page(domain, (u64)dma_addr, (u64)size);
0420     read_unlock(&domain->bounce_lock);
0421     vduse_domain_free_iova(iovad, dma_addr, size);
0422 }
0423 
0424 void *vduse_domain_alloc_coherent(struct vduse_iova_domain *domain,
0425                   size_t size, dma_addr_t *dma_addr,
0426                   gfp_t flag, unsigned long attrs)
0427 {
0428     struct iova_domain *iovad = &domain->consistent_iovad;
0429     unsigned long limit = domain->iova_limit;
0430     dma_addr_t iova = vduse_domain_alloc_iova(iovad, size, limit);
0431     void *orig = alloc_pages_exact(size, flag);
0432 
0433     if (!iova || !orig)
0434         goto err;
0435 
0436     spin_lock(&domain->iotlb_lock);
0437     if (vduse_iotlb_add_range(domain, (u64)iova, (u64)iova + size - 1,
0438                   virt_to_phys(orig), VHOST_MAP_RW,
0439                   domain->file, (u64)iova)) {
0440         spin_unlock(&domain->iotlb_lock);
0441         goto err;
0442     }
0443     spin_unlock(&domain->iotlb_lock);
0444 
0445     *dma_addr = iova;
0446 
0447     return orig;
0448 err:
0449     *dma_addr = DMA_MAPPING_ERROR;
0450     if (orig)
0451         free_pages_exact(orig, size);
0452     if (iova)
0453         vduse_domain_free_iova(iovad, iova, size);
0454 
0455     return NULL;
0456 }
0457 
0458 void vduse_domain_free_coherent(struct vduse_iova_domain *domain, size_t size,
0459                 void *vaddr, dma_addr_t dma_addr,
0460                 unsigned long attrs)
0461 {
0462     struct iova_domain *iovad = &domain->consistent_iovad;
0463     struct vhost_iotlb_map *map;
0464     struct vdpa_map_file *map_file;
0465     phys_addr_t pa;
0466 
0467     spin_lock(&domain->iotlb_lock);
0468     map = vhost_iotlb_itree_first(domain->iotlb, (u64)dma_addr,
0469                       (u64)dma_addr + size - 1);
0470     if (WARN_ON(!map)) {
0471         spin_unlock(&domain->iotlb_lock);
0472         return;
0473     }
0474     map_file = (struct vdpa_map_file *)map->opaque;
0475     fput(map_file->file);
0476     kfree(map_file);
0477     pa = map->addr;
0478     vhost_iotlb_map_free(domain->iotlb, map);
0479     spin_unlock(&domain->iotlb_lock);
0480 
0481     vduse_domain_free_iova(iovad, dma_addr, size);
0482     free_pages_exact(phys_to_virt(pa), size);
0483 }
0484 
0485 static vm_fault_t vduse_domain_mmap_fault(struct vm_fault *vmf)
0486 {
0487     struct vduse_iova_domain *domain = vmf->vma->vm_private_data;
0488     unsigned long iova = vmf->pgoff << PAGE_SHIFT;
0489     struct page *page;
0490 
0491     if (!domain)
0492         return VM_FAULT_SIGBUS;
0493 
0494     if (iova < domain->bounce_size)
0495         page = vduse_domain_get_bounce_page(domain, iova);
0496     else
0497         page = vduse_domain_get_coherent_page(domain, iova);
0498 
0499     if (!page)
0500         return VM_FAULT_SIGBUS;
0501 
0502     vmf->page = page;
0503 
0504     return 0;
0505 }
0506 
0507 static const struct vm_operations_struct vduse_domain_mmap_ops = {
0508     .fault = vduse_domain_mmap_fault,
0509 };
0510 
0511 static int vduse_domain_mmap(struct file *file, struct vm_area_struct *vma)
0512 {
0513     struct vduse_iova_domain *domain = file->private_data;
0514 
0515     vma->vm_flags |= VM_DONTDUMP | VM_DONTEXPAND;
0516     vma->vm_private_data = domain;
0517     vma->vm_ops = &vduse_domain_mmap_ops;
0518 
0519     return 0;
0520 }
0521 
0522 static int vduse_domain_release(struct inode *inode, struct file *file)
0523 {
0524     struct vduse_iova_domain *domain = file->private_data;
0525 
0526     spin_lock(&domain->iotlb_lock);
0527     vduse_iotlb_del_range(domain, 0, ULLONG_MAX);
0528     vduse_domain_remove_user_bounce_pages(domain);
0529     vduse_domain_free_kernel_bounce_pages(domain);
0530     spin_unlock(&domain->iotlb_lock);
0531     put_iova_domain(&domain->stream_iovad);
0532     put_iova_domain(&domain->consistent_iovad);
0533     vhost_iotlb_free(domain->iotlb);
0534     vfree(domain->bounce_maps);
0535     kfree(domain);
0536 
0537     return 0;
0538 }
0539 
0540 static const struct file_operations vduse_domain_fops = {
0541     .owner = THIS_MODULE,
0542     .mmap = vduse_domain_mmap,
0543     .release = vduse_domain_release,
0544 };
0545 
0546 void vduse_domain_destroy(struct vduse_iova_domain *domain)
0547 {
0548     fput(domain->file);
0549 }
0550 
0551 struct vduse_iova_domain *
0552 vduse_domain_create(unsigned long iova_limit, size_t bounce_size)
0553 {
0554     struct vduse_iova_domain *domain;
0555     struct file *file;
0556     struct vduse_bounce_map *map;
0557     unsigned long pfn, bounce_pfns;
0558     int ret;
0559 
0560     bounce_pfns = PAGE_ALIGN(bounce_size) >> PAGE_SHIFT;
0561     if (iova_limit <= bounce_size)
0562         return NULL;
0563 
0564     domain = kzalloc(sizeof(*domain), GFP_KERNEL);
0565     if (!domain)
0566         return NULL;
0567 
0568     domain->iotlb = vhost_iotlb_alloc(0, 0);
0569     if (!domain->iotlb)
0570         goto err_iotlb;
0571 
0572     domain->iova_limit = iova_limit;
0573     domain->bounce_size = PAGE_ALIGN(bounce_size);
0574     domain->bounce_maps = vzalloc(bounce_pfns *
0575                 sizeof(struct vduse_bounce_map));
0576     if (!domain->bounce_maps)
0577         goto err_map;
0578 
0579     for (pfn = 0; pfn < bounce_pfns; pfn++) {
0580         map = &domain->bounce_maps[pfn];
0581         map->orig_phys = INVALID_PHYS_ADDR;
0582     }
0583     file = anon_inode_getfile("[vduse-domain]", &vduse_domain_fops,
0584                 domain, O_RDWR);
0585     if (IS_ERR(file))
0586         goto err_file;
0587 
0588     domain->file = file;
0589     rwlock_init(&domain->bounce_lock);
0590     spin_lock_init(&domain->iotlb_lock);
0591     init_iova_domain(&domain->stream_iovad,
0592             PAGE_SIZE, IOVA_START_PFN);
0593     ret = iova_domain_init_rcaches(&domain->stream_iovad);
0594     if (ret)
0595         goto err_iovad_stream;
0596     init_iova_domain(&domain->consistent_iovad,
0597             PAGE_SIZE, bounce_pfns);
0598     ret = iova_domain_init_rcaches(&domain->consistent_iovad);
0599     if (ret)
0600         goto err_iovad_consistent;
0601 
0602     return domain;
0603 err_iovad_consistent:
0604     put_iova_domain(&domain->stream_iovad);
0605 err_iovad_stream:
0606     fput(file);
0607 err_file:
0608     vfree(domain->bounce_maps);
0609 err_map:
0610     vhost_iotlb_free(domain->iotlb);
0611 err_iotlb:
0612     kfree(domain);
0613     return NULL;
0614 }
0615 
0616 int vduse_domain_init(void)
0617 {
0618     return iova_cache_get();
0619 }
0620 
0621 void vduse_domain_exit(void)
0622 {
0623     iova_cache_put();
0624 }