Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /* XDP user-space packet buffer
0003  * Copyright(c) 2018 Intel Corporation.
0004  */
0005 
0006 #include <linux/init.h>
0007 #include <linux/sched/mm.h>
0008 #include <linux/sched/signal.h>
0009 #include <linux/sched/task.h>
0010 #include <linux/uaccess.h>
0011 #include <linux/slab.h>
0012 #include <linux/bpf.h>
0013 #include <linux/mm.h>
0014 #include <linux/netdevice.h>
0015 #include <linux/rtnetlink.h>
0016 #include <linux/idr.h>
0017 #include <linux/vmalloc.h>
0018 
0019 #include "xdp_umem.h"
0020 #include "xsk_queue.h"
0021 
0022 #define XDP_UMEM_MIN_CHUNK_SIZE 2048
0023 
0024 static DEFINE_IDA(umem_ida);
0025 
0026 static void xdp_umem_unpin_pages(struct xdp_umem *umem)
0027 {
0028     unpin_user_pages_dirty_lock(umem->pgs, umem->npgs, true);
0029 
0030     kvfree(umem->pgs);
0031     umem->pgs = NULL;
0032 }
0033 
0034 static void xdp_umem_unaccount_pages(struct xdp_umem *umem)
0035 {
0036     if (umem->user) {
0037         atomic_long_sub(umem->npgs, &umem->user->locked_vm);
0038         free_uid(umem->user);
0039     }
0040 }
0041 
0042 static void xdp_umem_addr_unmap(struct xdp_umem *umem)
0043 {
0044     vunmap(umem->addrs);
0045     umem->addrs = NULL;
0046 }
0047 
0048 static int xdp_umem_addr_map(struct xdp_umem *umem, struct page **pages,
0049                  u32 nr_pages)
0050 {
0051     umem->addrs = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
0052     if (!umem->addrs)
0053         return -ENOMEM;
0054     return 0;
0055 }
0056 
0057 static void xdp_umem_release(struct xdp_umem *umem)
0058 {
0059     umem->zc = false;
0060     ida_free(&umem_ida, umem->id);
0061 
0062     xdp_umem_addr_unmap(umem);
0063     xdp_umem_unpin_pages(umem);
0064 
0065     xdp_umem_unaccount_pages(umem);
0066     kfree(umem);
0067 }
0068 
0069 static void xdp_umem_release_deferred(struct work_struct *work)
0070 {
0071     struct xdp_umem *umem = container_of(work, struct xdp_umem, work);
0072 
0073     xdp_umem_release(umem);
0074 }
0075 
0076 void xdp_get_umem(struct xdp_umem *umem)
0077 {
0078     refcount_inc(&umem->users);
0079 }
0080 
0081 void xdp_put_umem(struct xdp_umem *umem, bool defer_cleanup)
0082 {
0083     if (!umem)
0084         return;
0085 
0086     if (refcount_dec_and_test(&umem->users)) {
0087         if (defer_cleanup) {
0088             INIT_WORK(&umem->work, xdp_umem_release_deferred);
0089             schedule_work(&umem->work);
0090         } else {
0091             xdp_umem_release(umem);
0092         }
0093     }
0094 }
0095 
0096 static int xdp_umem_pin_pages(struct xdp_umem *umem, unsigned long address)
0097 {
0098     unsigned int gup_flags = FOLL_WRITE;
0099     long npgs;
0100     int err;
0101 
0102     umem->pgs = kvcalloc(umem->npgs, sizeof(*umem->pgs), GFP_KERNEL | __GFP_NOWARN);
0103     if (!umem->pgs)
0104         return -ENOMEM;
0105 
0106     mmap_read_lock(current->mm);
0107     npgs = pin_user_pages(address, umem->npgs,
0108                   gup_flags | FOLL_LONGTERM, &umem->pgs[0], NULL);
0109     mmap_read_unlock(current->mm);
0110 
0111     if (npgs != umem->npgs) {
0112         if (npgs >= 0) {
0113             umem->npgs = npgs;
0114             err = -ENOMEM;
0115             goto out_pin;
0116         }
0117         err = npgs;
0118         goto out_pgs;
0119     }
0120     return 0;
0121 
0122 out_pin:
0123     xdp_umem_unpin_pages(umem);
0124 out_pgs:
0125     kvfree(umem->pgs);
0126     umem->pgs = NULL;
0127     return err;
0128 }
0129 
0130 static int xdp_umem_account_pages(struct xdp_umem *umem)
0131 {
0132     unsigned long lock_limit, new_npgs, old_npgs;
0133 
0134     if (capable(CAP_IPC_LOCK))
0135         return 0;
0136 
0137     lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
0138     umem->user = get_uid(current_user());
0139 
0140     do {
0141         old_npgs = atomic_long_read(&umem->user->locked_vm);
0142         new_npgs = old_npgs + umem->npgs;
0143         if (new_npgs > lock_limit) {
0144             free_uid(umem->user);
0145             umem->user = NULL;
0146             return -ENOBUFS;
0147         }
0148     } while (atomic_long_cmpxchg(&umem->user->locked_vm, old_npgs,
0149                      new_npgs) != old_npgs);
0150     return 0;
0151 }
0152 
0153 static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
0154 {
0155     u32 npgs_rem, chunk_size = mr->chunk_size, headroom = mr->headroom;
0156     bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG;
0157     u64 npgs, addr = mr->addr, size = mr->len;
0158     unsigned int chunks, chunks_rem;
0159     int err;
0160 
0161     if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) {
0162         /* Strictly speaking we could support this, if:
0163          * - huge pages, or*
0164          * - using an IOMMU, or
0165          * - making sure the memory area is consecutive
0166          * but for now, we simply say "computer says no".
0167          */
0168         return -EINVAL;
0169     }
0170 
0171     if (mr->flags & ~XDP_UMEM_UNALIGNED_CHUNK_FLAG)
0172         return -EINVAL;
0173 
0174     if (!unaligned_chunks && !is_power_of_2(chunk_size))
0175         return -EINVAL;
0176 
0177     if (!PAGE_ALIGNED(addr)) {
0178         /* Memory area has to be page size aligned. For
0179          * simplicity, this might change.
0180          */
0181         return -EINVAL;
0182     }
0183 
0184     if ((addr + size) < addr)
0185         return -EINVAL;
0186 
0187     npgs = div_u64_rem(size, PAGE_SIZE, &npgs_rem);
0188     if (npgs_rem)
0189         npgs++;
0190     if (npgs > U32_MAX)
0191         return -EINVAL;
0192 
0193     chunks = (unsigned int)div_u64_rem(size, chunk_size, &chunks_rem);
0194     if (chunks == 0)
0195         return -EINVAL;
0196 
0197     if (!unaligned_chunks && chunks_rem)
0198         return -EINVAL;
0199 
0200     if (headroom >= chunk_size - XDP_PACKET_HEADROOM)
0201         return -EINVAL;
0202 
0203     umem->size = size;
0204     umem->headroom = headroom;
0205     umem->chunk_size = chunk_size;
0206     umem->chunks = chunks;
0207     umem->npgs = (u32)npgs;
0208     umem->pgs = NULL;
0209     umem->user = NULL;
0210     umem->flags = mr->flags;
0211 
0212     INIT_LIST_HEAD(&umem->xsk_dma_list);
0213     refcount_set(&umem->users, 1);
0214 
0215     err = xdp_umem_account_pages(umem);
0216     if (err)
0217         return err;
0218 
0219     err = xdp_umem_pin_pages(umem, (unsigned long)addr);
0220     if (err)
0221         goto out_account;
0222 
0223     err = xdp_umem_addr_map(umem, umem->pgs, umem->npgs);
0224     if (err)
0225         goto out_unpin;
0226 
0227     return 0;
0228 
0229 out_unpin:
0230     xdp_umem_unpin_pages(umem);
0231 out_account:
0232     xdp_umem_unaccount_pages(umem);
0233     return err;
0234 }
0235 
0236 struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr)
0237 {
0238     struct xdp_umem *umem;
0239     int err;
0240 
0241     umem = kzalloc(sizeof(*umem), GFP_KERNEL);
0242     if (!umem)
0243         return ERR_PTR(-ENOMEM);
0244 
0245     err = ida_alloc(&umem_ida, GFP_KERNEL);
0246     if (err < 0) {
0247         kfree(umem);
0248         return ERR_PTR(err);
0249     }
0250     umem->id = err;
0251 
0252     err = xdp_umem_reg(umem, mr);
0253     if (err) {
0254         ida_free(&umem_ida, umem->id);
0255         kfree(umem);
0256         return ERR_PTR(err);
0257     }
0258 
0259     return umem;
0260 }