0001
0002
0003
0004
0005
0006 #include <linux/init.h>
0007 #include <linux/sched/mm.h>
0008 #include <linux/sched/signal.h>
0009 #include <linux/sched/task.h>
0010 #include <linux/uaccess.h>
0011 #include <linux/slab.h>
0012 #include <linux/bpf.h>
0013 #include <linux/mm.h>
0014 #include <linux/netdevice.h>
0015 #include <linux/rtnetlink.h>
0016 #include <linux/idr.h>
0017 #include <linux/vmalloc.h>
0018
0019 #include "xdp_umem.h"
0020 #include "xsk_queue.h"
0021
0022 #define XDP_UMEM_MIN_CHUNK_SIZE 2048
0023
0024 static DEFINE_IDA(umem_ida);
0025
0026 static void xdp_umem_unpin_pages(struct xdp_umem *umem)
0027 {
0028 unpin_user_pages_dirty_lock(umem->pgs, umem->npgs, true);
0029
0030 kvfree(umem->pgs);
0031 umem->pgs = NULL;
0032 }
0033
0034 static void xdp_umem_unaccount_pages(struct xdp_umem *umem)
0035 {
0036 if (umem->user) {
0037 atomic_long_sub(umem->npgs, &umem->user->locked_vm);
0038 free_uid(umem->user);
0039 }
0040 }
0041
0042 static void xdp_umem_addr_unmap(struct xdp_umem *umem)
0043 {
0044 vunmap(umem->addrs);
0045 umem->addrs = NULL;
0046 }
0047
0048 static int xdp_umem_addr_map(struct xdp_umem *umem, struct page **pages,
0049 u32 nr_pages)
0050 {
0051 umem->addrs = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
0052 if (!umem->addrs)
0053 return -ENOMEM;
0054 return 0;
0055 }
0056
0057 static void xdp_umem_release(struct xdp_umem *umem)
0058 {
0059 umem->zc = false;
0060 ida_free(&umem_ida, umem->id);
0061
0062 xdp_umem_addr_unmap(umem);
0063 xdp_umem_unpin_pages(umem);
0064
0065 xdp_umem_unaccount_pages(umem);
0066 kfree(umem);
0067 }
0068
0069 static void xdp_umem_release_deferred(struct work_struct *work)
0070 {
0071 struct xdp_umem *umem = container_of(work, struct xdp_umem, work);
0072
0073 xdp_umem_release(umem);
0074 }
0075
0076 void xdp_get_umem(struct xdp_umem *umem)
0077 {
0078 refcount_inc(&umem->users);
0079 }
0080
0081 void xdp_put_umem(struct xdp_umem *umem, bool defer_cleanup)
0082 {
0083 if (!umem)
0084 return;
0085
0086 if (refcount_dec_and_test(&umem->users)) {
0087 if (defer_cleanup) {
0088 INIT_WORK(&umem->work, xdp_umem_release_deferred);
0089 schedule_work(&umem->work);
0090 } else {
0091 xdp_umem_release(umem);
0092 }
0093 }
0094 }
0095
0096 static int xdp_umem_pin_pages(struct xdp_umem *umem, unsigned long address)
0097 {
0098 unsigned int gup_flags = FOLL_WRITE;
0099 long npgs;
0100 int err;
0101
0102 umem->pgs = kvcalloc(umem->npgs, sizeof(*umem->pgs), GFP_KERNEL | __GFP_NOWARN);
0103 if (!umem->pgs)
0104 return -ENOMEM;
0105
0106 mmap_read_lock(current->mm);
0107 npgs = pin_user_pages(address, umem->npgs,
0108 gup_flags | FOLL_LONGTERM, &umem->pgs[0], NULL);
0109 mmap_read_unlock(current->mm);
0110
0111 if (npgs != umem->npgs) {
0112 if (npgs >= 0) {
0113 umem->npgs = npgs;
0114 err = -ENOMEM;
0115 goto out_pin;
0116 }
0117 err = npgs;
0118 goto out_pgs;
0119 }
0120 return 0;
0121
0122 out_pin:
0123 xdp_umem_unpin_pages(umem);
0124 out_pgs:
0125 kvfree(umem->pgs);
0126 umem->pgs = NULL;
0127 return err;
0128 }
0129
0130 static int xdp_umem_account_pages(struct xdp_umem *umem)
0131 {
0132 unsigned long lock_limit, new_npgs, old_npgs;
0133
0134 if (capable(CAP_IPC_LOCK))
0135 return 0;
0136
0137 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
0138 umem->user = get_uid(current_user());
0139
0140 do {
0141 old_npgs = atomic_long_read(&umem->user->locked_vm);
0142 new_npgs = old_npgs + umem->npgs;
0143 if (new_npgs > lock_limit) {
0144 free_uid(umem->user);
0145 umem->user = NULL;
0146 return -ENOBUFS;
0147 }
0148 } while (atomic_long_cmpxchg(&umem->user->locked_vm, old_npgs,
0149 new_npgs) != old_npgs);
0150 return 0;
0151 }
0152
0153 static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
0154 {
0155 u32 npgs_rem, chunk_size = mr->chunk_size, headroom = mr->headroom;
0156 bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG;
0157 u64 npgs, addr = mr->addr, size = mr->len;
0158 unsigned int chunks, chunks_rem;
0159 int err;
0160
0161 if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) {
0162
0163
0164
0165
0166
0167
0168 return -EINVAL;
0169 }
0170
0171 if (mr->flags & ~XDP_UMEM_UNALIGNED_CHUNK_FLAG)
0172 return -EINVAL;
0173
0174 if (!unaligned_chunks && !is_power_of_2(chunk_size))
0175 return -EINVAL;
0176
0177 if (!PAGE_ALIGNED(addr)) {
0178
0179
0180
0181 return -EINVAL;
0182 }
0183
0184 if ((addr + size) < addr)
0185 return -EINVAL;
0186
0187 npgs = div_u64_rem(size, PAGE_SIZE, &npgs_rem);
0188 if (npgs_rem)
0189 npgs++;
0190 if (npgs > U32_MAX)
0191 return -EINVAL;
0192
0193 chunks = (unsigned int)div_u64_rem(size, chunk_size, &chunks_rem);
0194 if (chunks == 0)
0195 return -EINVAL;
0196
0197 if (!unaligned_chunks && chunks_rem)
0198 return -EINVAL;
0199
0200 if (headroom >= chunk_size - XDP_PACKET_HEADROOM)
0201 return -EINVAL;
0202
0203 umem->size = size;
0204 umem->headroom = headroom;
0205 umem->chunk_size = chunk_size;
0206 umem->chunks = chunks;
0207 umem->npgs = (u32)npgs;
0208 umem->pgs = NULL;
0209 umem->user = NULL;
0210 umem->flags = mr->flags;
0211
0212 INIT_LIST_HEAD(&umem->xsk_dma_list);
0213 refcount_set(&umem->users, 1);
0214
0215 err = xdp_umem_account_pages(umem);
0216 if (err)
0217 return err;
0218
0219 err = xdp_umem_pin_pages(umem, (unsigned long)addr);
0220 if (err)
0221 goto out_account;
0222
0223 err = xdp_umem_addr_map(umem, umem->pgs, umem->npgs);
0224 if (err)
0225 goto out_unpin;
0226
0227 return 0;
0228
0229 out_unpin:
0230 xdp_umem_unpin_pages(umem);
0231 out_account:
0232 xdp_umem_unaccount_pages(umem);
0233 return err;
0234 }
0235
0236 struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr)
0237 {
0238 struct xdp_umem *umem;
0239 int err;
0240
0241 umem = kzalloc(sizeof(*umem), GFP_KERNEL);
0242 if (!umem)
0243 return ERR_PTR(-ENOMEM);
0244
0245 err = ida_alloc(&umem_ida, GFP_KERNEL);
0246 if (err < 0) {
0247 kfree(umem);
0248 return ERR_PTR(err);
0249 }
0250 umem->id = err;
0251
0252 err = xdp_umem_reg(umem, mr);
0253 if (err) {
0254 ida_free(&umem_ida, umem->id);
0255 kfree(umem);
0256 return ERR_PTR(err);
0257 }
0258
0259 return umem;
0260 }