0001
0002
0003
0004
0005
0006 #include <linux/mm.h>
0007 #include <linux/module.h>
0008 #include <linux/sched/signal.h>
0009
0010 #include <asm/tlbflush.h>
0011 #include <as-layout.h>
0012 #include <mem_user.h>
0013 #include <os.h>
0014 #include <skas.h>
0015 #include <kern_util.h>
0016
0017 struct host_vm_change {
0018 struct host_vm_op {
0019 enum { NONE, MMAP, MUNMAP, MPROTECT } type;
0020 union {
0021 struct {
0022 unsigned long addr;
0023 unsigned long len;
0024 unsigned int prot;
0025 int fd;
0026 __u64 offset;
0027 } mmap;
0028 struct {
0029 unsigned long addr;
0030 unsigned long len;
0031 } munmap;
0032 struct {
0033 unsigned long addr;
0034 unsigned long len;
0035 unsigned int prot;
0036 } mprotect;
0037 } u;
0038 } ops[1];
0039 int userspace;
0040 int index;
0041 struct mm_struct *mm;
0042 void *data;
0043 int force;
0044 };
0045
0046 #define INIT_HVC(mm, force, userspace) \
0047 ((struct host_vm_change) \
0048 { .ops = { { .type = NONE } }, \
0049 .mm = mm, \
0050 .data = NULL, \
0051 .userspace = userspace, \
0052 .index = 0, \
0053 .force = force })
0054
0055 static void report_enomem(void)
0056 {
0057 printk(KERN_ERR "UML ran out of memory on the host side! "
0058 "This can happen due to a memory limitation or "
0059 "vm.max_map_count has been reached.\n");
0060 }
0061
0062 static int do_ops(struct host_vm_change *hvc, int end,
0063 int finished)
0064 {
0065 struct host_vm_op *op;
0066 int i, ret = 0;
0067
0068 for (i = 0; i < end && !ret; i++) {
0069 op = &hvc->ops[i];
0070 switch (op->type) {
0071 case MMAP:
0072 if (hvc->userspace)
0073 ret = map(&hvc->mm->context.id, op->u.mmap.addr,
0074 op->u.mmap.len, op->u.mmap.prot,
0075 op->u.mmap.fd,
0076 op->u.mmap.offset, finished,
0077 &hvc->data);
0078 else
0079 map_memory(op->u.mmap.addr, op->u.mmap.offset,
0080 op->u.mmap.len, 1, 1, 1);
0081 break;
0082 case MUNMAP:
0083 if (hvc->userspace)
0084 ret = unmap(&hvc->mm->context.id,
0085 op->u.munmap.addr,
0086 op->u.munmap.len, finished,
0087 &hvc->data);
0088 else
0089 ret = os_unmap_memory(
0090 (void *) op->u.munmap.addr,
0091 op->u.munmap.len);
0092
0093 break;
0094 case MPROTECT:
0095 if (hvc->userspace)
0096 ret = protect(&hvc->mm->context.id,
0097 op->u.mprotect.addr,
0098 op->u.mprotect.len,
0099 op->u.mprotect.prot,
0100 finished, &hvc->data);
0101 else
0102 ret = os_protect_memory(
0103 (void *) op->u.mprotect.addr,
0104 op->u.mprotect.len,
0105 1, 1, 1);
0106 break;
0107 default:
0108 printk(KERN_ERR "Unknown op type %d in do_ops\n",
0109 op->type);
0110 BUG();
0111 break;
0112 }
0113 }
0114
0115 if (ret == -ENOMEM)
0116 report_enomem();
0117
0118 return ret;
0119 }
0120
0121 static int add_mmap(unsigned long virt, unsigned long phys, unsigned long len,
0122 unsigned int prot, struct host_vm_change *hvc)
0123 {
0124 __u64 offset;
0125 struct host_vm_op *last;
0126 int fd = -1, ret = 0;
0127
0128 if (hvc->userspace)
0129 fd = phys_mapping(phys, &offset);
0130 else
0131 offset = phys;
0132 if (hvc->index != 0) {
0133 last = &hvc->ops[hvc->index - 1];
0134 if ((last->type == MMAP) &&
0135 (last->u.mmap.addr + last->u.mmap.len == virt) &&
0136 (last->u.mmap.prot == prot) && (last->u.mmap.fd == fd) &&
0137 (last->u.mmap.offset + last->u.mmap.len == offset)) {
0138 last->u.mmap.len += len;
0139 return 0;
0140 }
0141 }
0142
0143 if (hvc->index == ARRAY_SIZE(hvc->ops)) {
0144 ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
0145 hvc->index = 0;
0146 }
0147
0148 hvc->ops[hvc->index++] = ((struct host_vm_op)
0149 { .type = MMAP,
0150 .u = { .mmap = { .addr = virt,
0151 .len = len,
0152 .prot = prot,
0153 .fd = fd,
0154 .offset = offset }
0155 } });
0156 return ret;
0157 }
0158
0159 static int add_munmap(unsigned long addr, unsigned long len,
0160 struct host_vm_change *hvc)
0161 {
0162 struct host_vm_op *last;
0163 int ret = 0;
0164
0165 if (hvc->index != 0) {
0166 last = &hvc->ops[hvc->index - 1];
0167 if ((last->type == MUNMAP) &&
0168 (last->u.munmap.addr + last->u.mmap.len == addr)) {
0169 last->u.munmap.len += len;
0170 return 0;
0171 }
0172 }
0173
0174 if (hvc->index == ARRAY_SIZE(hvc->ops)) {
0175 ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
0176 hvc->index = 0;
0177 }
0178
0179 hvc->ops[hvc->index++] = ((struct host_vm_op)
0180 { .type = MUNMAP,
0181 .u = { .munmap = { .addr = addr,
0182 .len = len } } });
0183 return ret;
0184 }
0185
0186 static int add_mprotect(unsigned long addr, unsigned long len,
0187 unsigned int prot, struct host_vm_change *hvc)
0188 {
0189 struct host_vm_op *last;
0190 int ret = 0;
0191
0192 if (hvc->index != 0) {
0193 last = &hvc->ops[hvc->index - 1];
0194 if ((last->type == MPROTECT) &&
0195 (last->u.mprotect.addr + last->u.mprotect.len == addr) &&
0196 (last->u.mprotect.prot == prot)) {
0197 last->u.mprotect.len += len;
0198 return 0;
0199 }
0200 }
0201
0202 if (hvc->index == ARRAY_SIZE(hvc->ops)) {
0203 ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
0204 hvc->index = 0;
0205 }
0206
0207 hvc->ops[hvc->index++] = ((struct host_vm_op)
0208 { .type = MPROTECT,
0209 .u = { .mprotect = { .addr = addr,
0210 .len = len,
0211 .prot = prot } } });
0212 return ret;
0213 }
0214
0215 #define ADD_ROUND(n, inc) (((n) + (inc)) & ~((inc) - 1))
0216
0217 static inline int update_pte_range(pmd_t *pmd, unsigned long addr,
0218 unsigned long end,
0219 struct host_vm_change *hvc)
0220 {
0221 pte_t *pte;
0222 int r, w, x, prot, ret = 0;
0223
0224 pte = pte_offset_kernel(pmd, addr);
0225 do {
0226 r = pte_read(*pte);
0227 w = pte_write(*pte);
0228 x = pte_exec(*pte);
0229 if (!pte_young(*pte)) {
0230 r = 0;
0231 w = 0;
0232 } else if (!pte_dirty(*pte))
0233 w = 0;
0234
0235 prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) |
0236 (x ? UM_PROT_EXEC : 0));
0237 if (hvc->force || pte_newpage(*pte)) {
0238 if (pte_present(*pte)) {
0239 if (pte_newpage(*pte))
0240 ret = add_mmap(addr, pte_val(*pte) & PAGE_MASK,
0241 PAGE_SIZE, prot, hvc);
0242 } else
0243 ret = add_munmap(addr, PAGE_SIZE, hvc);
0244 } else if (pte_newprot(*pte))
0245 ret = add_mprotect(addr, PAGE_SIZE, prot, hvc);
0246 *pte = pte_mkuptodate(*pte);
0247 } while (pte++, addr += PAGE_SIZE, ((addr < end) && !ret));
0248 return ret;
0249 }
0250
0251 static inline int update_pmd_range(pud_t *pud, unsigned long addr,
0252 unsigned long end,
0253 struct host_vm_change *hvc)
0254 {
0255 pmd_t *pmd;
0256 unsigned long next;
0257 int ret = 0;
0258
0259 pmd = pmd_offset(pud, addr);
0260 do {
0261 next = pmd_addr_end(addr, end);
0262 if (!pmd_present(*pmd)) {
0263 if (hvc->force || pmd_newpage(*pmd)) {
0264 ret = add_munmap(addr, next - addr, hvc);
0265 pmd_mkuptodate(*pmd);
0266 }
0267 }
0268 else ret = update_pte_range(pmd, addr, next, hvc);
0269 } while (pmd++, addr = next, ((addr < end) && !ret));
0270 return ret;
0271 }
0272
0273 static inline int update_pud_range(p4d_t *p4d, unsigned long addr,
0274 unsigned long end,
0275 struct host_vm_change *hvc)
0276 {
0277 pud_t *pud;
0278 unsigned long next;
0279 int ret = 0;
0280
0281 pud = pud_offset(p4d, addr);
0282 do {
0283 next = pud_addr_end(addr, end);
0284 if (!pud_present(*pud)) {
0285 if (hvc->force || pud_newpage(*pud)) {
0286 ret = add_munmap(addr, next - addr, hvc);
0287 pud_mkuptodate(*pud);
0288 }
0289 }
0290 else ret = update_pmd_range(pud, addr, next, hvc);
0291 } while (pud++, addr = next, ((addr < end) && !ret));
0292 return ret;
0293 }
0294
0295 static inline int update_p4d_range(pgd_t *pgd, unsigned long addr,
0296 unsigned long end,
0297 struct host_vm_change *hvc)
0298 {
0299 p4d_t *p4d;
0300 unsigned long next;
0301 int ret = 0;
0302
0303 p4d = p4d_offset(pgd, addr);
0304 do {
0305 next = p4d_addr_end(addr, end);
0306 if (!p4d_present(*p4d)) {
0307 if (hvc->force || p4d_newpage(*p4d)) {
0308 ret = add_munmap(addr, next - addr, hvc);
0309 p4d_mkuptodate(*p4d);
0310 }
0311 } else
0312 ret = update_pud_range(p4d, addr, next, hvc);
0313 } while (p4d++, addr = next, ((addr < end) && !ret));
0314 return ret;
0315 }
0316
0317 void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
0318 unsigned long end_addr, int force)
0319 {
0320 pgd_t *pgd;
0321 struct host_vm_change hvc;
0322 unsigned long addr = start_addr, next;
0323 int ret = 0, userspace = 1;
0324
0325 hvc = INIT_HVC(mm, force, userspace);
0326 pgd = pgd_offset(mm, addr);
0327 do {
0328 next = pgd_addr_end(addr, end_addr);
0329 if (!pgd_present(*pgd)) {
0330 if (force || pgd_newpage(*pgd)) {
0331 ret = add_munmap(addr, next - addr, &hvc);
0332 pgd_mkuptodate(*pgd);
0333 }
0334 } else
0335 ret = update_p4d_range(pgd, addr, next, &hvc);
0336 } while (pgd++, addr = next, ((addr < end_addr) && !ret));
0337
0338 if (!ret)
0339 ret = do_ops(&hvc, hvc.index, 1);
0340
0341
0342 if (ret) {
0343 struct mm_id *mm_idp = ¤t->mm->context.id;
0344
0345 printk(KERN_ERR "fix_range_common: failed, killing current "
0346 "process: %d\n", task_tgid_vnr(current));
0347 mm_idp->kill = 1;
0348 }
0349 }
0350
0351 static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
0352 {
0353 struct mm_struct *mm;
0354 pgd_t *pgd;
0355 p4d_t *p4d;
0356 pud_t *pud;
0357 pmd_t *pmd;
0358 pte_t *pte;
0359 unsigned long addr, last;
0360 int updated = 0, err = 0, force = 0, userspace = 0;
0361 struct host_vm_change hvc;
0362
0363 mm = &init_mm;
0364 hvc = INIT_HVC(mm, force, userspace);
0365 for (addr = start; addr < end;) {
0366 pgd = pgd_offset(mm, addr);
0367 if (!pgd_present(*pgd)) {
0368 last = ADD_ROUND(addr, PGDIR_SIZE);
0369 if (last > end)
0370 last = end;
0371 if (pgd_newpage(*pgd)) {
0372 updated = 1;
0373 err = add_munmap(addr, last - addr, &hvc);
0374 if (err < 0)
0375 panic("munmap failed, errno = %d\n",
0376 -err);
0377 }
0378 addr = last;
0379 continue;
0380 }
0381
0382 p4d = p4d_offset(pgd, addr);
0383 if (!p4d_present(*p4d)) {
0384 last = ADD_ROUND(addr, P4D_SIZE);
0385 if (last > end)
0386 last = end;
0387 if (p4d_newpage(*p4d)) {
0388 updated = 1;
0389 err = add_munmap(addr, last - addr, &hvc);
0390 if (err < 0)
0391 panic("munmap failed, errno = %d\n",
0392 -err);
0393 }
0394 addr = last;
0395 continue;
0396 }
0397
0398 pud = pud_offset(p4d, addr);
0399 if (!pud_present(*pud)) {
0400 last = ADD_ROUND(addr, PUD_SIZE);
0401 if (last > end)
0402 last = end;
0403 if (pud_newpage(*pud)) {
0404 updated = 1;
0405 err = add_munmap(addr, last - addr, &hvc);
0406 if (err < 0)
0407 panic("munmap failed, errno = %d\n",
0408 -err);
0409 }
0410 addr = last;
0411 continue;
0412 }
0413
0414 pmd = pmd_offset(pud, addr);
0415 if (!pmd_present(*pmd)) {
0416 last = ADD_ROUND(addr, PMD_SIZE);
0417 if (last > end)
0418 last = end;
0419 if (pmd_newpage(*pmd)) {
0420 updated = 1;
0421 err = add_munmap(addr, last - addr, &hvc);
0422 if (err < 0)
0423 panic("munmap failed, errno = %d\n",
0424 -err);
0425 }
0426 addr = last;
0427 continue;
0428 }
0429
0430 pte = pte_offset_kernel(pmd, addr);
0431 if (!pte_present(*pte) || pte_newpage(*pte)) {
0432 updated = 1;
0433 err = add_munmap(addr, PAGE_SIZE, &hvc);
0434 if (err < 0)
0435 panic("munmap failed, errno = %d\n",
0436 -err);
0437 if (pte_present(*pte))
0438 err = add_mmap(addr, pte_val(*pte) & PAGE_MASK,
0439 PAGE_SIZE, 0, &hvc);
0440 }
0441 else if (pte_newprot(*pte)) {
0442 updated = 1;
0443 err = add_mprotect(addr, PAGE_SIZE, 0, &hvc);
0444 }
0445 addr += PAGE_SIZE;
0446 }
0447 if (!err)
0448 err = do_ops(&hvc, hvc.index, 1);
0449
0450 if (err < 0)
0451 panic("flush_tlb_kernel failed, errno = %d\n", err);
0452 return updated;
0453 }
0454
0455 void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
0456 {
0457 pgd_t *pgd;
0458 p4d_t *p4d;
0459 pud_t *pud;
0460 pmd_t *pmd;
0461 pte_t *pte;
0462 struct mm_struct *mm = vma->vm_mm;
0463 void *flush = NULL;
0464 int r, w, x, prot, err = 0;
0465 struct mm_id *mm_id;
0466
0467 address &= PAGE_MASK;
0468
0469 pgd = pgd_offset(mm, address);
0470 if (!pgd_present(*pgd))
0471 goto kill;
0472
0473 p4d = p4d_offset(pgd, address);
0474 if (!p4d_present(*p4d))
0475 goto kill;
0476
0477 pud = pud_offset(p4d, address);
0478 if (!pud_present(*pud))
0479 goto kill;
0480
0481 pmd = pmd_offset(pud, address);
0482 if (!pmd_present(*pmd))
0483 goto kill;
0484
0485 pte = pte_offset_kernel(pmd, address);
0486
0487 r = pte_read(*pte);
0488 w = pte_write(*pte);
0489 x = pte_exec(*pte);
0490 if (!pte_young(*pte)) {
0491 r = 0;
0492 w = 0;
0493 } else if (!pte_dirty(*pte)) {
0494 w = 0;
0495 }
0496
0497 mm_id = &mm->context.id;
0498 prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) |
0499 (x ? UM_PROT_EXEC : 0));
0500 if (pte_newpage(*pte)) {
0501 if (pte_present(*pte)) {
0502 unsigned long long offset;
0503 int fd;
0504
0505 fd = phys_mapping(pte_val(*pte) & PAGE_MASK, &offset);
0506 err = map(mm_id, address, PAGE_SIZE, prot, fd, offset,
0507 1, &flush);
0508 }
0509 else err = unmap(mm_id, address, PAGE_SIZE, 1, &flush);
0510 }
0511 else if (pte_newprot(*pte))
0512 err = protect(mm_id, address, PAGE_SIZE, prot, 1, &flush);
0513
0514 if (err) {
0515 if (err == -ENOMEM)
0516 report_enomem();
0517
0518 goto kill;
0519 }
0520
0521 *pte = pte_mkuptodate(*pte);
0522
0523 return;
0524
0525 kill:
0526 printk(KERN_ERR "Failed to flush page for address 0x%lx\n", address);
0527 force_sig(SIGKILL);
0528 }
0529
0530 void flush_tlb_all(void)
0531 {
0532
0533
0534
0535
0536 if (atomic_read(¤t->mm->mm_users) == 0)
0537 return;
0538
0539 flush_tlb_mm(current->mm);
0540 }
0541
0542 void flush_tlb_kernel_range(unsigned long start, unsigned long end)
0543 {
0544 flush_tlb_kernel_range_common(start, end);
0545 }
0546
0547 void flush_tlb_kernel_vm(void)
0548 {
0549 flush_tlb_kernel_range_common(start_vm, end_vm);
0550 }
0551
0552 void __flush_tlb_one(unsigned long addr)
0553 {
0554 flush_tlb_kernel_range_common(addr, addr + PAGE_SIZE);
0555 }
0556
0557 static void fix_range(struct mm_struct *mm, unsigned long start_addr,
0558 unsigned long end_addr, int force)
0559 {
0560
0561
0562
0563
0564 if (atomic_read(&mm->mm_users) == 0)
0565 return;
0566
0567 fix_range_common(mm, start_addr, end_addr, force);
0568 }
0569
0570 void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
0571 unsigned long end)
0572 {
0573 if (vma->vm_mm == NULL)
0574 flush_tlb_kernel_range_common(start, end);
0575 else fix_range(vma->vm_mm, start, end, 0);
0576 }
0577 EXPORT_SYMBOL(flush_tlb_range);
0578
0579 void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
0580 unsigned long end)
0581 {
0582 fix_range(mm, start, end, 0);
0583 }
0584
0585 void flush_tlb_mm(struct mm_struct *mm)
0586 {
0587 struct vm_area_struct *vma = mm->mmap;
0588
0589 while (vma != NULL) {
0590 fix_range(mm, vma->vm_start, vma->vm_end, 0);
0591 vma = vma->vm_next;
0592 }
0593 }
0594
0595 void force_flush_all(void)
0596 {
0597 struct mm_struct *mm = current->mm;
0598 struct vm_area_struct *vma = mm->mmap;
0599
0600 while (vma != NULL) {
0601 fix_range(mm, vma->vm_start, vma->vm_end, 1);
0602 vma = vma->vm_next;
0603 }
0604 }