Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
0004  */
0005 
0006 #include <linux/mm.h>
0007 #include <linux/module.h>
0008 #include <linux/sched/signal.h>
0009 
0010 #include <asm/tlbflush.h>
0011 #include <as-layout.h>
0012 #include <mem_user.h>
0013 #include <os.h>
0014 #include <skas.h>
0015 #include <kern_util.h>
0016 
0017 struct host_vm_change {
0018     struct host_vm_op {
0019         enum { NONE, MMAP, MUNMAP, MPROTECT } type;
0020         union {
0021             struct {
0022                 unsigned long addr;
0023                 unsigned long len;
0024                 unsigned int prot;
0025                 int fd;
0026                 __u64 offset;
0027             } mmap;
0028             struct {
0029                 unsigned long addr;
0030                 unsigned long len;
0031             } munmap;
0032             struct {
0033                 unsigned long addr;
0034                 unsigned long len;
0035                 unsigned int prot;
0036             } mprotect;
0037         } u;
0038     } ops[1];
0039     int userspace;
0040     int index;
0041     struct mm_struct *mm;
0042     void *data;
0043     int force;
0044 };
0045 
0046 #define INIT_HVC(mm, force, userspace) \
0047     ((struct host_vm_change) \
0048      { .ops     = { { .type = NONE } }, \
0049        .mm      = mm, \
0050            .data    = NULL, \
0051        .userspace   = userspace, \
0052        .index   = 0, \
0053        .force   = force })
0054 
0055 static void report_enomem(void)
0056 {
0057     printk(KERN_ERR "UML ran out of memory on the host side! "
0058             "This can happen due to a memory limitation or "
0059             "vm.max_map_count has been reached.\n");
0060 }
0061 
0062 static int do_ops(struct host_vm_change *hvc, int end,
0063           int finished)
0064 {
0065     struct host_vm_op *op;
0066     int i, ret = 0;
0067 
0068     for (i = 0; i < end && !ret; i++) {
0069         op = &hvc->ops[i];
0070         switch (op->type) {
0071         case MMAP:
0072             if (hvc->userspace)
0073                 ret = map(&hvc->mm->context.id, op->u.mmap.addr,
0074                       op->u.mmap.len, op->u.mmap.prot,
0075                       op->u.mmap.fd,
0076                       op->u.mmap.offset, finished,
0077                       &hvc->data);
0078             else
0079                 map_memory(op->u.mmap.addr, op->u.mmap.offset,
0080                        op->u.mmap.len, 1, 1, 1);
0081             break;
0082         case MUNMAP:
0083             if (hvc->userspace)
0084                 ret = unmap(&hvc->mm->context.id,
0085                         op->u.munmap.addr,
0086                         op->u.munmap.len, finished,
0087                         &hvc->data);
0088             else
0089                 ret = os_unmap_memory(
0090                     (void *) op->u.munmap.addr,
0091                               op->u.munmap.len);
0092 
0093             break;
0094         case MPROTECT:
0095             if (hvc->userspace)
0096                 ret = protect(&hvc->mm->context.id,
0097                           op->u.mprotect.addr,
0098                           op->u.mprotect.len,
0099                           op->u.mprotect.prot,
0100                           finished, &hvc->data);
0101             else
0102                 ret = os_protect_memory(
0103                     (void *) op->u.mprotect.addr,
0104                             op->u.mprotect.len,
0105                             1, 1, 1);
0106             break;
0107         default:
0108             printk(KERN_ERR "Unknown op type %d in do_ops\n",
0109                    op->type);
0110             BUG();
0111             break;
0112         }
0113     }
0114 
0115     if (ret == -ENOMEM)
0116         report_enomem();
0117 
0118     return ret;
0119 }
0120 
0121 static int add_mmap(unsigned long virt, unsigned long phys, unsigned long len,
0122             unsigned int prot, struct host_vm_change *hvc)
0123 {
0124     __u64 offset;
0125     struct host_vm_op *last;
0126     int fd = -1, ret = 0;
0127 
0128     if (hvc->userspace)
0129         fd = phys_mapping(phys, &offset);
0130     else
0131         offset = phys;
0132     if (hvc->index != 0) {
0133         last = &hvc->ops[hvc->index - 1];
0134         if ((last->type == MMAP) &&
0135            (last->u.mmap.addr + last->u.mmap.len == virt) &&
0136            (last->u.mmap.prot == prot) && (last->u.mmap.fd == fd) &&
0137            (last->u.mmap.offset + last->u.mmap.len == offset)) {
0138             last->u.mmap.len += len;
0139             return 0;
0140         }
0141     }
0142 
0143     if (hvc->index == ARRAY_SIZE(hvc->ops)) {
0144         ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
0145         hvc->index = 0;
0146     }
0147 
0148     hvc->ops[hvc->index++] = ((struct host_vm_op)
0149                   { .type   = MMAP,
0150                     .u = { .mmap = { .addr  = virt,
0151                              .len   = len,
0152                              .prot  = prot,
0153                              .fd    = fd,
0154                              .offset    = offset }
0155                } });
0156     return ret;
0157 }
0158 
0159 static int add_munmap(unsigned long addr, unsigned long len,
0160               struct host_vm_change *hvc)
0161 {
0162     struct host_vm_op *last;
0163     int ret = 0;
0164 
0165     if (hvc->index != 0) {
0166         last = &hvc->ops[hvc->index - 1];
0167         if ((last->type == MUNMAP) &&
0168            (last->u.munmap.addr + last->u.mmap.len == addr)) {
0169             last->u.munmap.len += len;
0170             return 0;
0171         }
0172     }
0173 
0174     if (hvc->index == ARRAY_SIZE(hvc->ops)) {
0175         ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
0176         hvc->index = 0;
0177     }
0178 
0179     hvc->ops[hvc->index++] = ((struct host_vm_op)
0180                   { .type   = MUNMAP,
0181                         .u = { .munmap = { .addr    = addr,
0182                                .len = len } } });
0183     return ret;
0184 }
0185 
0186 static int add_mprotect(unsigned long addr, unsigned long len,
0187             unsigned int prot, struct host_vm_change *hvc)
0188 {
0189     struct host_vm_op *last;
0190     int ret = 0;
0191 
0192     if (hvc->index != 0) {
0193         last = &hvc->ops[hvc->index - 1];
0194         if ((last->type == MPROTECT) &&
0195            (last->u.mprotect.addr + last->u.mprotect.len == addr) &&
0196            (last->u.mprotect.prot == prot)) {
0197             last->u.mprotect.len += len;
0198             return 0;
0199         }
0200     }
0201 
0202     if (hvc->index == ARRAY_SIZE(hvc->ops)) {
0203         ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
0204         hvc->index = 0;
0205     }
0206 
0207     hvc->ops[hvc->index++] = ((struct host_vm_op)
0208                   { .type   = MPROTECT,
0209                         .u = { .mprotect = { .addr  = addr,
0210                              .len   = len,
0211                              .prot  = prot } } });
0212     return ret;
0213 }
0214 
0215 #define ADD_ROUND(n, inc) (((n) + (inc)) & ~((inc) - 1))
0216 
0217 static inline int update_pte_range(pmd_t *pmd, unsigned long addr,
0218                    unsigned long end,
0219                    struct host_vm_change *hvc)
0220 {
0221     pte_t *pte;
0222     int r, w, x, prot, ret = 0;
0223 
0224     pte = pte_offset_kernel(pmd, addr);
0225     do {
0226         r = pte_read(*pte);
0227         w = pte_write(*pte);
0228         x = pte_exec(*pte);
0229         if (!pte_young(*pte)) {
0230             r = 0;
0231             w = 0;
0232         } else if (!pte_dirty(*pte))
0233             w = 0;
0234 
0235         prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) |
0236             (x ? UM_PROT_EXEC : 0));
0237         if (hvc->force || pte_newpage(*pte)) {
0238             if (pte_present(*pte)) {
0239                 if (pte_newpage(*pte))
0240                     ret = add_mmap(addr, pte_val(*pte) & PAGE_MASK,
0241                                PAGE_SIZE, prot, hvc);
0242             } else
0243                 ret = add_munmap(addr, PAGE_SIZE, hvc);
0244         } else if (pte_newprot(*pte))
0245             ret = add_mprotect(addr, PAGE_SIZE, prot, hvc);
0246         *pte = pte_mkuptodate(*pte);
0247     } while (pte++, addr += PAGE_SIZE, ((addr < end) && !ret));
0248     return ret;
0249 }
0250 
0251 static inline int update_pmd_range(pud_t *pud, unsigned long addr,
0252                    unsigned long end,
0253                    struct host_vm_change *hvc)
0254 {
0255     pmd_t *pmd;
0256     unsigned long next;
0257     int ret = 0;
0258 
0259     pmd = pmd_offset(pud, addr);
0260     do {
0261         next = pmd_addr_end(addr, end);
0262         if (!pmd_present(*pmd)) {
0263             if (hvc->force || pmd_newpage(*pmd)) {
0264                 ret = add_munmap(addr, next - addr, hvc);
0265                 pmd_mkuptodate(*pmd);
0266             }
0267         }
0268         else ret = update_pte_range(pmd, addr, next, hvc);
0269     } while (pmd++, addr = next, ((addr < end) && !ret));
0270     return ret;
0271 }
0272 
0273 static inline int update_pud_range(p4d_t *p4d, unsigned long addr,
0274                    unsigned long end,
0275                    struct host_vm_change *hvc)
0276 {
0277     pud_t *pud;
0278     unsigned long next;
0279     int ret = 0;
0280 
0281     pud = pud_offset(p4d, addr);
0282     do {
0283         next = pud_addr_end(addr, end);
0284         if (!pud_present(*pud)) {
0285             if (hvc->force || pud_newpage(*pud)) {
0286                 ret = add_munmap(addr, next - addr, hvc);
0287                 pud_mkuptodate(*pud);
0288             }
0289         }
0290         else ret = update_pmd_range(pud, addr, next, hvc);
0291     } while (pud++, addr = next, ((addr < end) && !ret));
0292     return ret;
0293 }
0294 
0295 static inline int update_p4d_range(pgd_t *pgd, unsigned long addr,
0296                    unsigned long end,
0297                    struct host_vm_change *hvc)
0298 {
0299     p4d_t *p4d;
0300     unsigned long next;
0301     int ret = 0;
0302 
0303     p4d = p4d_offset(pgd, addr);
0304     do {
0305         next = p4d_addr_end(addr, end);
0306         if (!p4d_present(*p4d)) {
0307             if (hvc->force || p4d_newpage(*p4d)) {
0308                 ret = add_munmap(addr, next - addr, hvc);
0309                 p4d_mkuptodate(*p4d);
0310             }
0311         } else
0312             ret = update_pud_range(p4d, addr, next, hvc);
0313     } while (p4d++, addr = next, ((addr < end) && !ret));
0314     return ret;
0315 }
0316 
0317 void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
0318               unsigned long end_addr, int force)
0319 {
0320     pgd_t *pgd;
0321     struct host_vm_change hvc;
0322     unsigned long addr = start_addr, next;
0323     int ret = 0, userspace = 1;
0324 
0325     hvc = INIT_HVC(mm, force, userspace);
0326     pgd = pgd_offset(mm, addr);
0327     do {
0328         next = pgd_addr_end(addr, end_addr);
0329         if (!pgd_present(*pgd)) {
0330             if (force || pgd_newpage(*pgd)) {
0331                 ret = add_munmap(addr, next - addr, &hvc);
0332                 pgd_mkuptodate(*pgd);
0333             }
0334         } else
0335             ret = update_p4d_range(pgd, addr, next, &hvc);
0336     } while (pgd++, addr = next, ((addr < end_addr) && !ret));
0337 
0338     if (!ret)
0339         ret = do_ops(&hvc, hvc.index, 1);
0340 
0341     /* This is not an else because ret is modified above */
0342     if (ret) {
0343         struct mm_id *mm_idp = &current->mm->context.id;
0344 
0345         printk(KERN_ERR "fix_range_common: failed, killing current "
0346                "process: %d\n", task_tgid_vnr(current));
0347         mm_idp->kill = 1;
0348     }
0349 }
0350 
0351 static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
0352 {
0353     struct mm_struct *mm;
0354     pgd_t *pgd;
0355     p4d_t *p4d;
0356     pud_t *pud;
0357     pmd_t *pmd;
0358     pte_t *pte;
0359     unsigned long addr, last;
0360     int updated = 0, err = 0, force = 0, userspace = 0;
0361     struct host_vm_change hvc;
0362 
0363     mm = &init_mm;
0364     hvc = INIT_HVC(mm, force, userspace);
0365     for (addr = start; addr < end;) {
0366         pgd = pgd_offset(mm, addr);
0367         if (!pgd_present(*pgd)) {
0368             last = ADD_ROUND(addr, PGDIR_SIZE);
0369             if (last > end)
0370                 last = end;
0371             if (pgd_newpage(*pgd)) {
0372                 updated = 1;
0373                 err = add_munmap(addr, last - addr, &hvc);
0374                 if (err < 0)
0375                     panic("munmap failed, errno = %d\n",
0376                           -err);
0377             }
0378             addr = last;
0379             continue;
0380         }
0381 
0382         p4d = p4d_offset(pgd, addr);
0383         if (!p4d_present(*p4d)) {
0384             last = ADD_ROUND(addr, P4D_SIZE);
0385             if (last > end)
0386                 last = end;
0387             if (p4d_newpage(*p4d)) {
0388                 updated = 1;
0389                 err = add_munmap(addr, last - addr, &hvc);
0390                 if (err < 0)
0391                     panic("munmap failed, errno = %d\n",
0392                           -err);
0393             }
0394             addr = last;
0395             continue;
0396         }
0397 
0398         pud = pud_offset(p4d, addr);
0399         if (!pud_present(*pud)) {
0400             last = ADD_ROUND(addr, PUD_SIZE);
0401             if (last > end)
0402                 last = end;
0403             if (pud_newpage(*pud)) {
0404                 updated = 1;
0405                 err = add_munmap(addr, last - addr, &hvc);
0406                 if (err < 0)
0407                     panic("munmap failed, errno = %d\n",
0408                           -err);
0409             }
0410             addr = last;
0411             continue;
0412         }
0413 
0414         pmd = pmd_offset(pud, addr);
0415         if (!pmd_present(*pmd)) {
0416             last = ADD_ROUND(addr, PMD_SIZE);
0417             if (last > end)
0418                 last = end;
0419             if (pmd_newpage(*pmd)) {
0420                 updated = 1;
0421                 err = add_munmap(addr, last - addr, &hvc);
0422                 if (err < 0)
0423                     panic("munmap failed, errno = %d\n",
0424                           -err);
0425             }
0426             addr = last;
0427             continue;
0428         }
0429 
0430         pte = pte_offset_kernel(pmd, addr);
0431         if (!pte_present(*pte) || pte_newpage(*pte)) {
0432             updated = 1;
0433             err = add_munmap(addr, PAGE_SIZE, &hvc);
0434             if (err < 0)
0435                 panic("munmap failed, errno = %d\n",
0436                       -err);
0437             if (pte_present(*pte))
0438                 err = add_mmap(addr, pte_val(*pte) & PAGE_MASK,
0439                            PAGE_SIZE, 0, &hvc);
0440         }
0441         else if (pte_newprot(*pte)) {
0442             updated = 1;
0443             err = add_mprotect(addr, PAGE_SIZE, 0, &hvc);
0444         }
0445         addr += PAGE_SIZE;
0446     }
0447     if (!err)
0448         err = do_ops(&hvc, hvc.index, 1);
0449 
0450     if (err < 0)
0451         panic("flush_tlb_kernel failed, errno = %d\n", err);
0452     return updated;
0453 }
0454 
0455 void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
0456 {
0457     pgd_t *pgd;
0458     p4d_t *p4d;
0459     pud_t *pud;
0460     pmd_t *pmd;
0461     pte_t *pte;
0462     struct mm_struct *mm = vma->vm_mm;
0463     void *flush = NULL;
0464     int r, w, x, prot, err = 0;
0465     struct mm_id *mm_id;
0466 
0467     address &= PAGE_MASK;
0468 
0469     pgd = pgd_offset(mm, address);
0470     if (!pgd_present(*pgd))
0471         goto kill;
0472 
0473     p4d = p4d_offset(pgd, address);
0474     if (!p4d_present(*p4d))
0475         goto kill;
0476 
0477     pud = pud_offset(p4d, address);
0478     if (!pud_present(*pud))
0479         goto kill;
0480 
0481     pmd = pmd_offset(pud, address);
0482     if (!pmd_present(*pmd))
0483         goto kill;
0484 
0485     pte = pte_offset_kernel(pmd, address);
0486 
0487     r = pte_read(*pte);
0488     w = pte_write(*pte);
0489     x = pte_exec(*pte);
0490     if (!pte_young(*pte)) {
0491         r = 0;
0492         w = 0;
0493     } else if (!pte_dirty(*pte)) {
0494         w = 0;
0495     }
0496 
0497     mm_id = &mm->context.id;
0498     prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) |
0499         (x ? UM_PROT_EXEC : 0));
0500     if (pte_newpage(*pte)) {
0501         if (pte_present(*pte)) {
0502             unsigned long long offset;
0503             int fd;
0504 
0505             fd = phys_mapping(pte_val(*pte) & PAGE_MASK, &offset);
0506             err = map(mm_id, address, PAGE_SIZE, prot, fd, offset,
0507                   1, &flush);
0508         }
0509         else err = unmap(mm_id, address, PAGE_SIZE, 1, &flush);
0510     }
0511     else if (pte_newprot(*pte))
0512         err = protect(mm_id, address, PAGE_SIZE, prot, 1, &flush);
0513 
0514     if (err) {
0515         if (err == -ENOMEM)
0516             report_enomem();
0517 
0518         goto kill;
0519     }
0520 
0521     *pte = pte_mkuptodate(*pte);
0522 
0523     return;
0524 
0525 kill:
0526     printk(KERN_ERR "Failed to flush page for address 0x%lx\n", address);
0527     force_sig(SIGKILL);
0528 }
0529 
0530 void flush_tlb_all(void)
0531 {
0532     /*
0533      * Don't bother flushing if this address space is about to be
0534      * destroyed.
0535      */
0536     if (atomic_read(&current->mm->mm_users) == 0)
0537         return;
0538 
0539     flush_tlb_mm(current->mm);
0540 }
0541 
0542 void flush_tlb_kernel_range(unsigned long start, unsigned long end)
0543 {
0544     flush_tlb_kernel_range_common(start, end);
0545 }
0546 
0547 void flush_tlb_kernel_vm(void)
0548 {
0549     flush_tlb_kernel_range_common(start_vm, end_vm);
0550 }
0551 
0552 void __flush_tlb_one(unsigned long addr)
0553 {
0554     flush_tlb_kernel_range_common(addr, addr + PAGE_SIZE);
0555 }
0556 
0557 static void fix_range(struct mm_struct *mm, unsigned long start_addr,
0558               unsigned long end_addr, int force)
0559 {
0560     /*
0561      * Don't bother flushing if this address space is about to be
0562      * destroyed.
0563      */
0564     if (atomic_read(&mm->mm_users) == 0)
0565         return;
0566 
0567     fix_range_common(mm, start_addr, end_addr, force);
0568 }
0569 
0570 void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
0571              unsigned long end)
0572 {
0573     if (vma->vm_mm == NULL)
0574         flush_tlb_kernel_range_common(start, end);
0575     else fix_range(vma->vm_mm, start, end, 0);
0576 }
0577 EXPORT_SYMBOL(flush_tlb_range);
0578 
0579 void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
0580             unsigned long end)
0581 {
0582     fix_range(mm, start, end, 0);
0583 }
0584 
0585 void flush_tlb_mm(struct mm_struct *mm)
0586 {
0587     struct vm_area_struct *vma = mm->mmap;
0588 
0589     while (vma != NULL) {
0590         fix_range(mm, vma->vm_start, vma->vm_end, 0);
0591         vma = vma->vm_next;
0592     }
0593 }
0594 
0595 void force_flush_all(void)
0596 {
0597     struct mm_struct *mm = current->mm;
0598     struct vm_area_struct *vma = mm->mmap;
0599 
0600     while (vma != NULL) {
0601         fix_range(mm, vma->vm_start, vma->vm_end, 1);
0602         vma = vma->vm_next;
0603     }
0604 }