0001
0002
0003
0004
0005
0006 #include <linux/memory_hotplug.h>
0007 #include <linux/memblock.h>
0008 #include <linux/pfn.h>
0009 #include <linux/mm.h>
0010 #include <linux/init.h>
0011 #include <linux/list.h>
0012 #include <linux/hugetlb.h>
0013 #include <linux/slab.h>
0014 #include <asm/cacheflush.h>
0015 #include <asm/nospec-branch.h>
0016 #include <asm/pgalloc.h>
0017 #include <asm/setup.h>
0018 #include <asm/tlbflush.h>
0019 #include <asm/sections.h>
0020 #include <asm/set_memory.h>
0021
0022 static DEFINE_MUTEX(vmem_mutex);
0023
0024 static void __ref *vmem_alloc_pages(unsigned int order)
0025 {
0026 unsigned long size = PAGE_SIZE << order;
0027
0028 if (slab_is_available())
0029 return (void *)__get_free_pages(GFP_KERNEL, order);
0030 return memblock_alloc(size, size);
0031 }
0032
0033 static void vmem_free_pages(unsigned long addr, int order)
0034 {
0035
0036 if (!slab_is_available() ||
0037 WARN_ON_ONCE(PageReserved(virt_to_page(addr))))
0038 return;
0039 free_pages(addr, order);
0040 }
0041
0042 void *vmem_crst_alloc(unsigned long val)
0043 {
0044 unsigned long *table;
0045
0046 table = vmem_alloc_pages(CRST_ALLOC_ORDER);
0047 if (table)
0048 crst_table_init(table, val);
0049 return table;
0050 }
0051
0052 pte_t __ref *vmem_pte_alloc(void)
0053 {
0054 unsigned long size = PTRS_PER_PTE * sizeof(pte_t);
0055 pte_t *pte;
0056
0057 if (slab_is_available())
0058 pte = (pte_t *) page_table_alloc(&init_mm);
0059 else
0060 pte = (pte_t *) memblock_alloc(size, size);
0061 if (!pte)
0062 return NULL;
0063 memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE);
0064 return pte;
0065 }
0066
0067 static void vmem_pte_free(unsigned long *table)
0068 {
0069
0070 if (!slab_is_available() ||
0071 WARN_ON_ONCE(PageReserved(virt_to_page(table))))
0072 return;
0073 page_table_free(&init_mm, table);
0074 }
0075
0076 #define PAGE_UNUSED 0xFD
0077
0078
0079
0080
0081
0082 static unsigned long unused_sub_pmd_start;
0083
0084 static void vmemmap_flush_unused_sub_pmd(void)
0085 {
0086 if (!unused_sub_pmd_start)
0087 return;
0088 memset((void *)unused_sub_pmd_start, PAGE_UNUSED,
0089 ALIGN(unused_sub_pmd_start, PMD_SIZE) - unused_sub_pmd_start);
0090 unused_sub_pmd_start = 0;
0091 }
0092
0093 static void vmemmap_mark_sub_pmd_used(unsigned long start, unsigned long end)
0094 {
0095
0096
0097
0098
0099
0100
0101 memset((void *)start, 0, sizeof(struct page));
0102 }
0103
0104 static void vmemmap_use_sub_pmd(unsigned long start, unsigned long end)
0105 {
0106
0107
0108
0109
0110 if (unused_sub_pmd_start == start) {
0111 unused_sub_pmd_start = end;
0112 if (likely(IS_ALIGNED(unused_sub_pmd_start, PMD_SIZE)))
0113 unused_sub_pmd_start = 0;
0114 return;
0115 }
0116 vmemmap_flush_unused_sub_pmd();
0117 vmemmap_mark_sub_pmd_used(start, end);
0118 }
0119
0120 static void vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end)
0121 {
0122 unsigned long page = ALIGN_DOWN(start, PMD_SIZE);
0123
0124 vmemmap_flush_unused_sub_pmd();
0125
0126
0127 vmemmap_mark_sub_pmd_used(start, end);
0128
0129
0130 if (!IS_ALIGNED(start, PMD_SIZE))
0131 memset((void *)page, PAGE_UNUSED, start - page);
0132
0133
0134
0135
0136
0137 if (!IS_ALIGNED(end, PMD_SIZE))
0138 unused_sub_pmd_start = end;
0139 }
0140
0141
0142 static bool vmemmap_unuse_sub_pmd(unsigned long start, unsigned long end)
0143 {
0144 unsigned long page = ALIGN_DOWN(start, PMD_SIZE);
0145
0146 vmemmap_flush_unused_sub_pmd();
0147 memset((void *)start, PAGE_UNUSED, end - start);
0148 return !memchr_inv((void *)page, PAGE_UNUSED, PMD_SIZE);
0149 }
0150
0151
0152 static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr,
0153 unsigned long end, bool add, bool direct)
0154 {
0155 unsigned long prot, pages = 0;
0156 int ret = -ENOMEM;
0157 pte_t *pte;
0158
0159 prot = pgprot_val(PAGE_KERNEL);
0160 if (!MACHINE_HAS_NX)
0161 prot &= ~_PAGE_NOEXEC;
0162
0163 pte = pte_offset_kernel(pmd, addr);
0164 for (; addr < end; addr += PAGE_SIZE, pte++) {
0165 if (!add) {
0166 if (pte_none(*pte))
0167 continue;
0168 if (!direct)
0169 vmem_free_pages((unsigned long) pfn_to_virt(pte_pfn(*pte)), 0);
0170 pte_clear(&init_mm, addr, pte);
0171 } else if (pte_none(*pte)) {
0172 if (!direct) {
0173 void *new_page = vmemmap_alloc_block(PAGE_SIZE, NUMA_NO_NODE);
0174
0175 if (!new_page)
0176 goto out;
0177 set_pte(pte, __pte(__pa(new_page) | prot));
0178 } else {
0179 set_pte(pte, __pte(__pa(addr) | prot));
0180 }
0181 } else {
0182 continue;
0183 }
0184 pages++;
0185 }
0186 ret = 0;
0187 out:
0188 if (direct)
0189 update_page_count(PG_DIRECT_MAP_4K, add ? pages : -pages);
0190 return ret;
0191 }
0192
0193 static void try_free_pte_table(pmd_t *pmd, unsigned long start)
0194 {
0195 pte_t *pte;
0196 int i;
0197
0198
0199 pte = pte_offset_kernel(pmd, start);
0200 for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
0201 if (!pte_none(*pte))
0202 return;
0203 }
0204 vmem_pte_free((unsigned long *) pmd_deref(*pmd));
0205 pmd_clear(pmd);
0206 }
0207
0208
0209 static int __ref modify_pmd_table(pud_t *pud, unsigned long addr,
0210 unsigned long end, bool add, bool direct)
0211 {
0212 unsigned long next, prot, pages = 0;
0213 int ret = -ENOMEM;
0214 pmd_t *pmd;
0215 pte_t *pte;
0216
0217 prot = pgprot_val(SEGMENT_KERNEL);
0218 if (!MACHINE_HAS_NX)
0219 prot &= ~_SEGMENT_ENTRY_NOEXEC;
0220
0221 pmd = pmd_offset(pud, addr);
0222 for (; addr < end; addr = next, pmd++) {
0223 next = pmd_addr_end(addr, end);
0224 if (!add) {
0225 if (pmd_none(*pmd))
0226 continue;
0227 if (pmd_large(*pmd)) {
0228 if (IS_ALIGNED(addr, PMD_SIZE) &&
0229 IS_ALIGNED(next, PMD_SIZE)) {
0230 if (!direct)
0231 vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE));
0232 pmd_clear(pmd);
0233 pages++;
0234 } else if (!direct && vmemmap_unuse_sub_pmd(addr, next)) {
0235 vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE));
0236 pmd_clear(pmd);
0237 }
0238 continue;
0239 }
0240 } else if (pmd_none(*pmd)) {
0241 if (IS_ALIGNED(addr, PMD_SIZE) &&
0242 IS_ALIGNED(next, PMD_SIZE) &&
0243 MACHINE_HAS_EDAT1 && addr && direct &&
0244 !debug_pagealloc_enabled()) {
0245 set_pmd(pmd, __pmd(__pa(addr) | prot));
0246 pages++;
0247 continue;
0248 } else if (!direct && MACHINE_HAS_EDAT1) {
0249 void *new_page;
0250
0251
0252
0253
0254
0255
0256
0257
0258 new_page = vmemmap_alloc_block(PMD_SIZE, NUMA_NO_NODE);
0259 if (new_page) {
0260 set_pmd(pmd, __pmd(__pa(new_page) | prot));
0261 if (!IS_ALIGNED(addr, PMD_SIZE) ||
0262 !IS_ALIGNED(next, PMD_SIZE)) {
0263 vmemmap_use_new_sub_pmd(addr, next);
0264 }
0265 continue;
0266 }
0267 }
0268 pte = vmem_pte_alloc();
0269 if (!pte)
0270 goto out;
0271 pmd_populate(&init_mm, pmd, pte);
0272 } else if (pmd_large(*pmd)) {
0273 if (!direct)
0274 vmemmap_use_sub_pmd(addr, next);
0275 continue;
0276 }
0277 ret = modify_pte_table(pmd, addr, next, add, direct);
0278 if (ret)
0279 goto out;
0280 if (!add)
0281 try_free_pte_table(pmd, addr & PMD_MASK);
0282 }
0283 ret = 0;
0284 out:
0285 if (direct)
0286 update_page_count(PG_DIRECT_MAP_1M, add ? pages : -pages);
0287 return ret;
0288 }
0289
0290 static void try_free_pmd_table(pud_t *pud, unsigned long start)
0291 {
0292 const unsigned long end = start + PUD_SIZE;
0293 pmd_t *pmd;
0294 int i;
0295
0296
0297 if (end > VMALLOC_START)
0298 return;
0299 #ifdef CONFIG_KASAN
0300 if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end)
0301 return;
0302 #endif
0303 pmd = pmd_offset(pud, start);
0304 for (i = 0; i < PTRS_PER_PMD; i++, pmd++)
0305 if (!pmd_none(*pmd))
0306 return;
0307 vmem_free_pages(pud_deref(*pud), CRST_ALLOC_ORDER);
0308 pud_clear(pud);
0309 }
0310
0311 static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
0312 bool add, bool direct)
0313 {
0314 unsigned long next, prot, pages = 0;
0315 int ret = -ENOMEM;
0316 pud_t *pud;
0317 pmd_t *pmd;
0318
0319 prot = pgprot_val(REGION3_KERNEL);
0320 if (!MACHINE_HAS_NX)
0321 prot &= ~_REGION_ENTRY_NOEXEC;
0322 pud = pud_offset(p4d, addr);
0323 for (; addr < end; addr = next, pud++) {
0324 next = pud_addr_end(addr, end);
0325 if (!add) {
0326 if (pud_none(*pud))
0327 continue;
0328 if (pud_large(*pud)) {
0329 if (IS_ALIGNED(addr, PUD_SIZE) &&
0330 IS_ALIGNED(next, PUD_SIZE)) {
0331 pud_clear(pud);
0332 pages++;
0333 }
0334 continue;
0335 }
0336 } else if (pud_none(*pud)) {
0337 if (IS_ALIGNED(addr, PUD_SIZE) &&
0338 IS_ALIGNED(next, PUD_SIZE) &&
0339 MACHINE_HAS_EDAT2 && addr && direct &&
0340 !debug_pagealloc_enabled()) {
0341 set_pud(pud, __pud(__pa(addr) | prot));
0342 pages++;
0343 continue;
0344 }
0345 pmd = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
0346 if (!pmd)
0347 goto out;
0348 pud_populate(&init_mm, pud, pmd);
0349 } else if (pud_large(*pud)) {
0350 continue;
0351 }
0352 ret = modify_pmd_table(pud, addr, next, add, direct);
0353 if (ret)
0354 goto out;
0355 if (!add)
0356 try_free_pmd_table(pud, addr & PUD_MASK);
0357 }
0358 ret = 0;
0359 out:
0360 if (direct)
0361 update_page_count(PG_DIRECT_MAP_2G, add ? pages : -pages);
0362 return ret;
0363 }
0364
0365 static void try_free_pud_table(p4d_t *p4d, unsigned long start)
0366 {
0367 const unsigned long end = start + P4D_SIZE;
0368 pud_t *pud;
0369 int i;
0370
0371
0372 if (end > VMALLOC_START)
0373 return;
0374 #ifdef CONFIG_KASAN
0375 if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end)
0376 return;
0377 #endif
0378
0379 pud = pud_offset(p4d, start);
0380 for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
0381 if (!pud_none(*pud))
0382 return;
0383 }
0384 vmem_free_pages(p4d_deref(*p4d), CRST_ALLOC_ORDER);
0385 p4d_clear(p4d);
0386 }
0387
0388 static int modify_p4d_table(pgd_t *pgd, unsigned long addr, unsigned long end,
0389 bool add, bool direct)
0390 {
0391 unsigned long next;
0392 int ret = -ENOMEM;
0393 p4d_t *p4d;
0394 pud_t *pud;
0395
0396 p4d = p4d_offset(pgd, addr);
0397 for (; addr < end; addr = next, p4d++) {
0398 next = p4d_addr_end(addr, end);
0399 if (!add) {
0400 if (p4d_none(*p4d))
0401 continue;
0402 } else if (p4d_none(*p4d)) {
0403 pud = vmem_crst_alloc(_REGION3_ENTRY_EMPTY);
0404 if (!pud)
0405 goto out;
0406 p4d_populate(&init_mm, p4d, pud);
0407 }
0408 ret = modify_pud_table(p4d, addr, next, add, direct);
0409 if (ret)
0410 goto out;
0411 if (!add)
0412 try_free_pud_table(p4d, addr & P4D_MASK);
0413 }
0414 ret = 0;
0415 out:
0416 return ret;
0417 }
0418
0419 static void try_free_p4d_table(pgd_t *pgd, unsigned long start)
0420 {
0421 const unsigned long end = start + PGDIR_SIZE;
0422 p4d_t *p4d;
0423 int i;
0424
0425
0426 if (end > VMALLOC_START)
0427 return;
0428 #ifdef CONFIG_KASAN
0429 if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end)
0430 return;
0431 #endif
0432
0433 p4d = p4d_offset(pgd, start);
0434 for (i = 0; i < PTRS_PER_P4D; i++, p4d++) {
0435 if (!p4d_none(*p4d))
0436 return;
0437 }
0438 vmem_free_pages(pgd_deref(*pgd), CRST_ALLOC_ORDER);
0439 pgd_clear(pgd);
0440 }
0441
0442 static int modify_pagetable(unsigned long start, unsigned long end, bool add,
0443 bool direct)
0444 {
0445 unsigned long addr, next;
0446 int ret = -ENOMEM;
0447 pgd_t *pgd;
0448 p4d_t *p4d;
0449
0450 if (WARN_ON_ONCE(!PAGE_ALIGNED(start | end)))
0451 return -EINVAL;
0452 for (addr = start; addr < end; addr = next) {
0453 next = pgd_addr_end(addr, end);
0454 pgd = pgd_offset_k(addr);
0455
0456 if (!add) {
0457 if (pgd_none(*pgd))
0458 continue;
0459 } else if (pgd_none(*pgd)) {
0460 p4d = vmem_crst_alloc(_REGION2_ENTRY_EMPTY);
0461 if (!p4d)
0462 goto out;
0463 pgd_populate(&init_mm, pgd, p4d);
0464 }
0465 ret = modify_p4d_table(pgd, addr, next, add, direct);
0466 if (ret)
0467 goto out;
0468 if (!add)
0469 try_free_p4d_table(pgd, addr & PGDIR_MASK);
0470 }
0471 ret = 0;
0472 out:
0473 if (!add)
0474 flush_tlb_kernel_range(start, end);
0475 return ret;
0476 }
0477
0478 static int add_pagetable(unsigned long start, unsigned long end, bool direct)
0479 {
0480 return modify_pagetable(start, end, true, direct);
0481 }
0482
0483 static int remove_pagetable(unsigned long start, unsigned long end, bool direct)
0484 {
0485 return modify_pagetable(start, end, false, direct);
0486 }
0487
0488
0489
0490
0491 static int vmem_add_range(unsigned long start, unsigned long size)
0492 {
0493 return add_pagetable(start, start + size, true);
0494 }
0495
0496
0497
0498
0499 static void vmem_remove_range(unsigned long start, unsigned long size)
0500 {
0501 remove_pagetable(start, start + size, true);
0502 }
0503
0504
0505
0506
0507 int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
0508 struct vmem_altmap *altmap)
0509 {
0510 int ret;
0511
0512 mutex_lock(&vmem_mutex);
0513
0514 ret = add_pagetable(start, end, false);
0515 if (ret)
0516 remove_pagetable(start, end, false);
0517 mutex_unlock(&vmem_mutex);
0518 return ret;
0519 }
0520
0521 void vmemmap_free(unsigned long start, unsigned long end,
0522 struct vmem_altmap *altmap)
0523 {
0524 mutex_lock(&vmem_mutex);
0525 remove_pagetable(start, end, false);
0526 mutex_unlock(&vmem_mutex);
0527 }
0528
0529 void vmem_remove_mapping(unsigned long start, unsigned long size)
0530 {
0531 mutex_lock(&vmem_mutex);
0532 vmem_remove_range(start, size);
0533 mutex_unlock(&vmem_mutex);
0534 }
0535
0536 struct range arch_get_mappable_range(void)
0537 {
0538 struct range mhp_range;
0539
0540 mhp_range.start = 0;
0541 mhp_range.end = VMEM_MAX_PHYS - 1;
0542 return mhp_range;
0543 }
0544
0545 int vmem_add_mapping(unsigned long start, unsigned long size)
0546 {
0547 struct range range = arch_get_mappable_range();
0548 int ret;
0549
0550 if (start < range.start ||
0551 start + size > range.end + 1 ||
0552 start + size < start)
0553 return -ERANGE;
0554
0555 mutex_lock(&vmem_mutex);
0556 ret = vmem_add_range(start, size);
0557 if (ret)
0558 vmem_remove_range(start, size);
0559 mutex_unlock(&vmem_mutex);
0560 return ret;
0561 }
0562
0563
0564
0565
0566
0567
0568 void __init vmem_map_init(void)
0569 {
0570 phys_addr_t base, end;
0571 u64 i;
0572
0573 for_each_mem_range(i, &base, &end)
0574 vmem_add_range(base, end - base);
0575 __set_memory((unsigned long)_stext,
0576 (unsigned long)(_etext - _stext) >> PAGE_SHIFT,
0577 SET_MEMORY_RO | SET_MEMORY_X);
0578 __set_memory((unsigned long)_etext,
0579 (unsigned long)(__end_rodata - _etext) >> PAGE_SHIFT,
0580 SET_MEMORY_RO);
0581 __set_memory((unsigned long)_sinittext,
0582 (unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT,
0583 SET_MEMORY_RO | SET_MEMORY_X);
0584 __set_memory(__stext_amode31, (__etext_amode31 - __stext_amode31) >> PAGE_SHIFT,
0585 SET_MEMORY_RO | SET_MEMORY_X);
0586
0587
0588 if (!static_key_enabled(&cpu_has_bear))
0589 set_memory_x(0, 1);
0590
0591 pr_info("Write protected kernel read-only data: %luk\n",
0592 (unsigned long)(__end_rodata - _stext) >> 10);
0593 }