0001
0002
0003 #include <linux/device.h>
0004 #include <linux/io.h>
0005 #include <linux/kasan.h>
0006 #include <linux/memory_hotplug.h>
0007 #include <linux/memremap.h>
0008 #include <linux/pfn_t.h>
0009 #include <linux/swap.h>
0010 #include <linux/mmzone.h>
0011 #include <linux/swapops.h>
0012 #include <linux/types.h>
0013 #include <linux/wait_bit.h>
0014 #include <linux/xarray.h>
0015 #include "internal.h"
0016
0017 static DEFINE_XARRAY(pgmap_array);
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033 #ifndef CONFIG_ARCH_HAS_MEMREMAP_COMPAT_ALIGN
0034 unsigned long memremap_compat_align(void)
0035 {
0036 return SUBSECTION_SIZE;
0037 }
0038 EXPORT_SYMBOL_GPL(memremap_compat_align);
0039 #endif
0040
0041 #ifdef CONFIG_FS_DAX
0042 DEFINE_STATIC_KEY_FALSE(devmap_managed_key);
0043 EXPORT_SYMBOL(devmap_managed_key);
0044
0045 static void devmap_managed_enable_put(struct dev_pagemap *pgmap)
0046 {
0047 if (pgmap->type == MEMORY_DEVICE_FS_DAX)
0048 static_branch_dec(&devmap_managed_key);
0049 }
0050
0051 static void devmap_managed_enable_get(struct dev_pagemap *pgmap)
0052 {
0053 if (pgmap->type == MEMORY_DEVICE_FS_DAX)
0054 static_branch_inc(&devmap_managed_key);
0055 }
0056 #else
0057 static void devmap_managed_enable_get(struct dev_pagemap *pgmap)
0058 {
0059 }
0060 static void devmap_managed_enable_put(struct dev_pagemap *pgmap)
0061 {
0062 }
0063 #endif
0064
0065 static void pgmap_array_delete(struct range *range)
0066 {
0067 xa_store_range(&pgmap_array, PHYS_PFN(range->start), PHYS_PFN(range->end),
0068 NULL, GFP_KERNEL);
0069 synchronize_rcu();
0070 }
0071
0072 static unsigned long pfn_first(struct dev_pagemap *pgmap, int range_id)
0073 {
0074 struct range *range = &pgmap->ranges[range_id];
0075 unsigned long pfn = PHYS_PFN(range->start);
0076
0077 if (range_id)
0078 return pfn;
0079 return pfn + vmem_altmap_offset(pgmap_altmap(pgmap));
0080 }
0081
0082 bool pgmap_pfn_valid(struct dev_pagemap *pgmap, unsigned long pfn)
0083 {
0084 int i;
0085
0086 for (i = 0; i < pgmap->nr_range; i++) {
0087 struct range *range = &pgmap->ranges[i];
0088
0089 if (pfn >= PHYS_PFN(range->start) &&
0090 pfn <= PHYS_PFN(range->end))
0091 return pfn >= pfn_first(pgmap, i);
0092 }
0093
0094 return false;
0095 }
0096
0097 static unsigned long pfn_end(struct dev_pagemap *pgmap, int range_id)
0098 {
0099 const struct range *range = &pgmap->ranges[range_id];
0100
0101 return (range->start + range_len(range)) >> PAGE_SHIFT;
0102 }
0103
0104 static unsigned long pfn_len(struct dev_pagemap *pgmap, unsigned long range_id)
0105 {
0106 return (pfn_end(pgmap, range_id) -
0107 pfn_first(pgmap, range_id)) >> pgmap->vmemmap_shift;
0108 }
0109
0110 static void pageunmap_range(struct dev_pagemap *pgmap, int range_id)
0111 {
0112 struct range *range = &pgmap->ranges[range_id];
0113 struct page *first_page;
0114
0115
0116 first_page = pfn_to_page(pfn_first(pgmap, range_id));
0117
0118
0119 mem_hotplug_begin();
0120 remove_pfn_range_from_zone(page_zone(first_page), PHYS_PFN(range->start),
0121 PHYS_PFN(range_len(range)));
0122 if (pgmap->type == MEMORY_DEVICE_PRIVATE) {
0123 __remove_pages(PHYS_PFN(range->start),
0124 PHYS_PFN(range_len(range)), NULL);
0125 } else {
0126 arch_remove_memory(range->start, range_len(range),
0127 pgmap_altmap(pgmap));
0128 kasan_remove_zero_shadow(__va(range->start), range_len(range));
0129 }
0130 mem_hotplug_done();
0131
0132 untrack_pfn(NULL, PHYS_PFN(range->start), range_len(range));
0133 pgmap_array_delete(range);
0134 }
0135
0136 void memunmap_pages(struct dev_pagemap *pgmap)
0137 {
0138 int i;
0139
0140 percpu_ref_kill(&pgmap->ref);
0141 for (i = 0; i < pgmap->nr_range; i++)
0142 percpu_ref_put_many(&pgmap->ref, pfn_len(pgmap, i));
0143 wait_for_completion(&pgmap->done);
0144
0145 for (i = 0; i < pgmap->nr_range; i++)
0146 pageunmap_range(pgmap, i);
0147 percpu_ref_exit(&pgmap->ref);
0148
0149 WARN_ONCE(pgmap->altmap.alloc, "failed to free all reserved pages\n");
0150 devmap_managed_enable_put(pgmap);
0151 }
0152 EXPORT_SYMBOL_GPL(memunmap_pages);
0153
0154 static void devm_memremap_pages_release(void *data)
0155 {
0156 memunmap_pages(data);
0157 }
0158
0159 static void dev_pagemap_percpu_release(struct percpu_ref *ref)
0160 {
0161 struct dev_pagemap *pgmap = container_of(ref, struct dev_pagemap, ref);
0162
0163 complete(&pgmap->done);
0164 }
0165
0166 static int pagemap_range(struct dev_pagemap *pgmap, struct mhp_params *params,
0167 int range_id, int nid)
0168 {
0169 const bool is_private = pgmap->type == MEMORY_DEVICE_PRIVATE;
0170 struct range *range = &pgmap->ranges[range_id];
0171 struct dev_pagemap *conflict_pgmap;
0172 int error, is_ram;
0173
0174 if (WARN_ONCE(pgmap_altmap(pgmap) && range_id > 0,
0175 "altmap not supported for multiple ranges\n"))
0176 return -EINVAL;
0177
0178 conflict_pgmap = get_dev_pagemap(PHYS_PFN(range->start), NULL);
0179 if (conflict_pgmap) {
0180 WARN(1, "Conflicting mapping in same section\n");
0181 put_dev_pagemap(conflict_pgmap);
0182 return -ENOMEM;
0183 }
0184
0185 conflict_pgmap = get_dev_pagemap(PHYS_PFN(range->end), NULL);
0186 if (conflict_pgmap) {
0187 WARN(1, "Conflicting mapping in same section\n");
0188 put_dev_pagemap(conflict_pgmap);
0189 return -ENOMEM;
0190 }
0191
0192 is_ram = region_intersects(range->start, range_len(range),
0193 IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE);
0194
0195 if (is_ram != REGION_DISJOINT) {
0196 WARN_ONCE(1, "attempted on %s region %#llx-%#llx\n",
0197 is_ram == REGION_MIXED ? "mixed" : "ram",
0198 range->start, range->end);
0199 return -ENXIO;
0200 }
0201
0202 error = xa_err(xa_store_range(&pgmap_array, PHYS_PFN(range->start),
0203 PHYS_PFN(range->end), pgmap, GFP_KERNEL));
0204 if (error)
0205 return error;
0206
0207 if (nid < 0)
0208 nid = numa_mem_id();
0209
0210 error = track_pfn_remap(NULL, ¶ms->pgprot, PHYS_PFN(range->start), 0,
0211 range_len(range));
0212 if (error)
0213 goto err_pfn_remap;
0214
0215 if (!mhp_range_allowed(range->start, range_len(range), !is_private)) {
0216 error = -EINVAL;
0217 goto err_kasan;
0218 }
0219
0220 mem_hotplug_begin();
0221
0222
0223
0224
0225
0226
0227
0228
0229
0230
0231
0232
0233 if (is_private) {
0234 error = add_pages(nid, PHYS_PFN(range->start),
0235 PHYS_PFN(range_len(range)), params);
0236 } else {
0237 error = kasan_add_zero_shadow(__va(range->start), range_len(range));
0238 if (error) {
0239 mem_hotplug_done();
0240 goto err_kasan;
0241 }
0242
0243 error = arch_add_memory(nid, range->start, range_len(range),
0244 params);
0245 }
0246
0247 if (!error) {
0248 struct zone *zone;
0249
0250 zone = &NODE_DATA(nid)->node_zones[ZONE_DEVICE];
0251 move_pfn_range_to_zone(zone, PHYS_PFN(range->start),
0252 PHYS_PFN(range_len(range)), params->altmap,
0253 MIGRATE_MOVABLE);
0254 }
0255
0256 mem_hotplug_done();
0257 if (error)
0258 goto err_add_memory;
0259
0260
0261
0262
0263
0264 memmap_init_zone_device(&NODE_DATA(nid)->node_zones[ZONE_DEVICE],
0265 PHYS_PFN(range->start),
0266 PHYS_PFN(range_len(range)), pgmap);
0267 percpu_ref_get_many(&pgmap->ref, pfn_len(pgmap, range_id));
0268 return 0;
0269
0270 err_add_memory:
0271 if (!is_private)
0272 kasan_remove_zero_shadow(__va(range->start), range_len(range));
0273 err_kasan:
0274 untrack_pfn(NULL, PHYS_PFN(range->start), range_len(range));
0275 err_pfn_remap:
0276 pgmap_array_delete(range);
0277 return error;
0278 }
0279
0280
0281
0282
0283
0284
0285
0286 void *memremap_pages(struct dev_pagemap *pgmap, int nid)
0287 {
0288 struct mhp_params params = {
0289 .altmap = pgmap_altmap(pgmap),
0290 .pgmap = pgmap,
0291 .pgprot = PAGE_KERNEL,
0292 };
0293 const int nr_range = pgmap->nr_range;
0294 int error, i;
0295
0296 if (WARN_ONCE(!nr_range, "nr_range must be specified\n"))
0297 return ERR_PTR(-EINVAL);
0298
0299 switch (pgmap->type) {
0300 case MEMORY_DEVICE_PRIVATE:
0301 if (!IS_ENABLED(CONFIG_DEVICE_PRIVATE)) {
0302 WARN(1, "Device private memory not supported\n");
0303 return ERR_PTR(-EINVAL);
0304 }
0305 if (!pgmap->ops || !pgmap->ops->migrate_to_ram) {
0306 WARN(1, "Missing migrate_to_ram method\n");
0307 return ERR_PTR(-EINVAL);
0308 }
0309 if (!pgmap->ops->page_free) {
0310 WARN(1, "Missing page_free method\n");
0311 return ERR_PTR(-EINVAL);
0312 }
0313 if (!pgmap->owner) {
0314 WARN(1, "Missing owner\n");
0315 return ERR_PTR(-EINVAL);
0316 }
0317 break;
0318 case MEMORY_DEVICE_COHERENT:
0319 if (!pgmap->ops->page_free) {
0320 WARN(1, "Missing page_free method\n");
0321 return ERR_PTR(-EINVAL);
0322 }
0323 if (!pgmap->owner) {
0324 WARN(1, "Missing owner\n");
0325 return ERR_PTR(-EINVAL);
0326 }
0327 break;
0328 case MEMORY_DEVICE_FS_DAX:
0329 if (IS_ENABLED(CONFIG_FS_DAX_LIMITED)) {
0330 WARN(1, "File system DAX not supported\n");
0331 return ERR_PTR(-EINVAL);
0332 }
0333 break;
0334 case MEMORY_DEVICE_GENERIC:
0335 break;
0336 case MEMORY_DEVICE_PCI_P2PDMA:
0337 params.pgprot = pgprot_noncached(params.pgprot);
0338 break;
0339 default:
0340 WARN(1, "Invalid pgmap type %d\n", pgmap->type);
0341 break;
0342 }
0343
0344 init_completion(&pgmap->done);
0345 error = percpu_ref_init(&pgmap->ref, dev_pagemap_percpu_release, 0,
0346 GFP_KERNEL);
0347 if (error)
0348 return ERR_PTR(error);
0349
0350 devmap_managed_enable_get(pgmap);
0351
0352
0353
0354
0355
0356
0357 pgmap->nr_range = 0;
0358 error = 0;
0359 for (i = 0; i < nr_range; i++) {
0360 error = pagemap_range(pgmap, ¶ms, i, nid);
0361 if (error)
0362 break;
0363 pgmap->nr_range++;
0364 }
0365
0366 if (i < nr_range) {
0367 memunmap_pages(pgmap);
0368 pgmap->nr_range = nr_range;
0369 return ERR_PTR(error);
0370 }
0371
0372 return __va(pgmap->ranges[0].start);
0373 }
0374 EXPORT_SYMBOL_GPL(memremap_pages);
0375
0376
0377
0378
0379
0380
0381
0382
0383
0384
0385
0386
0387
0388
0389
0390
0391
0392
0393
0394
0395
0396 void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
0397 {
0398 int error;
0399 void *ret;
0400
0401 ret = memremap_pages(pgmap, dev_to_node(dev));
0402 if (IS_ERR(ret))
0403 return ret;
0404
0405 error = devm_add_action_or_reset(dev, devm_memremap_pages_release,
0406 pgmap);
0407 if (error)
0408 return ERR_PTR(error);
0409 return ret;
0410 }
0411 EXPORT_SYMBOL_GPL(devm_memremap_pages);
0412
0413 void devm_memunmap_pages(struct device *dev, struct dev_pagemap *pgmap)
0414 {
0415 devm_release_action(dev, devm_memremap_pages_release, pgmap);
0416 }
0417 EXPORT_SYMBOL_GPL(devm_memunmap_pages);
0418
0419 unsigned long vmem_altmap_offset(struct vmem_altmap *altmap)
0420 {
0421
0422 if (altmap)
0423 return altmap->reserve + altmap->free;
0424 return 0;
0425 }
0426
0427 void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns)
0428 {
0429 altmap->alloc -= nr_pfns;
0430 }
0431
0432
0433
0434
0435
0436
0437
0438
0439
0440 struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
0441 struct dev_pagemap *pgmap)
0442 {
0443 resource_size_t phys = PFN_PHYS(pfn);
0444
0445
0446
0447
0448 if (pgmap) {
0449 if (phys >= pgmap->range.start && phys <= pgmap->range.end)
0450 return pgmap;
0451 put_dev_pagemap(pgmap);
0452 }
0453
0454
0455 rcu_read_lock();
0456 pgmap = xa_load(&pgmap_array, PHYS_PFN(phys));
0457 if (pgmap && !percpu_ref_tryget_live(&pgmap->ref))
0458 pgmap = NULL;
0459 rcu_read_unlock();
0460
0461 return pgmap;
0462 }
0463 EXPORT_SYMBOL_GPL(get_dev_pagemap);
0464
0465 void free_zone_device_page(struct page *page)
0466 {
0467 if (WARN_ON_ONCE(!page->pgmap->ops || !page->pgmap->ops->page_free))
0468 return;
0469
0470 mem_cgroup_uncharge(page_folio(page));
0471
0472
0473
0474
0475
0476
0477 VM_BUG_ON_PAGE(PageAnon(page) && PageCompound(page), page);
0478 if (PageAnon(page))
0479 __ClearPageAnonExclusive(page);
0480
0481
0482
0483
0484
0485
0486
0487
0488
0489
0490
0491
0492
0493
0494
0495
0496
0497
0498
0499
0500
0501
0502 page->mapping = NULL;
0503 page->pgmap->ops->page_free(page);
0504
0505
0506
0507
0508 set_page_count(page, 1);
0509 }
0510
0511 #ifdef CONFIG_FS_DAX
0512 bool __put_devmap_managed_page_refs(struct page *page, int refs)
0513 {
0514 if (page->pgmap->type != MEMORY_DEVICE_FS_DAX)
0515 return false;
0516
0517
0518
0519
0520
0521
0522 if (page_ref_sub_return(page, refs) == 1)
0523 wake_up_var(&page->_refcount);
0524 return true;
0525 }
0526 EXPORT_SYMBOL(__put_devmap_managed_page_refs);
0527 #endif