0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065
0066
0067
0068
0069 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
0070
0071 #include <linux/bitmap.h>
0072 #include <linux/cpumask.h>
0073 #include <linux/memblock.h>
0074 #include <linux/err.h>
0075 #include <linux/lcm.h>
0076 #include <linux/list.h>
0077 #include <linux/log2.h>
0078 #include <linux/mm.h>
0079 #include <linux/module.h>
0080 #include <linux/mutex.h>
0081 #include <linux/percpu.h>
0082 #include <linux/pfn.h>
0083 #include <linux/slab.h>
0084 #include <linux/spinlock.h>
0085 #include <linux/vmalloc.h>
0086 #include <linux/workqueue.h>
0087 #include <linux/kmemleak.h>
0088 #include <linux/sched.h>
0089 #include <linux/sched/mm.h>
0090 #include <linux/memcontrol.h>
0091
0092 #include <asm/cacheflush.h>
0093 #include <asm/sections.h>
0094 #include <asm/tlbflush.h>
0095 #include <asm/io.h>
0096
0097 #define CREATE_TRACE_POINTS
0098 #include <trace/events/percpu.h>
0099
0100 #include "percpu-internal.h"
0101
0102
0103
0104
0105
0106 #define PCPU_SLOT_BASE_SHIFT 5
0107
0108 #define PCPU_SLOT_FAIL_THRESHOLD 3
0109
0110 #define PCPU_EMPTY_POP_PAGES_LOW 2
0111 #define PCPU_EMPTY_POP_PAGES_HIGH 4
0112
0113 #ifdef CONFIG_SMP
0114
0115 #ifndef __addr_to_pcpu_ptr
0116 #define __addr_to_pcpu_ptr(addr) \
0117 (void __percpu *)((unsigned long)(addr) - \
0118 (unsigned long)pcpu_base_addr + \
0119 (unsigned long)__per_cpu_start)
0120 #endif
0121 #ifndef __pcpu_ptr_to_addr
0122 #define __pcpu_ptr_to_addr(ptr) \
0123 (void __force *)((unsigned long)(ptr) + \
0124 (unsigned long)pcpu_base_addr - \
0125 (unsigned long)__per_cpu_start)
0126 #endif
0127 #else
0128
0129 #define __addr_to_pcpu_ptr(addr) (void __percpu *)(addr)
0130 #define __pcpu_ptr_to_addr(ptr) (void __force *)(ptr)
0131 #endif
0132
0133 static int pcpu_unit_pages __ro_after_init;
0134 static int pcpu_unit_size __ro_after_init;
0135 static int pcpu_nr_units __ro_after_init;
0136 static int pcpu_atom_size __ro_after_init;
0137 int pcpu_nr_slots __ro_after_init;
0138 static int pcpu_free_slot __ro_after_init;
0139 int pcpu_sidelined_slot __ro_after_init;
0140 int pcpu_to_depopulate_slot __ro_after_init;
0141 static size_t pcpu_chunk_struct_size __ro_after_init;
0142
0143
0144 static unsigned int pcpu_low_unit_cpu __ro_after_init;
0145 static unsigned int pcpu_high_unit_cpu __ro_after_init;
0146
0147
0148 void *pcpu_base_addr __ro_after_init;
0149
0150 static const int *pcpu_unit_map __ro_after_init;
0151 const unsigned long *pcpu_unit_offsets __ro_after_init;
0152
0153
0154 static int pcpu_nr_groups __ro_after_init;
0155 static const unsigned long *pcpu_group_offsets __ro_after_init;
0156 static const size_t *pcpu_group_sizes __ro_after_init;
0157
0158
0159
0160
0161
0162
0163 struct pcpu_chunk *pcpu_first_chunk __ro_after_init;
0164
0165
0166
0167
0168
0169
0170 struct pcpu_chunk *pcpu_reserved_chunk __ro_after_init;
0171
0172 DEFINE_SPINLOCK(pcpu_lock);
0173 static DEFINE_MUTEX(pcpu_alloc_mutex);
0174
0175 struct list_head *pcpu_chunk_lists __ro_after_init;
0176
0177
0178 static LIST_HEAD(pcpu_map_extend_chunks);
0179
0180
0181
0182
0183
0184 int pcpu_nr_empty_pop_pages;
0185
0186
0187
0188
0189
0190
0191
0192 static unsigned long pcpu_nr_populated;
0193
0194
0195
0196
0197
0198
0199
0200 static void pcpu_balance_workfn(struct work_struct *work);
0201 static DECLARE_WORK(pcpu_balance_work, pcpu_balance_workfn);
0202 static bool pcpu_async_enabled __read_mostly;
0203 static bool pcpu_atomic_alloc_failed;
0204
0205 static void pcpu_schedule_balance_work(void)
0206 {
0207 if (pcpu_async_enabled)
0208 schedule_work(&pcpu_balance_work);
0209 }
0210
0211
0212
0213
0214
0215
0216
0217
0218
0219 static bool pcpu_addr_in_chunk(struct pcpu_chunk *chunk, void *addr)
0220 {
0221 void *start_addr, *end_addr;
0222
0223 if (!chunk)
0224 return false;
0225
0226 start_addr = chunk->base_addr + chunk->start_offset;
0227 end_addr = chunk->base_addr + chunk->nr_pages * PAGE_SIZE -
0228 chunk->end_offset;
0229
0230 return addr >= start_addr && addr < end_addr;
0231 }
0232
0233 static int __pcpu_size_to_slot(int size)
0234 {
0235 int highbit = fls(size);
0236 return max(highbit - PCPU_SLOT_BASE_SHIFT + 2, 1);
0237 }
0238
0239 static int pcpu_size_to_slot(int size)
0240 {
0241 if (size == pcpu_unit_size)
0242 return pcpu_free_slot;
0243 return __pcpu_size_to_slot(size);
0244 }
0245
0246 static int pcpu_chunk_slot(const struct pcpu_chunk *chunk)
0247 {
0248 const struct pcpu_block_md *chunk_md = &chunk->chunk_md;
0249
0250 if (chunk->free_bytes < PCPU_MIN_ALLOC_SIZE ||
0251 chunk_md->contig_hint == 0)
0252 return 0;
0253
0254 return pcpu_size_to_slot(chunk_md->contig_hint * PCPU_MIN_ALLOC_SIZE);
0255 }
0256
0257
0258 static void pcpu_set_page_chunk(struct page *page, struct pcpu_chunk *pcpu)
0259 {
0260 page->index = (unsigned long)pcpu;
0261 }
0262
0263
0264 static struct pcpu_chunk *pcpu_get_page_chunk(struct page *page)
0265 {
0266 return (struct pcpu_chunk *)page->index;
0267 }
0268
0269 static int __maybe_unused pcpu_page_idx(unsigned int cpu, int page_idx)
0270 {
0271 return pcpu_unit_map[cpu] * pcpu_unit_pages + page_idx;
0272 }
0273
0274 static unsigned long pcpu_unit_page_offset(unsigned int cpu, int page_idx)
0275 {
0276 return pcpu_unit_offsets[cpu] + (page_idx << PAGE_SHIFT);
0277 }
0278
0279 static unsigned long pcpu_chunk_addr(struct pcpu_chunk *chunk,
0280 unsigned int cpu, int page_idx)
0281 {
0282 return (unsigned long)chunk->base_addr +
0283 pcpu_unit_page_offset(cpu, page_idx);
0284 }
0285
0286
0287
0288
0289
0290 static unsigned long *pcpu_index_alloc_map(struct pcpu_chunk *chunk, int index)
0291 {
0292 return chunk->alloc_map +
0293 (index * PCPU_BITMAP_BLOCK_BITS / BITS_PER_LONG);
0294 }
0295
0296 static unsigned long pcpu_off_to_block_index(int off)
0297 {
0298 return off / PCPU_BITMAP_BLOCK_BITS;
0299 }
0300
0301 static unsigned long pcpu_off_to_block_off(int off)
0302 {
0303 return off & (PCPU_BITMAP_BLOCK_BITS - 1);
0304 }
0305
0306 static unsigned long pcpu_block_off_to_off(int index, int off)
0307 {
0308 return index * PCPU_BITMAP_BLOCK_BITS + off;
0309 }
0310
0311
0312
0313
0314
0315
0316
0317
0318
0319
0320
0321 static bool pcpu_check_block_hint(struct pcpu_block_md *block, int bits,
0322 size_t align)
0323 {
0324 int bit_off = ALIGN(block->contig_hint_start, align) -
0325 block->contig_hint_start;
0326
0327 return bit_off + bits <= block->contig_hint;
0328 }
0329
0330
0331
0332
0333
0334
0335
0336
0337
0338
0339
0340
0341 static int pcpu_next_hint(struct pcpu_block_md *block, int alloc_bits)
0342 {
0343
0344
0345
0346
0347
0348
0349
0350 if (block->scan_hint &&
0351 block->contig_hint_start > block->scan_hint_start &&
0352 alloc_bits > block->scan_hint)
0353 return block->scan_hint_start + block->scan_hint;
0354
0355 return block->first_free;
0356 }
0357
0358
0359
0360
0361
0362
0363
0364
0365
0366
0367
0368
0369 static void pcpu_next_md_free_region(struct pcpu_chunk *chunk, int *bit_off,
0370 int *bits)
0371 {
0372 int i = pcpu_off_to_block_index(*bit_off);
0373 int block_off = pcpu_off_to_block_off(*bit_off);
0374 struct pcpu_block_md *block;
0375
0376 *bits = 0;
0377 for (block = chunk->md_blocks + i; i < pcpu_chunk_nr_blocks(chunk);
0378 block++, i++) {
0379
0380 if (*bits) {
0381 *bits += block->left_free;
0382 if (block->left_free == PCPU_BITMAP_BLOCK_BITS)
0383 continue;
0384 return;
0385 }
0386
0387
0388
0389
0390
0391
0392
0393
0394
0395 *bits = block->contig_hint;
0396 if (*bits && block->contig_hint_start >= block_off &&
0397 *bits + block->contig_hint_start < PCPU_BITMAP_BLOCK_BITS) {
0398 *bit_off = pcpu_block_off_to_off(i,
0399 block->contig_hint_start);
0400 return;
0401 }
0402
0403 block_off = 0;
0404
0405 *bits = block->right_free;
0406 *bit_off = (i + 1) * PCPU_BITMAP_BLOCK_BITS - block->right_free;
0407 }
0408 }
0409
0410
0411
0412
0413
0414
0415
0416
0417
0418
0419
0420
0421
0422
0423
0424 static void pcpu_next_fit_region(struct pcpu_chunk *chunk, int alloc_bits,
0425 int align, int *bit_off, int *bits)
0426 {
0427 int i = pcpu_off_to_block_index(*bit_off);
0428 int block_off = pcpu_off_to_block_off(*bit_off);
0429 struct pcpu_block_md *block;
0430
0431 *bits = 0;
0432 for (block = chunk->md_blocks + i; i < pcpu_chunk_nr_blocks(chunk);
0433 block++, i++) {
0434
0435 if (*bits) {
0436 *bits += block->left_free;
0437 if (*bits >= alloc_bits)
0438 return;
0439 if (block->left_free == PCPU_BITMAP_BLOCK_BITS)
0440 continue;
0441 }
0442
0443
0444 *bits = ALIGN(block->contig_hint_start, align) -
0445 block->contig_hint_start;
0446
0447
0448
0449
0450 if (block->contig_hint &&
0451 block->contig_hint_start >= block_off &&
0452 block->contig_hint >= *bits + alloc_bits) {
0453 int start = pcpu_next_hint(block, alloc_bits);
0454
0455 *bits += alloc_bits + block->contig_hint_start -
0456 start;
0457 *bit_off = pcpu_block_off_to_off(i, start);
0458 return;
0459 }
0460
0461 block_off = 0;
0462
0463 *bit_off = ALIGN(PCPU_BITMAP_BLOCK_BITS - block->right_free,
0464 align);
0465 *bits = PCPU_BITMAP_BLOCK_BITS - *bit_off;
0466 *bit_off = pcpu_block_off_to_off(i, *bit_off);
0467 if (*bits >= alloc_bits)
0468 return;
0469 }
0470
0471
0472 *bit_off = pcpu_chunk_map_bits(chunk);
0473 }
0474
0475
0476
0477
0478
0479
0480
0481 #define pcpu_for_each_md_free_region(chunk, bit_off, bits) \
0482 for (pcpu_next_md_free_region((chunk), &(bit_off), &(bits)); \
0483 (bit_off) < pcpu_chunk_map_bits((chunk)); \
0484 (bit_off) += (bits) + 1, \
0485 pcpu_next_md_free_region((chunk), &(bit_off), &(bits)))
0486
0487 #define pcpu_for_each_fit_region(chunk, alloc_bits, align, bit_off, bits) \
0488 for (pcpu_next_fit_region((chunk), (alloc_bits), (align), &(bit_off), \
0489 &(bits)); \
0490 (bit_off) < pcpu_chunk_map_bits((chunk)); \
0491 (bit_off) += (bits), \
0492 pcpu_next_fit_region((chunk), (alloc_bits), (align), &(bit_off), \
0493 &(bits)))
0494
0495
0496
0497
0498
0499
0500
0501
0502
0503
0504
0505
0506
0507
0508 static void *pcpu_mem_zalloc(size_t size, gfp_t gfp)
0509 {
0510 if (WARN_ON_ONCE(!slab_is_available()))
0511 return NULL;
0512
0513 if (size <= PAGE_SIZE)
0514 return kzalloc(size, gfp);
0515 else
0516 return __vmalloc(size, gfp | __GFP_ZERO);
0517 }
0518
0519
0520
0521
0522
0523
0524
0525 static void pcpu_mem_free(void *ptr)
0526 {
0527 kvfree(ptr);
0528 }
0529
0530 static void __pcpu_chunk_move(struct pcpu_chunk *chunk, int slot,
0531 bool move_front)
0532 {
0533 if (chunk != pcpu_reserved_chunk) {
0534 if (move_front)
0535 list_move(&chunk->list, &pcpu_chunk_lists[slot]);
0536 else
0537 list_move_tail(&chunk->list, &pcpu_chunk_lists[slot]);
0538 }
0539 }
0540
0541 static void pcpu_chunk_move(struct pcpu_chunk *chunk, int slot)
0542 {
0543 __pcpu_chunk_move(chunk, slot, true);
0544 }
0545
0546
0547
0548
0549
0550
0551
0552
0553
0554
0555
0556
0557
0558
0559 static void pcpu_chunk_relocate(struct pcpu_chunk *chunk, int oslot)
0560 {
0561 int nslot = pcpu_chunk_slot(chunk);
0562
0563
0564 if (chunk->isolated)
0565 return;
0566
0567 if (oslot != nslot)
0568 __pcpu_chunk_move(chunk, nslot, oslot < nslot);
0569 }
0570
0571 static void pcpu_isolate_chunk(struct pcpu_chunk *chunk)
0572 {
0573 lockdep_assert_held(&pcpu_lock);
0574
0575 if (!chunk->isolated) {
0576 chunk->isolated = true;
0577 pcpu_nr_empty_pop_pages -= chunk->nr_empty_pop_pages;
0578 }
0579 list_move(&chunk->list, &pcpu_chunk_lists[pcpu_to_depopulate_slot]);
0580 }
0581
0582 static void pcpu_reintegrate_chunk(struct pcpu_chunk *chunk)
0583 {
0584 lockdep_assert_held(&pcpu_lock);
0585
0586 if (chunk->isolated) {
0587 chunk->isolated = false;
0588 pcpu_nr_empty_pop_pages += chunk->nr_empty_pop_pages;
0589 pcpu_chunk_relocate(chunk, -1);
0590 }
0591 }
0592
0593
0594
0595
0596
0597
0598
0599
0600
0601
0602 static inline void pcpu_update_empty_pages(struct pcpu_chunk *chunk, int nr)
0603 {
0604 chunk->nr_empty_pop_pages += nr;
0605 if (chunk != pcpu_reserved_chunk && !chunk->isolated)
0606 pcpu_nr_empty_pop_pages += nr;
0607 }
0608
0609
0610
0611
0612
0613
0614
0615
0616
0617
0618
0619 static inline bool pcpu_region_overlap(int a, int b, int x, int y)
0620 {
0621 return (a < y) && (x < b);
0622 }
0623
0624
0625
0626
0627
0628
0629
0630
0631
0632
0633
0634 static void pcpu_block_update(struct pcpu_block_md *block, int start, int end)
0635 {
0636 int contig = end - start;
0637
0638 block->first_free = min(block->first_free, start);
0639 if (start == 0)
0640 block->left_free = contig;
0641
0642 if (end == block->nr_bits)
0643 block->right_free = contig;
0644
0645 if (contig > block->contig_hint) {
0646
0647 if (start > block->contig_hint_start) {
0648 if (block->contig_hint > block->scan_hint) {
0649 block->scan_hint_start =
0650 block->contig_hint_start;
0651 block->scan_hint = block->contig_hint;
0652 } else if (start < block->scan_hint_start) {
0653
0654
0655
0656
0657
0658 block->scan_hint = 0;
0659 }
0660 } else {
0661 block->scan_hint = 0;
0662 }
0663 block->contig_hint_start = start;
0664 block->contig_hint = contig;
0665 } else if (contig == block->contig_hint) {
0666 if (block->contig_hint_start &&
0667 (!start ||
0668 __ffs(start) > __ffs(block->contig_hint_start))) {
0669
0670 block->contig_hint_start = start;
0671 if (start < block->scan_hint_start &&
0672 block->contig_hint > block->scan_hint)
0673 block->scan_hint = 0;
0674 } else if (start > block->scan_hint_start ||
0675 block->contig_hint > block->scan_hint) {
0676
0677
0678
0679
0680
0681 block->scan_hint_start = start;
0682 block->scan_hint = contig;
0683 }
0684 } else {
0685
0686
0687
0688
0689
0690 if ((start < block->contig_hint_start &&
0691 (contig > block->scan_hint ||
0692 (contig == block->scan_hint &&
0693 start > block->scan_hint_start)))) {
0694 block->scan_hint_start = start;
0695 block->scan_hint = contig;
0696 }
0697 }
0698 }
0699
0700
0701
0702
0703
0704
0705
0706
0707
0708
0709
0710
0711
0712
0713
0714
0715
0716 static void pcpu_block_update_scan(struct pcpu_chunk *chunk, int bit_off,
0717 int bits)
0718 {
0719 int s_off = pcpu_off_to_block_off(bit_off);
0720 int e_off = s_off + bits;
0721 int s_index, l_bit;
0722 struct pcpu_block_md *block;
0723
0724 if (e_off > PCPU_BITMAP_BLOCK_BITS)
0725 return;
0726
0727 s_index = pcpu_off_to_block_index(bit_off);
0728 block = chunk->md_blocks + s_index;
0729
0730
0731 l_bit = find_last_bit(pcpu_index_alloc_map(chunk, s_index), s_off);
0732 s_off = (s_off == l_bit) ? 0 : l_bit + 1;
0733
0734 pcpu_block_update(block, s_off, e_off);
0735 }
0736
0737
0738
0739
0740
0741
0742
0743
0744
0745
0746
0747
0748
0749 static void pcpu_chunk_refresh_hint(struct pcpu_chunk *chunk, bool full_scan)
0750 {
0751 struct pcpu_block_md *chunk_md = &chunk->chunk_md;
0752 int bit_off, bits;
0753
0754
0755 if (!full_scan && chunk_md->scan_hint) {
0756 bit_off = chunk_md->scan_hint_start + chunk_md->scan_hint;
0757 chunk_md->contig_hint_start = chunk_md->scan_hint_start;
0758 chunk_md->contig_hint = chunk_md->scan_hint;
0759 chunk_md->scan_hint = 0;
0760 } else {
0761 bit_off = chunk_md->first_free;
0762 chunk_md->contig_hint = 0;
0763 }
0764
0765 bits = 0;
0766 pcpu_for_each_md_free_region(chunk, bit_off, bits)
0767 pcpu_block_update(chunk_md, bit_off, bit_off + bits);
0768 }
0769
0770
0771
0772
0773
0774
0775
0776
0777
0778 static void pcpu_block_refresh_hint(struct pcpu_chunk *chunk, int index)
0779 {
0780 struct pcpu_block_md *block = chunk->md_blocks + index;
0781 unsigned long *alloc_map = pcpu_index_alloc_map(chunk, index);
0782 unsigned int start, end;
0783
0784
0785 if (block->scan_hint) {
0786 start = block->scan_hint_start + block->scan_hint;
0787 block->contig_hint_start = block->scan_hint_start;
0788 block->contig_hint = block->scan_hint;
0789 block->scan_hint = 0;
0790 } else {
0791 start = block->first_free;
0792 block->contig_hint = 0;
0793 }
0794
0795 block->right_free = 0;
0796
0797
0798 for_each_clear_bitrange_from(start, end, alloc_map, PCPU_BITMAP_BLOCK_BITS)
0799 pcpu_block_update(block, start, end);
0800 }
0801
0802
0803
0804
0805
0806
0807
0808
0809
0810
0811
0812 static void pcpu_block_update_hint_alloc(struct pcpu_chunk *chunk, int bit_off,
0813 int bits)
0814 {
0815 struct pcpu_block_md *chunk_md = &chunk->chunk_md;
0816 int nr_empty_pages = 0;
0817 struct pcpu_block_md *s_block, *e_block, *block;
0818 int s_index, e_index;
0819 int s_off, e_off;
0820
0821
0822
0823
0824
0825
0826
0827 s_index = pcpu_off_to_block_index(bit_off);
0828 e_index = pcpu_off_to_block_index(bit_off + bits - 1);
0829 s_off = pcpu_off_to_block_off(bit_off);
0830 e_off = pcpu_off_to_block_off(bit_off + bits - 1) + 1;
0831
0832 s_block = chunk->md_blocks + s_index;
0833 e_block = chunk->md_blocks + e_index;
0834
0835
0836
0837
0838
0839
0840
0841 if (s_block->contig_hint == PCPU_BITMAP_BLOCK_BITS)
0842 nr_empty_pages++;
0843
0844 if (s_off == s_block->first_free)
0845 s_block->first_free = find_next_zero_bit(
0846 pcpu_index_alloc_map(chunk, s_index),
0847 PCPU_BITMAP_BLOCK_BITS,
0848 s_off + bits);
0849
0850 if (pcpu_region_overlap(s_block->scan_hint_start,
0851 s_block->scan_hint_start + s_block->scan_hint,
0852 s_off,
0853 s_off + bits))
0854 s_block->scan_hint = 0;
0855
0856 if (pcpu_region_overlap(s_block->contig_hint_start,
0857 s_block->contig_hint_start +
0858 s_block->contig_hint,
0859 s_off,
0860 s_off + bits)) {
0861
0862 if (!s_off)
0863 s_block->left_free = 0;
0864 pcpu_block_refresh_hint(chunk, s_index);
0865 } else {
0866
0867 s_block->left_free = min(s_block->left_free, s_off);
0868 if (s_index == e_index)
0869 s_block->right_free = min_t(int, s_block->right_free,
0870 PCPU_BITMAP_BLOCK_BITS - e_off);
0871 else
0872 s_block->right_free = 0;
0873 }
0874
0875
0876
0877
0878 if (s_index != e_index) {
0879 if (e_block->contig_hint == PCPU_BITMAP_BLOCK_BITS)
0880 nr_empty_pages++;
0881
0882
0883
0884
0885
0886 e_block->first_free = find_next_zero_bit(
0887 pcpu_index_alloc_map(chunk, e_index),
0888 PCPU_BITMAP_BLOCK_BITS, e_off);
0889
0890 if (e_off == PCPU_BITMAP_BLOCK_BITS) {
0891
0892 e_block++;
0893 } else {
0894 if (e_off > e_block->scan_hint_start)
0895 e_block->scan_hint = 0;
0896
0897 e_block->left_free = 0;
0898 if (e_off > e_block->contig_hint_start) {
0899
0900 pcpu_block_refresh_hint(chunk, e_index);
0901 } else {
0902 e_block->right_free =
0903 min_t(int, e_block->right_free,
0904 PCPU_BITMAP_BLOCK_BITS - e_off);
0905 }
0906 }
0907
0908
0909 nr_empty_pages += (e_index - s_index - 1);
0910 for (block = s_block + 1; block < e_block; block++) {
0911 block->scan_hint = 0;
0912 block->contig_hint = 0;
0913 block->left_free = 0;
0914 block->right_free = 0;
0915 }
0916 }
0917
0918 if (nr_empty_pages)
0919 pcpu_update_empty_pages(chunk, -nr_empty_pages);
0920
0921 if (pcpu_region_overlap(chunk_md->scan_hint_start,
0922 chunk_md->scan_hint_start +
0923 chunk_md->scan_hint,
0924 bit_off,
0925 bit_off + bits))
0926 chunk_md->scan_hint = 0;
0927
0928
0929
0930
0931
0932
0933 if (pcpu_region_overlap(chunk_md->contig_hint_start,
0934 chunk_md->contig_hint_start +
0935 chunk_md->contig_hint,
0936 bit_off,
0937 bit_off + bits))
0938 pcpu_chunk_refresh_hint(chunk, false);
0939 }
0940
0941
0942
0943
0944
0945
0946
0947
0948
0949
0950
0951
0952
0953
0954
0955
0956
0957
0958
0959 static void pcpu_block_update_hint_free(struct pcpu_chunk *chunk, int bit_off,
0960 int bits)
0961 {
0962 int nr_empty_pages = 0;
0963 struct pcpu_block_md *s_block, *e_block, *block;
0964 int s_index, e_index;
0965 int s_off, e_off;
0966 int start, end;
0967
0968
0969
0970
0971
0972
0973
0974 s_index = pcpu_off_to_block_index(bit_off);
0975 e_index = pcpu_off_to_block_index(bit_off + bits - 1);
0976 s_off = pcpu_off_to_block_off(bit_off);
0977 e_off = pcpu_off_to_block_off(bit_off + bits - 1) + 1;
0978
0979 s_block = chunk->md_blocks + s_index;
0980 e_block = chunk->md_blocks + e_index;
0981
0982
0983
0984
0985
0986
0987
0988
0989
0990
0991
0992 start = s_off;
0993 if (s_off == s_block->contig_hint + s_block->contig_hint_start) {
0994 start = s_block->contig_hint_start;
0995 } else {
0996
0997
0998
0999
1000
1001
1002 int l_bit = find_last_bit(pcpu_index_alloc_map(chunk, s_index),
1003 start);
1004 start = (start == l_bit) ? 0 : l_bit + 1;
1005 }
1006
1007 end = e_off;
1008 if (e_off == e_block->contig_hint_start)
1009 end = e_block->contig_hint_start + e_block->contig_hint;
1010 else
1011 end = find_next_bit(pcpu_index_alloc_map(chunk, e_index),
1012 PCPU_BITMAP_BLOCK_BITS, end);
1013
1014
1015 e_off = (s_index == e_index) ? end : PCPU_BITMAP_BLOCK_BITS;
1016 if (!start && e_off == PCPU_BITMAP_BLOCK_BITS)
1017 nr_empty_pages++;
1018 pcpu_block_update(s_block, start, e_off);
1019
1020
1021 if (s_index != e_index) {
1022
1023 if (end == PCPU_BITMAP_BLOCK_BITS)
1024 nr_empty_pages++;
1025 pcpu_block_update(e_block, 0, end);
1026
1027
1028 nr_empty_pages += (e_index - s_index - 1);
1029 for (block = s_block + 1; block < e_block; block++) {
1030 block->first_free = 0;
1031 block->scan_hint = 0;
1032 block->contig_hint_start = 0;
1033 block->contig_hint = PCPU_BITMAP_BLOCK_BITS;
1034 block->left_free = PCPU_BITMAP_BLOCK_BITS;
1035 block->right_free = PCPU_BITMAP_BLOCK_BITS;
1036 }
1037 }
1038
1039 if (nr_empty_pages)
1040 pcpu_update_empty_pages(chunk, nr_empty_pages);
1041
1042
1043
1044
1045
1046
1047
1048 if (((end - start) >= PCPU_BITMAP_BLOCK_BITS) || s_index != e_index)
1049 pcpu_chunk_refresh_hint(chunk, true);
1050 else
1051 pcpu_block_update(&chunk->chunk_md,
1052 pcpu_block_off_to_off(s_index, start),
1053 end);
1054 }
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069 static bool pcpu_is_populated(struct pcpu_chunk *chunk, int bit_off, int bits,
1070 int *next_off)
1071 {
1072 unsigned int start, end;
1073
1074 start = PFN_DOWN(bit_off * PCPU_MIN_ALLOC_SIZE);
1075 end = PFN_UP((bit_off + bits) * PCPU_MIN_ALLOC_SIZE);
1076
1077 start = find_next_zero_bit(chunk->populated, end, start);
1078 if (start >= end)
1079 return true;
1080
1081 end = find_next_bit(chunk->populated, end, start + 1);
1082
1083 *next_off = end * PAGE_SIZE / PCPU_MIN_ALLOC_SIZE;
1084 return false;
1085 }
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106 static int pcpu_find_block_fit(struct pcpu_chunk *chunk, int alloc_bits,
1107 size_t align, bool pop_only)
1108 {
1109 struct pcpu_block_md *chunk_md = &chunk->chunk_md;
1110 int bit_off, bits, next_off;
1111
1112
1113
1114
1115
1116
1117 if (!pcpu_check_block_hint(chunk_md, alloc_bits, align))
1118 return -1;
1119
1120 bit_off = pcpu_next_hint(chunk_md, alloc_bits);
1121 bits = 0;
1122 pcpu_for_each_fit_region(chunk, alloc_bits, align, bit_off, bits) {
1123 if (!pop_only || pcpu_is_populated(chunk, bit_off, bits,
1124 &next_off))
1125 break;
1126
1127 bit_off = next_off;
1128 bits = 0;
1129 }
1130
1131 if (bit_off == pcpu_chunk_map_bits(chunk))
1132 return -1;
1133
1134 return bit_off;
1135 }
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157 static unsigned long pcpu_find_zero_area(unsigned long *map,
1158 unsigned long size,
1159 unsigned long start,
1160 unsigned long nr,
1161 unsigned long align_mask,
1162 unsigned long *largest_off,
1163 unsigned long *largest_bits)
1164 {
1165 unsigned long index, end, i, area_off, area_bits;
1166 again:
1167 index = find_next_zero_bit(map, size, start);
1168
1169
1170 index = __ALIGN_MASK(index, align_mask);
1171 area_off = index;
1172
1173 end = index + nr;
1174 if (end > size)
1175 return end;
1176 i = find_next_bit(map, end, index);
1177 if (i < end) {
1178 area_bits = i - area_off;
1179
1180 if (area_bits > *largest_bits ||
1181 (area_bits == *largest_bits && *largest_off &&
1182 (!area_off || __ffs(area_off) > __ffs(*largest_off)))) {
1183 *largest_off = area_off;
1184 *largest_bits = area_bits;
1185 }
1186
1187 start = i + 1;
1188 goto again;
1189 }
1190 return index;
1191 }
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212 static int pcpu_alloc_area(struct pcpu_chunk *chunk, int alloc_bits,
1213 size_t align, int start)
1214 {
1215 struct pcpu_block_md *chunk_md = &chunk->chunk_md;
1216 size_t align_mask = (align) ? (align - 1) : 0;
1217 unsigned long area_off = 0, area_bits = 0;
1218 int bit_off, end, oslot;
1219
1220 lockdep_assert_held(&pcpu_lock);
1221
1222 oslot = pcpu_chunk_slot(chunk);
1223
1224
1225
1226
1227 end = min_t(int, start + alloc_bits + PCPU_BITMAP_BLOCK_BITS,
1228 pcpu_chunk_map_bits(chunk));
1229 bit_off = pcpu_find_zero_area(chunk->alloc_map, end, start, alloc_bits,
1230 align_mask, &area_off, &area_bits);
1231 if (bit_off >= end)
1232 return -1;
1233
1234 if (area_bits)
1235 pcpu_block_update_scan(chunk, area_off, area_bits);
1236
1237
1238 bitmap_set(chunk->alloc_map, bit_off, alloc_bits);
1239
1240
1241 set_bit(bit_off, chunk->bound_map);
1242 bitmap_clear(chunk->bound_map, bit_off + 1, alloc_bits - 1);
1243 set_bit(bit_off + alloc_bits, chunk->bound_map);
1244
1245 chunk->free_bytes -= alloc_bits * PCPU_MIN_ALLOC_SIZE;
1246
1247
1248 if (bit_off == chunk_md->first_free)
1249 chunk_md->first_free = find_next_zero_bit(
1250 chunk->alloc_map,
1251 pcpu_chunk_map_bits(chunk),
1252 bit_off + alloc_bits);
1253
1254 pcpu_block_update_hint_alloc(chunk, bit_off, alloc_bits);
1255
1256 pcpu_chunk_relocate(chunk, oslot);
1257
1258 return bit_off * PCPU_MIN_ALLOC_SIZE;
1259 }
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272 static int pcpu_free_area(struct pcpu_chunk *chunk, int off)
1273 {
1274 struct pcpu_block_md *chunk_md = &chunk->chunk_md;
1275 int bit_off, bits, end, oslot, freed;
1276
1277 lockdep_assert_held(&pcpu_lock);
1278 pcpu_stats_area_dealloc(chunk);
1279
1280 oslot = pcpu_chunk_slot(chunk);
1281
1282 bit_off = off / PCPU_MIN_ALLOC_SIZE;
1283
1284
1285 end = find_next_bit(chunk->bound_map, pcpu_chunk_map_bits(chunk),
1286 bit_off + 1);
1287 bits = end - bit_off;
1288 bitmap_clear(chunk->alloc_map, bit_off, bits);
1289
1290 freed = bits * PCPU_MIN_ALLOC_SIZE;
1291
1292
1293 chunk->free_bytes += freed;
1294
1295
1296 chunk_md->first_free = min(chunk_md->first_free, bit_off);
1297
1298 pcpu_block_update_hint_free(chunk, bit_off, bits);
1299
1300 pcpu_chunk_relocate(chunk, oslot);
1301
1302 return freed;
1303 }
1304
1305 static void pcpu_init_md_block(struct pcpu_block_md *block, int nr_bits)
1306 {
1307 block->scan_hint = 0;
1308 block->contig_hint = nr_bits;
1309 block->left_free = nr_bits;
1310 block->right_free = nr_bits;
1311 block->first_free = 0;
1312 block->nr_bits = nr_bits;
1313 }
1314
1315 static void pcpu_init_md_blocks(struct pcpu_chunk *chunk)
1316 {
1317 struct pcpu_block_md *md_block;
1318
1319
1320 pcpu_init_md_block(&chunk->chunk_md, pcpu_chunk_map_bits(chunk));
1321
1322 for (md_block = chunk->md_blocks;
1323 md_block != chunk->md_blocks + pcpu_chunk_nr_blocks(chunk);
1324 md_block++)
1325 pcpu_init_md_block(md_block, PCPU_BITMAP_BLOCK_BITS);
1326 }
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341 static struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr,
1342 int map_size)
1343 {
1344 struct pcpu_chunk *chunk;
1345 unsigned long aligned_addr, lcm_align;
1346 int start_offset, offset_bits, region_size, region_bits;
1347 size_t alloc_size;
1348
1349
1350 aligned_addr = tmp_addr & PAGE_MASK;
1351
1352 start_offset = tmp_addr - aligned_addr;
1353
1354
1355
1356
1357
1358
1359 lcm_align = lcm(PAGE_SIZE, PCPU_BITMAP_BLOCK_SIZE);
1360 region_size = ALIGN(start_offset + map_size, lcm_align);
1361
1362
1363 alloc_size = struct_size(chunk, populated,
1364 BITS_TO_LONGS(region_size >> PAGE_SHIFT));
1365 chunk = memblock_alloc(alloc_size, SMP_CACHE_BYTES);
1366 if (!chunk)
1367 panic("%s: Failed to allocate %zu bytes\n", __func__,
1368 alloc_size);
1369
1370 INIT_LIST_HEAD(&chunk->list);
1371
1372 chunk->base_addr = (void *)aligned_addr;
1373 chunk->start_offset = start_offset;
1374 chunk->end_offset = region_size - chunk->start_offset - map_size;
1375
1376 chunk->nr_pages = region_size >> PAGE_SHIFT;
1377 region_bits = pcpu_chunk_map_bits(chunk);
1378
1379 alloc_size = BITS_TO_LONGS(region_bits) * sizeof(chunk->alloc_map[0]);
1380 chunk->alloc_map = memblock_alloc(alloc_size, SMP_CACHE_BYTES);
1381 if (!chunk->alloc_map)
1382 panic("%s: Failed to allocate %zu bytes\n", __func__,
1383 alloc_size);
1384
1385 alloc_size =
1386 BITS_TO_LONGS(region_bits + 1) * sizeof(chunk->bound_map[0]);
1387 chunk->bound_map = memblock_alloc(alloc_size, SMP_CACHE_BYTES);
1388 if (!chunk->bound_map)
1389 panic("%s: Failed to allocate %zu bytes\n", __func__,
1390 alloc_size);
1391
1392 alloc_size = pcpu_chunk_nr_blocks(chunk) * sizeof(chunk->md_blocks[0]);
1393 chunk->md_blocks = memblock_alloc(alloc_size, SMP_CACHE_BYTES);
1394 if (!chunk->md_blocks)
1395 panic("%s: Failed to allocate %zu bytes\n", __func__,
1396 alloc_size);
1397
1398 #ifdef CONFIG_MEMCG_KMEM
1399
1400 chunk->obj_cgroups = NULL;
1401 #endif
1402 pcpu_init_md_blocks(chunk);
1403
1404
1405 chunk->immutable = true;
1406 bitmap_fill(chunk->populated, chunk->nr_pages);
1407 chunk->nr_populated = chunk->nr_pages;
1408 chunk->nr_empty_pop_pages = chunk->nr_pages;
1409
1410 chunk->free_bytes = map_size;
1411
1412 if (chunk->start_offset) {
1413
1414 offset_bits = chunk->start_offset / PCPU_MIN_ALLOC_SIZE;
1415 bitmap_set(chunk->alloc_map, 0, offset_bits);
1416 set_bit(0, chunk->bound_map);
1417 set_bit(offset_bits, chunk->bound_map);
1418
1419 chunk->chunk_md.first_free = offset_bits;
1420
1421 pcpu_block_update_hint_alloc(chunk, 0, offset_bits);
1422 }
1423
1424 if (chunk->end_offset) {
1425
1426 offset_bits = chunk->end_offset / PCPU_MIN_ALLOC_SIZE;
1427 bitmap_set(chunk->alloc_map,
1428 pcpu_chunk_map_bits(chunk) - offset_bits,
1429 offset_bits);
1430 set_bit((start_offset + map_size) / PCPU_MIN_ALLOC_SIZE,
1431 chunk->bound_map);
1432 set_bit(region_bits, chunk->bound_map);
1433
1434 pcpu_block_update_hint_alloc(chunk, pcpu_chunk_map_bits(chunk)
1435 - offset_bits, offset_bits);
1436 }
1437
1438 return chunk;
1439 }
1440
1441 static struct pcpu_chunk *pcpu_alloc_chunk(gfp_t gfp)
1442 {
1443 struct pcpu_chunk *chunk;
1444 int region_bits;
1445
1446 chunk = pcpu_mem_zalloc(pcpu_chunk_struct_size, gfp);
1447 if (!chunk)
1448 return NULL;
1449
1450 INIT_LIST_HEAD(&chunk->list);
1451 chunk->nr_pages = pcpu_unit_pages;
1452 region_bits = pcpu_chunk_map_bits(chunk);
1453
1454 chunk->alloc_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits) *
1455 sizeof(chunk->alloc_map[0]), gfp);
1456 if (!chunk->alloc_map)
1457 goto alloc_map_fail;
1458
1459 chunk->bound_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits + 1) *
1460 sizeof(chunk->bound_map[0]), gfp);
1461 if (!chunk->bound_map)
1462 goto bound_map_fail;
1463
1464 chunk->md_blocks = pcpu_mem_zalloc(pcpu_chunk_nr_blocks(chunk) *
1465 sizeof(chunk->md_blocks[0]), gfp);
1466 if (!chunk->md_blocks)
1467 goto md_blocks_fail;
1468
1469 #ifdef CONFIG_MEMCG_KMEM
1470 if (!mem_cgroup_kmem_disabled()) {
1471 chunk->obj_cgroups =
1472 pcpu_mem_zalloc(pcpu_chunk_map_bits(chunk) *
1473 sizeof(struct obj_cgroup *), gfp);
1474 if (!chunk->obj_cgroups)
1475 goto objcg_fail;
1476 }
1477 #endif
1478
1479 pcpu_init_md_blocks(chunk);
1480
1481
1482 chunk->free_bytes = chunk->nr_pages * PAGE_SIZE;
1483
1484 return chunk;
1485
1486 #ifdef CONFIG_MEMCG_KMEM
1487 objcg_fail:
1488 pcpu_mem_free(chunk->md_blocks);
1489 #endif
1490 md_blocks_fail:
1491 pcpu_mem_free(chunk->bound_map);
1492 bound_map_fail:
1493 pcpu_mem_free(chunk->alloc_map);
1494 alloc_map_fail:
1495 pcpu_mem_free(chunk);
1496
1497 return NULL;
1498 }
1499
1500 static void pcpu_free_chunk(struct pcpu_chunk *chunk)
1501 {
1502 if (!chunk)
1503 return;
1504 #ifdef CONFIG_MEMCG_KMEM
1505 pcpu_mem_free(chunk->obj_cgroups);
1506 #endif
1507 pcpu_mem_free(chunk->md_blocks);
1508 pcpu_mem_free(chunk->bound_map);
1509 pcpu_mem_free(chunk->alloc_map);
1510 pcpu_mem_free(chunk);
1511 }
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523 static void pcpu_chunk_populated(struct pcpu_chunk *chunk, int page_start,
1524 int page_end)
1525 {
1526 int nr = page_end - page_start;
1527
1528 lockdep_assert_held(&pcpu_lock);
1529
1530 bitmap_set(chunk->populated, page_start, nr);
1531 chunk->nr_populated += nr;
1532 pcpu_nr_populated += nr;
1533
1534 pcpu_update_empty_pages(chunk, nr);
1535 }
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547 static void pcpu_chunk_depopulated(struct pcpu_chunk *chunk,
1548 int page_start, int page_end)
1549 {
1550 int nr = page_end - page_start;
1551
1552 lockdep_assert_held(&pcpu_lock);
1553
1554 bitmap_clear(chunk->populated, page_start, nr);
1555 chunk->nr_populated -= nr;
1556 pcpu_nr_populated -= nr;
1557
1558 pcpu_update_empty_pages(chunk, -nr);
1559 }
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577 static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
1578 int page_start, int page_end, gfp_t gfp);
1579 static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
1580 int page_start, int page_end);
1581 static void pcpu_post_unmap_tlb_flush(struct pcpu_chunk *chunk,
1582 int page_start, int page_end);
1583 static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp);
1584 static void pcpu_destroy_chunk(struct pcpu_chunk *chunk);
1585 static struct page *pcpu_addr_to_page(void *addr);
1586 static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai);
1587
1588 #ifdef CONFIG_NEED_PER_CPU_KM
1589 #include "percpu-km.c"
1590 #else
1591 #include "percpu-vm.c"
1592 #endif
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604 static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
1605 {
1606
1607 if (pcpu_addr_in_chunk(pcpu_first_chunk, addr))
1608 return pcpu_first_chunk;
1609
1610
1611 if (pcpu_addr_in_chunk(pcpu_reserved_chunk, addr))
1612 return pcpu_reserved_chunk;
1613
1614
1615
1616
1617
1618
1619
1620
1621 addr += pcpu_unit_offsets[raw_smp_processor_id()];
1622 return pcpu_get_page_chunk(pcpu_addr_to_page(addr));
1623 }
1624
1625 #ifdef CONFIG_MEMCG_KMEM
1626 static bool pcpu_memcg_pre_alloc_hook(size_t size, gfp_t gfp,
1627 struct obj_cgroup **objcgp)
1628 {
1629 struct obj_cgroup *objcg;
1630
1631 if (!memcg_kmem_enabled() || !(gfp & __GFP_ACCOUNT))
1632 return true;
1633
1634 objcg = get_obj_cgroup_from_current();
1635 if (!objcg)
1636 return true;
1637
1638 if (obj_cgroup_charge(objcg, gfp, pcpu_obj_full_size(size))) {
1639 obj_cgroup_put(objcg);
1640 return false;
1641 }
1642
1643 *objcgp = objcg;
1644 return true;
1645 }
1646
1647 static void pcpu_memcg_post_alloc_hook(struct obj_cgroup *objcg,
1648 struct pcpu_chunk *chunk, int off,
1649 size_t size)
1650 {
1651 if (!objcg)
1652 return;
1653
1654 if (likely(chunk && chunk->obj_cgroups)) {
1655 chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT] = objcg;
1656
1657 rcu_read_lock();
1658 mod_memcg_state(obj_cgroup_memcg(objcg), MEMCG_PERCPU_B,
1659 pcpu_obj_full_size(size));
1660 rcu_read_unlock();
1661 } else {
1662 obj_cgroup_uncharge(objcg, pcpu_obj_full_size(size));
1663 obj_cgroup_put(objcg);
1664 }
1665 }
1666
1667 static void pcpu_memcg_free_hook(struct pcpu_chunk *chunk, int off, size_t size)
1668 {
1669 struct obj_cgroup *objcg;
1670
1671 if (unlikely(!chunk->obj_cgroups))
1672 return;
1673
1674 objcg = chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT];
1675 if (!objcg)
1676 return;
1677 chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT] = NULL;
1678
1679 obj_cgroup_uncharge(objcg, pcpu_obj_full_size(size));
1680
1681 rcu_read_lock();
1682 mod_memcg_state(obj_cgroup_memcg(objcg), MEMCG_PERCPU_B,
1683 -pcpu_obj_full_size(size));
1684 rcu_read_unlock();
1685
1686 obj_cgroup_put(objcg);
1687 }
1688
1689 #else
1690 static bool
1691 pcpu_memcg_pre_alloc_hook(size_t size, gfp_t gfp, struct obj_cgroup **objcgp)
1692 {
1693 return true;
1694 }
1695
1696 static void pcpu_memcg_post_alloc_hook(struct obj_cgroup *objcg,
1697 struct pcpu_chunk *chunk, int off,
1698 size_t size)
1699 {
1700 }
1701
1702 static void pcpu_memcg_free_hook(struct pcpu_chunk *chunk, int off, size_t size)
1703 {
1704 }
1705 #endif
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722 static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
1723 gfp_t gfp)
1724 {
1725 gfp_t pcpu_gfp;
1726 bool is_atomic;
1727 bool do_warn;
1728 struct obj_cgroup *objcg = NULL;
1729 static int warn_limit = 10;
1730 struct pcpu_chunk *chunk, *next;
1731 const char *err;
1732 int slot, off, cpu, ret;
1733 unsigned long flags;
1734 void __percpu *ptr;
1735 size_t bits, bit_align;
1736
1737 gfp = current_gfp_context(gfp);
1738
1739 pcpu_gfp = gfp & (GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
1740 is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL;
1741 do_warn = !(gfp & __GFP_NOWARN);
1742
1743
1744
1745
1746
1747
1748
1749 if (unlikely(align < PCPU_MIN_ALLOC_SIZE))
1750 align = PCPU_MIN_ALLOC_SIZE;
1751
1752 size = ALIGN(size, PCPU_MIN_ALLOC_SIZE);
1753 bits = size >> PCPU_MIN_ALLOC_SHIFT;
1754 bit_align = align >> PCPU_MIN_ALLOC_SHIFT;
1755
1756 if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE ||
1757 !is_power_of_2(align))) {
1758 WARN(do_warn, "illegal size (%zu) or align (%zu) for percpu allocation\n",
1759 size, align);
1760 return NULL;
1761 }
1762
1763 if (unlikely(!pcpu_memcg_pre_alloc_hook(size, gfp, &objcg)))
1764 return NULL;
1765
1766 if (!is_atomic) {
1767
1768
1769
1770
1771
1772 if (gfp & __GFP_NOFAIL) {
1773 mutex_lock(&pcpu_alloc_mutex);
1774 } else if (mutex_lock_killable(&pcpu_alloc_mutex)) {
1775 pcpu_memcg_post_alloc_hook(objcg, NULL, 0, size);
1776 return NULL;
1777 }
1778 }
1779
1780 spin_lock_irqsave(&pcpu_lock, flags);
1781
1782
1783 if (reserved && pcpu_reserved_chunk) {
1784 chunk = pcpu_reserved_chunk;
1785
1786 off = pcpu_find_block_fit(chunk, bits, bit_align, is_atomic);
1787 if (off < 0) {
1788 err = "alloc from reserved chunk failed";
1789 goto fail_unlock;
1790 }
1791
1792 off = pcpu_alloc_area(chunk, bits, bit_align, off);
1793 if (off >= 0)
1794 goto area_found;
1795
1796 err = "alloc from reserved chunk failed";
1797 goto fail_unlock;
1798 }
1799
1800 restart:
1801
1802 for (slot = pcpu_size_to_slot(size); slot <= pcpu_free_slot; slot++) {
1803 list_for_each_entry_safe(chunk, next, &pcpu_chunk_lists[slot],
1804 list) {
1805 off = pcpu_find_block_fit(chunk, bits, bit_align,
1806 is_atomic);
1807 if (off < 0) {
1808 if (slot < PCPU_SLOT_FAIL_THRESHOLD)
1809 pcpu_chunk_move(chunk, 0);
1810 continue;
1811 }
1812
1813 off = pcpu_alloc_area(chunk, bits, bit_align, off);
1814 if (off >= 0) {
1815 pcpu_reintegrate_chunk(chunk);
1816 goto area_found;
1817 }
1818 }
1819 }
1820
1821 spin_unlock_irqrestore(&pcpu_lock, flags);
1822
1823
1824
1825
1826
1827
1828 if (is_atomic) {
1829 err = "atomic alloc failed, no space left";
1830 goto fail;
1831 }
1832
1833 if (list_empty(&pcpu_chunk_lists[pcpu_free_slot])) {
1834 chunk = pcpu_create_chunk(pcpu_gfp);
1835 if (!chunk) {
1836 err = "failed to allocate new chunk";
1837 goto fail;
1838 }
1839
1840 spin_lock_irqsave(&pcpu_lock, flags);
1841 pcpu_chunk_relocate(chunk, -1);
1842 } else {
1843 spin_lock_irqsave(&pcpu_lock, flags);
1844 }
1845
1846 goto restart;
1847
1848 area_found:
1849 pcpu_stats_area_alloc(chunk, size);
1850 spin_unlock_irqrestore(&pcpu_lock, flags);
1851
1852
1853 if (!is_atomic) {
1854 unsigned int page_end, rs, re;
1855
1856 rs = PFN_DOWN(off);
1857 page_end = PFN_UP(off + size);
1858
1859 for_each_clear_bitrange_from(rs, re, chunk->populated, page_end) {
1860 WARN_ON(chunk->immutable);
1861
1862 ret = pcpu_populate_chunk(chunk, rs, re, pcpu_gfp);
1863
1864 spin_lock_irqsave(&pcpu_lock, flags);
1865 if (ret) {
1866 pcpu_free_area(chunk, off);
1867 err = "failed to populate";
1868 goto fail_unlock;
1869 }
1870 pcpu_chunk_populated(chunk, rs, re);
1871 spin_unlock_irqrestore(&pcpu_lock, flags);
1872 }
1873
1874 mutex_unlock(&pcpu_alloc_mutex);
1875 }
1876
1877 if (pcpu_nr_empty_pop_pages < PCPU_EMPTY_POP_PAGES_LOW)
1878 pcpu_schedule_balance_work();
1879
1880
1881 for_each_possible_cpu(cpu)
1882 memset((void *)pcpu_chunk_addr(chunk, cpu, 0) + off, 0, size);
1883
1884 ptr = __addr_to_pcpu_ptr(chunk->base_addr + off);
1885 kmemleak_alloc_percpu(ptr, size, gfp);
1886
1887 trace_percpu_alloc_percpu(_RET_IP_, reserved, is_atomic, size, align,
1888 chunk->base_addr, off, ptr,
1889 pcpu_obj_full_size(size), gfp);
1890
1891 pcpu_memcg_post_alloc_hook(objcg, chunk, off, size);
1892
1893 return ptr;
1894
1895 fail_unlock:
1896 spin_unlock_irqrestore(&pcpu_lock, flags);
1897 fail:
1898 trace_percpu_alloc_percpu_fail(reserved, is_atomic, size, align);
1899
1900 if (!is_atomic && do_warn && warn_limit) {
1901 pr_warn("allocation failed, size=%zu align=%zu atomic=%d, %s\n",
1902 size, align, is_atomic, err);
1903 dump_stack();
1904 if (!--warn_limit)
1905 pr_info("limit reached, disable warning\n");
1906 }
1907 if (is_atomic) {
1908
1909 pcpu_atomic_alloc_failed = true;
1910 pcpu_schedule_balance_work();
1911 } else {
1912 mutex_unlock(&pcpu_alloc_mutex);
1913 }
1914
1915 pcpu_memcg_post_alloc_hook(objcg, NULL, 0, size);
1916
1917 return NULL;
1918 }
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935 void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp)
1936 {
1937 return pcpu_alloc(size, align, false, gfp);
1938 }
1939 EXPORT_SYMBOL_GPL(__alloc_percpu_gfp);
1940
1941
1942
1943
1944
1945
1946
1947
1948 void __percpu *__alloc_percpu(size_t size, size_t align)
1949 {
1950 return pcpu_alloc(size, align, false, GFP_KERNEL);
1951 }
1952 EXPORT_SYMBOL_GPL(__alloc_percpu);
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970 void __percpu *__alloc_reserved_percpu(size_t size, size_t align)
1971 {
1972 return pcpu_alloc(size, align, true, GFP_KERNEL);
1973 }
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986 static void pcpu_balance_free(bool empty_only)
1987 {
1988 LIST_HEAD(to_free);
1989 struct list_head *free_head = &pcpu_chunk_lists[pcpu_free_slot];
1990 struct pcpu_chunk *chunk, *next;
1991
1992 lockdep_assert_held(&pcpu_lock);
1993
1994
1995
1996
1997
1998 list_for_each_entry_safe(chunk, next, free_head, list) {
1999 WARN_ON(chunk->immutable);
2000
2001
2002 if (chunk == list_first_entry(free_head, struct pcpu_chunk, list))
2003 continue;
2004
2005 if (!empty_only || chunk->nr_empty_pop_pages == 0)
2006 list_move(&chunk->list, &to_free);
2007 }
2008
2009 if (list_empty(&to_free))
2010 return;
2011
2012 spin_unlock_irq(&pcpu_lock);
2013 list_for_each_entry_safe(chunk, next, &to_free, list) {
2014 unsigned int rs, re;
2015
2016 for_each_set_bitrange(rs, re, chunk->populated, chunk->nr_pages) {
2017 pcpu_depopulate_chunk(chunk, rs, re);
2018 spin_lock_irq(&pcpu_lock);
2019 pcpu_chunk_depopulated(chunk, rs, re);
2020 spin_unlock_irq(&pcpu_lock);
2021 }
2022 pcpu_destroy_chunk(chunk);
2023 cond_resched();
2024 }
2025 spin_lock_irq(&pcpu_lock);
2026 }
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040 static void pcpu_balance_populated(void)
2041 {
2042
2043 const gfp_t gfp = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN;
2044 struct pcpu_chunk *chunk;
2045 int slot, nr_to_pop, ret;
2046
2047 lockdep_assert_held(&pcpu_lock);
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059 retry_pop:
2060 if (pcpu_atomic_alloc_failed) {
2061 nr_to_pop = PCPU_EMPTY_POP_PAGES_HIGH;
2062
2063 pcpu_atomic_alloc_failed = false;
2064 } else {
2065 nr_to_pop = clamp(PCPU_EMPTY_POP_PAGES_HIGH -
2066 pcpu_nr_empty_pop_pages,
2067 0, PCPU_EMPTY_POP_PAGES_HIGH);
2068 }
2069
2070 for (slot = pcpu_size_to_slot(PAGE_SIZE); slot <= pcpu_free_slot; slot++) {
2071 unsigned int nr_unpop = 0, rs, re;
2072
2073 if (!nr_to_pop)
2074 break;
2075
2076 list_for_each_entry(chunk, &pcpu_chunk_lists[slot], list) {
2077 nr_unpop = chunk->nr_pages - chunk->nr_populated;
2078 if (nr_unpop)
2079 break;
2080 }
2081
2082 if (!nr_unpop)
2083 continue;
2084
2085
2086 for_each_clear_bitrange(rs, re, chunk->populated, chunk->nr_pages) {
2087 int nr = min_t(int, re - rs, nr_to_pop);
2088
2089 spin_unlock_irq(&pcpu_lock);
2090 ret = pcpu_populate_chunk(chunk, rs, rs + nr, gfp);
2091 cond_resched();
2092 spin_lock_irq(&pcpu_lock);
2093 if (!ret) {
2094 nr_to_pop -= nr;
2095 pcpu_chunk_populated(chunk, rs, rs + nr);
2096 } else {
2097 nr_to_pop = 0;
2098 }
2099
2100 if (!nr_to_pop)
2101 break;
2102 }
2103 }
2104
2105 if (nr_to_pop) {
2106
2107 spin_unlock_irq(&pcpu_lock);
2108 chunk = pcpu_create_chunk(gfp);
2109 cond_resched();
2110 spin_lock_irq(&pcpu_lock);
2111 if (chunk) {
2112 pcpu_chunk_relocate(chunk, -1);
2113 goto retry_pop;
2114 }
2115 }
2116 }
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133 static void pcpu_reclaim_populated(void)
2134 {
2135 struct pcpu_chunk *chunk;
2136 struct pcpu_block_md *block;
2137 int freed_page_start, freed_page_end;
2138 int i, end;
2139 bool reintegrate;
2140
2141 lockdep_assert_held(&pcpu_lock);
2142
2143
2144
2145
2146
2147
2148
2149 while (!list_empty(&pcpu_chunk_lists[pcpu_to_depopulate_slot])) {
2150 chunk = list_first_entry(&pcpu_chunk_lists[pcpu_to_depopulate_slot],
2151 struct pcpu_chunk, list);
2152 WARN_ON(chunk->immutable);
2153
2154
2155
2156
2157
2158 freed_page_start = chunk->nr_pages;
2159 freed_page_end = 0;
2160 reintegrate = false;
2161 for (i = chunk->nr_pages - 1, end = -1; i >= 0; i--) {
2162
2163 if (chunk->nr_empty_pop_pages == 0)
2164 break;
2165
2166
2167 if (pcpu_nr_empty_pop_pages < PCPU_EMPTY_POP_PAGES_HIGH) {
2168 reintegrate = true;
2169 goto end_chunk;
2170 }
2171
2172
2173
2174
2175
2176
2177
2178 block = chunk->md_blocks + i;
2179 if (block->contig_hint == PCPU_BITMAP_BLOCK_BITS &&
2180 test_bit(i, chunk->populated)) {
2181 if (end == -1)
2182 end = i;
2183 if (i > 0)
2184 continue;
2185 i--;
2186 }
2187
2188
2189 if (end == -1)
2190 continue;
2191
2192 spin_unlock_irq(&pcpu_lock);
2193 pcpu_depopulate_chunk(chunk, i + 1, end + 1);
2194 cond_resched();
2195 spin_lock_irq(&pcpu_lock);
2196
2197 pcpu_chunk_depopulated(chunk, i + 1, end + 1);
2198 freed_page_start = min(freed_page_start, i + 1);
2199 freed_page_end = max(freed_page_end, end + 1);
2200
2201
2202 end = -1;
2203 }
2204
2205 end_chunk:
2206
2207 if (freed_page_start < freed_page_end) {
2208 spin_unlock_irq(&pcpu_lock);
2209 pcpu_post_unmap_tlb_flush(chunk,
2210 freed_page_start,
2211 freed_page_end);
2212 cond_resched();
2213 spin_lock_irq(&pcpu_lock);
2214 }
2215
2216 if (reintegrate || chunk->free_bytes == pcpu_unit_size)
2217 pcpu_reintegrate_chunk(chunk);
2218 else
2219 list_move_tail(&chunk->list,
2220 &pcpu_chunk_lists[pcpu_sidelined_slot]);
2221 }
2222 }
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232 static void pcpu_balance_workfn(struct work_struct *work)
2233 {
2234
2235
2236
2237
2238
2239
2240
2241 mutex_lock(&pcpu_alloc_mutex);
2242 spin_lock_irq(&pcpu_lock);
2243
2244 pcpu_balance_free(false);
2245 pcpu_reclaim_populated();
2246 pcpu_balance_populated();
2247 pcpu_balance_free(true);
2248
2249 spin_unlock_irq(&pcpu_lock);
2250 mutex_unlock(&pcpu_alloc_mutex);
2251 }
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262 void free_percpu(void __percpu *ptr)
2263 {
2264 void *addr;
2265 struct pcpu_chunk *chunk;
2266 unsigned long flags;
2267 int size, off;
2268 bool need_balance = false;
2269
2270 if (!ptr)
2271 return;
2272
2273 kmemleak_free_percpu(ptr);
2274
2275 addr = __pcpu_ptr_to_addr(ptr);
2276
2277 spin_lock_irqsave(&pcpu_lock, flags);
2278
2279 chunk = pcpu_chunk_addr_search(addr);
2280 off = addr - chunk->base_addr;
2281
2282 size = pcpu_free_area(chunk, off);
2283
2284 pcpu_memcg_free_hook(chunk, off, size);
2285
2286
2287
2288
2289
2290
2291 if (!chunk->isolated && chunk->free_bytes == pcpu_unit_size) {
2292 struct pcpu_chunk *pos;
2293
2294 list_for_each_entry(pos, &pcpu_chunk_lists[pcpu_free_slot], list)
2295 if (pos != chunk) {
2296 need_balance = true;
2297 break;
2298 }
2299 } else if (pcpu_should_reclaim_chunk(chunk)) {
2300 pcpu_isolate_chunk(chunk);
2301 need_balance = true;
2302 }
2303
2304 trace_percpu_free_percpu(chunk->base_addr, off, ptr);
2305
2306 spin_unlock_irqrestore(&pcpu_lock, flags);
2307
2308 if (need_balance)
2309 pcpu_schedule_balance_work();
2310 }
2311 EXPORT_SYMBOL_GPL(free_percpu);
2312
2313 bool __is_kernel_percpu_address(unsigned long addr, unsigned long *can_addr)
2314 {
2315 #ifdef CONFIG_SMP
2316 const size_t static_size = __per_cpu_end - __per_cpu_start;
2317 void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr);
2318 unsigned int cpu;
2319
2320 for_each_possible_cpu(cpu) {
2321 void *start = per_cpu_ptr(base, cpu);
2322 void *va = (void *)addr;
2323
2324 if (va >= start && va < start + static_size) {
2325 if (can_addr) {
2326 *can_addr = (unsigned long) (va - start);
2327 *can_addr += (unsigned long)
2328 per_cpu_ptr(base, get_boot_cpu_id());
2329 }
2330 return true;
2331 }
2332 }
2333 #endif
2334
2335 return false;
2336 }
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349 bool is_kernel_percpu_address(unsigned long addr)
2350 {
2351 return __is_kernel_percpu_address(addr, NULL);
2352 }
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377 phys_addr_t per_cpu_ptr_to_phys(void *addr)
2378 {
2379 void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr);
2380 bool in_first_chunk = false;
2381 unsigned long first_low, first_high;
2382 unsigned int cpu;
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394 first_low = (unsigned long)pcpu_base_addr +
2395 pcpu_unit_page_offset(pcpu_low_unit_cpu, 0);
2396 first_high = (unsigned long)pcpu_base_addr +
2397 pcpu_unit_page_offset(pcpu_high_unit_cpu, pcpu_unit_pages);
2398 if ((unsigned long)addr >= first_low &&
2399 (unsigned long)addr < first_high) {
2400 for_each_possible_cpu(cpu) {
2401 void *start = per_cpu_ptr(base, cpu);
2402
2403 if (addr >= start && addr < start + pcpu_unit_size) {
2404 in_first_chunk = true;
2405 break;
2406 }
2407 }
2408 }
2409
2410 if (in_first_chunk) {
2411 if (!is_vmalloc_addr(addr))
2412 return __pa(addr);
2413 else
2414 return page_to_phys(vmalloc_to_page(addr)) +
2415 offset_in_page(addr);
2416 } else
2417 return page_to_phys(pcpu_addr_to_page(addr)) +
2418 offset_in_page(addr);
2419 }
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436 struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups,
2437 int nr_units)
2438 {
2439 struct pcpu_alloc_info *ai;
2440 size_t base_size, ai_size;
2441 void *ptr;
2442 int unit;
2443
2444 base_size = ALIGN(struct_size(ai, groups, nr_groups),
2445 __alignof__(ai->groups[0].cpu_map[0]));
2446 ai_size = base_size + nr_units * sizeof(ai->groups[0].cpu_map[0]);
2447
2448 ptr = memblock_alloc(PFN_ALIGN(ai_size), PAGE_SIZE);
2449 if (!ptr)
2450 return NULL;
2451 ai = ptr;
2452 ptr += base_size;
2453
2454 ai->groups[0].cpu_map = ptr;
2455
2456 for (unit = 0; unit < nr_units; unit++)
2457 ai->groups[0].cpu_map[unit] = NR_CPUS;
2458
2459 ai->nr_groups = nr_groups;
2460 ai->__ai_size = PFN_ALIGN(ai_size);
2461
2462 return ai;
2463 }
2464
2465
2466
2467
2468
2469
2470
2471 void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai)
2472 {
2473 memblock_free(ai, ai->__ai_size);
2474 }
2475
2476
2477
2478
2479
2480
2481
2482
2483 static void pcpu_dump_alloc_info(const char *lvl,
2484 const struct pcpu_alloc_info *ai)
2485 {
2486 int group_width = 1, cpu_width = 1, width;
2487 char empty_str[] = "--------";
2488 int alloc = 0, alloc_end = 0;
2489 int group, v;
2490 int upa, apl;
2491
2492 v = ai->nr_groups;
2493 while (v /= 10)
2494 group_width++;
2495
2496 v = num_possible_cpus();
2497 while (v /= 10)
2498 cpu_width++;
2499 empty_str[min_t(int, cpu_width, sizeof(empty_str) - 1)] = '\0';
2500
2501 upa = ai->alloc_size / ai->unit_size;
2502 width = upa * (cpu_width + 1) + group_width + 3;
2503 apl = rounddown_pow_of_two(max(60 / width, 1));
2504
2505 printk("%spcpu-alloc: s%zu r%zu d%zu u%zu alloc=%zu*%zu",
2506 lvl, ai->static_size, ai->reserved_size, ai->dyn_size,
2507 ai->unit_size, ai->alloc_size / ai->atom_size, ai->atom_size);
2508
2509 for (group = 0; group < ai->nr_groups; group++) {
2510 const struct pcpu_group_info *gi = &ai->groups[group];
2511 int unit = 0, unit_end = 0;
2512
2513 BUG_ON(gi->nr_units % upa);
2514 for (alloc_end += gi->nr_units / upa;
2515 alloc < alloc_end; alloc++) {
2516 if (!(alloc % apl)) {
2517 pr_cont("\n");
2518 printk("%spcpu-alloc: ", lvl);
2519 }
2520 pr_cont("[%0*d] ", group_width, group);
2521
2522 for (unit_end += upa; unit < unit_end; unit++)
2523 if (gi->cpu_map[unit] != NR_CPUS)
2524 pr_cont("%0*d ",
2525 cpu_width, gi->cpu_map[unit]);
2526 else
2527 pr_cont("%s ", empty_str);
2528 }
2529 }
2530 pr_cont("\n");
2531 }
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587 void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
2588 void *base_addr)
2589 {
2590 size_t size_sum = ai->static_size + ai->reserved_size + ai->dyn_size;
2591 size_t static_size, dyn_size;
2592 struct pcpu_chunk *chunk;
2593 unsigned long *group_offsets;
2594 size_t *group_sizes;
2595 unsigned long *unit_off;
2596 unsigned int cpu;
2597 int *unit_map;
2598 int group, unit, i;
2599 int map_size;
2600 unsigned long tmp_addr;
2601 size_t alloc_size;
2602
2603 #define PCPU_SETUP_BUG_ON(cond) do { \
2604 if (unlikely(cond)) { \
2605 pr_emerg("failed to initialize, %s\n", #cond); \
2606 pr_emerg("cpu_possible_mask=%*pb\n", \
2607 cpumask_pr_args(cpu_possible_mask)); \
2608 pcpu_dump_alloc_info(KERN_EMERG, ai); \
2609 BUG(); \
2610 } \
2611 } while (0)
2612
2613
2614 PCPU_SETUP_BUG_ON(ai->nr_groups <= 0);
2615 #ifdef CONFIG_SMP
2616 PCPU_SETUP_BUG_ON(!ai->static_size);
2617 PCPU_SETUP_BUG_ON(offset_in_page(__per_cpu_start));
2618 #endif
2619 PCPU_SETUP_BUG_ON(!base_addr);
2620 PCPU_SETUP_BUG_ON(offset_in_page(base_addr));
2621 PCPU_SETUP_BUG_ON(ai->unit_size < size_sum);
2622 PCPU_SETUP_BUG_ON(offset_in_page(ai->unit_size));
2623 PCPU_SETUP_BUG_ON(ai->unit_size < PCPU_MIN_UNIT_SIZE);
2624 PCPU_SETUP_BUG_ON(!IS_ALIGNED(ai->unit_size, PCPU_BITMAP_BLOCK_SIZE));
2625 PCPU_SETUP_BUG_ON(ai->dyn_size < PERCPU_DYNAMIC_EARLY_SIZE);
2626 PCPU_SETUP_BUG_ON(!ai->dyn_size);
2627 PCPU_SETUP_BUG_ON(!IS_ALIGNED(ai->reserved_size, PCPU_MIN_ALLOC_SIZE));
2628 PCPU_SETUP_BUG_ON(!(IS_ALIGNED(PCPU_BITMAP_BLOCK_SIZE, PAGE_SIZE) ||
2629 IS_ALIGNED(PAGE_SIZE, PCPU_BITMAP_BLOCK_SIZE)));
2630 PCPU_SETUP_BUG_ON(pcpu_verify_alloc_info(ai) < 0);
2631
2632
2633 alloc_size = ai->nr_groups * sizeof(group_offsets[0]);
2634 group_offsets = memblock_alloc(alloc_size, SMP_CACHE_BYTES);
2635 if (!group_offsets)
2636 panic("%s: Failed to allocate %zu bytes\n", __func__,
2637 alloc_size);
2638
2639 alloc_size = ai->nr_groups * sizeof(group_sizes[0]);
2640 group_sizes = memblock_alloc(alloc_size, SMP_CACHE_BYTES);
2641 if (!group_sizes)
2642 panic("%s: Failed to allocate %zu bytes\n", __func__,
2643 alloc_size);
2644
2645 alloc_size = nr_cpu_ids * sizeof(unit_map[0]);
2646 unit_map = memblock_alloc(alloc_size, SMP_CACHE_BYTES);
2647 if (!unit_map)
2648 panic("%s: Failed to allocate %zu bytes\n", __func__,
2649 alloc_size);
2650
2651 alloc_size = nr_cpu_ids * sizeof(unit_off[0]);
2652 unit_off = memblock_alloc(alloc_size, SMP_CACHE_BYTES);
2653 if (!unit_off)
2654 panic("%s: Failed to allocate %zu bytes\n", __func__,
2655 alloc_size);
2656
2657 for (cpu = 0; cpu < nr_cpu_ids; cpu++)
2658 unit_map[cpu] = UINT_MAX;
2659
2660 pcpu_low_unit_cpu = NR_CPUS;
2661 pcpu_high_unit_cpu = NR_CPUS;
2662
2663 for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) {
2664 const struct pcpu_group_info *gi = &ai->groups[group];
2665
2666 group_offsets[group] = gi->base_offset;
2667 group_sizes[group] = gi->nr_units * ai->unit_size;
2668
2669 for (i = 0; i < gi->nr_units; i++) {
2670 cpu = gi->cpu_map[i];
2671 if (cpu == NR_CPUS)
2672 continue;
2673
2674 PCPU_SETUP_BUG_ON(cpu >= nr_cpu_ids);
2675 PCPU_SETUP_BUG_ON(!cpu_possible(cpu));
2676 PCPU_SETUP_BUG_ON(unit_map[cpu] != UINT_MAX);
2677
2678 unit_map[cpu] = unit + i;
2679 unit_off[cpu] = gi->base_offset + i * ai->unit_size;
2680
2681
2682 if (pcpu_low_unit_cpu == NR_CPUS ||
2683 unit_off[cpu] < unit_off[pcpu_low_unit_cpu])
2684 pcpu_low_unit_cpu = cpu;
2685 if (pcpu_high_unit_cpu == NR_CPUS ||
2686 unit_off[cpu] > unit_off[pcpu_high_unit_cpu])
2687 pcpu_high_unit_cpu = cpu;
2688 }
2689 }
2690 pcpu_nr_units = unit;
2691
2692 for_each_possible_cpu(cpu)
2693 PCPU_SETUP_BUG_ON(unit_map[cpu] == UINT_MAX);
2694
2695
2696 #undef PCPU_SETUP_BUG_ON
2697 pcpu_dump_alloc_info(KERN_DEBUG, ai);
2698
2699 pcpu_nr_groups = ai->nr_groups;
2700 pcpu_group_offsets = group_offsets;
2701 pcpu_group_sizes = group_sizes;
2702 pcpu_unit_map = unit_map;
2703 pcpu_unit_offsets = unit_off;
2704
2705
2706 pcpu_unit_pages = ai->unit_size >> PAGE_SHIFT;
2707 pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT;
2708 pcpu_atom_size = ai->atom_size;
2709 pcpu_chunk_struct_size = struct_size(chunk, populated,
2710 BITS_TO_LONGS(pcpu_unit_pages));
2711
2712 pcpu_stats_save_ai(ai);
2713
2714
2715
2716
2717
2718
2719
2720 pcpu_sidelined_slot = __pcpu_size_to_slot(pcpu_unit_size) + 1;
2721 pcpu_free_slot = pcpu_sidelined_slot + 1;
2722 pcpu_to_depopulate_slot = pcpu_free_slot + 1;
2723 pcpu_nr_slots = pcpu_to_depopulate_slot + 1;
2724 pcpu_chunk_lists = memblock_alloc(pcpu_nr_slots *
2725 sizeof(pcpu_chunk_lists[0]),
2726 SMP_CACHE_BYTES);
2727 if (!pcpu_chunk_lists)
2728 panic("%s: Failed to allocate %zu bytes\n", __func__,
2729 pcpu_nr_slots * sizeof(pcpu_chunk_lists[0]));
2730
2731 for (i = 0; i < pcpu_nr_slots; i++)
2732 INIT_LIST_HEAD(&pcpu_chunk_lists[i]);
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742 static_size = ALIGN(ai->static_size, PCPU_MIN_ALLOC_SIZE);
2743 dyn_size = ai->dyn_size - (static_size - ai->static_size);
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753 tmp_addr = (unsigned long)base_addr + static_size;
2754 map_size = ai->reserved_size ?: dyn_size;
2755 chunk = pcpu_alloc_first_chunk(tmp_addr, map_size);
2756
2757
2758 if (ai->reserved_size) {
2759 pcpu_reserved_chunk = chunk;
2760
2761 tmp_addr = (unsigned long)base_addr + static_size +
2762 ai->reserved_size;
2763 map_size = dyn_size;
2764 chunk = pcpu_alloc_first_chunk(tmp_addr, map_size);
2765 }
2766
2767
2768 pcpu_first_chunk = chunk;
2769 pcpu_nr_empty_pop_pages = pcpu_first_chunk->nr_empty_pop_pages;
2770 pcpu_chunk_relocate(pcpu_first_chunk, -1);
2771
2772
2773 pcpu_nr_populated += PFN_DOWN(size_sum);
2774
2775 pcpu_stats_chunk_alloc();
2776 trace_percpu_create_chunk(base_addr);
2777
2778
2779 pcpu_base_addr = base_addr;
2780 }
2781
2782 #ifdef CONFIG_SMP
2783
2784 const char * const pcpu_fc_names[PCPU_FC_NR] __initconst = {
2785 [PCPU_FC_AUTO] = "auto",
2786 [PCPU_FC_EMBED] = "embed",
2787 [PCPU_FC_PAGE] = "page",
2788 };
2789
2790 enum pcpu_fc pcpu_chosen_fc __initdata = PCPU_FC_AUTO;
2791
2792 static int __init percpu_alloc_setup(char *str)
2793 {
2794 if (!str)
2795 return -EINVAL;
2796
2797 if (0)
2798 ;
2799 #ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK
2800 else if (!strcmp(str, "embed"))
2801 pcpu_chosen_fc = PCPU_FC_EMBED;
2802 #endif
2803 #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
2804 else if (!strcmp(str, "page"))
2805 pcpu_chosen_fc = PCPU_FC_PAGE;
2806 #endif
2807 else
2808 pr_warn("unknown allocator %s specified\n", str);
2809
2810 return 0;
2811 }
2812 early_param("percpu_alloc", percpu_alloc_setup);
2813
2814
2815
2816
2817
2818
2819 #if defined(CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK) || \
2820 !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA)
2821 #define BUILD_EMBED_FIRST_CHUNK
2822 #endif
2823
2824
2825 #if defined(CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK)
2826 #define BUILD_PAGE_FIRST_CHUNK
2827 #endif
2828
2829
2830 #if defined(BUILD_EMBED_FIRST_CHUNK) || defined(BUILD_PAGE_FIRST_CHUNK)
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852 static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info(
2853 size_t reserved_size, size_t dyn_size,
2854 size_t atom_size,
2855 pcpu_fc_cpu_distance_fn_t cpu_distance_fn)
2856 {
2857 static int group_map[NR_CPUS] __initdata;
2858 static int group_cnt[NR_CPUS] __initdata;
2859 static struct cpumask mask __initdata;
2860 const size_t static_size = __per_cpu_end - __per_cpu_start;
2861 int nr_groups = 1, nr_units = 0;
2862 size_t size_sum, min_unit_size, alloc_size;
2863 int upa, max_upa, best_upa;
2864 int last_allocs, group, unit;
2865 unsigned int cpu, tcpu;
2866 struct pcpu_alloc_info *ai;
2867 unsigned int *cpu_map;
2868
2869
2870 memset(group_map, 0, sizeof(group_map));
2871 memset(group_cnt, 0, sizeof(group_cnt));
2872 cpumask_clear(&mask);
2873
2874
2875 size_sum = PFN_ALIGN(static_size + reserved_size +
2876 max_t(size_t, dyn_size, PERCPU_DYNAMIC_EARLY_SIZE));
2877 dyn_size = size_sum - static_size - reserved_size;
2878
2879
2880
2881
2882
2883
2884
2885 min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
2886
2887
2888 alloc_size = roundup(min_unit_size, atom_size);
2889 upa = alloc_size / min_unit_size;
2890 while (alloc_size % upa || (offset_in_page(alloc_size / upa)))
2891 upa--;
2892 max_upa = upa;
2893
2894 cpumask_copy(&mask, cpu_possible_mask);
2895
2896
2897 for (group = 0; !cpumask_empty(&mask); group++) {
2898
2899 cpu = cpumask_first(&mask);
2900 group_map[cpu] = group;
2901 group_cnt[group]++;
2902 cpumask_clear_cpu(cpu, &mask);
2903
2904 for_each_cpu(tcpu, &mask) {
2905 if (!cpu_distance_fn ||
2906 (cpu_distance_fn(cpu, tcpu) == LOCAL_DISTANCE &&
2907 cpu_distance_fn(tcpu, cpu) == LOCAL_DISTANCE)) {
2908 group_map[tcpu] = group;
2909 group_cnt[group]++;
2910 cpumask_clear_cpu(tcpu, &mask);
2911 }
2912 }
2913 }
2914 nr_groups = group;
2915
2916
2917
2918
2919
2920
2921 last_allocs = INT_MAX;
2922 best_upa = 0;
2923 for (upa = max_upa; upa; upa--) {
2924 int allocs = 0, wasted = 0;
2925
2926 if (alloc_size % upa || (offset_in_page(alloc_size / upa)))
2927 continue;
2928
2929 for (group = 0; group < nr_groups; group++) {
2930 int this_allocs = DIV_ROUND_UP(group_cnt[group], upa);
2931 allocs += this_allocs;
2932 wasted += this_allocs * upa - group_cnt[group];
2933 }
2934
2935
2936
2937
2938
2939
2940 if (wasted > num_possible_cpus() / 3)
2941 continue;
2942
2943
2944 if (allocs > last_allocs)
2945 break;
2946 last_allocs = allocs;
2947 best_upa = upa;
2948 }
2949 BUG_ON(!best_upa);
2950 upa = best_upa;
2951
2952
2953 for (group = 0; group < nr_groups; group++)
2954 nr_units += roundup(group_cnt[group], upa);
2955
2956 ai = pcpu_alloc_alloc_info(nr_groups, nr_units);
2957 if (!ai)
2958 return ERR_PTR(-ENOMEM);
2959 cpu_map = ai->groups[0].cpu_map;
2960
2961 for (group = 0; group < nr_groups; group++) {
2962 ai->groups[group].cpu_map = cpu_map;
2963 cpu_map += roundup(group_cnt[group], upa);
2964 }
2965
2966 ai->static_size = static_size;
2967 ai->reserved_size = reserved_size;
2968 ai->dyn_size = dyn_size;
2969 ai->unit_size = alloc_size / upa;
2970 ai->atom_size = atom_size;
2971 ai->alloc_size = alloc_size;
2972
2973 for (group = 0, unit = 0; group < nr_groups; group++) {
2974 struct pcpu_group_info *gi = &ai->groups[group];
2975
2976
2977
2978
2979
2980
2981 gi->base_offset = unit * ai->unit_size;
2982
2983 for_each_possible_cpu(cpu)
2984 if (group_map[cpu] == group)
2985 gi->cpu_map[gi->nr_units++] = cpu;
2986 gi->nr_units = roundup(gi->nr_units, upa);
2987 unit += gi->nr_units;
2988 }
2989 BUG_ON(unit != nr_units);
2990
2991 return ai;
2992 }
2993
2994 static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align,
2995 pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
2996 {
2997 const unsigned long goal = __pa(MAX_DMA_ADDRESS);
2998 #ifdef CONFIG_NUMA
2999 int node = NUMA_NO_NODE;
3000 void *ptr;
3001
3002 if (cpu_to_nd_fn)
3003 node = cpu_to_nd_fn(cpu);
3004
3005 if (node == NUMA_NO_NODE || !node_online(node) || !NODE_DATA(node)) {
3006 ptr = memblock_alloc_from(size, align, goal);
3007 pr_info("cpu %d has no node %d or node-local memory\n",
3008 cpu, node);
3009 pr_debug("per cpu data for cpu%d %zu bytes at 0x%llx\n",
3010 cpu, size, (u64)__pa(ptr));
3011 } else {
3012 ptr = memblock_alloc_try_nid(size, align, goal,
3013 MEMBLOCK_ALLOC_ACCESSIBLE,
3014 node);
3015
3016 pr_debug("per cpu data for cpu%d %zu bytes on node%d at 0x%llx\n",
3017 cpu, size, node, (u64)__pa(ptr));
3018 }
3019 return ptr;
3020 #else
3021 return memblock_alloc_from(size, align, goal);
3022 #endif
3023 }
3024
3025 static void __init pcpu_fc_free(void *ptr, size_t size)
3026 {
3027 memblock_free(ptr, size);
3028 }
3029 #endif
3030
3031 #if defined(BUILD_EMBED_FIRST_CHUNK)
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063 int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
3064 size_t atom_size,
3065 pcpu_fc_cpu_distance_fn_t cpu_distance_fn,
3066 pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
3067 {
3068 void *base = (void *)ULONG_MAX;
3069 void **areas = NULL;
3070 struct pcpu_alloc_info *ai;
3071 size_t size_sum, areas_size;
3072 unsigned long max_distance;
3073 int group, i, highest_group, rc = 0;
3074
3075 ai = pcpu_build_alloc_info(reserved_size, dyn_size, atom_size,
3076 cpu_distance_fn);
3077 if (IS_ERR(ai))
3078 return PTR_ERR(ai);
3079
3080 size_sum = ai->static_size + ai->reserved_size + ai->dyn_size;
3081 areas_size = PFN_ALIGN(ai->nr_groups * sizeof(void *));
3082
3083 areas = memblock_alloc(areas_size, SMP_CACHE_BYTES);
3084 if (!areas) {
3085 rc = -ENOMEM;
3086 goto out_free;
3087 }
3088
3089
3090 highest_group = 0;
3091 for (group = 0; group < ai->nr_groups; group++) {
3092 struct pcpu_group_info *gi = &ai->groups[group];
3093 unsigned int cpu = NR_CPUS;
3094 void *ptr;
3095
3096 for (i = 0; i < gi->nr_units && cpu == NR_CPUS; i++)
3097 cpu = gi->cpu_map[i];
3098 BUG_ON(cpu == NR_CPUS);
3099
3100
3101 ptr = pcpu_fc_alloc(cpu, gi->nr_units * ai->unit_size, atom_size, cpu_to_nd_fn);
3102 if (!ptr) {
3103 rc = -ENOMEM;
3104 goto out_free_areas;
3105 }
3106
3107 kmemleak_ignore_phys(__pa(ptr));
3108 areas[group] = ptr;
3109
3110 base = min(ptr, base);
3111 if (ptr > areas[highest_group])
3112 highest_group = group;
3113 }
3114 max_distance = areas[highest_group] - base;
3115 max_distance += ai->unit_size * ai->groups[highest_group].nr_units;
3116
3117
3118 if (max_distance > VMALLOC_TOTAL * 3 / 4) {
3119 pr_warn("max_distance=0x%lx too large for vmalloc space 0x%lx\n",
3120 max_distance, VMALLOC_TOTAL);
3121 #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
3122
3123 rc = -EINVAL;
3124 goto out_free_areas;
3125 #endif
3126 }
3127
3128
3129
3130
3131
3132
3133 for (group = 0; group < ai->nr_groups; group++) {
3134 struct pcpu_group_info *gi = &ai->groups[group];
3135 void *ptr = areas[group];
3136
3137 for (i = 0; i < gi->nr_units; i++, ptr += ai->unit_size) {
3138 if (gi->cpu_map[i] == NR_CPUS) {
3139
3140 pcpu_fc_free(ptr, ai->unit_size);
3141 continue;
3142 }
3143
3144 memcpy(ptr, __per_cpu_load, ai->static_size);
3145 pcpu_fc_free(ptr + size_sum, ai->unit_size - size_sum);
3146 }
3147 }
3148
3149
3150 for (group = 0; group < ai->nr_groups; group++) {
3151 ai->groups[group].base_offset = areas[group] - base;
3152 }
3153
3154 pr_info("Embedded %zu pages/cpu s%zu r%zu d%zu u%zu\n",
3155 PFN_DOWN(size_sum), ai->static_size, ai->reserved_size,
3156 ai->dyn_size, ai->unit_size);
3157
3158 pcpu_setup_first_chunk(ai, base);
3159 goto out_free;
3160
3161 out_free_areas:
3162 for (group = 0; group < ai->nr_groups; group++)
3163 if (areas[group])
3164 pcpu_fc_free(areas[group],
3165 ai->groups[group].nr_units * ai->unit_size);
3166 out_free:
3167 pcpu_free_alloc_info(ai);
3168 if (areas)
3169 memblock_free(areas, areas_size);
3170 return rc;
3171 }
3172 #endif
3173
3174 #ifdef BUILD_PAGE_FIRST_CHUNK
3175 #include <asm/pgalloc.h>
3176
3177 #ifndef P4D_TABLE_SIZE
3178 #define P4D_TABLE_SIZE PAGE_SIZE
3179 #endif
3180
3181 #ifndef PUD_TABLE_SIZE
3182 #define PUD_TABLE_SIZE PAGE_SIZE
3183 #endif
3184
3185 #ifndef PMD_TABLE_SIZE
3186 #define PMD_TABLE_SIZE PAGE_SIZE
3187 #endif
3188
3189 #ifndef PTE_TABLE_SIZE
3190 #define PTE_TABLE_SIZE PAGE_SIZE
3191 #endif
3192 void __init __weak pcpu_populate_pte(unsigned long addr)
3193 {
3194 pgd_t *pgd = pgd_offset_k(addr);
3195 p4d_t *p4d;
3196 pud_t *pud;
3197 pmd_t *pmd;
3198
3199 if (pgd_none(*pgd)) {
3200 p4d_t *new;
3201
3202 new = memblock_alloc(P4D_TABLE_SIZE, P4D_TABLE_SIZE);
3203 if (!new)
3204 goto err_alloc;
3205 pgd_populate(&init_mm, pgd, new);
3206 }
3207
3208 p4d = p4d_offset(pgd, addr);
3209 if (p4d_none(*p4d)) {
3210 pud_t *new;
3211
3212 new = memblock_alloc(PUD_TABLE_SIZE, PUD_TABLE_SIZE);
3213 if (!new)
3214 goto err_alloc;
3215 p4d_populate(&init_mm, p4d, new);
3216 }
3217
3218 pud = pud_offset(p4d, addr);
3219 if (pud_none(*pud)) {
3220 pmd_t *new;
3221
3222 new = memblock_alloc(PMD_TABLE_SIZE, PMD_TABLE_SIZE);
3223 if (!new)
3224 goto err_alloc;
3225 pud_populate(&init_mm, pud, new);
3226 }
3227
3228 pmd = pmd_offset(pud, addr);
3229 if (!pmd_present(*pmd)) {
3230 pte_t *new;
3231
3232 new = memblock_alloc(PTE_TABLE_SIZE, PTE_TABLE_SIZE);
3233 if (!new)
3234 goto err_alloc;
3235 pmd_populate_kernel(&init_mm, pmd, new);
3236 }
3237
3238 return;
3239
3240 err_alloc:
3241 panic("%s: Failed to allocate memory\n", __func__);
3242 }
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258 int __init pcpu_page_first_chunk(size_t reserved_size, pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
3259 {
3260 static struct vm_struct vm;
3261 struct pcpu_alloc_info *ai;
3262 char psize_str[16];
3263 int unit_pages;
3264 size_t pages_size;
3265 struct page **pages;
3266 int unit, i, j, rc = 0;
3267 int upa;
3268 int nr_g0_units;
3269
3270 snprintf(psize_str, sizeof(psize_str), "%luK", PAGE_SIZE >> 10);
3271
3272 ai = pcpu_build_alloc_info(reserved_size, 0, PAGE_SIZE, NULL);
3273 if (IS_ERR(ai))
3274 return PTR_ERR(ai);
3275 BUG_ON(ai->nr_groups != 1);
3276 upa = ai->alloc_size/ai->unit_size;
3277 nr_g0_units = roundup(num_possible_cpus(), upa);
3278 if (WARN_ON(ai->groups[0].nr_units != nr_g0_units)) {
3279 pcpu_free_alloc_info(ai);
3280 return -EINVAL;
3281 }
3282
3283 unit_pages = ai->unit_size >> PAGE_SHIFT;
3284
3285
3286 pages_size = PFN_ALIGN(unit_pages * num_possible_cpus() *
3287 sizeof(pages[0]));
3288 pages = memblock_alloc(pages_size, SMP_CACHE_BYTES);
3289 if (!pages)
3290 panic("%s: Failed to allocate %zu bytes\n", __func__,
3291 pages_size);
3292
3293
3294 j = 0;
3295 for (unit = 0; unit < num_possible_cpus(); unit++) {
3296 unsigned int cpu = ai->groups[0].cpu_map[unit];
3297 for (i = 0; i < unit_pages; i++) {
3298 void *ptr;
3299
3300 ptr = pcpu_fc_alloc(cpu, PAGE_SIZE, PAGE_SIZE, cpu_to_nd_fn);
3301 if (!ptr) {
3302 pr_warn("failed to allocate %s page for cpu%u\n",
3303 psize_str, cpu);
3304 goto enomem;
3305 }
3306
3307 kmemleak_ignore_phys(__pa(ptr));
3308 pages[j++] = virt_to_page(ptr);
3309 }
3310 }
3311
3312
3313 vm.flags = VM_ALLOC;
3314 vm.size = num_possible_cpus() * ai->unit_size;
3315 vm_area_register_early(&vm, PAGE_SIZE);
3316
3317 for (unit = 0; unit < num_possible_cpus(); unit++) {
3318 unsigned long unit_addr =
3319 (unsigned long)vm.addr + unit * ai->unit_size;
3320
3321 for (i = 0; i < unit_pages; i++)
3322 pcpu_populate_pte(unit_addr + (i << PAGE_SHIFT));
3323
3324
3325 rc = __pcpu_map_pages(unit_addr, &pages[unit * unit_pages],
3326 unit_pages);
3327 if (rc < 0)
3328 panic("failed to map percpu area, err=%d\n", rc);
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339 memcpy((void *)unit_addr, __per_cpu_load, ai->static_size);
3340 }
3341
3342
3343 pr_info("%d %s pages/cpu s%zu r%zu d%zu\n",
3344 unit_pages, psize_str, ai->static_size,
3345 ai->reserved_size, ai->dyn_size);
3346
3347 pcpu_setup_first_chunk(ai, vm.addr);
3348 goto out_free_ar;
3349
3350 enomem:
3351 while (--j >= 0)
3352 pcpu_fc_free(page_address(pages[j]), PAGE_SIZE);
3353 rc = -ENOMEM;
3354 out_free_ar:
3355 memblock_free(pages, pages_size);
3356 pcpu_free_alloc_info(ai);
3357 return rc;
3358 }
3359 #endif
3360
3361 #ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374 unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
3375 EXPORT_SYMBOL(__per_cpu_offset);
3376
3377 void __init setup_per_cpu_areas(void)
3378 {
3379 unsigned long delta;
3380 unsigned int cpu;
3381 int rc;
3382
3383
3384
3385
3386
3387 rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE, PERCPU_DYNAMIC_RESERVE,
3388 PAGE_SIZE, NULL, NULL);
3389 if (rc < 0)
3390 panic("Failed to initialize percpu areas.");
3391
3392 delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
3393 for_each_possible_cpu(cpu)
3394 __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
3395 }
3396 #endif
3397
3398 #else
3399
3400
3401
3402
3403
3404
3405
3406
3407 void __init setup_per_cpu_areas(void)
3408 {
3409 const size_t unit_size =
3410 roundup_pow_of_two(max_t(size_t, PCPU_MIN_UNIT_SIZE,
3411 PERCPU_DYNAMIC_RESERVE));
3412 struct pcpu_alloc_info *ai;
3413 void *fc;
3414
3415 ai = pcpu_alloc_alloc_info(1, 1);
3416 fc = memblock_alloc_from(unit_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
3417 if (!ai || !fc)
3418 panic("Failed to allocate memory for percpu areas.");
3419
3420 kmemleak_ignore_phys(__pa(fc));
3421
3422 ai->dyn_size = unit_size;
3423 ai->unit_size = unit_size;
3424 ai->atom_size = unit_size;
3425 ai->alloc_size = unit_size;
3426 ai->groups[0].nr_units = 1;
3427 ai->groups[0].cpu_map[0] = 0;
3428
3429 pcpu_setup_first_chunk(ai, fc);
3430 pcpu_free_alloc_info(ai);
3431 }
3432
3433 #endif
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446 unsigned long pcpu_nr_pages(void)
3447 {
3448 return pcpu_nr_populated * pcpu_nr_units;
3449 }
3450
3451
3452
3453
3454
3455
3456 static int __init percpu_enable_async(void)
3457 {
3458 pcpu_async_enabled = true;
3459 return 0;
3460 }
3461 subsys_initcall(percpu_enable_async);