0001
0002 #include <linux/init.h>
0003
0004 #include <linux/mm.h>
0005 #include <linux/spinlock.h>
0006 #include <linux/smp.h>
0007 #include <linux/interrupt.h>
0008 #include <linux/export.h>
0009 #include <linux/cpu.h>
0010 #include <linux/debugfs.h>
0011 #include <linux/sched/smt.h>
0012 #include <linux/task_work.h>
0013
0014 #include <asm/tlbflush.h>
0015 #include <asm/mmu_context.h>
0016 #include <asm/nospec-branch.h>
0017 #include <asm/cache.h>
0018 #include <asm/cacheflush.h>
0019 #include <asm/apic.h>
0020 #include <asm/perf_event.h>
0021
0022 #include "mm_internal.h"
0023
0024 #ifdef CONFIG_PARAVIRT
0025 # define STATIC_NOPV
0026 #else
0027 # define STATIC_NOPV static
0028 # define __flush_tlb_local native_flush_tlb_local
0029 # define __flush_tlb_global native_flush_tlb_global
0030 # define __flush_tlb_one_user(addr) native_flush_tlb_one_user(addr)
0031 # define __flush_tlb_multi(msk, info) native_flush_tlb_multi(msk, info)
0032 #endif
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052 #define LAST_USER_MM_IBPB 0x1UL
0053 #define LAST_USER_MM_L1D_FLUSH 0x2UL
0054 #define LAST_USER_MM_SPEC_MASK (LAST_USER_MM_IBPB | LAST_USER_MM_L1D_FLUSH)
0055
0056
0057 #define LAST_USER_MM_INIT LAST_USER_MM_IBPB
0058
0059
0060
0061
0062
0063
0064
0065
0066
0067
0068
0069
0070
0071
0072
0073
0074
0075
0076
0077
0078
0079
0080
0081
0082
0083
0084
0085
0086
0087
0088 #define CR3_HW_ASID_BITS 12
0089
0090
0091
0092
0093
0094 #ifdef CONFIG_PAGE_TABLE_ISOLATION
0095 # define PTI_CONSUMED_PCID_BITS 1
0096 #else
0097 # define PTI_CONSUMED_PCID_BITS 0
0098 #endif
0099
0100 #define CR3_AVAIL_PCID_BITS (X86_CR3_PCID_BITS - PTI_CONSUMED_PCID_BITS)
0101
0102
0103
0104
0105
0106
0107 #define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_PCID_BITS) - 2)
0108
0109
0110
0111
0112 static inline u16 kern_pcid(u16 asid)
0113 {
0114 VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
0115
0116 #ifdef CONFIG_PAGE_TABLE_ISOLATION
0117
0118
0119
0120
0121 BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_PCID_USER_BIT));
0122
0123
0124
0125
0126
0127 VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_PCID_USER_BIT));
0128 #endif
0129
0130
0131
0132
0133
0134
0135
0136
0137
0138
0139
0140
0141
0142 return asid + 1;
0143 }
0144
0145
0146
0147
0148 static inline u16 user_pcid(u16 asid)
0149 {
0150 u16 ret = kern_pcid(asid);
0151 #ifdef CONFIG_PAGE_TABLE_ISOLATION
0152 ret |= 1 << X86_CR3_PTI_PCID_USER_BIT;
0153 #endif
0154 return ret;
0155 }
0156
0157 static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
0158 {
0159 if (static_cpu_has(X86_FEATURE_PCID)) {
0160 return __sme_pa(pgd) | kern_pcid(asid);
0161 } else {
0162 VM_WARN_ON_ONCE(asid != 0);
0163 return __sme_pa(pgd);
0164 }
0165 }
0166
0167 static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
0168 {
0169 VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
0170
0171
0172
0173
0174
0175 VM_WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_PCID));
0176 return __sme_pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH;
0177 }
0178
0179
0180
0181
0182
0183
0184
0185 static void clear_asid_other(void)
0186 {
0187 u16 asid;
0188
0189
0190
0191
0192
0193 if (!static_cpu_has(X86_FEATURE_PTI)) {
0194 WARN_ON_ONCE(1);
0195 return;
0196 }
0197
0198 for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
0199
0200 if (asid == this_cpu_read(cpu_tlbstate.loaded_mm_asid))
0201 continue;
0202
0203
0204
0205
0206 this_cpu_write(cpu_tlbstate.ctxs[asid].ctx_id, 0);
0207 }
0208 this_cpu_write(cpu_tlbstate.invalidate_other, false);
0209 }
0210
0211 atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
0212
0213
0214 static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
0215 u16 *new_asid, bool *need_flush)
0216 {
0217 u16 asid;
0218
0219 if (!static_cpu_has(X86_FEATURE_PCID)) {
0220 *new_asid = 0;
0221 *need_flush = true;
0222 return;
0223 }
0224
0225 if (this_cpu_read(cpu_tlbstate.invalidate_other))
0226 clear_asid_other();
0227
0228 for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
0229 if (this_cpu_read(cpu_tlbstate.ctxs[asid].ctx_id) !=
0230 next->context.ctx_id)
0231 continue;
0232
0233 *new_asid = asid;
0234 *need_flush = (this_cpu_read(cpu_tlbstate.ctxs[asid].tlb_gen) <
0235 next_tlb_gen);
0236 return;
0237 }
0238
0239
0240
0241
0242
0243 *new_asid = this_cpu_add_return(cpu_tlbstate.next_asid, 1) - 1;
0244 if (*new_asid >= TLB_NR_DYN_ASIDS) {
0245 *new_asid = 0;
0246 this_cpu_write(cpu_tlbstate.next_asid, 1);
0247 }
0248 *need_flush = true;
0249 }
0250
0251
0252
0253
0254
0255
0256
0257 static inline void invalidate_user_asid(u16 asid)
0258 {
0259
0260 if (!IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
0261 return;
0262
0263
0264
0265
0266
0267 if (!cpu_feature_enabled(X86_FEATURE_PCID))
0268 return;
0269
0270 if (!static_cpu_has(X86_FEATURE_PTI))
0271 return;
0272
0273 __set_bit(kern_pcid(asid),
0274 (unsigned long *)this_cpu_ptr(&cpu_tlbstate.user_pcid_flush_mask));
0275 }
0276
0277 static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, bool need_flush)
0278 {
0279 unsigned long new_mm_cr3;
0280
0281 if (need_flush) {
0282 invalidate_user_asid(new_asid);
0283 new_mm_cr3 = build_cr3(pgdir, new_asid);
0284 } else {
0285 new_mm_cr3 = build_cr3_noflush(pgdir, new_asid);
0286 }
0287
0288
0289
0290
0291
0292
0293 write_cr3(new_mm_cr3);
0294 }
0295
0296 void leave_mm(int cpu)
0297 {
0298 struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
0299
0300
0301
0302
0303
0304
0305
0306
0307
0308 if (loaded_mm == &init_mm)
0309 return;
0310
0311
0312 WARN_ON(!this_cpu_read(cpu_tlbstate_shared.is_lazy));
0313
0314 switch_mm(NULL, &init_mm, NULL);
0315 }
0316 EXPORT_SYMBOL_GPL(leave_mm);
0317
0318 void switch_mm(struct mm_struct *prev, struct mm_struct *next,
0319 struct task_struct *tsk)
0320 {
0321 unsigned long flags;
0322
0323 local_irq_save(flags);
0324 switch_mm_irqs_off(prev, next, tsk);
0325 local_irq_restore(flags);
0326 }
0327
0328
0329
0330
0331
0332
0333
0334 static void l1d_flush_force_sigbus(struct callback_head *ch)
0335 {
0336 force_sig(SIGBUS);
0337 }
0338
0339 static void l1d_flush_evaluate(unsigned long prev_mm, unsigned long next_mm,
0340 struct task_struct *next)
0341 {
0342
0343 if (prev_mm & LAST_USER_MM_L1D_FLUSH)
0344 wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
0345
0346
0347 if (likely(!(next_mm & LAST_USER_MM_L1D_FLUSH)))
0348 return;
0349
0350
0351
0352
0353
0354
0355
0356 if (this_cpu_read(cpu_info.smt_active)) {
0357 clear_ti_thread_flag(&next->thread_info, TIF_SPEC_L1D_FLUSH);
0358 next->l1d_flush_kill.func = l1d_flush_force_sigbus;
0359 task_work_add(next, &next->l1d_flush_kill, TWA_RESUME);
0360 }
0361 }
0362
0363 static unsigned long mm_mangle_tif_spec_bits(struct task_struct *next)
0364 {
0365 unsigned long next_tif = read_task_thread_flags(next);
0366 unsigned long spec_bits = (next_tif >> TIF_SPEC_IB) & LAST_USER_MM_SPEC_MASK;
0367
0368
0369
0370
0371
0372 BUILD_BUG_ON(TIF_SPEC_L1D_FLUSH != TIF_SPEC_IB + 1);
0373
0374 return (unsigned long)next->mm | spec_bits;
0375 }
0376
0377 static void cond_mitigation(struct task_struct *next)
0378 {
0379 unsigned long prev_mm, next_mm;
0380
0381 if (!next || !next->mm)
0382 return;
0383
0384 next_mm = mm_mangle_tif_spec_bits(next);
0385 prev_mm = this_cpu_read(cpu_tlbstate.last_user_mm_spec);
0386
0387
0388
0389
0390
0391
0392
0393
0394
0395
0396
0397
0398
0399
0400 if (static_branch_likely(&switch_mm_cond_ibpb)) {
0401
0402
0403
0404
0405
0406
0407
0408
0409
0410
0411
0412
0413
0414
0415
0416
0417
0418
0419
0420
0421
0422
0423
0424
0425
0426
0427
0428
0429
0430
0431
0432
0433
0434
0435 if (next_mm != prev_mm &&
0436 (next_mm | prev_mm) & LAST_USER_MM_IBPB)
0437 indirect_branch_prediction_barrier();
0438 }
0439
0440 if (static_branch_unlikely(&switch_mm_always_ibpb)) {
0441
0442
0443
0444
0445
0446 if ((prev_mm & ~LAST_USER_MM_SPEC_MASK) !=
0447 (unsigned long)next->mm)
0448 indirect_branch_prediction_barrier();
0449 }
0450
0451 if (static_branch_unlikely(&switch_mm_cond_l1d_flush)) {
0452
0453
0454
0455
0456
0457 if (unlikely((prev_mm | next_mm) & LAST_USER_MM_L1D_FLUSH))
0458 l1d_flush_evaluate(prev_mm, next_mm, next);
0459 }
0460
0461 this_cpu_write(cpu_tlbstate.last_user_mm_spec, next_mm);
0462 }
0463
0464 #ifdef CONFIG_PERF_EVENTS
0465 static inline void cr4_update_pce_mm(struct mm_struct *mm)
0466 {
0467 if (static_branch_unlikely(&rdpmc_always_available_key) ||
0468 (!static_branch_unlikely(&rdpmc_never_available_key) &&
0469 atomic_read(&mm->context.perf_rdpmc_allowed))) {
0470
0471
0472
0473
0474 perf_clear_dirty_counters();
0475 cr4_set_bits_irqsoff(X86_CR4_PCE);
0476 } else
0477 cr4_clear_bits_irqsoff(X86_CR4_PCE);
0478 }
0479
0480 void cr4_update_pce(void *ignored)
0481 {
0482 cr4_update_pce_mm(this_cpu_read(cpu_tlbstate.loaded_mm));
0483 }
0484
0485 #else
0486 static inline void cr4_update_pce_mm(struct mm_struct *mm) { }
0487 #endif
0488
0489 void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
0490 struct task_struct *tsk)
0491 {
0492 struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
0493 u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
0494 bool was_lazy = this_cpu_read(cpu_tlbstate_shared.is_lazy);
0495 unsigned cpu = smp_processor_id();
0496 u64 next_tlb_gen;
0497 bool need_flush;
0498 u16 new_asid;
0499
0500
0501
0502
0503
0504
0505
0506
0507
0508
0509
0510 if (IS_ENABLED(CONFIG_PROVE_LOCKING))
0511 WARN_ON_ONCE(!irqs_disabled());
0512
0513
0514
0515
0516
0517
0518
0519
0520
0521
0522 #ifdef CONFIG_DEBUG_VM
0523 if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid))) {
0524
0525
0526
0527
0528
0529
0530
0531
0532
0533
0534
0535
0536 __flush_tlb_all();
0537 }
0538 #endif
0539 if (was_lazy)
0540 this_cpu_write(cpu_tlbstate_shared.is_lazy, false);
0541
0542
0543
0544
0545
0546
0547
0548
0549
0550
0551
0552
0553
0554 if (real_prev == next) {
0555 VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
0556 next->context.ctx_id);
0557
0558
0559
0560
0561
0562
0563 if (WARN_ON_ONCE(real_prev != &init_mm &&
0564 !cpumask_test_cpu(cpu, mm_cpumask(next))))
0565 cpumask_set_cpu(cpu, mm_cpumask(next));
0566
0567
0568
0569
0570
0571
0572 if (!was_lazy)
0573 return;
0574
0575
0576
0577
0578
0579
0580
0581 smp_mb();
0582 next_tlb_gen = atomic64_read(&next->context.tlb_gen);
0583 if (this_cpu_read(cpu_tlbstate.ctxs[prev_asid].tlb_gen) ==
0584 next_tlb_gen)
0585 return;
0586
0587
0588
0589
0590
0591 new_asid = prev_asid;
0592 need_flush = true;
0593 } else {
0594
0595
0596
0597
0598 cond_mitigation(tsk);
0599
0600
0601
0602
0603
0604
0605 if (real_prev != &init_mm) {
0606 VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu,
0607 mm_cpumask(real_prev)));
0608 cpumask_clear_cpu(cpu, mm_cpumask(real_prev));
0609 }
0610
0611
0612
0613
0614 if (next != &init_mm)
0615 cpumask_set_cpu(cpu, mm_cpumask(next));
0616 next_tlb_gen = atomic64_read(&next->context.tlb_gen);
0617
0618 choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);
0619
0620
0621 this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING);
0622 barrier();
0623 }
0624
0625 if (need_flush) {
0626 this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
0627 this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
0628 load_new_mm_cr3(next->pgd, new_asid, true);
0629
0630 trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
0631 } else {
0632
0633 load_new_mm_cr3(next->pgd, new_asid, false);
0634
0635 trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
0636 }
0637
0638
0639 barrier();
0640
0641 this_cpu_write(cpu_tlbstate.loaded_mm, next);
0642 this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);
0643
0644 if (next != real_prev) {
0645 cr4_update_pce_mm(next);
0646 switch_ldt(real_prev, next);
0647 }
0648 }
0649
0650
0651
0652
0653
0654
0655
0656
0657
0658
0659
0660
0661
0662
0663 void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
0664 {
0665 if (this_cpu_read(cpu_tlbstate.loaded_mm) == &init_mm)
0666 return;
0667
0668 this_cpu_write(cpu_tlbstate_shared.is_lazy, true);
0669 }
0670
0671
0672
0673
0674
0675
0676
0677
0678
0679
0680
0681
0682
0683
0684 void initialize_tlbstate_and_flush(void)
0685 {
0686 int i;
0687 struct mm_struct *mm = this_cpu_read(cpu_tlbstate.loaded_mm);
0688 u64 tlb_gen = atomic64_read(&init_mm.context.tlb_gen);
0689 unsigned long cr3 = __read_cr3();
0690
0691
0692 WARN_ON((cr3 & CR3_ADDR_MASK) != __pa(mm->pgd));
0693
0694
0695
0696
0697
0698
0699 WARN_ON(boot_cpu_has(X86_FEATURE_PCID) &&
0700 !(cr4_read_shadow() & X86_CR4_PCIDE));
0701
0702
0703 write_cr3(build_cr3(mm->pgd, 0));
0704
0705
0706 this_cpu_write(cpu_tlbstate.last_user_mm_spec, LAST_USER_MM_INIT);
0707 this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
0708 this_cpu_write(cpu_tlbstate.next_asid, 1);
0709 this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);
0710 this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, tlb_gen);
0711
0712 for (i = 1; i < TLB_NR_DYN_ASIDS; i++)
0713 this_cpu_write(cpu_tlbstate.ctxs[i].ctx_id, 0);
0714 }
0715
0716
0717
0718
0719
0720
0721
0722
0723 static void flush_tlb_func(void *info)
0724 {
0725
0726
0727
0728
0729
0730
0731
0732
0733
0734 const struct flush_tlb_info *f = info;
0735 struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
0736 u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
0737 u64 local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen);
0738 bool local = smp_processor_id() == f->initiating_cpu;
0739 unsigned long nr_invalidate = 0;
0740 u64 mm_tlb_gen;
0741
0742
0743 VM_WARN_ON(!irqs_disabled());
0744
0745 if (!local) {
0746 inc_irq_stat(irq_tlb_count);
0747 count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
0748
0749
0750 if (f->mm && f->mm != loaded_mm)
0751 return;
0752 }
0753
0754 if (unlikely(loaded_mm == &init_mm))
0755 return;
0756
0757 VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].ctx_id) !=
0758 loaded_mm->context.ctx_id);
0759
0760 if (this_cpu_read(cpu_tlbstate_shared.is_lazy)) {
0761
0762
0763
0764
0765
0766
0767
0768
0769
0770 switch_mm_irqs_off(NULL, &init_mm, NULL);
0771 return;
0772 }
0773
0774 if (unlikely(f->new_tlb_gen != TLB_GENERATION_INVALID &&
0775 f->new_tlb_gen <= local_tlb_gen)) {
0776
0777
0778
0779
0780
0781
0782 return;
0783 }
0784
0785
0786
0787
0788
0789 mm_tlb_gen = atomic64_read(&loaded_mm->context.tlb_gen);
0790
0791 if (unlikely(local_tlb_gen == mm_tlb_gen)) {
0792
0793
0794
0795
0796
0797
0798 goto done;
0799 }
0800
0801 WARN_ON_ONCE(local_tlb_gen > mm_tlb_gen);
0802 WARN_ON_ONCE(f->new_tlb_gen > mm_tlb_gen);
0803
0804
0805
0806
0807
0808
0809
0810
0811
0812
0813
0814
0815
0816
0817
0818
0819
0820
0821
0822
0823
0824
0825
0826
0827
0828
0829
0830
0831
0832
0833
0834
0835
0836
0837
0838
0839
0840
0841 if (f->end != TLB_FLUSH_ALL &&
0842 f->new_tlb_gen == local_tlb_gen + 1 &&
0843 f->new_tlb_gen == mm_tlb_gen) {
0844
0845 unsigned long addr = f->start;
0846
0847
0848 VM_WARN_ON(f->new_tlb_gen == TLB_GENERATION_INVALID);
0849
0850
0851 VM_WARN_ON(f->mm == NULL);
0852
0853 nr_invalidate = (f->end - f->start) >> f->stride_shift;
0854
0855 while (addr < f->end) {
0856 flush_tlb_one_user(addr);
0857 addr += 1UL << f->stride_shift;
0858 }
0859 if (local)
0860 count_vm_tlb_events(NR_TLB_LOCAL_FLUSH_ONE, nr_invalidate);
0861 } else {
0862
0863 nr_invalidate = TLB_FLUSH_ALL;
0864
0865 flush_tlb_local();
0866 if (local)
0867 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
0868 }
0869
0870
0871 this_cpu_write(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen, mm_tlb_gen);
0872
0873
0874 done:
0875 trace_tlb_flush(!local ? TLB_REMOTE_SHOOTDOWN :
0876 (f->mm == NULL) ? TLB_LOCAL_SHOOTDOWN :
0877 TLB_LOCAL_MM_SHOOTDOWN,
0878 nr_invalidate);
0879 }
0880
0881 static bool tlb_is_not_lazy(int cpu, void *data)
0882 {
0883 return !per_cpu(cpu_tlbstate_shared.is_lazy, cpu);
0884 }
0885
0886 DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state_shared, cpu_tlbstate_shared);
0887 EXPORT_PER_CPU_SYMBOL(cpu_tlbstate_shared);
0888
0889 STATIC_NOPV void native_flush_tlb_multi(const struct cpumask *cpumask,
0890 const struct flush_tlb_info *info)
0891 {
0892
0893
0894
0895
0896
0897 count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
0898 if (info->end == TLB_FLUSH_ALL)
0899 trace_tlb_flush(TLB_REMOTE_SEND_IPI, TLB_FLUSH_ALL);
0900 else
0901 trace_tlb_flush(TLB_REMOTE_SEND_IPI,
0902 (info->end - info->start) >> PAGE_SHIFT);
0903
0904
0905
0906
0907
0908
0909
0910
0911
0912
0913
0914 if (info->freed_tables)
0915 on_each_cpu_mask(cpumask, flush_tlb_func, (void *)info, true);
0916 else
0917 on_each_cpu_cond_mask(tlb_is_not_lazy, flush_tlb_func,
0918 (void *)info, 1, cpumask);
0919 }
0920
0921 void flush_tlb_multi(const struct cpumask *cpumask,
0922 const struct flush_tlb_info *info)
0923 {
0924 __flush_tlb_multi(cpumask, info);
0925 }
0926
0927
0928
0929
0930
0931
0932
0933
0934
0935
0936
0937 unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
0938
0939 static DEFINE_PER_CPU_SHARED_ALIGNED(struct flush_tlb_info, flush_tlb_info);
0940
0941 #ifdef CONFIG_DEBUG_VM
0942 static DEFINE_PER_CPU(unsigned int, flush_tlb_info_idx);
0943 #endif
0944
0945 static struct flush_tlb_info *get_flush_tlb_info(struct mm_struct *mm,
0946 unsigned long start, unsigned long end,
0947 unsigned int stride_shift, bool freed_tables,
0948 u64 new_tlb_gen)
0949 {
0950 struct flush_tlb_info *info = this_cpu_ptr(&flush_tlb_info);
0951
0952 #ifdef CONFIG_DEBUG_VM
0953
0954
0955
0956
0957
0958 BUG_ON(this_cpu_inc_return(flush_tlb_info_idx) != 1);
0959 #endif
0960
0961 info->start = start;
0962 info->end = end;
0963 info->mm = mm;
0964 info->stride_shift = stride_shift;
0965 info->freed_tables = freed_tables;
0966 info->new_tlb_gen = new_tlb_gen;
0967 info->initiating_cpu = smp_processor_id();
0968
0969 return info;
0970 }
0971
0972 static void put_flush_tlb_info(void)
0973 {
0974 #ifdef CONFIG_DEBUG_VM
0975
0976 barrier();
0977 this_cpu_dec(flush_tlb_info_idx);
0978 #endif
0979 }
0980
0981 void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
0982 unsigned long end, unsigned int stride_shift,
0983 bool freed_tables)
0984 {
0985 struct flush_tlb_info *info;
0986 u64 new_tlb_gen;
0987 int cpu;
0988
0989 cpu = get_cpu();
0990
0991
0992 if ((end == TLB_FLUSH_ALL) ||
0993 ((end - start) >> stride_shift) > tlb_single_page_flush_ceiling) {
0994 start = 0;
0995 end = TLB_FLUSH_ALL;
0996 }
0997
0998
0999 new_tlb_gen = inc_mm_tlb_gen(mm);
1000
1001 info = get_flush_tlb_info(mm, start, end, stride_shift, freed_tables,
1002 new_tlb_gen);
1003
1004
1005
1006
1007
1008
1009 if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) {
1010 flush_tlb_multi(mm_cpumask(mm), info);
1011 } else if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) {
1012 lockdep_assert_irqs_enabled();
1013 local_irq_disable();
1014 flush_tlb_func(info);
1015 local_irq_enable();
1016 }
1017
1018 put_flush_tlb_info();
1019 put_cpu();
1020 }
1021
1022
1023 static void do_flush_tlb_all(void *info)
1024 {
1025 count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
1026 __flush_tlb_all();
1027 }
1028
1029 void flush_tlb_all(void)
1030 {
1031 count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
1032 on_each_cpu(do_flush_tlb_all, NULL, 1);
1033 }
1034
1035 static void do_kernel_range_flush(void *info)
1036 {
1037 struct flush_tlb_info *f = info;
1038 unsigned long addr;
1039
1040
1041 for (addr = f->start; addr < f->end; addr += PAGE_SIZE)
1042 flush_tlb_one_kernel(addr);
1043 }
1044
1045 void flush_tlb_kernel_range(unsigned long start, unsigned long end)
1046 {
1047
1048 if (end == TLB_FLUSH_ALL ||
1049 (end - start) > tlb_single_page_flush_ceiling << PAGE_SHIFT) {
1050 on_each_cpu(do_flush_tlb_all, NULL, 1);
1051 } else {
1052 struct flush_tlb_info *info;
1053
1054 preempt_disable();
1055 info = get_flush_tlb_info(NULL, start, end, 0, false,
1056 TLB_GENERATION_INVALID);
1057
1058 on_each_cpu(do_kernel_range_flush, info, 1);
1059
1060 put_flush_tlb_info();
1061 preempt_enable();
1062 }
1063 }
1064
1065
1066
1067
1068
1069
1070
1071
1072 unsigned long __get_current_cr3_fast(void)
1073 {
1074 unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd,
1075 this_cpu_read(cpu_tlbstate.loaded_mm_asid));
1076
1077
1078 VM_WARN_ON(in_nmi() || preemptible());
1079
1080 VM_BUG_ON(cr3 != __read_cr3());
1081 return cr3;
1082 }
1083 EXPORT_SYMBOL_GPL(__get_current_cr3_fast);
1084
1085
1086
1087
1088 void flush_tlb_one_kernel(unsigned long addr)
1089 {
1090 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103 flush_tlb_one_user(addr);
1104
1105 if (!static_cpu_has(X86_FEATURE_PTI))
1106 return;
1107
1108
1109
1110
1111
1112
1113
1114 this_cpu_write(cpu_tlbstate.invalidate_other, true);
1115 }
1116
1117
1118
1119
1120 STATIC_NOPV void native_flush_tlb_one_user(unsigned long addr)
1121 {
1122 u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
1123
1124 asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
1125
1126 if (!static_cpu_has(X86_FEATURE_PTI))
1127 return;
1128
1129
1130
1131
1132
1133 if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE))
1134 invalidate_user_asid(loaded_mm_asid);
1135 else
1136 invpcid_flush_one(user_pcid(loaded_mm_asid), addr);
1137 }
1138
1139 void flush_tlb_one_user(unsigned long addr)
1140 {
1141 __flush_tlb_one_user(addr);
1142 }
1143
1144
1145
1146
1147 STATIC_NOPV void native_flush_tlb_global(void)
1148 {
1149 unsigned long flags;
1150
1151 if (static_cpu_has(X86_FEATURE_INVPCID)) {
1152
1153
1154
1155
1156
1157
1158 invpcid_flush_all();
1159 return;
1160 }
1161
1162
1163
1164
1165
1166
1167 raw_local_irq_save(flags);
1168
1169 __native_tlb_flush_global(this_cpu_read(cpu_tlbstate.cr4));
1170
1171 raw_local_irq_restore(flags);
1172 }
1173
1174
1175
1176
1177 STATIC_NOPV void native_flush_tlb_local(void)
1178 {
1179
1180
1181
1182
1183
1184 WARN_ON_ONCE(preemptible());
1185
1186 invalidate_user_asid(this_cpu_read(cpu_tlbstate.loaded_mm_asid));
1187
1188
1189 native_write_cr3(__native_read_cr3());
1190 }
1191
1192 void flush_tlb_local(void)
1193 {
1194 __flush_tlb_local();
1195 }
1196
1197
1198
1199
1200 void __flush_tlb_all(void)
1201 {
1202
1203
1204
1205
1206 VM_WARN_ON_ONCE(preemptible());
1207
1208 if (boot_cpu_has(X86_FEATURE_PGE)) {
1209 __flush_tlb_global();
1210 } else {
1211
1212
1213
1214 flush_tlb_local();
1215 }
1216 }
1217 EXPORT_SYMBOL_GPL(__flush_tlb_all);
1218
1219 void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
1220 {
1221 struct flush_tlb_info *info;
1222
1223 int cpu = get_cpu();
1224
1225 info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL, 0, false,
1226 TLB_GENERATION_INVALID);
1227
1228
1229
1230
1231
1232 if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids) {
1233 flush_tlb_multi(&batch->cpumask, info);
1234 } else if (cpumask_test_cpu(cpu, &batch->cpumask)) {
1235 lockdep_assert_irqs_enabled();
1236 local_irq_disable();
1237 flush_tlb_func(info);
1238 local_irq_enable();
1239 }
1240
1241 cpumask_clear(&batch->cpumask);
1242
1243 put_flush_tlb_info();
1244 put_cpu();
1245 }
1246
1247
1248
1249
1250
1251
1252
1253
1254 bool nmi_uaccess_okay(void)
1255 {
1256 struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
1257 struct mm_struct *current_mm = current->mm;
1258
1259 VM_WARN_ON_ONCE(!loaded_mm);
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271 if (loaded_mm != current_mm)
1272 return false;
1273
1274 VM_WARN_ON_ONCE(current_mm->pgd != __va(read_cr3_pa()));
1275
1276 return true;
1277 }
1278
1279 static ssize_t tlbflush_read_file(struct file *file, char __user *user_buf,
1280 size_t count, loff_t *ppos)
1281 {
1282 char buf[32];
1283 unsigned int len;
1284
1285 len = sprintf(buf, "%ld\n", tlb_single_page_flush_ceiling);
1286 return simple_read_from_buffer(user_buf, count, ppos, buf, len);
1287 }
1288
1289 static ssize_t tlbflush_write_file(struct file *file,
1290 const char __user *user_buf, size_t count, loff_t *ppos)
1291 {
1292 char buf[32];
1293 ssize_t len;
1294 int ceiling;
1295
1296 len = min(count, sizeof(buf) - 1);
1297 if (copy_from_user(buf, user_buf, len))
1298 return -EFAULT;
1299
1300 buf[len] = '\0';
1301 if (kstrtoint(buf, 0, &ceiling))
1302 return -EINVAL;
1303
1304 if (ceiling < 0)
1305 return -EINVAL;
1306
1307 tlb_single_page_flush_ceiling = ceiling;
1308 return count;
1309 }
1310
1311 static const struct file_operations fops_tlbflush = {
1312 .read = tlbflush_read_file,
1313 .write = tlbflush_write_file,
1314 .llseek = default_llseek,
1315 };
1316
1317 static int __init create_tlb_single_page_flush_ceiling(void)
1318 {
1319 debugfs_create_file("tlb_single_page_flush_ceiling", S_IRUSR | S_IWUSR,
1320 arch_debugfs_dir, NULL, &fops_tlbflush);
1321 return 0;
1322 }
1323 late_initcall(create_tlb_single_page_flush_ceiling);