Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /*
0003  * TLB flush routines for radix kernels.
0004  *
0005  * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation.
0006  */
0007 
0008 #include <linux/mm.h>
0009 #include <linux/hugetlb.h>
0010 #include <linux/memblock.h>
0011 #include <linux/mmu_context.h>
0012 #include <linux/sched/mm.h>
0013 #include <linux/debugfs.h>
0014 
0015 #include <asm/ppc-opcode.h>
0016 #include <asm/tlb.h>
0017 #include <asm/tlbflush.h>
0018 #include <asm/trace.h>
0019 #include <asm/cputhreads.h>
0020 #include <asm/plpar_wrappers.h>
0021 
0022 #include "internal.h"
0023 
0024 /*
0025  * tlbiel instruction for radix, set invalidation
0026  * i.e., r=1 and is=01 or is=10 or is=11
0027  */
0028 static __always_inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is,
0029                     unsigned int pid,
0030                     unsigned int ric, unsigned int prs)
0031 {
0032     unsigned long rb;
0033     unsigned long rs;
0034 
0035     rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
0036     rs = ((unsigned long)pid << PPC_BITLSHIFT(31));
0037 
0038     asm volatile(PPC_TLBIEL(%0, %1, %2, %3, 1)
0039              : : "r"(rb), "r"(rs), "i"(ric), "i"(prs)
0040              : "memory");
0041 }
0042 
0043 static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
0044 {
0045     unsigned int set;
0046 
0047     asm volatile("ptesync": : :"memory");
0048 
0049     /*
0050      * Flush the first set of the TLB, and the entire Page Walk Cache
0051      * and partition table entries. Then flush the remaining sets of the
0052      * TLB.
0053      */
0054 
0055     if (early_cpu_has_feature(CPU_FTR_HVMODE)) {
0056         /* MSR[HV] should flush partition scope translations first. */
0057         tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0);
0058 
0059         if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) {
0060             for (set = 1; set < num_sets; set++)
0061                 tlbiel_radix_set_isa300(set, is, 0,
0062                             RIC_FLUSH_TLB, 0);
0063         }
0064     }
0065 
0066     /* Flush process scoped entries. */
0067     tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 1);
0068 
0069     if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) {
0070         for (set = 1; set < num_sets; set++)
0071             tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1);
0072     }
0073 
0074     ppc_after_tlbiel_barrier();
0075 }
0076 
0077 void radix__tlbiel_all(unsigned int action)
0078 {
0079     unsigned int is;
0080 
0081     switch (action) {
0082     case TLB_INVAL_SCOPE_GLOBAL:
0083         is = 3;
0084         break;
0085     case TLB_INVAL_SCOPE_LPID:
0086         is = 2;
0087         break;
0088     default:
0089         BUG();
0090     }
0091 
0092     if (early_cpu_has_feature(CPU_FTR_ARCH_300))
0093         tlbiel_all_isa300(POWER9_TLB_SETS_RADIX, is);
0094     else
0095         WARN(1, "%s called on pre-POWER9 CPU\n", __func__);
0096 
0097     asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory");
0098 }
0099 
0100 static __always_inline void __tlbiel_pid(unsigned long pid, int set,
0101                 unsigned long ric)
0102 {
0103     unsigned long rb,rs,prs,r;
0104 
0105     rb = PPC_BIT(53); /* IS = 1 */
0106     rb |= set << PPC_BITLSHIFT(51);
0107     rs = ((unsigned long)pid) << PPC_BITLSHIFT(31);
0108     prs = 1; /* process scoped */
0109     r = 1;   /* radix format */
0110 
0111     asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
0112              : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
0113     trace_tlbie(0, 1, rb, rs, ric, prs, r);
0114 }
0115 
0116 static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric)
0117 {
0118     unsigned long rb,rs,prs,r;
0119 
0120     rb = PPC_BIT(53); /* IS = 1 */
0121     rs = pid << PPC_BITLSHIFT(31);
0122     prs = 1; /* process scoped */
0123     r = 1;   /* radix format */
0124 
0125     asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
0126              : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
0127     trace_tlbie(0, 0, rb, rs, ric, prs, r);
0128 }
0129 
0130 static __always_inline void __tlbie_pid_lpid(unsigned long pid,
0131                          unsigned long lpid,
0132                          unsigned long ric)
0133 {
0134     unsigned long rb, rs, prs, r;
0135 
0136     rb = PPC_BIT(53); /* IS = 1 */
0137     rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
0138     prs = 1; /* process scoped */
0139     r = 1;   /* radix format */
0140 
0141     asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
0142              : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
0143     trace_tlbie(0, 0, rb, rs, ric, prs, r);
0144 }
0145 static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric)
0146 {
0147     unsigned long rb,rs,prs,r;
0148 
0149     rb = PPC_BIT(52); /* IS = 2 */
0150     rs = lpid;
0151     prs = 0; /* partition scoped */
0152     r = 1;   /* radix format */
0153 
0154     asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
0155              : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
0156     trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
0157 }
0158 
0159 static __always_inline void __tlbie_lpid_guest(unsigned long lpid, unsigned long ric)
0160 {
0161     unsigned long rb,rs,prs,r;
0162 
0163     rb = PPC_BIT(52); /* IS = 2 */
0164     rs = lpid;
0165     prs = 1; /* process scoped */
0166     r = 1;   /* radix format */
0167 
0168     asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
0169              : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
0170     trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
0171 }
0172 
0173 static __always_inline void __tlbiel_va(unsigned long va, unsigned long pid,
0174                     unsigned long ap, unsigned long ric)
0175 {
0176     unsigned long rb,rs,prs,r;
0177 
0178     rb = va & ~(PPC_BITMASK(52, 63));
0179     rb |= ap << PPC_BITLSHIFT(58);
0180     rs = pid << PPC_BITLSHIFT(31);
0181     prs = 1; /* process scoped */
0182     r = 1;   /* radix format */
0183 
0184     asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
0185              : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
0186     trace_tlbie(0, 1, rb, rs, ric, prs, r);
0187 }
0188 
0189 static __always_inline void __tlbie_va(unsigned long va, unsigned long pid,
0190                        unsigned long ap, unsigned long ric)
0191 {
0192     unsigned long rb,rs,prs,r;
0193 
0194     rb = va & ~(PPC_BITMASK(52, 63));
0195     rb |= ap << PPC_BITLSHIFT(58);
0196     rs = pid << PPC_BITLSHIFT(31);
0197     prs = 1; /* process scoped */
0198     r = 1;   /* radix format */
0199 
0200     asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
0201              : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
0202     trace_tlbie(0, 0, rb, rs, ric, prs, r);
0203 }
0204 
0205 static __always_inline void __tlbie_va_lpid(unsigned long va, unsigned long pid,
0206                         unsigned long lpid,
0207                         unsigned long ap, unsigned long ric)
0208 {
0209     unsigned long rb, rs, prs, r;
0210 
0211     rb = va & ~(PPC_BITMASK(52, 63));
0212     rb |= ap << PPC_BITLSHIFT(58);
0213     rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
0214     prs = 1; /* process scoped */
0215     r = 1;   /* radix format */
0216 
0217     asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
0218              : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
0219     trace_tlbie(0, 0, rb, rs, ric, prs, r);
0220 }
0221 
0222 static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid,
0223                         unsigned long ap, unsigned long ric)
0224 {
0225     unsigned long rb,rs,prs,r;
0226 
0227     rb = va & ~(PPC_BITMASK(52, 63));
0228     rb |= ap << PPC_BITLSHIFT(58);
0229     rs = lpid;
0230     prs = 0; /* partition scoped */
0231     r = 1;   /* radix format */
0232 
0233     asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
0234              : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
0235     trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
0236 }
0237 
0238 
0239 static inline void fixup_tlbie_va(unsigned long va, unsigned long pid,
0240                   unsigned long ap)
0241 {
0242     if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
0243         asm volatile("ptesync": : :"memory");
0244         __tlbie_va(va, 0, ap, RIC_FLUSH_TLB);
0245     }
0246 
0247     if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
0248         asm volatile("ptesync": : :"memory");
0249         __tlbie_va(va, pid, ap, RIC_FLUSH_TLB);
0250     }
0251 }
0252 
0253 static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid,
0254                     unsigned long ap)
0255 {
0256     if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
0257         asm volatile("ptesync": : :"memory");
0258         __tlbie_pid(0, RIC_FLUSH_TLB);
0259     }
0260 
0261     if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
0262         asm volatile("ptesync": : :"memory");
0263         __tlbie_va(va, pid, ap, RIC_FLUSH_TLB);
0264     }
0265 }
0266 
0267 static inline void fixup_tlbie_va_range_lpid(unsigned long va,
0268                          unsigned long pid,
0269                          unsigned long lpid,
0270                          unsigned long ap)
0271 {
0272     if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
0273         asm volatile("ptesync" : : : "memory");
0274         __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
0275     }
0276 
0277     if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
0278         asm volatile("ptesync" : : : "memory");
0279         __tlbie_va_lpid(va, pid, lpid, ap, RIC_FLUSH_TLB);
0280     }
0281 }
0282 
0283 static inline void fixup_tlbie_pid(unsigned long pid)
0284 {
0285     /*
0286      * We can use any address for the invalidation, pick one which is
0287      * probably unused as an optimisation.
0288      */
0289     unsigned long va = ((1UL << 52) - 1);
0290 
0291     if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
0292         asm volatile("ptesync": : :"memory");
0293         __tlbie_pid(0, RIC_FLUSH_TLB);
0294     }
0295 
0296     if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
0297         asm volatile("ptesync": : :"memory");
0298         __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
0299     }
0300 }
0301 
0302 static inline void fixup_tlbie_pid_lpid(unsigned long pid, unsigned long lpid)
0303 {
0304     /*
0305      * We can use any address for the invalidation, pick one which is
0306      * probably unused as an optimisation.
0307      */
0308     unsigned long va = ((1UL << 52) - 1);
0309 
0310     if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
0311         asm volatile("ptesync" : : : "memory");
0312         __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
0313     }
0314 
0315     if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
0316         asm volatile("ptesync" : : : "memory");
0317         __tlbie_va_lpid(va, pid, lpid, mmu_get_ap(MMU_PAGE_64K),
0318                 RIC_FLUSH_TLB);
0319     }
0320 }
0321 
0322 static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid,
0323                        unsigned long ap)
0324 {
0325     if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
0326         asm volatile("ptesync": : :"memory");
0327         __tlbie_lpid_va(va, 0, ap, RIC_FLUSH_TLB);
0328     }
0329 
0330     if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
0331         asm volatile("ptesync": : :"memory");
0332         __tlbie_lpid_va(va, lpid, ap, RIC_FLUSH_TLB);
0333     }
0334 }
0335 
0336 static inline void fixup_tlbie_lpid(unsigned long lpid)
0337 {
0338     /*
0339      * We can use any address for the invalidation, pick one which is
0340      * probably unused as an optimisation.
0341      */
0342     unsigned long va = ((1UL << 52) - 1);
0343 
0344     if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
0345         asm volatile("ptesync": : :"memory");
0346         __tlbie_lpid(0, RIC_FLUSH_TLB);
0347     }
0348 
0349     if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
0350         asm volatile("ptesync": : :"memory");
0351         __tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
0352     }
0353 }
0354 
0355 /*
0356  * We use 128 set in radix mode and 256 set in hpt mode.
0357  */
0358 static inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
0359 {
0360     int set;
0361 
0362     asm volatile("ptesync": : :"memory");
0363 
0364     switch (ric) {
0365     case RIC_FLUSH_PWC:
0366 
0367         /* For PWC, only one flush is needed */
0368         __tlbiel_pid(pid, 0, RIC_FLUSH_PWC);
0369         ppc_after_tlbiel_barrier();
0370         return;
0371     case RIC_FLUSH_TLB:
0372         __tlbiel_pid(pid, 0, RIC_FLUSH_TLB);
0373         break;
0374     case RIC_FLUSH_ALL:
0375     default:
0376         /*
0377          * Flush the first set of the TLB, and if
0378          * we're doing a RIC_FLUSH_ALL, also flush
0379          * the entire Page Walk Cache.
0380          */
0381         __tlbiel_pid(pid, 0, RIC_FLUSH_ALL);
0382     }
0383 
0384     if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
0385         /* For the remaining sets, just flush the TLB */
0386         for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++)
0387             __tlbiel_pid(pid, set, RIC_FLUSH_TLB);
0388     }
0389 
0390     ppc_after_tlbiel_barrier();
0391     asm volatile(PPC_RADIX_INVALIDATE_ERAT_USER "; isync" : : :"memory");
0392 }
0393 
0394 static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
0395 {
0396     asm volatile("ptesync": : :"memory");
0397 
0398     /*
0399      * Workaround the fact that the "ric" argument to __tlbie_pid
0400      * must be a compile-time constraint to match the "i" constraint
0401      * in the asm statement.
0402      */
0403     switch (ric) {
0404     case RIC_FLUSH_TLB:
0405         __tlbie_pid(pid, RIC_FLUSH_TLB);
0406         fixup_tlbie_pid(pid);
0407         break;
0408     case RIC_FLUSH_PWC:
0409         __tlbie_pid(pid, RIC_FLUSH_PWC);
0410         break;
0411     case RIC_FLUSH_ALL:
0412     default:
0413         __tlbie_pid(pid, RIC_FLUSH_ALL);
0414         fixup_tlbie_pid(pid);
0415     }
0416     asm volatile("eieio; tlbsync; ptesync": : :"memory");
0417 }
0418 
0419 static inline void _tlbie_pid_lpid(unsigned long pid, unsigned long lpid,
0420                    unsigned long ric)
0421 {
0422     asm volatile("ptesync" : : : "memory");
0423 
0424     /*
0425      * Workaround the fact that the "ric" argument to __tlbie_pid
0426      * must be a compile-time contraint to match the "i" constraint
0427      * in the asm statement.
0428      */
0429     switch (ric) {
0430     case RIC_FLUSH_TLB:
0431         __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
0432         fixup_tlbie_pid_lpid(pid, lpid);
0433         break;
0434     case RIC_FLUSH_PWC:
0435         __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
0436         break;
0437     case RIC_FLUSH_ALL:
0438     default:
0439         __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL);
0440         fixup_tlbie_pid_lpid(pid, lpid);
0441     }
0442     asm volatile("eieio; tlbsync; ptesync" : : : "memory");
0443 }
0444 struct tlbiel_pid {
0445     unsigned long pid;
0446     unsigned long ric;
0447 };
0448 
0449 static void do_tlbiel_pid(void *info)
0450 {
0451     struct tlbiel_pid *t = info;
0452 
0453     if (t->ric == RIC_FLUSH_TLB)
0454         _tlbiel_pid(t->pid, RIC_FLUSH_TLB);
0455     else if (t->ric == RIC_FLUSH_PWC)
0456         _tlbiel_pid(t->pid, RIC_FLUSH_PWC);
0457     else
0458         _tlbiel_pid(t->pid, RIC_FLUSH_ALL);
0459 }
0460 
0461 static inline void _tlbiel_pid_multicast(struct mm_struct *mm,
0462                 unsigned long pid, unsigned long ric)
0463 {
0464     struct cpumask *cpus = mm_cpumask(mm);
0465     struct tlbiel_pid t = { .pid = pid, .ric = ric };
0466 
0467     on_each_cpu_mask(cpus, do_tlbiel_pid, &t, 1);
0468     /*
0469      * Always want the CPU translations to be invalidated with tlbiel in
0470      * these paths, so while coprocessors must use tlbie, we can not
0471      * optimise away the tlbiel component.
0472      */
0473     if (atomic_read(&mm->context.copros) > 0)
0474         _tlbie_pid(pid, RIC_FLUSH_ALL);
0475 }
0476 
0477 static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric)
0478 {
0479     asm volatile("ptesync": : :"memory");
0480 
0481     /*
0482      * Workaround the fact that the "ric" argument to __tlbie_pid
0483      * must be a compile-time contraint to match the "i" constraint
0484      * in the asm statement.
0485      */
0486     switch (ric) {
0487     case RIC_FLUSH_TLB:
0488         __tlbie_lpid(lpid, RIC_FLUSH_TLB);
0489         fixup_tlbie_lpid(lpid);
0490         break;
0491     case RIC_FLUSH_PWC:
0492         __tlbie_lpid(lpid, RIC_FLUSH_PWC);
0493         break;
0494     case RIC_FLUSH_ALL:
0495     default:
0496         __tlbie_lpid(lpid, RIC_FLUSH_ALL);
0497         fixup_tlbie_lpid(lpid);
0498     }
0499     asm volatile("eieio; tlbsync; ptesync": : :"memory");
0500 }
0501 
0502 static __always_inline void _tlbie_lpid_guest(unsigned long lpid, unsigned long ric)
0503 {
0504     /*
0505      * Workaround the fact that the "ric" argument to __tlbie_pid
0506      * must be a compile-time contraint to match the "i" constraint
0507      * in the asm statement.
0508      */
0509     switch (ric) {
0510     case RIC_FLUSH_TLB:
0511         __tlbie_lpid_guest(lpid, RIC_FLUSH_TLB);
0512         break;
0513     case RIC_FLUSH_PWC:
0514         __tlbie_lpid_guest(lpid, RIC_FLUSH_PWC);
0515         break;
0516     case RIC_FLUSH_ALL:
0517     default:
0518         __tlbie_lpid_guest(lpid, RIC_FLUSH_ALL);
0519     }
0520     fixup_tlbie_lpid(lpid);
0521     asm volatile("eieio; tlbsync; ptesync": : :"memory");
0522 }
0523 
0524 static inline void __tlbiel_va_range(unsigned long start, unsigned long end,
0525                     unsigned long pid, unsigned long page_size,
0526                     unsigned long psize)
0527 {
0528     unsigned long addr;
0529     unsigned long ap = mmu_get_ap(psize);
0530 
0531     for (addr = start; addr < end; addr += page_size)
0532         __tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
0533 }
0534 
0535 static __always_inline void _tlbiel_va(unsigned long va, unsigned long pid,
0536                        unsigned long psize, unsigned long ric)
0537 {
0538     unsigned long ap = mmu_get_ap(psize);
0539 
0540     asm volatile("ptesync": : :"memory");
0541     __tlbiel_va(va, pid, ap, ric);
0542     ppc_after_tlbiel_barrier();
0543 }
0544 
0545 static inline void _tlbiel_va_range(unsigned long start, unsigned long end,
0546                     unsigned long pid, unsigned long page_size,
0547                     unsigned long psize, bool also_pwc)
0548 {
0549     asm volatile("ptesync": : :"memory");
0550     if (also_pwc)
0551         __tlbiel_pid(pid, 0, RIC_FLUSH_PWC);
0552     __tlbiel_va_range(start, end, pid, page_size, psize);
0553     ppc_after_tlbiel_barrier();
0554 }
0555 
0556 static inline void __tlbie_va_range(unsigned long start, unsigned long end,
0557                     unsigned long pid, unsigned long page_size,
0558                     unsigned long psize)
0559 {
0560     unsigned long addr;
0561     unsigned long ap = mmu_get_ap(psize);
0562 
0563     for (addr = start; addr < end; addr += page_size)
0564         __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
0565 
0566     fixup_tlbie_va_range(addr - page_size, pid, ap);
0567 }
0568 
0569 static inline void __tlbie_va_range_lpid(unsigned long start, unsigned long end,
0570                      unsigned long pid, unsigned long lpid,
0571                      unsigned long page_size,
0572                      unsigned long psize)
0573 {
0574     unsigned long addr;
0575     unsigned long ap = mmu_get_ap(psize);
0576 
0577     for (addr = start; addr < end; addr += page_size)
0578         __tlbie_va_lpid(addr, pid, lpid, ap, RIC_FLUSH_TLB);
0579 
0580     fixup_tlbie_va_range_lpid(addr - page_size, pid, lpid, ap);
0581 }
0582 
0583 static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
0584                       unsigned long psize, unsigned long ric)
0585 {
0586     unsigned long ap = mmu_get_ap(psize);
0587 
0588     asm volatile("ptesync": : :"memory");
0589     __tlbie_va(va, pid, ap, ric);
0590     fixup_tlbie_va(va, pid, ap);
0591     asm volatile("eieio; tlbsync; ptesync": : :"memory");
0592 }
0593 
0594 struct tlbiel_va {
0595     unsigned long pid;
0596     unsigned long va;
0597     unsigned long psize;
0598     unsigned long ric;
0599 };
0600 
0601 static void do_tlbiel_va(void *info)
0602 {
0603     struct tlbiel_va *t = info;
0604 
0605     if (t->ric == RIC_FLUSH_TLB)
0606         _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_TLB);
0607     else if (t->ric == RIC_FLUSH_PWC)
0608         _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_PWC);
0609     else
0610         _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_ALL);
0611 }
0612 
0613 static inline void _tlbiel_va_multicast(struct mm_struct *mm,
0614                 unsigned long va, unsigned long pid,
0615                 unsigned long psize, unsigned long ric)
0616 {
0617     struct cpumask *cpus = mm_cpumask(mm);
0618     struct tlbiel_va t = { .va = va, .pid = pid, .psize = psize, .ric = ric };
0619     on_each_cpu_mask(cpus, do_tlbiel_va, &t, 1);
0620     if (atomic_read(&mm->context.copros) > 0)
0621         _tlbie_va(va, pid, psize, RIC_FLUSH_TLB);
0622 }
0623 
0624 struct tlbiel_va_range {
0625     unsigned long pid;
0626     unsigned long start;
0627     unsigned long end;
0628     unsigned long page_size;
0629     unsigned long psize;
0630     bool also_pwc;
0631 };
0632 
0633 static void do_tlbiel_va_range(void *info)
0634 {
0635     struct tlbiel_va_range *t = info;
0636 
0637     _tlbiel_va_range(t->start, t->end, t->pid, t->page_size,
0638                     t->psize, t->also_pwc);
0639 }
0640 
0641 static __always_inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid,
0642                   unsigned long psize, unsigned long ric)
0643 {
0644     unsigned long ap = mmu_get_ap(psize);
0645 
0646     asm volatile("ptesync": : :"memory");
0647     __tlbie_lpid_va(va, lpid, ap, ric);
0648     fixup_tlbie_lpid_va(va, lpid, ap);
0649     asm volatile("eieio; tlbsync; ptesync": : :"memory");
0650 }
0651 
0652 static inline void _tlbie_va_range(unsigned long start, unsigned long end,
0653                     unsigned long pid, unsigned long page_size,
0654                     unsigned long psize, bool also_pwc)
0655 {
0656     asm volatile("ptesync": : :"memory");
0657     if (also_pwc)
0658         __tlbie_pid(pid, RIC_FLUSH_PWC);
0659     __tlbie_va_range(start, end, pid, page_size, psize);
0660     asm volatile("eieio; tlbsync; ptesync": : :"memory");
0661 }
0662 
0663 static inline void _tlbie_va_range_lpid(unsigned long start, unsigned long end,
0664                     unsigned long pid, unsigned long lpid,
0665                     unsigned long page_size,
0666                     unsigned long psize, bool also_pwc)
0667 {
0668     asm volatile("ptesync" : : : "memory");
0669     if (also_pwc)
0670         __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
0671     __tlbie_va_range_lpid(start, end, pid, lpid, page_size, psize);
0672     asm volatile("eieio; tlbsync; ptesync" : : : "memory");
0673 }
0674 
0675 static inline void _tlbiel_va_range_multicast(struct mm_struct *mm,
0676                 unsigned long start, unsigned long end,
0677                 unsigned long pid, unsigned long page_size,
0678                 unsigned long psize, bool also_pwc)
0679 {
0680     struct cpumask *cpus = mm_cpumask(mm);
0681     struct tlbiel_va_range t = { .start = start, .end = end,
0682                 .pid = pid, .page_size = page_size,
0683                 .psize = psize, .also_pwc = also_pwc };
0684 
0685     on_each_cpu_mask(cpus, do_tlbiel_va_range, &t, 1);
0686     if (atomic_read(&mm->context.copros) > 0)
0687         _tlbie_va_range(start, end, pid, page_size, psize, also_pwc);
0688 }
0689 
0690 /*
0691  * Base TLB flushing operations:
0692  *
0693  *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
0694  *  - flush_tlb_page(vma, vmaddr) flushes one page
0695  *  - flush_tlb_range(vma, start, end) flushes a range of pages
0696  *  - flush_tlb_kernel_range(start, end) flushes kernel pages
0697  *
0698  *  - local_* variants of page and mm only apply to the current
0699  *    processor
0700  */
0701 void radix__local_flush_tlb_mm(struct mm_struct *mm)
0702 {
0703     unsigned long pid;
0704 
0705     preempt_disable();
0706     pid = mm->context.id;
0707     if (pid != MMU_NO_CONTEXT)
0708         _tlbiel_pid(pid, RIC_FLUSH_TLB);
0709     preempt_enable();
0710 }
0711 EXPORT_SYMBOL(radix__local_flush_tlb_mm);
0712 
0713 #ifndef CONFIG_SMP
0714 void radix__local_flush_all_mm(struct mm_struct *mm)
0715 {
0716     unsigned long pid;
0717 
0718     preempt_disable();
0719     pid = mm->context.id;
0720     if (pid != MMU_NO_CONTEXT)
0721         _tlbiel_pid(pid, RIC_FLUSH_ALL);
0722     preempt_enable();
0723 }
0724 EXPORT_SYMBOL(radix__local_flush_all_mm);
0725 
0726 static void __flush_all_mm(struct mm_struct *mm, bool fullmm)
0727 {
0728     radix__local_flush_all_mm(mm);
0729 }
0730 #endif /* CONFIG_SMP */
0731 
0732 void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
0733                        int psize)
0734 {
0735     unsigned long pid;
0736 
0737     preempt_disable();
0738     pid = mm->context.id;
0739     if (pid != MMU_NO_CONTEXT)
0740         _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
0741     preempt_enable();
0742 }
0743 
0744 void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
0745 {
0746 #ifdef CONFIG_HUGETLB_PAGE
0747     /* need the return fix for nohash.c */
0748     if (is_vm_hugetlb_page(vma))
0749         return radix__local_flush_hugetlb_page(vma, vmaddr);
0750 #endif
0751     radix__local_flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize);
0752 }
0753 EXPORT_SYMBOL(radix__local_flush_tlb_page);
0754 
0755 static bool mm_needs_flush_escalation(struct mm_struct *mm)
0756 {
0757     /*
0758      * The P9 nest MMU has issues with the page walk cache caching PTEs
0759      * and not flushing them when RIC = 0 for a PID/LPID invalidate.
0760      *
0761      * This may have been fixed in shipping firmware (by disabling PWC
0762      * or preventing it from caching PTEs), but until that is confirmed,
0763      * this workaround is required - escalate all RIC=0 IS=1/2/3 flushes
0764      * to RIC=2.
0765      *
0766      * POWER10 (and P9P) does not have this problem.
0767      */
0768     if (cpu_has_feature(CPU_FTR_ARCH_31))
0769         return false;
0770     if (atomic_read(&mm->context.copros) > 0)
0771         return true;
0772     return false;
0773 }
0774 
0775 /*
0776  * If always_flush is true, then flush even if this CPU can't be removed
0777  * from mm_cpumask.
0778  */
0779 void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush)
0780 {
0781     unsigned long pid = mm->context.id;
0782     int cpu = smp_processor_id();
0783 
0784     /*
0785      * A kthread could have done a mmget_not_zero() after the flushing CPU
0786      * checked mm_cpumask, and be in the process of kthread_use_mm when
0787      * interrupted here. In that case, current->mm will be set to mm,
0788      * because kthread_use_mm() setting ->mm and switching to the mm is
0789      * done with interrupts off.
0790      */
0791     if (current->mm == mm)
0792         goto out;
0793 
0794     if (current->active_mm == mm) {
0795         WARN_ON_ONCE(current->mm != NULL);
0796         /* Is a kernel thread and is using mm as the lazy tlb */
0797         mmgrab(&init_mm);
0798         current->active_mm = &init_mm;
0799         switch_mm_irqs_off(mm, &init_mm, current);
0800         mmdrop(mm);
0801     }
0802 
0803     /*
0804      * This IPI may be initiated from any source including those not
0805      * running the mm, so there may be a racing IPI that comes after
0806      * this one which finds the cpumask already clear. Check and avoid
0807      * underflowing the active_cpus count in that case. The race should
0808      * not otherwise be a problem, but the TLB must be flushed because
0809      * that's what the caller expects.
0810      */
0811     if (cpumask_test_cpu(cpu, mm_cpumask(mm))) {
0812         atomic_dec(&mm->context.active_cpus);
0813         cpumask_clear_cpu(cpu, mm_cpumask(mm));
0814         always_flush = true;
0815     }
0816 
0817 out:
0818     if (always_flush)
0819         _tlbiel_pid(pid, RIC_FLUSH_ALL);
0820 }
0821 
0822 #ifdef CONFIG_SMP
0823 static void do_exit_flush_lazy_tlb(void *arg)
0824 {
0825     struct mm_struct *mm = arg;
0826     exit_lazy_flush_tlb(mm, true);
0827 }
0828 
0829 static void exit_flush_lazy_tlbs(struct mm_struct *mm)
0830 {
0831     /*
0832      * Would be nice if this was async so it could be run in
0833      * parallel with our local flush, but generic code does not
0834      * give a good API for it. Could extend the generic code or
0835      * make a special powerpc IPI for flushing TLBs.
0836      * For now it's not too performance critical.
0837      */
0838     smp_call_function_many(mm_cpumask(mm), do_exit_flush_lazy_tlb,
0839                 (void *)mm, 1);
0840 }
0841 
0842 #else /* CONFIG_SMP */
0843 static inline void exit_flush_lazy_tlbs(struct mm_struct *mm) { }
0844 #endif /* CONFIG_SMP */
0845 
0846 static DEFINE_PER_CPU(unsigned int, mm_cpumask_trim_clock);
0847 
0848 /*
0849  * Interval between flushes at which we send out IPIs to check whether the
0850  * mm_cpumask can be trimmed for the case where it's not a single-threaded
0851  * process flushing its own mm. The intent is to reduce the cost of later
0852  * flushes. Don't want this to be so low that it adds noticable cost to TLB
0853  * flushing, or so high that it doesn't help reduce global TLBIEs.
0854  */
0855 static unsigned long tlb_mm_cpumask_trim_timer = 1073;
0856 
0857 static bool tick_and_test_trim_clock(void)
0858 {
0859     if (__this_cpu_inc_return(mm_cpumask_trim_clock) ==
0860             tlb_mm_cpumask_trim_timer) {
0861         __this_cpu_write(mm_cpumask_trim_clock, 0);
0862         return true;
0863     }
0864     return false;
0865 }
0866 
0867 enum tlb_flush_type {
0868     FLUSH_TYPE_NONE,
0869     FLUSH_TYPE_LOCAL,
0870     FLUSH_TYPE_GLOBAL,
0871 };
0872 
0873 static enum tlb_flush_type flush_type_needed(struct mm_struct *mm, bool fullmm)
0874 {
0875     int active_cpus = atomic_read(&mm->context.active_cpus);
0876     int cpu = smp_processor_id();
0877 
0878     if (active_cpus == 0)
0879         return FLUSH_TYPE_NONE;
0880     if (active_cpus == 1 && cpumask_test_cpu(cpu, mm_cpumask(mm))) {
0881         if (current->mm != mm) {
0882             /*
0883              * Asynchronous flush sources may trim down to nothing
0884              * if the process is not running, so occasionally try
0885              * to trim.
0886              */
0887             if (tick_and_test_trim_clock()) {
0888                 exit_lazy_flush_tlb(mm, true);
0889                 return FLUSH_TYPE_NONE;
0890             }
0891         }
0892         return FLUSH_TYPE_LOCAL;
0893     }
0894 
0895     /* Coprocessors require TLBIE to invalidate nMMU. */
0896     if (atomic_read(&mm->context.copros) > 0)
0897         return FLUSH_TYPE_GLOBAL;
0898 
0899     /*
0900      * In the fullmm case there's no point doing the exit_flush_lazy_tlbs
0901      * because the mm is being taken down anyway, and a TLBIE tends to
0902      * be faster than an IPI+TLBIEL.
0903      */
0904     if (fullmm)
0905         return FLUSH_TYPE_GLOBAL;
0906 
0907     /*
0908      * If we are running the only thread of a single-threaded process,
0909      * then we should almost always be able to trim off the rest of the
0910      * CPU mask (except in the case of use_mm() races), so always try
0911      * trimming the mask.
0912      */
0913     if (atomic_read(&mm->mm_users) <= 1 && current->mm == mm) {
0914         exit_flush_lazy_tlbs(mm);
0915         /*
0916          * use_mm() race could prevent IPIs from being able to clear
0917          * the cpumask here, however those users are established
0918          * after our first check (and so after the PTEs are removed),
0919          * and the TLB still gets flushed by the IPI, so this CPU
0920          * will only require a local flush.
0921          */
0922         return FLUSH_TYPE_LOCAL;
0923     }
0924 
0925     /*
0926      * Occasionally try to trim down the cpumask. It's possible this can
0927      * bring the mask to zero, which results in no flush.
0928      */
0929     if (tick_and_test_trim_clock()) {
0930         exit_flush_lazy_tlbs(mm);
0931         if (current->mm == mm)
0932             return FLUSH_TYPE_LOCAL;
0933         if (cpumask_test_cpu(cpu, mm_cpumask(mm)))
0934             exit_lazy_flush_tlb(mm, true);
0935         return FLUSH_TYPE_NONE;
0936     }
0937 
0938     return FLUSH_TYPE_GLOBAL;
0939 }
0940 
0941 #ifdef CONFIG_SMP
0942 void radix__flush_tlb_mm(struct mm_struct *mm)
0943 {
0944     unsigned long pid;
0945     enum tlb_flush_type type;
0946 
0947     pid = mm->context.id;
0948     if (unlikely(pid == MMU_NO_CONTEXT))
0949         return;
0950 
0951     preempt_disable();
0952     /*
0953      * Order loads of mm_cpumask (in flush_type_needed) vs previous
0954      * stores to clear ptes before the invalidate. See barrier in
0955      * switch_mm_irqs_off
0956      */
0957     smp_mb();
0958     type = flush_type_needed(mm, false);
0959     if (type == FLUSH_TYPE_LOCAL) {
0960         _tlbiel_pid(pid, RIC_FLUSH_TLB);
0961     } else if (type == FLUSH_TYPE_GLOBAL) {
0962         if (!mmu_has_feature(MMU_FTR_GTSE)) {
0963             unsigned long tgt = H_RPTI_TARGET_CMMU;
0964 
0965             if (atomic_read(&mm->context.copros) > 0)
0966                 tgt |= H_RPTI_TARGET_NMMU;
0967             pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB,
0968                            H_RPTI_PAGE_ALL, 0, -1UL);
0969         } else if (cputlb_use_tlbie()) {
0970             if (mm_needs_flush_escalation(mm))
0971                 _tlbie_pid(pid, RIC_FLUSH_ALL);
0972             else
0973                 _tlbie_pid(pid, RIC_FLUSH_TLB);
0974         } else {
0975             _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB);
0976         }
0977     }
0978     preempt_enable();
0979 }
0980 EXPORT_SYMBOL(radix__flush_tlb_mm);
0981 
0982 static void __flush_all_mm(struct mm_struct *mm, bool fullmm)
0983 {
0984     unsigned long pid;
0985     enum tlb_flush_type type;
0986 
0987     pid = mm->context.id;
0988     if (unlikely(pid == MMU_NO_CONTEXT))
0989         return;
0990 
0991     preempt_disable();
0992     smp_mb(); /* see radix__flush_tlb_mm */
0993     type = flush_type_needed(mm, fullmm);
0994     if (type == FLUSH_TYPE_LOCAL) {
0995         _tlbiel_pid(pid, RIC_FLUSH_ALL);
0996     } else if (type == FLUSH_TYPE_GLOBAL) {
0997         if (!mmu_has_feature(MMU_FTR_GTSE)) {
0998             unsigned long tgt = H_RPTI_TARGET_CMMU;
0999             unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
1000                          H_RPTI_TYPE_PRT;
1001 
1002             if (atomic_read(&mm->context.copros) > 0)
1003                 tgt |= H_RPTI_TARGET_NMMU;
1004             pseries_rpt_invalidate(pid, tgt, type,
1005                            H_RPTI_PAGE_ALL, 0, -1UL);
1006         } else if (cputlb_use_tlbie())
1007             _tlbie_pid(pid, RIC_FLUSH_ALL);
1008         else
1009             _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL);
1010     }
1011     preempt_enable();
1012 }
1013 
1014 void radix__flush_all_mm(struct mm_struct *mm)
1015 {
1016     __flush_all_mm(mm, false);
1017 }
1018 EXPORT_SYMBOL(radix__flush_all_mm);
1019 
1020 void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
1021                  int psize)
1022 {
1023     unsigned long pid;
1024     enum tlb_flush_type type;
1025 
1026     pid = mm->context.id;
1027     if (unlikely(pid == MMU_NO_CONTEXT))
1028         return;
1029 
1030     preempt_disable();
1031     smp_mb(); /* see radix__flush_tlb_mm */
1032     type = flush_type_needed(mm, false);
1033     if (type == FLUSH_TYPE_LOCAL) {
1034         _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
1035     } else if (type == FLUSH_TYPE_GLOBAL) {
1036         if (!mmu_has_feature(MMU_FTR_GTSE)) {
1037             unsigned long tgt, pg_sizes, size;
1038 
1039             tgt = H_RPTI_TARGET_CMMU;
1040             pg_sizes = psize_to_rpti_pgsize(psize);
1041             size = 1UL << mmu_psize_to_shift(psize);
1042 
1043             if (atomic_read(&mm->context.copros) > 0)
1044                 tgt |= H_RPTI_TARGET_NMMU;
1045             pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB,
1046                            pg_sizes, vmaddr,
1047                            vmaddr + size);
1048         } else if (cputlb_use_tlbie())
1049             _tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
1050         else
1051             _tlbiel_va_multicast(mm, vmaddr, pid, psize, RIC_FLUSH_TLB);
1052     }
1053     preempt_enable();
1054 }
1055 
1056 void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
1057 {
1058 #ifdef CONFIG_HUGETLB_PAGE
1059     if (is_vm_hugetlb_page(vma))
1060         return radix__flush_hugetlb_page(vma, vmaddr);
1061 #endif
1062     radix__flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize);
1063 }
1064 EXPORT_SYMBOL(radix__flush_tlb_page);
1065 
1066 #endif /* CONFIG_SMP */
1067 
1068 static void do_tlbiel_kernel(void *info)
1069 {
1070     _tlbiel_pid(0, RIC_FLUSH_ALL);
1071 }
1072 
1073 static inline void _tlbiel_kernel_broadcast(void)
1074 {
1075     on_each_cpu(do_tlbiel_kernel, NULL, 1);
1076     if (tlbie_capable) {
1077         /*
1078          * Coherent accelerators don't refcount kernel memory mappings,
1079          * so have to always issue a tlbie for them. This is quite a
1080          * slow path anyway.
1081          */
1082         _tlbie_pid(0, RIC_FLUSH_ALL);
1083     }
1084 }
1085 
1086 /*
1087  * If kernel TLBIs ever become local rather than global, then
1088  * drivers/misc/ocxl/link.c:ocxl_link_add_pe will need some work, as it
1089  * assumes kernel TLBIs are global.
1090  */
1091 void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end)
1092 {
1093     if (!mmu_has_feature(MMU_FTR_GTSE)) {
1094         unsigned long tgt = H_RPTI_TARGET_CMMU | H_RPTI_TARGET_NMMU;
1095         unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
1096                      H_RPTI_TYPE_PRT;
1097 
1098         pseries_rpt_invalidate(0, tgt, type, H_RPTI_PAGE_ALL,
1099                        start, end);
1100     } else if (cputlb_use_tlbie())
1101         _tlbie_pid(0, RIC_FLUSH_ALL);
1102     else
1103         _tlbiel_kernel_broadcast();
1104 }
1105 EXPORT_SYMBOL(radix__flush_tlb_kernel_range);
1106 
1107 #define TLB_FLUSH_ALL -1UL
1108 
1109 /*
1110  * Number of pages above which we invalidate the entire PID rather than
1111  * flush individual pages, for local and global flushes respectively.
1112  *
1113  * tlbie goes out to the interconnect and individual ops are more costly.
1114  * It also does not iterate over sets like the local tlbiel variant when
1115  * invalidating a full PID, so it has a far lower threshold to change from
1116  * individual page flushes to full-pid flushes.
1117  */
1118 static u32 tlb_single_page_flush_ceiling __read_mostly = 33;
1119 static u32 tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2;
1120 
1121 static inline void __radix__flush_tlb_range(struct mm_struct *mm,
1122                         unsigned long start, unsigned long end)
1123 {
1124     unsigned long pid;
1125     unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift;
1126     unsigned long page_size = 1UL << page_shift;
1127     unsigned long nr_pages = (end - start) >> page_shift;
1128     bool fullmm = (end == TLB_FLUSH_ALL);
1129     bool flush_pid, flush_pwc = false;
1130     enum tlb_flush_type type;
1131 
1132     pid = mm->context.id;
1133     if (unlikely(pid == MMU_NO_CONTEXT))
1134         return;
1135 
1136     preempt_disable();
1137     smp_mb(); /* see radix__flush_tlb_mm */
1138     type = flush_type_needed(mm, fullmm);
1139     if (type == FLUSH_TYPE_NONE)
1140         goto out;
1141 
1142     if (fullmm)
1143         flush_pid = true;
1144     else if (type == FLUSH_TYPE_GLOBAL)
1145         flush_pid = nr_pages > tlb_single_page_flush_ceiling;
1146     else
1147         flush_pid = nr_pages > tlb_local_single_page_flush_ceiling;
1148     /*
1149      * full pid flush already does the PWC flush. if it is not full pid
1150      * flush check the range is more than PMD and force a pwc flush
1151      * mremap() depends on this behaviour.
1152      */
1153     if (!flush_pid && (end - start) >= PMD_SIZE)
1154         flush_pwc = true;
1155 
1156     if (!mmu_has_feature(MMU_FTR_GTSE) && type == FLUSH_TYPE_GLOBAL) {
1157         unsigned long type = H_RPTI_TYPE_TLB;
1158         unsigned long tgt = H_RPTI_TARGET_CMMU;
1159         unsigned long pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize);
1160 
1161         if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
1162             pg_sizes |= psize_to_rpti_pgsize(MMU_PAGE_2M);
1163         if (atomic_read(&mm->context.copros) > 0)
1164             tgt |= H_RPTI_TARGET_NMMU;
1165         if (flush_pwc)
1166             type |= H_RPTI_TYPE_PWC;
1167         pseries_rpt_invalidate(pid, tgt, type, pg_sizes, start, end);
1168     } else if (flush_pid) {
1169         /*
1170          * We are now flushing a range larger than PMD size force a RIC_FLUSH_ALL
1171          */
1172         if (type == FLUSH_TYPE_LOCAL) {
1173             _tlbiel_pid(pid, RIC_FLUSH_ALL);
1174         } else {
1175             if (cputlb_use_tlbie()) {
1176                 _tlbie_pid(pid, RIC_FLUSH_ALL);
1177             } else {
1178                 _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL);
1179             }
1180         }
1181     } else {
1182         bool hflush = false;
1183         unsigned long hstart, hend;
1184 
1185         if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
1186             hstart = (start + PMD_SIZE - 1) & PMD_MASK;
1187             hend = end & PMD_MASK;
1188             if (hstart < hend)
1189                 hflush = true;
1190         }
1191 
1192         if (type == FLUSH_TYPE_LOCAL) {
1193             asm volatile("ptesync": : :"memory");
1194             if (flush_pwc)
1195                 /* For PWC, only one flush is needed */
1196                 __tlbiel_pid(pid, 0, RIC_FLUSH_PWC);
1197             __tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize);
1198             if (hflush)
1199                 __tlbiel_va_range(hstart, hend, pid,
1200                         PMD_SIZE, MMU_PAGE_2M);
1201             ppc_after_tlbiel_barrier();
1202         } else if (cputlb_use_tlbie()) {
1203             asm volatile("ptesync": : :"memory");
1204             if (flush_pwc)
1205                 __tlbie_pid(pid, RIC_FLUSH_PWC);
1206             __tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize);
1207             if (hflush)
1208                 __tlbie_va_range(hstart, hend, pid,
1209                         PMD_SIZE, MMU_PAGE_2M);
1210             asm volatile("eieio; tlbsync; ptesync": : :"memory");
1211         } else {
1212             _tlbiel_va_range_multicast(mm,
1213                     start, end, pid, page_size, mmu_virtual_psize, flush_pwc);
1214             if (hflush)
1215                 _tlbiel_va_range_multicast(mm,
1216                     hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M, flush_pwc);
1217         }
1218     }
1219 out:
1220     preempt_enable();
1221 }
1222 
1223 void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
1224              unsigned long end)
1225 
1226 {
1227 #ifdef CONFIG_HUGETLB_PAGE
1228     if (is_vm_hugetlb_page(vma))
1229         return radix__flush_hugetlb_tlb_range(vma, start, end);
1230 #endif
1231 
1232     __radix__flush_tlb_range(vma->vm_mm, start, end);
1233 }
1234 EXPORT_SYMBOL(radix__flush_tlb_range);
1235 
1236 static int radix_get_mmu_psize(int page_size)
1237 {
1238     int psize;
1239 
1240     if (page_size == (1UL << mmu_psize_defs[mmu_virtual_psize].shift))
1241         psize = mmu_virtual_psize;
1242     else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_2M].shift))
1243         psize = MMU_PAGE_2M;
1244     else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_1G].shift))
1245         psize = MMU_PAGE_1G;
1246     else
1247         return -1;
1248     return psize;
1249 }
1250 
1251 /*
1252  * Flush partition scoped LPID address translation for all CPUs.
1253  */
1254 void radix__flush_tlb_lpid_page(unsigned int lpid,
1255                     unsigned long addr,
1256                     unsigned long page_size)
1257 {
1258     int psize = radix_get_mmu_psize(page_size);
1259 
1260     _tlbie_lpid_va(addr, lpid, psize, RIC_FLUSH_TLB);
1261 }
1262 EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid_page);
1263 
1264 /*
1265  * Flush partition scoped PWC from LPID for all CPUs.
1266  */
1267 void radix__flush_pwc_lpid(unsigned int lpid)
1268 {
1269     _tlbie_lpid(lpid, RIC_FLUSH_PWC);
1270 }
1271 EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid);
1272 
1273 /*
1274  * Flush partition scoped translations from LPID (=LPIDR)
1275  */
1276 void radix__flush_all_lpid(unsigned int lpid)
1277 {
1278     _tlbie_lpid(lpid, RIC_FLUSH_ALL);
1279 }
1280 EXPORT_SYMBOL_GPL(radix__flush_all_lpid);
1281 
1282 /*
1283  * Flush process scoped translations from LPID (=LPIDR)
1284  */
1285 void radix__flush_all_lpid_guest(unsigned int lpid)
1286 {
1287     _tlbie_lpid_guest(lpid, RIC_FLUSH_ALL);
1288 }
1289 
1290 void radix__tlb_flush(struct mmu_gather *tlb)
1291 {
1292     int psize = 0;
1293     struct mm_struct *mm = tlb->mm;
1294     int page_size = tlb->page_size;
1295     unsigned long start = tlb->start;
1296     unsigned long end = tlb->end;
1297 
1298     /*
1299      * if page size is not something we understand, do a full mm flush
1300      *
1301      * A "fullmm" flush must always do a flush_all_mm (RIC=2) flush
1302      * that flushes the process table entry cache upon process teardown.
1303      * See the comment for radix in arch_exit_mmap().
1304      */
1305     if (tlb->fullmm || tlb->need_flush_all) {
1306         __flush_all_mm(mm, true);
1307     } else if ( (psize = radix_get_mmu_psize(page_size)) == -1) {
1308         if (!tlb->freed_tables)
1309             radix__flush_tlb_mm(mm);
1310         else
1311             radix__flush_all_mm(mm);
1312     } else {
1313         if (!tlb->freed_tables)
1314             radix__flush_tlb_range_psize(mm, start, end, psize);
1315         else
1316             radix__flush_tlb_pwc_range_psize(mm, start, end, psize);
1317     }
1318 }
1319 
1320 static void __radix__flush_tlb_range_psize(struct mm_struct *mm,
1321                 unsigned long start, unsigned long end,
1322                 int psize, bool also_pwc)
1323 {
1324     unsigned long pid;
1325     unsigned int page_shift = mmu_psize_defs[psize].shift;
1326     unsigned long page_size = 1UL << page_shift;
1327     unsigned long nr_pages = (end - start) >> page_shift;
1328     bool fullmm = (end == TLB_FLUSH_ALL);
1329     bool flush_pid;
1330     enum tlb_flush_type type;
1331 
1332     pid = mm->context.id;
1333     if (unlikely(pid == MMU_NO_CONTEXT))
1334         return;
1335 
1336     fullmm = (end == TLB_FLUSH_ALL);
1337 
1338     preempt_disable();
1339     smp_mb(); /* see radix__flush_tlb_mm */
1340     type = flush_type_needed(mm, fullmm);
1341     if (type == FLUSH_TYPE_NONE)
1342         goto out;
1343 
1344     if (fullmm)
1345         flush_pid = true;
1346     else if (type == FLUSH_TYPE_GLOBAL)
1347         flush_pid = nr_pages > tlb_single_page_flush_ceiling;
1348     else
1349         flush_pid = nr_pages > tlb_local_single_page_flush_ceiling;
1350 
1351     if (!mmu_has_feature(MMU_FTR_GTSE) && type == FLUSH_TYPE_GLOBAL) {
1352         unsigned long tgt = H_RPTI_TARGET_CMMU;
1353         unsigned long type = H_RPTI_TYPE_TLB;
1354         unsigned long pg_sizes = psize_to_rpti_pgsize(psize);
1355 
1356         if (also_pwc)
1357             type |= H_RPTI_TYPE_PWC;
1358         if (atomic_read(&mm->context.copros) > 0)
1359             tgt |= H_RPTI_TARGET_NMMU;
1360         pseries_rpt_invalidate(pid, tgt, type, pg_sizes, start, end);
1361     } else if (flush_pid) {
1362         if (type == FLUSH_TYPE_LOCAL) {
1363             _tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
1364         } else {
1365             if (cputlb_use_tlbie()) {
1366                 if (mm_needs_flush_escalation(mm))
1367                     also_pwc = true;
1368 
1369                 _tlbie_pid(pid,
1370                     also_pwc ?  RIC_FLUSH_ALL : RIC_FLUSH_TLB);
1371             } else {
1372                 _tlbiel_pid_multicast(mm, pid,
1373                     also_pwc ?  RIC_FLUSH_ALL : RIC_FLUSH_TLB);
1374             }
1375 
1376         }
1377     } else {
1378         if (type == FLUSH_TYPE_LOCAL)
1379             _tlbiel_va_range(start, end, pid, page_size, psize, also_pwc);
1380         else if (cputlb_use_tlbie())
1381             _tlbie_va_range(start, end, pid, page_size, psize, also_pwc);
1382         else
1383             _tlbiel_va_range_multicast(mm,
1384                     start, end, pid, page_size, psize, also_pwc);
1385     }
1386 out:
1387     preempt_enable();
1388 }
1389 
1390 void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
1391                   unsigned long end, int psize)
1392 {
1393     return __radix__flush_tlb_range_psize(mm, start, end, psize, false);
1394 }
1395 
1396 void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
1397                       unsigned long end, int psize)
1398 {
1399     __radix__flush_tlb_range_psize(mm, start, end, psize, true);
1400 }
1401 
1402 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
1403 void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
1404 {
1405     unsigned long pid, end;
1406     enum tlb_flush_type type;
1407 
1408     pid = mm->context.id;
1409     if (unlikely(pid == MMU_NO_CONTEXT))
1410         return;
1411 
1412     /* 4k page size, just blow the world */
1413     if (PAGE_SIZE == 0x1000) {
1414         radix__flush_all_mm(mm);
1415         return;
1416     }
1417 
1418     end = addr + HPAGE_PMD_SIZE;
1419 
1420     /* Otherwise first do the PWC, then iterate the pages. */
1421     preempt_disable();
1422     smp_mb(); /* see radix__flush_tlb_mm */
1423     type = flush_type_needed(mm, false);
1424     if (type == FLUSH_TYPE_LOCAL) {
1425         _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
1426     } else if (type == FLUSH_TYPE_GLOBAL) {
1427         if (!mmu_has_feature(MMU_FTR_GTSE)) {
1428             unsigned long tgt, type, pg_sizes;
1429 
1430             tgt = H_RPTI_TARGET_CMMU;
1431             type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
1432                    H_RPTI_TYPE_PRT;
1433             pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize);
1434 
1435             if (atomic_read(&mm->context.copros) > 0)
1436                 tgt |= H_RPTI_TARGET_NMMU;
1437             pseries_rpt_invalidate(pid, tgt, type, pg_sizes,
1438                            addr, end);
1439         } else if (cputlb_use_tlbie())
1440             _tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
1441         else
1442             _tlbiel_va_range_multicast(mm,
1443                     addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
1444     }
1445 
1446     preempt_enable();
1447 }
1448 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
1449 
1450 void radix__flush_pmd_tlb_range(struct vm_area_struct *vma,
1451                 unsigned long start, unsigned long end)
1452 {
1453     radix__flush_tlb_range_psize(vma->vm_mm, start, end, MMU_PAGE_2M);
1454 }
1455 EXPORT_SYMBOL(radix__flush_pmd_tlb_range);
1456 
1457 void radix__flush_tlb_all(void)
1458 {
1459     unsigned long rb,prs,r,rs;
1460     unsigned long ric = RIC_FLUSH_ALL;
1461 
1462     rb = 0x3 << PPC_BITLSHIFT(53); /* IS = 3 */
1463     prs = 0; /* partition scoped */
1464     r = 1;   /* radix format */
1465     rs = 1 & ((1UL << 32) - 1); /* any LPID value to flush guest mappings */
1466 
1467     asm volatile("ptesync": : :"memory");
1468     /*
1469      * now flush guest entries by passing PRS = 1 and LPID != 0
1470      */
1471     asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
1472              : : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory");
1473     /*
1474      * now flush host entires by passing PRS = 0 and LPID == 0
1475      */
1476     asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
1477              : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory");
1478     asm volatile("eieio; tlbsync; ptesync": : :"memory");
1479 }
1480 
1481 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
1482 /*
1483  * Performs process-scoped invalidations for a given LPID
1484  * as part of H_RPT_INVALIDATE hcall.
1485  */
1486 void do_h_rpt_invalidate_prt(unsigned long pid, unsigned long lpid,
1487                  unsigned long type, unsigned long pg_sizes,
1488                  unsigned long start, unsigned long end)
1489 {
1490     unsigned long psize, nr_pages;
1491     struct mmu_psize_def *def;
1492     bool flush_pid;
1493 
1494     /*
1495      * A H_RPTI_TYPE_ALL request implies RIC=3, hence
1496      * do a single IS=1 based flush.
1497      */
1498     if ((type & H_RPTI_TYPE_ALL) == H_RPTI_TYPE_ALL) {
1499         _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL);
1500         return;
1501     }
1502 
1503     if (type & H_RPTI_TYPE_PWC)
1504         _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
1505 
1506     /* Full PID flush */
1507     if (start == 0 && end == -1)
1508         return _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
1509 
1510     /* Do range invalidation for all the valid page sizes */
1511     for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
1512         def = &mmu_psize_defs[psize];
1513         if (!(pg_sizes & def->h_rpt_pgsize))
1514             continue;
1515 
1516         nr_pages = (end - start) >> def->shift;
1517         flush_pid = nr_pages > tlb_single_page_flush_ceiling;
1518 
1519         /*
1520          * If the number of pages spanning the range is above
1521          * the ceiling, convert the request into a full PID flush.
1522          * And since PID flush takes out all the page sizes, there
1523          * is no need to consider remaining page sizes.
1524          */
1525         if (flush_pid) {
1526             _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
1527             return;
1528         }
1529         _tlbie_va_range_lpid(start, end, pid, lpid,
1530                      (1UL << def->shift), psize, false);
1531     }
1532 }
1533 EXPORT_SYMBOL_GPL(do_h_rpt_invalidate_prt);
1534 
1535 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
1536 
1537 static int __init create_tlb_single_page_flush_ceiling(void)
1538 {
1539     debugfs_create_u32("tlb_single_page_flush_ceiling", 0600,
1540                arch_debugfs_dir, &tlb_single_page_flush_ceiling);
1541     debugfs_create_u32("tlb_local_single_page_flush_ceiling", 0600,
1542                arch_debugfs_dir, &tlb_local_single_page_flush_ceiling);
1543     return 0;
1544 }
1545 late_initcall(create_tlb_single_page_flush_ceiling);
1546