Back to home page

OSCL-LXR

 
 

    


0001 /* SPDX-License-Identifier: GPL-2.0 */
0002 #ifndef _SPARC64_TSB_H
0003 #define _SPARC64_TSB_H
0004 
0005 /* The sparc64 TSB is similar to the powerpc hashtables.  It's a
0006  * power-of-2 sized table of TAG/PTE pairs.  The cpu precomputes
0007  * pointers into this table for 8K and 64K page sizes, and also a
0008  * comparison TAG based upon the virtual address and context which
0009  * faults.
0010  *
0011  * TLB miss trap handler software does the actual lookup via something
0012  * of the form:
0013  *
0014  *  ldxa        [%g0] ASI_{D,I}MMU_TSB_8KB_PTR, %g1
0015  *  ldxa        [%g0] ASI_{D,I}MMU, %g6
0016  *  sllx        %g6, 22, %g6
0017  *  srlx        %g6, 22, %g6
0018  *  ldda        [%g1] ASI_NUCLEUS_QUAD_LDD, %g4
0019  *  cmp     %g4, %g6
0020  *  bne,pn  %xcc, tsb_miss_{d,i}tlb
0021  *   mov        FAULT_CODE_{D,I}TLB, %g3
0022  *  stxa        %g5, [%g0] ASI_{D,I}TLB_DATA_IN
0023  *  retry
0024  *
0025  *
0026  * Each 16-byte slot of the TSB is the 8-byte tag and then the 8-byte
0027  * PTE.  The TAG is of the same layout as the TLB TAG TARGET mmu
0028  * register which is:
0029  *
0030  * -------------------------------------------------
0031  * |  -  |  CONTEXT |  -  |    VADDR bits 63:22    |
0032  * -------------------------------------------------
0033  *  63 61 60      48 47 42 41                     0
0034  *
0035  * But actually, since we use per-mm TSB's, we zero out the CONTEXT
0036  * field.
0037  *
0038  * Like the powerpc hashtables we need to use locking in order to
0039  * synchronize while we update the entries.  PTE updates need locking
0040  * as well.
0041  *
0042  * We need to carefully choose a lock bits for the TSB entry.  We
0043  * choose to use bit 47 in the tag.  Also, since we never map anything
0044  * at page zero in context zero, we use zero as an invalid tag entry.
0045  * When the lock bit is set, this forces a tag comparison failure.
0046  */
0047 
0048 #define TSB_TAG_LOCK_BIT    47
0049 #define TSB_TAG_LOCK_HIGH   (1 << (TSB_TAG_LOCK_BIT - 32))
0050 
0051 #define TSB_TAG_INVALID_BIT 46
0052 #define TSB_TAG_INVALID_HIGH    (1 << (TSB_TAG_INVALID_BIT - 32))
0053 
0054 /* Some cpus support physical address quad loads.  We want to use
0055  * those if possible so we don't need to hard-lock the TSB mapping
0056  * into the TLB.  We encode some instruction patching in order to
0057  * support this.
0058  *
0059  * The kernel TSB is locked into the TLB by virtue of being in the
0060  * kernel image, so we don't play these games for swapper_tsb access.
0061  */
0062 #ifndef __ASSEMBLY__
0063 struct tsb_ldquad_phys_patch_entry {
0064     unsigned int    addr;
0065     unsigned int    sun4u_insn;
0066     unsigned int    sun4v_insn;
0067 };
0068 extern struct tsb_ldquad_phys_patch_entry __tsb_ldquad_phys_patch,
0069     __tsb_ldquad_phys_patch_end;
0070 
0071 struct tsb_phys_patch_entry {
0072     unsigned int    addr;
0073     unsigned int    insn;
0074 };
0075 extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
0076 #endif
0077 #define TSB_LOAD_QUAD(TSB, REG) \
0078 661:    ldda        [TSB] ASI_NUCLEUS_QUAD_LDD, REG; \
0079     .section    .tsb_ldquad_phys_patch, "ax"; \
0080     .word       661b; \
0081     ldda        [TSB] ASI_QUAD_LDD_PHYS, REG; \
0082     ldda        [TSB] ASI_QUAD_LDD_PHYS_4V, REG; \
0083     .previous
0084 
0085 #define TSB_LOAD_TAG_HIGH(TSB, REG) \
0086 661:    lduwa       [TSB] ASI_N, REG; \
0087     .section    .tsb_phys_patch, "ax"; \
0088     .word       661b; \
0089     lduwa       [TSB] ASI_PHYS_USE_EC, REG; \
0090     .previous
0091 
0092 #define TSB_LOAD_TAG(TSB, REG) \
0093 661:    ldxa        [TSB] ASI_N, REG; \
0094     .section    .tsb_phys_patch, "ax"; \
0095     .word       661b; \
0096     ldxa        [TSB] ASI_PHYS_USE_EC, REG; \
0097     .previous
0098 
0099 #define TSB_CAS_TAG_HIGH(TSB, REG1, REG2) \
0100 661:    casa        [TSB] ASI_N, REG1, REG2; \
0101     .section    .tsb_phys_patch, "ax"; \
0102     .word       661b; \
0103     casa        [TSB] ASI_PHYS_USE_EC, REG1, REG2; \
0104     .previous
0105 
0106 #define TSB_CAS_TAG(TSB, REG1, REG2) \
0107 661:    casxa       [TSB] ASI_N, REG1, REG2; \
0108     .section    .tsb_phys_patch, "ax"; \
0109     .word       661b; \
0110     casxa       [TSB] ASI_PHYS_USE_EC, REG1, REG2; \
0111     .previous
0112 
0113 #define TSB_STORE(ADDR, VAL) \
0114 661:    stxa        VAL, [ADDR] ASI_N; \
0115     .section    .tsb_phys_patch, "ax"; \
0116     .word       661b; \
0117     stxa        VAL, [ADDR] ASI_PHYS_USE_EC; \
0118     .previous
0119 
0120 #define TSB_LOCK_TAG(TSB, REG1, REG2)   \
0121 99: TSB_LOAD_TAG_HIGH(TSB, REG1);   \
0122     sethi   %hi(TSB_TAG_LOCK_HIGH), REG2;\
0123     andcc   REG1, REG2, %g0;    \
0124     bne,pn  %icc, 99b;      \
0125      nop;               \
0126     TSB_CAS_TAG_HIGH(TSB, REG1, REG2);  \
0127     cmp REG1, REG2;     \
0128     bne,pn  %icc, 99b;      \
0129      nop;               \
0130 
0131 #define TSB_WRITE(TSB, TTE, TAG) \
0132     add TSB, 0x8, TSB;   \
0133     TSB_STORE(TSB, TTE);     \
0134     sub TSB, 0x8, TSB;   \
0135     TSB_STORE(TSB, TAG);
0136 
0137     /* Do a kernel page table walk.  Leaves valid PTE value in
0138      * REG1.  Jumps to FAIL_LABEL on early page table walk
0139      * termination.  VADDR will not be clobbered, but REG2 will.
0140      *
0141      * There are two masks we must apply to propagate bits from
0142      * the virtual address into the PTE physical address field
0143      * when dealing with huge pages.  This is because the page
0144      * table boundaries do not match the huge page size(s) the
0145      * hardware supports.
0146      *
0147      * In these cases we propagate the bits that are below the
0148      * page table level where we saw the huge page mapping, but
0149      * are still within the relevant physical bits for the huge
0150      * page size in question.  So for PMD mappings (which fall on
0151      * bit 23, for 8MB per PMD) we must propagate bit 22 for a
0152      * 4MB huge page.  For huge PUDs (which fall on bit 33, for
0153      * 8GB per PUD), we have to accommodate 256MB and 2GB huge
0154      * pages.  So for those we propagate bits 32 to 28.
0155      */
0156 #define KERN_PGTABLE_WALK(VADDR, REG1, REG2, FAIL_LABEL)    \
0157     sethi       %hi(swapper_pg_dir), REG1; \
0158     or      REG1, %lo(swapper_pg_dir), REG1; \
0159     sllx        VADDR, 64 - (PGDIR_SHIFT + PGDIR_BITS), REG2; \
0160     srlx        REG2, 64 - PAGE_SHIFT, REG2; \
0161     andn        REG2, 0x7, REG2; \
0162     ldx     [REG1 + REG2], REG1; \
0163     brz,pn      REG1, FAIL_LABEL; \
0164      sllx       VADDR, 64 - (PUD_SHIFT + PUD_BITS), REG2; \
0165     srlx        REG2, 64 - PAGE_SHIFT, REG2; \
0166     andn        REG2, 0x7, REG2; \
0167     ldxa        [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
0168     brz,pn      REG1, FAIL_LABEL; \
0169     sethi       %uhi(_PAGE_PUD_HUGE), REG2; \
0170     brz,pn      REG1, FAIL_LABEL; \
0171      sllx       REG2, 32, REG2; \
0172     andcc       REG1, REG2, %g0; \
0173     sethi       %hi(0xf8000000), REG2; \
0174     bne,pt      %xcc, 697f; \
0175      sllx       REG2, 1, REG2; \
0176     sllx        VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \
0177     srlx        REG2, 64 - PAGE_SHIFT, REG2; \
0178     andn        REG2, 0x7, REG2; \
0179     ldxa        [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
0180     sethi       %uhi(_PAGE_PMD_HUGE), REG2; \
0181     brz,pn      REG1, FAIL_LABEL; \
0182      sllx       REG2, 32, REG2; \
0183     andcc       REG1, REG2, %g0; \
0184     be,pn       %xcc, 698f; \
0185      sethi      %hi(0x400000), REG2; \
0186 697:    brgez,pn    REG1, FAIL_LABEL; \
0187      andn       REG1, REG2, REG1; \
0188     and     VADDR, REG2, REG2; \
0189     ba,pt       %xcc, 699f; \
0190      or     REG1, REG2, REG1; \
0191 698:    sllx        VADDR, 64 - PMD_SHIFT, REG2; \
0192     srlx        REG2, 64 - PAGE_SHIFT, REG2; \
0193     andn        REG2, 0x7, REG2; \
0194     ldxa        [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
0195     brgez,pn    REG1, FAIL_LABEL; \
0196      nop; \
0197 699:
0198 
0199     /* PUD has been loaded into REG1, interpret the value, seeing
0200      * if it is a HUGE PUD or a normal one.  If it is not valid
0201      * then jump to FAIL_LABEL.  If it is a HUGE PUD, and it
0202      * translates to a valid PTE, branch to PTE_LABEL.
0203      *
0204      * We have to propagate bits [32:22] from the virtual address
0205      * to resolve at 4M granularity.
0206      */
0207 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
0208 #define USER_PGTABLE_CHECK_PUD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \
0209 700:    ba 700f;                    \
0210      nop;                       \
0211     .section    .pud_huge_patch, "ax";      \
0212     .word       700b;               \
0213     nop;                        \
0214     .previous;                  \
0215     brz,pn      REG1, FAIL_LABEL;       \
0216      sethi      %uhi(_PAGE_PUD_HUGE), REG2; \
0217     sllx        REG2, 32, REG2;         \
0218     andcc       REG1, REG2, %g0;        \
0219     be,pt       %xcc, 700f;         \
0220      sethi      %hi(0xffe00000), REG2;      \
0221     sllx        REG2, 1, REG2;          \
0222     brgez,pn    REG1, FAIL_LABEL;       \
0223      andn       REG1, REG2, REG1;       \
0224     and     VADDR, REG2, REG2;      \
0225     brlz,pt     REG1, PTE_LABEL;        \
0226      or     REG1, REG2, REG1;       \
0227 700:
0228 #else
0229 #define USER_PGTABLE_CHECK_PUD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \
0230     brz,pn      REG1, FAIL_LABEL; \
0231      nop;
0232 #endif
0233 
0234     /* PMD has been loaded into REG1, interpret the value, seeing
0235      * if it is a HUGE PMD or a normal one.  If it is not valid
0236      * then jump to FAIL_LABEL.  If it is a HUGE PMD, and it
0237      * translates to a valid PTE, branch to PTE_LABEL.
0238      *
0239      * We have to propagate the 4MB bit of the virtual address
0240      * because we are fabricating 8MB pages using 4MB hw pages.
0241      */
0242 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
0243 #define USER_PGTABLE_CHECK_PMD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \
0244     brz,pn      REG1, FAIL_LABEL;       \
0245      sethi      %uhi(_PAGE_PMD_HUGE), REG2; \
0246     sllx        REG2, 32, REG2;         \
0247     andcc       REG1, REG2, %g0;        \
0248     be,pt       %xcc, 700f;         \
0249      sethi      %hi(4 * 1024 * 1024), REG2; \
0250     brgez,pn    REG1, FAIL_LABEL;       \
0251      andn       REG1, REG2, REG1;       \
0252     and     VADDR, REG2, REG2;      \
0253     brlz,pt     REG1, PTE_LABEL;        \
0254      or     REG1, REG2, REG1;       \
0255 700:
0256 #else
0257 #define USER_PGTABLE_CHECK_PMD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \
0258     brz,pn      REG1, FAIL_LABEL; \
0259      nop;
0260 #endif
0261 
0262     /* Do a user page table walk in MMU globals.  Leaves final,
0263      * valid, PTE value in REG1.  Jumps to FAIL_LABEL on early
0264      * page table walk termination or if the PTE is not valid.
0265      *
0266      * Physical base of page tables is in PHYS_PGD which will not
0267      * be modified.
0268      *
0269      * VADDR will not be clobbered, but REG1 and REG2 will.
0270      */
0271 #define USER_PGTABLE_WALK_TL1(VADDR, PHYS_PGD, REG1, REG2, FAIL_LABEL)  \
0272     sllx        VADDR, 64 - (PGDIR_SHIFT + PGDIR_BITS), REG2; \
0273     srlx        REG2, 64 - PAGE_SHIFT, REG2; \
0274     andn        REG2, 0x7, REG2; \
0275     ldxa        [PHYS_PGD + REG2] ASI_PHYS_USE_EC, REG1; \
0276     brz,pn      REG1, FAIL_LABEL; \
0277      sllx       VADDR, 64 - (PUD_SHIFT + PUD_BITS), REG2; \
0278     srlx        REG2, 64 - PAGE_SHIFT, REG2; \
0279     andn        REG2, 0x7, REG2; \
0280     ldxa        [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
0281     USER_PGTABLE_CHECK_PUD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, 800f) \
0282     brz,pn      REG1, FAIL_LABEL; \
0283      sllx       VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \
0284     srlx        REG2, 64 - PAGE_SHIFT, REG2; \
0285     andn        REG2, 0x7, REG2; \
0286     ldxa        [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
0287     USER_PGTABLE_CHECK_PMD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, 800f) \
0288     sllx        VADDR, 64 - PMD_SHIFT, REG2; \
0289     srlx        REG2, 64 - PAGE_SHIFT, REG2; \
0290     andn        REG2, 0x7, REG2; \
0291     add     REG1, REG2, REG1; \
0292     ldxa        [REG1] ASI_PHYS_USE_EC, REG1; \
0293     brgez,pn    REG1, FAIL_LABEL; \
0294      nop; \
0295 800:
0296 
0297 /* Lookup a OBP mapping on VADDR in the prom_trans[] table at TL>0.
0298  * If no entry is found, FAIL_LABEL will be branched to.  On success
0299  * the resulting PTE value will be left in REG1.  VADDR is preserved
0300  * by this routine.
0301  */
0302 #define OBP_TRANS_LOOKUP(VADDR, REG1, REG2, REG3, FAIL_LABEL) \
0303     sethi       %hi(prom_trans), REG1; \
0304     or      REG1, %lo(prom_trans), REG1; \
0305 97: ldx     [REG1 + 0x00], REG2; \
0306     brz,pn      REG2, FAIL_LABEL; \
0307      nop; \
0308     ldx     [REG1 + 0x08], REG3; \
0309     add     REG2, REG3, REG3; \
0310     cmp     REG2, VADDR; \
0311     bgu,pt      %xcc, 98f; \
0312      cmp        VADDR, REG3; \
0313     bgeu,pt     %xcc, 98f; \
0314      ldx        [REG1 + 0x10], REG3; \
0315     sub     VADDR, REG2, REG2; \
0316     ba,pt       %xcc, 99f; \
0317      add        REG3, REG2, REG1; \
0318 98: ba,pt       %xcc, 97b; \
0319      add        REG1, (3 * 8), REG1; \
0320 99:
0321 
0322     /* We use a 32K TSB for the whole kernel, this allows to
0323      * handle about 16MB of modules and vmalloc mappings without
0324      * incurring many hash conflicts.
0325      */
0326 #define KERNEL_TSB_SIZE_BYTES   (32 * 1024)
0327 #define KERNEL_TSB_NENTRIES \
0328     (KERNEL_TSB_SIZE_BYTES / 16)
0329 #define KERNEL_TSB4M_NENTRIES   4096
0330 
0331     /* Do a kernel TSB lookup at tl>0 on VADDR+TAG, branch to OK_LABEL
0332      * on TSB hit.  REG1, REG2, REG3, and REG4 are used as temporaries
0333      * and the found TTE will be left in REG1.  REG3 and REG4 must
0334      * be an even/odd pair of registers.
0335      *
0336      * VADDR and TAG will be preserved and not clobbered by this macro.
0337      */
0338 #define KERN_TSB_LOOKUP_TL1(VADDR, TAG, REG1, REG2, REG3, REG4, OK_LABEL) \
0339 661:    sethi       %uhi(swapper_tsb), REG1; \
0340     sethi       %hi(swapper_tsb), REG2; \
0341     or      REG1, %ulo(swapper_tsb), REG1; \
0342     or      REG2, %lo(swapper_tsb), REG2; \
0343     .section    .swapper_tsb_phys_patch, "ax"; \
0344     .word       661b; \
0345     .previous; \
0346     sllx        REG1, 32, REG1; \
0347     or      REG1, REG2, REG1; \
0348     srlx        VADDR, PAGE_SHIFT, REG2; \
0349     and     REG2, (KERNEL_TSB_NENTRIES - 1), REG2; \
0350     sllx        REG2, 4, REG2; \
0351     add     REG1, REG2, REG2; \
0352     TSB_LOAD_QUAD(REG2, REG3); \
0353     cmp     REG3, TAG; \
0354     be,a,pt     %xcc, OK_LABEL; \
0355      mov        REG4, REG1;
0356 
0357 #ifndef CONFIG_DEBUG_PAGEALLOC
0358     /* This version uses a trick, the TAG is already (VADDR >> 22) so
0359      * we can make use of that for the index computation.
0360      */
0361 #define KERN_TSB4M_LOOKUP_TL1(TAG, REG1, REG2, REG3, REG4, OK_LABEL) \
0362 661:    sethi       %uhi(swapper_4m_tsb), REG1; \
0363     sethi       %hi(swapper_4m_tsb), REG2; \
0364     or      REG1, %ulo(swapper_4m_tsb), REG1; \
0365     or      REG2, %lo(swapper_4m_tsb), REG2; \
0366     .section    .swapper_4m_tsb_phys_patch, "ax"; \
0367     .word       661b; \
0368     .previous; \
0369     sllx        REG1, 32, REG1; \
0370     or      REG1, REG2, REG1; \
0371     and     TAG, (KERNEL_TSB4M_NENTRIES - 1), REG2; \
0372     sllx        REG2, 4, REG2; \
0373     add     REG1, REG2, REG2; \
0374     TSB_LOAD_QUAD(REG2, REG3); \
0375     cmp     REG3, TAG; \
0376     be,a,pt     %xcc, OK_LABEL; \
0377      mov        REG4, REG1;
0378 #endif
0379 
0380 #endif /* !(_SPARC64_TSB_H) */