Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: MIT
0002 /*
0003  * Copyright © 2020 Intel Corporation
0004  */
0005 
0006 #include <linux/log2.h>
0007 
0008 #include "gem/i915_gem_lmem.h"
0009 
0010 #include "gen8_ppgtt.h"
0011 #include "i915_scatterlist.h"
0012 #include "i915_trace.h"
0013 #include "i915_pvinfo.h"
0014 #include "i915_vgpu.h"
0015 #include "intel_gt.h"
0016 #include "intel_gtt.h"
0017 
0018 static u64 gen8_pde_encode(const dma_addr_t addr,
0019                const enum i915_cache_level level)
0020 {
0021     u64 pde = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
0022 
0023     if (level != I915_CACHE_NONE)
0024         pde |= PPAT_CACHED_PDE;
0025     else
0026         pde |= PPAT_UNCACHED;
0027 
0028     return pde;
0029 }
0030 
0031 static u64 gen8_pte_encode(dma_addr_t addr,
0032                enum i915_cache_level level,
0033                u32 flags)
0034 {
0035     gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
0036 
0037     if (unlikely(flags & PTE_READ_ONLY))
0038         pte &= ~GEN8_PAGE_RW;
0039 
0040     if (flags & PTE_LM)
0041         pte |= GEN12_PPGTT_PTE_LM;
0042 
0043     switch (level) {
0044     case I915_CACHE_NONE:
0045         pte |= PPAT_UNCACHED;
0046         break;
0047     case I915_CACHE_WT:
0048         pte |= PPAT_DISPLAY_ELLC;
0049         break;
0050     default:
0051         pte |= PPAT_CACHED;
0052         break;
0053     }
0054 
0055     return pte;
0056 }
0057 
0058 static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
0059 {
0060     struct drm_i915_private *i915 = ppgtt->vm.i915;
0061     struct intel_uncore *uncore = ppgtt->vm.gt->uncore;
0062     enum vgt_g2v_type msg;
0063     int i;
0064 
0065     if (create)
0066         atomic_inc(px_used(ppgtt->pd)); /* never remove */
0067     else
0068         atomic_dec(px_used(ppgtt->pd));
0069 
0070     mutex_lock(&i915->vgpu.lock);
0071 
0072     if (i915_vm_is_4lvl(&ppgtt->vm)) {
0073         const u64 daddr = px_dma(ppgtt->pd);
0074 
0075         intel_uncore_write(uncore,
0076                    vgtif_reg(pdp[0].lo), lower_32_bits(daddr));
0077         intel_uncore_write(uncore,
0078                    vgtif_reg(pdp[0].hi), upper_32_bits(daddr));
0079 
0080         msg = create ?
0081             VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE :
0082             VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY;
0083     } else {
0084         for (i = 0; i < GEN8_3LVL_PDPES; i++) {
0085             const u64 daddr = i915_page_dir_dma_addr(ppgtt, i);
0086 
0087             intel_uncore_write(uncore,
0088                        vgtif_reg(pdp[i].lo),
0089                        lower_32_bits(daddr));
0090             intel_uncore_write(uncore,
0091                        vgtif_reg(pdp[i].hi),
0092                        upper_32_bits(daddr));
0093         }
0094 
0095         msg = create ?
0096             VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE :
0097             VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY;
0098     }
0099 
0100     /* g2v_notify atomically (via hv trap) consumes the message packet. */
0101     intel_uncore_write(uncore, vgtif_reg(g2v_notify), msg);
0102 
0103     mutex_unlock(&i915->vgpu.lock);
0104 }
0105 
0106 /* Index shifts into the pagetable are offset by GEN8_PTE_SHIFT [12] */
0107 #define GEN8_PAGE_SIZE (SZ_4K) /* page and page-directory sizes are the same */
0108 #define GEN8_PTE_SHIFT (ilog2(GEN8_PAGE_SIZE))
0109 #define GEN8_PDES (GEN8_PAGE_SIZE / sizeof(u64))
0110 #define gen8_pd_shift(lvl) ((lvl) * ilog2(GEN8_PDES))
0111 #define gen8_pd_index(i, lvl) i915_pde_index((i), gen8_pd_shift(lvl))
0112 #define __gen8_pte_shift(lvl) (GEN8_PTE_SHIFT + gen8_pd_shift(lvl))
0113 #define __gen8_pte_index(a, lvl) i915_pde_index((a), __gen8_pte_shift(lvl))
0114 
0115 #define as_pd(x) container_of((x), typeof(struct i915_page_directory), pt)
0116 
0117 static unsigned int
0118 gen8_pd_range(u64 start, u64 end, int lvl, unsigned int *idx)
0119 {
0120     const int shift = gen8_pd_shift(lvl);
0121     const u64 mask = ~0ull << gen8_pd_shift(lvl + 1);
0122 
0123     GEM_BUG_ON(start >= end);
0124     end += ~mask >> gen8_pd_shift(1);
0125 
0126     *idx = i915_pde_index(start, shift);
0127     if ((start ^ end) & mask)
0128         return GEN8_PDES - *idx;
0129     else
0130         return i915_pde_index(end, shift) - *idx;
0131 }
0132 
0133 static bool gen8_pd_contains(u64 start, u64 end, int lvl)
0134 {
0135     const u64 mask = ~0ull << gen8_pd_shift(lvl + 1);
0136 
0137     GEM_BUG_ON(start >= end);
0138     return (start ^ end) & mask && (start & ~mask) == 0;
0139 }
0140 
0141 static unsigned int gen8_pt_count(u64 start, u64 end)
0142 {
0143     GEM_BUG_ON(start >= end);
0144     if ((start ^ end) >> gen8_pd_shift(1))
0145         return GEN8_PDES - (start & (GEN8_PDES - 1));
0146     else
0147         return end - start;
0148 }
0149 
0150 static unsigned int gen8_pd_top_count(const struct i915_address_space *vm)
0151 {
0152     unsigned int shift = __gen8_pte_shift(vm->top);
0153 
0154     return (vm->total + (1ull << shift) - 1) >> shift;
0155 }
0156 
0157 static struct i915_page_directory *
0158 gen8_pdp_for_page_index(struct i915_address_space * const vm, const u64 idx)
0159 {
0160     struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm);
0161 
0162     if (vm->top == 2)
0163         return ppgtt->pd;
0164     else
0165         return i915_pd_entry(ppgtt->pd, gen8_pd_index(idx, vm->top));
0166 }
0167 
0168 static struct i915_page_directory *
0169 gen8_pdp_for_page_address(struct i915_address_space * const vm, const u64 addr)
0170 {
0171     return gen8_pdp_for_page_index(vm, addr >> GEN8_PTE_SHIFT);
0172 }
0173 
0174 static void __gen8_ppgtt_cleanup(struct i915_address_space *vm,
0175                  struct i915_page_directory *pd,
0176                  int count, int lvl)
0177 {
0178     if (lvl) {
0179         void **pde = pd->entry;
0180 
0181         do {
0182             if (!*pde)
0183                 continue;
0184 
0185             __gen8_ppgtt_cleanup(vm, *pde, GEN8_PDES, lvl - 1);
0186         } while (pde++, --count);
0187     }
0188 
0189     free_px(vm, &pd->pt, lvl);
0190 }
0191 
0192 static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
0193 {
0194     struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
0195 
0196     if (intel_vgpu_active(vm->i915))
0197         gen8_ppgtt_notify_vgt(ppgtt, false);
0198 
0199     __gen8_ppgtt_cleanup(vm, ppgtt->pd, gen8_pd_top_count(vm), vm->top);
0200     free_scratch(vm);
0201 }
0202 
0203 static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm,
0204                   struct i915_page_directory * const pd,
0205                   u64 start, const u64 end, int lvl)
0206 {
0207     const struct drm_i915_gem_object * const scratch = vm->scratch[lvl];
0208     unsigned int idx, len;
0209 
0210     GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT);
0211 
0212     len = gen8_pd_range(start, end, lvl--, &idx);
0213     DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n",
0214         __func__, vm, lvl + 1, start, end,
0215         idx, len, atomic_read(px_used(pd)));
0216     GEM_BUG_ON(!len || len >= atomic_read(px_used(pd)));
0217 
0218     do {
0219         struct i915_page_table *pt = pd->entry[idx];
0220 
0221         if (atomic_fetch_inc(&pt->used) >> gen8_pd_shift(1) &&
0222             gen8_pd_contains(start, end, lvl)) {
0223             DBG("%s(%p):{ lvl:%d, idx:%d, start:%llx, end:%llx } removing pd\n",
0224                 __func__, vm, lvl + 1, idx, start, end);
0225             clear_pd_entry(pd, idx, scratch);
0226             __gen8_ppgtt_cleanup(vm, as_pd(pt), I915_PDES, lvl);
0227             start += (u64)I915_PDES << gen8_pd_shift(lvl);
0228             continue;
0229         }
0230 
0231         if (lvl) {
0232             start = __gen8_ppgtt_clear(vm, as_pd(pt),
0233                            start, end, lvl);
0234         } else {
0235             unsigned int count;
0236             unsigned int pte = gen8_pd_index(start, 0);
0237             unsigned int num_ptes;
0238             u64 *vaddr;
0239 
0240             count = gen8_pt_count(start, end);
0241             DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } removing pte\n",
0242                 __func__, vm, lvl, start, end,
0243                 gen8_pd_index(start, 0), count,
0244                 atomic_read(&pt->used));
0245             GEM_BUG_ON(!count || count >= atomic_read(&pt->used));
0246 
0247             num_ptes = count;
0248             if (pt->is_compact) {
0249                 GEM_BUG_ON(num_ptes % 16);
0250                 GEM_BUG_ON(pte % 16);
0251                 num_ptes /= 16;
0252                 pte /= 16;
0253             }
0254 
0255             vaddr = px_vaddr(pt);
0256             memset64(vaddr + pte,
0257                  vm->scratch[0]->encode,
0258                  num_ptes);
0259 
0260             atomic_sub(count, &pt->used);
0261             start += count;
0262         }
0263 
0264         if (release_pd_entry(pd, idx, pt, scratch))
0265             free_px(vm, pt, lvl);
0266     } while (idx++, --len);
0267 
0268     return start;
0269 }
0270 
0271 static void gen8_ppgtt_clear(struct i915_address_space *vm,
0272                  u64 start, u64 length)
0273 {
0274     GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
0275     GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
0276     GEM_BUG_ON(range_overflows(start, length, vm->total));
0277 
0278     start >>= GEN8_PTE_SHIFT;
0279     length >>= GEN8_PTE_SHIFT;
0280     GEM_BUG_ON(length == 0);
0281 
0282     __gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd,
0283                start, start + length, vm->top);
0284 }
0285 
0286 static void __gen8_ppgtt_alloc(struct i915_address_space * const vm,
0287                    struct i915_vm_pt_stash *stash,
0288                    struct i915_page_directory * const pd,
0289                    u64 * const start, const u64 end, int lvl)
0290 {
0291     unsigned int idx, len;
0292 
0293     GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT);
0294 
0295     len = gen8_pd_range(*start, end, lvl--, &idx);
0296     DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n",
0297         __func__, vm, lvl + 1, *start, end,
0298         idx, len, atomic_read(px_used(pd)));
0299     GEM_BUG_ON(!len || (idx + len - 1) >> gen8_pd_shift(1));
0300 
0301     spin_lock(&pd->lock);
0302     GEM_BUG_ON(!atomic_read(px_used(pd))); /* Must be pinned! */
0303     do {
0304         struct i915_page_table *pt = pd->entry[idx];
0305 
0306         if (!pt) {
0307             spin_unlock(&pd->lock);
0308 
0309             DBG("%s(%p):{ lvl:%d, idx:%d } allocating new tree\n",
0310                 __func__, vm, lvl + 1, idx);
0311 
0312             pt = stash->pt[!!lvl];
0313             __i915_gem_object_pin_pages(pt->base);
0314 
0315             fill_px(pt, vm->scratch[lvl]->encode);
0316 
0317             spin_lock(&pd->lock);
0318             if (likely(!pd->entry[idx])) {
0319                 stash->pt[!!lvl] = pt->stash;
0320                 atomic_set(&pt->used, 0);
0321                 set_pd_entry(pd, idx, pt);
0322             } else {
0323                 pt = pd->entry[idx];
0324             }
0325         }
0326 
0327         if (lvl) {
0328             atomic_inc(&pt->used);
0329             spin_unlock(&pd->lock);
0330 
0331             __gen8_ppgtt_alloc(vm, stash,
0332                        as_pd(pt), start, end, lvl);
0333 
0334             spin_lock(&pd->lock);
0335             atomic_dec(&pt->used);
0336             GEM_BUG_ON(!atomic_read(&pt->used));
0337         } else {
0338             unsigned int count = gen8_pt_count(*start, end);
0339 
0340             DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } inserting pte\n",
0341                 __func__, vm, lvl, *start, end,
0342                 gen8_pd_index(*start, 0), count,
0343                 atomic_read(&pt->used));
0344 
0345             atomic_add(count, &pt->used);
0346             /* All other pdes may be simultaneously removed */
0347             GEM_BUG_ON(atomic_read(&pt->used) > NALLOC * I915_PDES);
0348             *start += count;
0349         }
0350     } while (idx++, --len);
0351     spin_unlock(&pd->lock);
0352 }
0353 
0354 static void gen8_ppgtt_alloc(struct i915_address_space *vm,
0355                  struct i915_vm_pt_stash *stash,
0356                  u64 start, u64 length)
0357 {
0358     GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
0359     GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
0360     GEM_BUG_ON(range_overflows(start, length, vm->total));
0361 
0362     start >>= GEN8_PTE_SHIFT;
0363     length >>= GEN8_PTE_SHIFT;
0364     GEM_BUG_ON(length == 0);
0365 
0366     __gen8_ppgtt_alloc(vm, stash, i915_vm_to_ppgtt(vm)->pd,
0367                &start, start + length, vm->top);
0368 }
0369 
0370 static void __gen8_ppgtt_foreach(struct i915_address_space *vm,
0371                  struct i915_page_directory *pd,
0372                  u64 *start, u64 end, int lvl,
0373                  void (*fn)(struct i915_address_space *vm,
0374                         struct i915_page_table *pt,
0375                         void *data),
0376                  void *data)
0377 {
0378     unsigned int idx, len;
0379 
0380     len = gen8_pd_range(*start, end, lvl--, &idx);
0381 
0382     spin_lock(&pd->lock);
0383     do {
0384         struct i915_page_table *pt = pd->entry[idx];
0385 
0386         atomic_inc(&pt->used);
0387         spin_unlock(&pd->lock);
0388 
0389         if (lvl) {
0390             __gen8_ppgtt_foreach(vm, as_pd(pt), start, end, lvl,
0391                          fn, data);
0392         } else {
0393             fn(vm, pt, data);
0394             *start += gen8_pt_count(*start, end);
0395         }
0396 
0397         spin_lock(&pd->lock);
0398         atomic_dec(&pt->used);
0399     } while (idx++, --len);
0400     spin_unlock(&pd->lock);
0401 }
0402 
0403 static void gen8_ppgtt_foreach(struct i915_address_space *vm,
0404                    u64 start, u64 length,
0405                    void (*fn)(struct i915_address_space *vm,
0406                       struct i915_page_table *pt,
0407                       void *data),
0408                    void *data)
0409 {
0410     start >>= GEN8_PTE_SHIFT;
0411     length >>= GEN8_PTE_SHIFT;
0412 
0413     __gen8_ppgtt_foreach(vm, i915_vm_to_ppgtt(vm)->pd,
0414                  &start, start + length, vm->top,
0415                  fn, data);
0416 }
0417 
0418 static __always_inline u64
0419 gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
0420               struct i915_page_directory *pdp,
0421               struct sgt_dma *iter,
0422               u64 idx,
0423               enum i915_cache_level cache_level,
0424               u32 flags)
0425 {
0426     struct i915_page_directory *pd;
0427     const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
0428     gen8_pte_t *vaddr;
0429 
0430     pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2));
0431     vaddr = px_vaddr(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
0432     do {
0433         GEM_BUG_ON(sg_dma_len(iter->sg) < I915_GTT_PAGE_SIZE);
0434         vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma;
0435 
0436         iter->dma += I915_GTT_PAGE_SIZE;
0437         if (iter->dma >= iter->max) {
0438             iter->sg = __sg_next(iter->sg);
0439             if (!iter->sg || sg_dma_len(iter->sg) == 0) {
0440                 idx = 0;
0441                 break;
0442             }
0443 
0444             iter->dma = sg_dma_address(iter->sg);
0445             iter->max = iter->dma + sg_dma_len(iter->sg);
0446         }
0447 
0448         if (gen8_pd_index(++idx, 0) == 0) {
0449             if (gen8_pd_index(idx, 1) == 0) {
0450                 /* Limited by sg length for 3lvl */
0451                 if (gen8_pd_index(idx, 2) == 0)
0452                     break;
0453 
0454                 pd = pdp->entry[gen8_pd_index(idx, 2)];
0455             }
0456 
0457             drm_clflush_virt_range(vaddr, PAGE_SIZE);
0458             vaddr = px_vaddr(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
0459         }
0460     } while (1);
0461     drm_clflush_virt_range(vaddr, PAGE_SIZE);
0462 
0463     return idx;
0464 }
0465 
0466 static void
0467 xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm,
0468               struct i915_vma_resource *vma_res,
0469               struct sgt_dma *iter,
0470               enum i915_cache_level cache_level,
0471               u32 flags)
0472 {
0473     const gen8_pte_t pte_encode = vm->pte_encode(0, cache_level, flags);
0474     unsigned int rem = sg_dma_len(iter->sg);
0475     u64 start = vma_res->start;
0476 
0477     GEM_BUG_ON(!i915_vm_is_4lvl(vm));
0478 
0479     do {
0480         struct i915_page_directory * const pdp =
0481             gen8_pdp_for_page_address(vm, start);
0482         struct i915_page_directory * const pd =
0483             i915_pd_entry(pdp, __gen8_pte_index(start, 2));
0484         struct i915_page_table *pt =
0485             i915_pt_entry(pd, __gen8_pte_index(start, 1));
0486         gen8_pte_t encode = pte_encode;
0487         unsigned int page_size;
0488         gen8_pte_t *vaddr;
0489         u16 index, max;
0490 
0491         max = I915_PDES;
0492 
0493         if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_2M &&
0494             IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) &&
0495             rem >= I915_GTT_PAGE_SIZE_2M &&
0496             !__gen8_pte_index(start, 0)) {
0497             index = __gen8_pte_index(start, 1);
0498             encode |= GEN8_PDE_PS_2M;
0499             page_size = I915_GTT_PAGE_SIZE_2M;
0500 
0501             vaddr = px_vaddr(pd);
0502         } else {
0503             if (encode & GEN12_PPGTT_PTE_LM) {
0504                 GEM_BUG_ON(__gen8_pte_index(start, 0) % 16);
0505                 GEM_BUG_ON(rem < I915_GTT_PAGE_SIZE_64K);
0506                 GEM_BUG_ON(!IS_ALIGNED(iter->dma,
0507                                I915_GTT_PAGE_SIZE_64K));
0508 
0509                 index = __gen8_pte_index(start, 0) / 16;
0510                 page_size = I915_GTT_PAGE_SIZE_64K;
0511 
0512                 max /= 16;
0513 
0514                 vaddr = px_vaddr(pd);
0515                 vaddr[__gen8_pte_index(start, 1)] |= GEN12_PDE_64K;
0516 
0517                 pt->is_compact = true;
0518             } else {
0519                 GEM_BUG_ON(pt->is_compact);
0520                 index =  __gen8_pte_index(start, 0);
0521                 page_size = I915_GTT_PAGE_SIZE;
0522             }
0523 
0524             vaddr = px_vaddr(pt);
0525         }
0526 
0527         do {
0528             GEM_BUG_ON(rem < page_size);
0529             vaddr[index++] = encode | iter->dma;
0530 
0531             start += page_size;
0532             iter->dma += page_size;
0533             rem -= page_size;
0534             if (iter->dma >= iter->max) {
0535                 iter->sg = __sg_next(iter->sg);
0536                 if (!iter->sg)
0537                     break;
0538 
0539                 rem = sg_dma_len(iter->sg);
0540                 if (!rem)
0541                     break;
0542 
0543                 iter->dma = sg_dma_address(iter->sg);
0544                 iter->max = iter->dma + rem;
0545 
0546                 if (unlikely(!IS_ALIGNED(iter->dma, page_size)))
0547                     break;
0548             }
0549         } while (rem >= page_size && index < max);
0550 
0551         vma_res->page_sizes_gtt |= page_size;
0552     } while (iter->sg && sg_dma_len(iter->sg));
0553 }
0554 
0555 static void gen8_ppgtt_insert_huge(struct i915_address_space *vm,
0556                    struct i915_vma_resource *vma_res,
0557                    struct sgt_dma *iter,
0558                    enum i915_cache_level cache_level,
0559                    u32 flags)
0560 {
0561     const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
0562     unsigned int rem = sg_dma_len(iter->sg);
0563     u64 start = vma_res->start;
0564 
0565     GEM_BUG_ON(!i915_vm_is_4lvl(vm));
0566 
0567     do {
0568         struct i915_page_directory * const pdp =
0569             gen8_pdp_for_page_address(vm, start);
0570         struct i915_page_directory * const pd =
0571             i915_pd_entry(pdp, __gen8_pte_index(start, 2));
0572         gen8_pte_t encode = pte_encode;
0573         unsigned int maybe_64K = -1;
0574         unsigned int page_size;
0575         gen8_pte_t *vaddr;
0576         u16 index;
0577 
0578         if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_2M &&
0579             IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) &&
0580             rem >= I915_GTT_PAGE_SIZE_2M &&
0581             !__gen8_pte_index(start, 0)) {
0582             index = __gen8_pte_index(start, 1);
0583             encode |= GEN8_PDE_PS_2M;
0584             page_size = I915_GTT_PAGE_SIZE_2M;
0585 
0586             vaddr = px_vaddr(pd);
0587         } else {
0588             struct i915_page_table *pt =
0589                 i915_pt_entry(pd, __gen8_pte_index(start, 1));
0590 
0591             index = __gen8_pte_index(start, 0);
0592             page_size = I915_GTT_PAGE_SIZE;
0593 
0594             if (!index &&
0595                 vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_64K &&
0596                 IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
0597                 (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
0598                  rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE))
0599                 maybe_64K = __gen8_pte_index(start, 1);
0600 
0601             vaddr = px_vaddr(pt);
0602         }
0603 
0604         do {
0605             GEM_BUG_ON(sg_dma_len(iter->sg) < page_size);
0606             vaddr[index++] = encode | iter->dma;
0607 
0608             start += page_size;
0609             iter->dma += page_size;
0610             rem -= page_size;
0611             if (iter->dma >= iter->max) {
0612                 iter->sg = __sg_next(iter->sg);
0613                 if (!iter->sg)
0614                     break;
0615 
0616                 rem = sg_dma_len(iter->sg);
0617                 if (!rem)
0618                     break;
0619 
0620                 iter->dma = sg_dma_address(iter->sg);
0621                 iter->max = iter->dma + rem;
0622 
0623                 if (maybe_64K != -1 && index < I915_PDES &&
0624                     !(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
0625                       (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
0626                        rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE)))
0627                     maybe_64K = -1;
0628 
0629                 if (unlikely(!IS_ALIGNED(iter->dma, page_size)))
0630                     break;
0631             }
0632         } while (rem >= page_size && index < I915_PDES);
0633 
0634         drm_clflush_virt_range(vaddr, PAGE_SIZE);
0635 
0636         /*
0637          * Is it safe to mark the 2M block as 64K? -- Either we have
0638          * filled whole page-table with 64K entries, or filled part of
0639          * it and have reached the end of the sg table and we have
0640          * enough padding.
0641          */
0642         if (maybe_64K != -1 &&
0643             (index == I915_PDES ||
0644              (i915_vm_has_scratch_64K(vm) &&
0645               !iter->sg && IS_ALIGNED(vma_res->start +
0646                           vma_res->node_size,
0647                           I915_GTT_PAGE_SIZE_2M)))) {
0648             vaddr = px_vaddr(pd);
0649             vaddr[maybe_64K] |= GEN8_PDE_IPS_64K;
0650             drm_clflush_virt_range(vaddr, PAGE_SIZE);
0651             page_size = I915_GTT_PAGE_SIZE_64K;
0652 
0653             /*
0654              * We write all 4K page entries, even when using 64K
0655              * pages. In order to verify that the HW isn't cheating
0656              * by using the 4K PTE instead of the 64K PTE, we want
0657              * to remove all the surplus entries. If the HW skipped
0658              * the 64K PTE, it will read/write into the scratch page
0659              * instead - which we detect as missing results during
0660              * selftests.
0661              */
0662             if (I915_SELFTEST_ONLY(vm->scrub_64K)) {
0663                 u16 i;
0664 
0665                 encode = vm->scratch[0]->encode;
0666                 vaddr = px_vaddr(i915_pt_entry(pd, maybe_64K));
0667 
0668                 for (i = 1; i < index; i += 16)
0669                     memset64(vaddr + i, encode, 15);
0670 
0671                 drm_clflush_virt_range(vaddr, PAGE_SIZE);
0672             }
0673         }
0674 
0675         vma_res->page_sizes_gtt |= page_size;
0676     } while (iter->sg && sg_dma_len(iter->sg));
0677 }
0678 
0679 static void gen8_ppgtt_insert(struct i915_address_space *vm,
0680                   struct i915_vma_resource *vma_res,
0681                   enum i915_cache_level cache_level,
0682                   u32 flags)
0683 {
0684     struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm);
0685     struct sgt_dma iter = sgt_dma(vma_res);
0686 
0687     if (vma_res->bi.page_sizes.sg > I915_GTT_PAGE_SIZE) {
0688         if (HAS_64K_PAGES(vm->i915))
0689             xehpsdv_ppgtt_insert_huge(vm, vma_res, &iter, cache_level, flags);
0690         else
0691             gen8_ppgtt_insert_huge(vm, vma_res, &iter, cache_level, flags);
0692     } else  {
0693         u64 idx = vma_res->start >> GEN8_PTE_SHIFT;
0694 
0695         do {
0696             struct i915_page_directory * const pdp =
0697                 gen8_pdp_for_page_index(vm, idx);
0698 
0699             idx = gen8_ppgtt_insert_pte(ppgtt, pdp, &iter, idx,
0700                             cache_level, flags);
0701         } while (idx);
0702 
0703         vma_res->page_sizes_gtt = I915_GTT_PAGE_SIZE;
0704     }
0705 }
0706 
0707 static void gen8_ppgtt_insert_entry(struct i915_address_space *vm,
0708                     dma_addr_t addr,
0709                     u64 offset,
0710                     enum i915_cache_level level,
0711                     u32 flags)
0712 {
0713     u64 idx = offset >> GEN8_PTE_SHIFT;
0714     struct i915_page_directory * const pdp =
0715         gen8_pdp_for_page_index(vm, idx);
0716     struct i915_page_directory *pd =
0717         i915_pd_entry(pdp, gen8_pd_index(idx, 2));
0718     struct i915_page_table *pt = i915_pt_entry(pd, gen8_pd_index(idx, 1));
0719     gen8_pte_t *vaddr;
0720 
0721     GEM_BUG_ON(pt->is_compact);
0722 
0723     vaddr = px_vaddr(pt);
0724     vaddr[gen8_pd_index(idx, 0)] = gen8_pte_encode(addr, level, flags);
0725     drm_clflush_virt_range(&vaddr[gen8_pd_index(idx, 0)], sizeof(*vaddr));
0726 }
0727 
0728 static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm,
0729                         dma_addr_t addr,
0730                         u64 offset,
0731                         enum i915_cache_level level,
0732                         u32 flags)
0733 {
0734     u64 idx = offset >> GEN8_PTE_SHIFT;
0735     struct i915_page_directory * const pdp =
0736         gen8_pdp_for_page_index(vm, idx);
0737     struct i915_page_directory *pd =
0738         i915_pd_entry(pdp, gen8_pd_index(idx, 2));
0739     struct i915_page_table *pt = i915_pt_entry(pd, gen8_pd_index(idx, 1));
0740     gen8_pte_t *vaddr;
0741 
0742     GEM_BUG_ON(!IS_ALIGNED(addr, SZ_64K));
0743     GEM_BUG_ON(!IS_ALIGNED(offset, SZ_64K));
0744 
0745     if (!pt->is_compact) {
0746         vaddr = px_vaddr(pd);
0747         vaddr[gen8_pd_index(idx, 1)] |= GEN12_PDE_64K;
0748         pt->is_compact = true;
0749     }
0750 
0751     vaddr = px_vaddr(pt);
0752     vaddr[gen8_pd_index(idx, 0) / 16] = gen8_pte_encode(addr, level, flags);
0753 }
0754 
0755 static void xehpsdv_ppgtt_insert_entry(struct i915_address_space *vm,
0756                        dma_addr_t addr,
0757                        u64 offset,
0758                        enum i915_cache_level level,
0759                        u32 flags)
0760 {
0761     if (flags & PTE_LM)
0762         return __xehpsdv_ppgtt_insert_entry_lm(vm, addr, offset,
0763                                level, flags);
0764 
0765     return gen8_ppgtt_insert_entry(vm, addr, offset, level, flags);
0766 }
0767 
0768 static int gen8_init_scratch(struct i915_address_space *vm)
0769 {
0770     u32 pte_flags;
0771     int ret;
0772     int i;
0773 
0774     /*
0775      * If everybody agrees to not to write into the scratch page,
0776      * we can reuse it for all vm, keeping contexts and processes separate.
0777      */
0778     if (vm->has_read_only && vm->gt->vm && !i915_is_ggtt(vm->gt->vm)) {
0779         struct i915_address_space *clone = vm->gt->vm;
0780 
0781         GEM_BUG_ON(!clone->has_read_only);
0782 
0783         vm->scratch_order = clone->scratch_order;
0784         for (i = 0; i <= vm->top; i++)
0785             vm->scratch[i] = i915_gem_object_get(clone->scratch[i]);
0786 
0787         return 0;
0788     }
0789 
0790     ret = setup_scratch_page(vm);
0791     if (ret)
0792         return ret;
0793 
0794     pte_flags = vm->has_read_only;
0795     if (i915_gem_object_is_lmem(vm->scratch[0]))
0796         pte_flags |= PTE_LM;
0797 
0798     vm->scratch[0]->encode =
0799         gen8_pte_encode(px_dma(vm->scratch[0]),
0800                 I915_CACHE_NONE, pte_flags);
0801 
0802     for (i = 1; i <= vm->top; i++) {
0803         struct drm_i915_gem_object *obj;
0804 
0805         obj = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K);
0806         if (IS_ERR(obj))
0807             goto free_scratch;
0808 
0809         ret = map_pt_dma(vm, obj);
0810         if (ret) {
0811             i915_gem_object_put(obj);
0812             goto free_scratch;
0813         }
0814 
0815         fill_px(obj, vm->scratch[i - 1]->encode);
0816         obj->encode = gen8_pde_encode(px_dma(obj), I915_CACHE_NONE);
0817 
0818         vm->scratch[i] = obj;
0819     }
0820 
0821     return 0;
0822 
0823 free_scratch:
0824     while (i--)
0825         i915_gem_object_put(vm->scratch[i]);
0826     return -ENOMEM;
0827 }
0828 
0829 static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
0830 {
0831     struct i915_address_space *vm = &ppgtt->vm;
0832     struct i915_page_directory *pd = ppgtt->pd;
0833     unsigned int idx;
0834 
0835     GEM_BUG_ON(vm->top != 2);
0836     GEM_BUG_ON(gen8_pd_top_count(vm) != GEN8_3LVL_PDPES);
0837 
0838     for (idx = 0; idx < GEN8_3LVL_PDPES; idx++) {
0839         struct i915_page_directory *pde;
0840         int err;
0841 
0842         pde = alloc_pd(vm);
0843         if (IS_ERR(pde))
0844             return PTR_ERR(pde);
0845 
0846         err = map_pt_dma(vm, pde->pt.base);
0847         if (err) {
0848             free_pd(vm, pde);
0849             return err;
0850         }
0851 
0852         fill_px(pde, vm->scratch[1]->encode);
0853         set_pd_entry(pd, idx, pde);
0854         atomic_inc(px_used(pde)); /* keep pinned */
0855     }
0856     wmb();
0857 
0858     return 0;
0859 }
0860 
0861 static struct i915_page_directory *
0862 gen8_alloc_top_pd(struct i915_address_space *vm)
0863 {
0864     const unsigned int count = gen8_pd_top_count(vm);
0865     struct i915_page_directory *pd;
0866     int err;
0867 
0868     GEM_BUG_ON(count > I915_PDES);
0869 
0870     pd = __alloc_pd(count);
0871     if (unlikely(!pd))
0872         return ERR_PTR(-ENOMEM);
0873 
0874     pd->pt.base = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K);
0875     if (IS_ERR(pd->pt.base)) {
0876         err = PTR_ERR(pd->pt.base);
0877         pd->pt.base = NULL;
0878         goto err_pd;
0879     }
0880 
0881     err = map_pt_dma(vm, pd->pt.base);
0882     if (err)
0883         goto err_pd;
0884 
0885     fill_page_dma(px_base(pd), vm->scratch[vm->top]->encode, count);
0886     atomic_inc(px_used(pd)); /* mark as pinned */
0887     return pd;
0888 
0889 err_pd:
0890     free_pd(vm, pd);
0891     return ERR_PTR(err);
0892 }
0893 
0894 /*
0895  * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
0896  * with a net effect resembling a 2-level page table in normal x86 terms. Each
0897  * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
0898  * space.
0899  *
0900  */
0901 struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
0902                      unsigned long lmem_pt_obj_flags)
0903 {
0904     struct i915_ppgtt *ppgtt;
0905     int err;
0906 
0907     ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
0908     if (!ppgtt)
0909         return ERR_PTR(-ENOMEM);
0910 
0911     ppgtt_init(ppgtt, gt, lmem_pt_obj_flags);
0912     ppgtt->vm.top = i915_vm_is_4lvl(&ppgtt->vm) ? 3 : 2;
0913     ppgtt->vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen8_pte_t));
0914 
0915     /*
0916      * From bdw, there is hw support for read-only pages in the PPGTT.
0917      *
0918      * Gen11 has HSDES#:1807136187 unresolved. Disable ro support
0919      * for now.
0920      *
0921      * Gen12 has inherited the same read-only fault issue from gen11.
0922      */
0923     ppgtt->vm.has_read_only = !IS_GRAPHICS_VER(gt->i915, 11, 12);
0924 
0925     if (HAS_LMEM(gt->i915)) {
0926         ppgtt->vm.alloc_pt_dma = alloc_pt_lmem;
0927 
0928         /*
0929          * On some platforms the hw has dropped support for 4K GTT pages
0930          * when dealing with LMEM, and due to the design of 64K GTT
0931          * pages in the hw, we can only mark the *entire* page-table as
0932          * operating in 64K GTT mode, since the enable bit is still on
0933          * the pde, and not the pte. And since we still need to allow
0934          * 4K GTT pages for SMEM objects, we can't have a "normal" 4K
0935          * page-table with scratch pointing to LMEM, since that's
0936          * undefined from the hw pov. The simplest solution is to just
0937          * move the 64K scratch page to SMEM on such platforms and call
0938          * it a day, since that should work for all configurations.
0939          */
0940         if (HAS_64K_PAGES(gt->i915))
0941             ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
0942         else
0943             ppgtt->vm.alloc_scratch_dma = alloc_pt_lmem;
0944     } else {
0945         ppgtt->vm.alloc_pt_dma = alloc_pt_dma;
0946         ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
0947     }
0948 
0949     err = gen8_init_scratch(&ppgtt->vm);
0950     if (err)
0951         goto err_free;
0952 
0953     ppgtt->pd = gen8_alloc_top_pd(&ppgtt->vm);
0954     if (IS_ERR(ppgtt->pd)) {
0955         err = PTR_ERR(ppgtt->pd);
0956         goto err_free_scratch;
0957     }
0958 
0959     if (!i915_vm_is_4lvl(&ppgtt->vm)) {
0960         err = gen8_preallocate_top_level_pdp(ppgtt);
0961         if (err)
0962             goto err_free_pd;
0963     }
0964 
0965     ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND;
0966     ppgtt->vm.insert_entries = gen8_ppgtt_insert;
0967     if (HAS_64K_PAGES(gt->i915))
0968         ppgtt->vm.insert_page = xehpsdv_ppgtt_insert_entry;
0969     else
0970         ppgtt->vm.insert_page = gen8_ppgtt_insert_entry;
0971     ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc;
0972     ppgtt->vm.clear_range = gen8_ppgtt_clear;
0973     ppgtt->vm.foreach = gen8_ppgtt_foreach;
0974 
0975     ppgtt->vm.pte_encode = gen8_pte_encode;
0976 
0977     if (intel_vgpu_active(gt->i915))
0978         gen8_ppgtt_notify_vgt(ppgtt, true);
0979 
0980     ppgtt->vm.cleanup = gen8_ppgtt_cleanup;
0981 
0982     return ppgtt;
0983 
0984 err_free_pd:
0985     __gen8_ppgtt_cleanup(&ppgtt->vm, ppgtt->pd,
0986                  gen8_pd_top_count(&ppgtt->vm), ppgtt->vm.top);
0987 err_free_scratch:
0988     free_scratch(&ppgtt->vm);
0989 err_free:
0990     kfree(ppgtt);
0991     return ERR_PTR(err);
0992 }