Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: MIT
0002 /*
0003  * Copyright © 2020 Intel Corporation
0004  */
0005 
0006 #include <linux/log2.h>
0007 
0008 #include "gem/i915_gem_internal.h"
0009 
0010 #include "gen6_ppgtt.h"
0011 #include "i915_scatterlist.h"
0012 #include "i915_trace.h"
0013 #include "i915_vgpu.h"
0014 #include "intel_gt_regs.h"
0015 #include "intel_engine_regs.h"
0016 #include "intel_gt.h"
0017 
0018 /* Write pde (index) from the page directory @pd to the page table @pt */
0019 static void gen6_write_pde(const struct gen6_ppgtt *ppgtt,
0020                const unsigned int pde,
0021                const struct i915_page_table *pt)
0022 {
0023     dma_addr_t addr = pt ? px_dma(pt) : px_dma(ppgtt->base.vm.scratch[1]);
0024 
0025     /* Caller needs to make sure the write completes if necessary */
0026     iowrite32(GEN6_PDE_ADDR_ENCODE(addr) | GEN6_PDE_VALID,
0027           ppgtt->pd_addr + pde);
0028 }
0029 
0030 void gen7_ppgtt_enable(struct intel_gt *gt)
0031 {
0032     struct drm_i915_private *i915 = gt->i915;
0033     struct intel_uncore *uncore = gt->uncore;
0034     u32 ecochk;
0035 
0036     intel_uncore_rmw(uncore, GAC_ECO_BITS, 0, ECOBITS_PPGTT_CACHE64B);
0037 
0038     ecochk = intel_uncore_read(uncore, GAM_ECOCHK);
0039     if (IS_HASWELL(i915)) {
0040         ecochk |= ECOCHK_PPGTT_WB_HSW;
0041     } else {
0042         ecochk |= ECOCHK_PPGTT_LLC_IVB;
0043         ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
0044     }
0045     intel_uncore_write(uncore, GAM_ECOCHK, ecochk);
0046 }
0047 
0048 void gen6_ppgtt_enable(struct intel_gt *gt)
0049 {
0050     struct intel_uncore *uncore = gt->uncore;
0051 
0052     intel_uncore_rmw(uncore,
0053              GAC_ECO_BITS,
0054              0,
0055              ECOBITS_SNB_BIT | ECOBITS_PPGTT_CACHE64B);
0056 
0057     intel_uncore_rmw(uncore,
0058              GAB_CTL,
0059              0,
0060              GAB_CTL_CONT_AFTER_PAGEFAULT);
0061 
0062     intel_uncore_rmw(uncore,
0063              GAM_ECOCHK,
0064              0,
0065              ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
0066 
0067     if (HAS_PPGTT(uncore->i915)) /* may be disabled for VT-d */
0068         intel_uncore_write(uncore,
0069                    GFX_MODE,
0070                    _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
0071 }
0072 
0073 /* PPGTT support for Sandybdrige/Gen6 and later */
0074 static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
0075                    u64 start, u64 length)
0076 {
0077     struct gen6_ppgtt * const ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
0078     const unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
0079     const gen6_pte_t scratch_pte = vm->scratch[0]->encode;
0080     unsigned int pde = first_entry / GEN6_PTES;
0081     unsigned int pte = first_entry % GEN6_PTES;
0082     unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
0083 
0084     while (num_entries) {
0085         struct i915_page_table * const pt =
0086             i915_pt_entry(ppgtt->base.pd, pde++);
0087         const unsigned int count = min(num_entries, GEN6_PTES - pte);
0088         gen6_pte_t *vaddr;
0089 
0090         num_entries -= count;
0091 
0092         GEM_BUG_ON(count > atomic_read(&pt->used));
0093         if (!atomic_sub_return(count, &pt->used))
0094             ppgtt->scan_for_unused_pt = true;
0095 
0096         /*
0097          * Note that the hw doesn't support removing PDE on the fly
0098          * (they are cached inside the context with no means to
0099          * invalidate the cache), so we can only reset the PTE
0100          * entries back to scratch.
0101          */
0102 
0103         vaddr = px_vaddr(pt);
0104         memset32(vaddr + pte, scratch_pte, count);
0105 
0106         pte = 0;
0107     }
0108 }
0109 
0110 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
0111                       struct i915_vma_resource *vma_res,
0112                       enum i915_cache_level cache_level,
0113                       u32 flags)
0114 {
0115     struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
0116     struct i915_page_directory * const pd = ppgtt->pd;
0117     unsigned int first_entry = vma_res->start / I915_GTT_PAGE_SIZE;
0118     unsigned int act_pt = first_entry / GEN6_PTES;
0119     unsigned int act_pte = first_entry % GEN6_PTES;
0120     const u32 pte_encode = vm->pte_encode(0, cache_level, flags);
0121     struct sgt_dma iter = sgt_dma(vma_res);
0122     gen6_pte_t *vaddr;
0123 
0124     GEM_BUG_ON(!pd->entry[act_pt]);
0125 
0126     vaddr = px_vaddr(i915_pt_entry(pd, act_pt));
0127     do {
0128         GEM_BUG_ON(sg_dma_len(iter.sg) < I915_GTT_PAGE_SIZE);
0129         vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma);
0130 
0131         iter.dma += I915_GTT_PAGE_SIZE;
0132         if (iter.dma == iter.max) {
0133             iter.sg = __sg_next(iter.sg);
0134             if (!iter.sg || sg_dma_len(iter.sg) == 0)
0135                 break;
0136 
0137             iter.dma = sg_dma_address(iter.sg);
0138             iter.max = iter.dma + sg_dma_len(iter.sg);
0139         }
0140 
0141         if (++act_pte == GEN6_PTES) {
0142             vaddr = px_vaddr(i915_pt_entry(pd, ++act_pt));
0143             act_pte = 0;
0144         }
0145     } while (1);
0146 
0147     vma_res->page_sizes_gtt = I915_GTT_PAGE_SIZE;
0148 }
0149 
0150 static void gen6_flush_pd(struct gen6_ppgtt *ppgtt, u64 start, u64 end)
0151 {
0152     struct i915_page_directory * const pd = ppgtt->base.pd;
0153     struct i915_page_table *pt;
0154     unsigned int pde;
0155 
0156     start = round_down(start, SZ_64K);
0157     end = round_up(end, SZ_64K) - start;
0158 
0159     mutex_lock(&ppgtt->flush);
0160 
0161     gen6_for_each_pde(pt, pd, start, end, pde)
0162         gen6_write_pde(ppgtt, pde, pt);
0163 
0164     mb();
0165     ioread32(ppgtt->pd_addr + pde - 1);
0166     gen6_ggtt_invalidate(ppgtt->base.vm.gt->ggtt);
0167     mb();
0168 
0169     mutex_unlock(&ppgtt->flush);
0170 }
0171 
0172 static void gen6_alloc_va_range(struct i915_address_space *vm,
0173                 struct i915_vm_pt_stash *stash,
0174                 u64 start, u64 length)
0175 {
0176     struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
0177     struct i915_page_directory * const pd = ppgtt->base.pd;
0178     struct i915_page_table *pt;
0179     bool flush = false;
0180     u64 from = start;
0181     unsigned int pde;
0182 
0183     spin_lock(&pd->lock);
0184     gen6_for_each_pde(pt, pd, start, length, pde) {
0185         const unsigned int count = gen6_pte_count(start, length);
0186 
0187         if (!pt) {
0188             spin_unlock(&pd->lock);
0189 
0190             pt = stash->pt[0];
0191             __i915_gem_object_pin_pages(pt->base);
0192 
0193             fill32_px(pt, vm->scratch[0]->encode);
0194 
0195             spin_lock(&pd->lock);
0196             if (!pd->entry[pde]) {
0197                 stash->pt[0] = pt->stash;
0198                 atomic_set(&pt->used, 0);
0199                 pd->entry[pde] = pt;
0200             } else {
0201                 pt = pd->entry[pde];
0202             }
0203 
0204             flush = true;
0205         }
0206 
0207         atomic_add(count, &pt->used);
0208     }
0209     spin_unlock(&pd->lock);
0210 
0211     if (flush && i915_vma_is_bound(ppgtt->vma, I915_VMA_GLOBAL_BIND)) {
0212         intel_wakeref_t wakeref;
0213 
0214         with_intel_runtime_pm(&vm->i915->runtime_pm, wakeref)
0215             gen6_flush_pd(ppgtt, from, start);
0216     }
0217 }
0218 
0219 static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt)
0220 {
0221     struct i915_address_space * const vm = &ppgtt->base.vm;
0222     int ret;
0223 
0224     ret = setup_scratch_page(vm);
0225     if (ret)
0226         return ret;
0227 
0228     vm->scratch[0]->encode =
0229         vm->pte_encode(px_dma(vm->scratch[0]),
0230                    I915_CACHE_NONE, PTE_READ_ONLY);
0231 
0232     vm->scratch[1] = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K);
0233     if (IS_ERR(vm->scratch[1])) {
0234         ret = PTR_ERR(vm->scratch[1]);
0235         goto err_scratch0;
0236     }
0237 
0238     ret = map_pt_dma(vm, vm->scratch[1]);
0239     if (ret)
0240         goto err_scratch1;
0241 
0242     fill32_px(vm->scratch[1], vm->scratch[0]->encode);
0243 
0244     return 0;
0245 
0246 err_scratch1:
0247     i915_gem_object_put(vm->scratch[1]);
0248 err_scratch0:
0249     i915_gem_object_put(vm->scratch[0]);
0250     return ret;
0251 }
0252 
0253 static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt)
0254 {
0255     struct i915_page_directory * const pd = ppgtt->base.pd;
0256     struct i915_page_table *pt;
0257     u32 pde;
0258 
0259     gen6_for_all_pdes(pt, pd, pde)
0260         if (pt)
0261             free_pt(&ppgtt->base.vm, pt);
0262 }
0263 
0264 static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
0265 {
0266     struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
0267 
0268     gen6_ppgtt_free_pd(ppgtt);
0269     free_scratch(vm);
0270 
0271     mutex_destroy(&ppgtt->flush);
0272 
0273     free_pd(&ppgtt->base.vm, ppgtt->base.pd);
0274 }
0275 
0276 static void pd_vma_bind(struct i915_address_space *vm,
0277             struct i915_vm_pt_stash *stash,
0278             struct i915_vma_resource *vma_res,
0279             enum i915_cache_level cache_level,
0280             u32 unused)
0281 {
0282     struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
0283     struct gen6_ppgtt *ppgtt = vma_res->private;
0284     u32 ggtt_offset = vma_res->start / I915_GTT_PAGE_SIZE;
0285 
0286     ppgtt->pp_dir = ggtt_offset * sizeof(gen6_pte_t) << 10;
0287     ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + ggtt_offset;
0288 
0289     gen6_flush_pd(ppgtt, 0, ppgtt->base.vm.total);
0290 }
0291 
0292 static void pd_vma_unbind(struct i915_address_space *vm,
0293               struct i915_vma_resource *vma_res)
0294 {
0295     struct gen6_ppgtt *ppgtt = vma_res->private;
0296     struct i915_page_directory * const pd = ppgtt->base.pd;
0297     struct i915_page_table *pt;
0298     unsigned int pde;
0299 
0300     if (!ppgtt->scan_for_unused_pt)
0301         return;
0302 
0303     /* Free all no longer used page tables */
0304     gen6_for_all_pdes(pt, ppgtt->base.pd, pde) {
0305         if (!pt || atomic_read(&pt->used))
0306             continue;
0307 
0308         free_pt(&ppgtt->base.vm, pt);
0309         pd->entry[pde] = NULL;
0310     }
0311 
0312     ppgtt->scan_for_unused_pt = false;
0313 }
0314 
0315 static const struct i915_vma_ops pd_vma_ops = {
0316     .bind_vma = pd_vma_bind,
0317     .unbind_vma = pd_vma_unbind,
0318 };
0319 
0320 int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww)
0321 {
0322     struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
0323     int err;
0324 
0325     GEM_BUG_ON(!kref_read(&ppgtt->base.vm.ref));
0326 
0327     /*
0328      * Workaround the limited maximum vma->pin_count and the aliasing_ppgtt
0329      * which will be pinned into every active context.
0330      * (When vma->pin_count becomes atomic, I expect we will naturally
0331      * need a larger, unpacked, type and kill this redundancy.)
0332      */
0333     if (atomic_add_unless(&ppgtt->pin_count, 1, 0))
0334         return 0;
0335 
0336     /* grab the ppgtt resv to pin the object */
0337     err = i915_vm_lock_objects(&ppgtt->base.vm, ww);
0338     if (err)
0339         return err;
0340 
0341     /*
0342      * PPGTT PDEs reside in the GGTT and consists of 512 entries. The
0343      * allocator works in address space sizes, so it's multiplied by page
0344      * size. We allocate at the top of the GTT to avoid fragmentation.
0345      */
0346     if (!atomic_read(&ppgtt->pin_count)) {
0347         err = i915_ggtt_pin(ppgtt->vma, ww, GEN6_PD_ALIGN, PIN_HIGH);
0348 
0349         GEM_BUG_ON(ppgtt->vma->fence);
0350         clear_bit(I915_VMA_CAN_FENCE_BIT, __i915_vma_flags(ppgtt->vma));
0351     }
0352     if (!err)
0353         atomic_inc(&ppgtt->pin_count);
0354 
0355     return err;
0356 }
0357 
0358 static int pd_dummy_obj_get_pages(struct drm_i915_gem_object *obj)
0359 {
0360     obj->mm.pages = ZERO_SIZE_PTR;
0361     return 0;
0362 }
0363 
0364 static void pd_dummy_obj_put_pages(struct drm_i915_gem_object *obj,
0365                    struct sg_table *pages)
0366 {
0367 }
0368 
0369 static const struct drm_i915_gem_object_ops pd_dummy_obj_ops = {
0370     .name = "pd_dummy_obj",
0371     .get_pages = pd_dummy_obj_get_pages,
0372     .put_pages = pd_dummy_obj_put_pages,
0373 };
0374 
0375 static struct i915_page_directory *
0376 gen6_alloc_top_pd(struct gen6_ppgtt *ppgtt)
0377 {
0378     struct i915_ggtt * const ggtt = ppgtt->base.vm.gt->ggtt;
0379     struct i915_page_directory *pd;
0380     int err;
0381 
0382     pd = __alloc_pd(I915_PDES);
0383     if (unlikely(!pd))
0384         return ERR_PTR(-ENOMEM);
0385 
0386     pd->pt.base = __i915_gem_object_create_internal(ppgtt->base.vm.gt->i915,
0387                             &pd_dummy_obj_ops,
0388                             I915_PDES * SZ_4K);
0389     if (IS_ERR(pd->pt.base)) {
0390         err = PTR_ERR(pd->pt.base);
0391         pd->pt.base = NULL;
0392         goto err_pd;
0393     }
0394 
0395     pd->pt.base->base.resv = i915_vm_resv_get(&ppgtt->base.vm);
0396     pd->pt.base->shares_resv_from = &ppgtt->base.vm;
0397 
0398     ppgtt->vma = i915_vma_instance(pd->pt.base, &ggtt->vm, NULL);
0399     if (IS_ERR(ppgtt->vma)) {
0400         err = PTR_ERR(ppgtt->vma);
0401         ppgtt->vma = NULL;
0402         goto err_pd;
0403     }
0404 
0405     /* The dummy object we create is special, override ops.. */
0406     ppgtt->vma->ops = &pd_vma_ops;
0407     ppgtt->vma->private = ppgtt;
0408     return pd;
0409 
0410 err_pd:
0411     free_pd(&ppgtt->base.vm, pd);
0412     return ERR_PTR(err);
0413 }
0414 
0415 void gen6_ppgtt_unpin(struct i915_ppgtt *base)
0416 {
0417     struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
0418 
0419     GEM_BUG_ON(!atomic_read(&ppgtt->pin_count));
0420     if (atomic_dec_and_test(&ppgtt->pin_count))
0421         i915_vma_unpin(ppgtt->vma);
0422 }
0423 
0424 struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
0425 {
0426     struct i915_ggtt * const ggtt = gt->ggtt;
0427     struct gen6_ppgtt *ppgtt;
0428     int err;
0429 
0430     ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
0431     if (!ppgtt)
0432         return ERR_PTR(-ENOMEM);
0433 
0434     mutex_init(&ppgtt->flush);
0435 
0436     ppgtt_init(&ppgtt->base, gt, 0);
0437     ppgtt->base.vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen6_pte_t));
0438     ppgtt->base.vm.top = 1;
0439 
0440     ppgtt->base.vm.bind_async_flags = I915_VMA_LOCAL_BIND;
0441     ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range;
0442     ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range;
0443     ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries;
0444     ppgtt->base.vm.cleanup = gen6_ppgtt_cleanup;
0445 
0446     ppgtt->base.vm.alloc_pt_dma = alloc_pt_dma;
0447     ppgtt->base.vm.alloc_scratch_dma = alloc_pt_dma;
0448     ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode;
0449 
0450     err = gen6_ppgtt_init_scratch(ppgtt);
0451     if (err)
0452         goto err_free;
0453 
0454     ppgtt->base.pd = gen6_alloc_top_pd(ppgtt);
0455     if (IS_ERR(ppgtt->base.pd)) {
0456         err = PTR_ERR(ppgtt->base.pd);
0457         goto err_scratch;
0458     }
0459 
0460     return &ppgtt->base;
0461 
0462 err_scratch:
0463     free_scratch(&ppgtt->base.vm);
0464 err_free:
0465     kfree(ppgtt);
0466     return ERR_PTR(err);
0467 }