0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023 #include "amdgpu_vm.h"
0024 #include "amdgpu_job.h"
0025 #include "amdgpu_object.h"
0026 #include "amdgpu_trace.h"
0027
0028 #define AMDGPU_VM_SDMA_MIN_NUM_DW 256u
0029 #define AMDGPU_VM_SDMA_MAX_NUM_DW (16u * 1024u)
0030
0031
0032
0033
0034
0035
0036 static int amdgpu_vm_sdma_map_table(struct amdgpu_bo_vm *table)
0037 {
0038 int r;
0039
0040 r = amdgpu_ttm_alloc_gart(&table->bo.tbo);
0041 if (r)
0042 return r;
0043
0044 if (table->shadow)
0045 r = amdgpu_ttm_alloc_gart(&table->shadow->tbo);
0046
0047 return r;
0048 }
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060 static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p,
0061 struct dma_resv *resv,
0062 enum amdgpu_sync_mode sync_mode)
0063 {
0064 enum amdgpu_ib_pool_type pool = p->immediate ? AMDGPU_IB_POOL_IMMEDIATE
0065 : AMDGPU_IB_POOL_DELAYED;
0066 unsigned int ndw = AMDGPU_VM_SDMA_MIN_NUM_DW;
0067 int r;
0068
0069 r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4, pool, &p->job);
0070 if (r)
0071 return r;
0072
0073 p->num_dw_left = ndw;
0074
0075 if (!resv)
0076 return 0;
0077
0078 return amdgpu_sync_resv(p->adev, &p->job->sync, resv, sync_mode, p->vm);
0079 }
0080
0081
0082
0083
0084
0085
0086
0087
0088
0089
0090 static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
0091 struct dma_fence **fence)
0092 {
0093 struct amdgpu_ib *ib = p->job->ibs;
0094 struct drm_sched_entity *entity;
0095 struct amdgpu_ring *ring;
0096 struct dma_fence *f;
0097 int r;
0098
0099 entity = p->immediate ? &p->vm->immediate : &p->vm->delayed;
0100 ring = container_of(entity->rq->sched, struct amdgpu_ring, sched);
0101
0102 WARN_ON(ib->length_dw == 0);
0103 amdgpu_ring_pad_ib(ring, ib);
0104 WARN_ON(ib->length_dw > p->num_dw_left);
0105 r = amdgpu_job_submit(p->job, entity, AMDGPU_FENCE_OWNER_VM, &f);
0106 if (r)
0107 goto error;
0108
0109 if (p->unlocked) {
0110 struct dma_fence *tmp = dma_fence_get(f);
0111
0112 swap(p->vm->last_unlocked, tmp);
0113 dma_fence_put(tmp);
0114 } else {
0115 amdgpu_bo_fence(p->vm->root.bo, f, true);
0116 }
0117
0118 if (fence && !p->immediate)
0119 swap(*fence, f);
0120 dma_fence_put(f);
0121 return 0;
0122
0123 error:
0124 amdgpu_job_free(p->job);
0125 return r;
0126 }
0127
0128
0129
0130
0131
0132
0133
0134
0135
0136
0137
0138 static void amdgpu_vm_sdma_copy_ptes(struct amdgpu_vm_update_params *p,
0139 struct amdgpu_bo *bo, uint64_t pe,
0140 unsigned count)
0141 {
0142 struct amdgpu_ib *ib = p->job->ibs;
0143 uint64_t src = ib->gpu_addr;
0144
0145 src += p->num_dw_left * 4;
0146
0147 pe += amdgpu_gmc_sign_extend(amdgpu_bo_gpu_offset_no_check(bo));
0148 trace_amdgpu_vm_copy_ptes(pe, src, count, p->immediate);
0149
0150 amdgpu_vm_copy_pte(p->adev, ib, pe, src, count);
0151 }
0152
0153
0154
0155
0156
0157
0158
0159
0160
0161
0162
0163
0164
0165
0166
0167 static void amdgpu_vm_sdma_set_ptes(struct amdgpu_vm_update_params *p,
0168 struct amdgpu_bo *bo, uint64_t pe,
0169 uint64_t addr, unsigned count,
0170 uint32_t incr, uint64_t flags)
0171 {
0172 struct amdgpu_ib *ib = p->job->ibs;
0173
0174 pe += amdgpu_gmc_sign_extend(amdgpu_bo_gpu_offset_no_check(bo));
0175 trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags, p->immediate);
0176 if (count < 3) {
0177 amdgpu_vm_write_pte(p->adev, ib, pe, addr | flags,
0178 count, incr);
0179 } else {
0180 amdgpu_vm_set_pte_pde(p->adev, ib, pe, addr,
0181 count, incr, flags);
0182 }
0183 }
0184
0185
0186
0187
0188
0189
0190
0191
0192
0193
0194
0195
0196
0197
0198
0199 static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p,
0200 struct amdgpu_bo_vm *vmbo, uint64_t pe,
0201 uint64_t addr, unsigned count, uint32_t incr,
0202 uint64_t flags)
0203 {
0204 struct amdgpu_bo *bo = &vmbo->bo;
0205 enum amdgpu_ib_pool_type pool = p->immediate ? AMDGPU_IB_POOL_IMMEDIATE
0206 : AMDGPU_IB_POOL_DELAYED;
0207 struct dma_resv_iter cursor;
0208 unsigned int i, ndw, nptes;
0209 struct dma_fence *fence;
0210 uint64_t *pte;
0211 int r;
0212
0213
0214 dma_resv_for_each_fence(&cursor, bo->tbo.base.resv,
0215 DMA_RESV_USAGE_KERNEL, fence) {
0216 r = amdgpu_sync_fence(&p->job->sync, fence);
0217 if (r)
0218 return r;
0219 }
0220
0221 do {
0222 ndw = p->num_dw_left;
0223 ndw -= p->job->ibs->length_dw;
0224
0225 if (ndw < 32) {
0226 r = amdgpu_vm_sdma_commit(p, NULL);
0227 if (r)
0228 return r;
0229
0230
0231 ndw = 32;
0232 if (p->pages_addr)
0233 ndw += count * 2;
0234 ndw = max(ndw, AMDGPU_VM_SDMA_MIN_NUM_DW);
0235 ndw = min(ndw, AMDGPU_VM_SDMA_MAX_NUM_DW);
0236
0237 r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4, pool,
0238 &p->job);
0239 if (r)
0240 return r;
0241
0242 p->num_dw_left = ndw;
0243 }
0244
0245 if (!p->pages_addr) {
0246
0247 if (vmbo->shadow)
0248 amdgpu_vm_sdma_set_ptes(p, vmbo->shadow, pe, addr,
0249 count, incr, flags);
0250 amdgpu_vm_sdma_set_ptes(p, bo, pe, addr, count,
0251 incr, flags);
0252 return 0;
0253 }
0254
0255
0256 ndw -= p->adev->vm_manager.vm_pte_funcs->copy_pte_num_dw *
0257 (vmbo->shadow ? 2 : 1);
0258
0259
0260 ndw -= 7;
0261
0262 nptes = min(count, ndw / 2);
0263
0264
0265 p->num_dw_left -= nptes * 2;
0266 pte = (uint64_t *)&(p->job->ibs->ptr[p->num_dw_left]);
0267 for (i = 0; i < nptes; ++i, addr += incr) {
0268 pte[i] = amdgpu_vm_map_gart(p->pages_addr, addr);
0269 pte[i] |= flags;
0270 }
0271
0272 if (vmbo->shadow)
0273 amdgpu_vm_sdma_copy_ptes(p, vmbo->shadow, pe, nptes);
0274 amdgpu_vm_sdma_copy_ptes(p, bo, pe, nptes);
0275
0276 pe += nptes * 8;
0277 count -= nptes;
0278 } while (count);
0279
0280 return 0;
0281 }
0282
0283 const struct amdgpu_vm_update_funcs amdgpu_vm_sdma_funcs = {
0284 .map_table = amdgpu_vm_sdma_map_table,
0285 .prepare = amdgpu_vm_sdma_prepare,
0286 .update = amdgpu_vm_sdma_update,
0287 .commit = amdgpu_vm_sdma_commit
0288 };