0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023 #include "amdgpu_ids.h"
0024
0025 #include <linux/idr.h>
0026 #include <linux/dma-fence-array.h>
0027
0028
0029 #include "amdgpu.h"
0030 #include "amdgpu_trace.h"
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041 static DEFINE_IDA(amdgpu_pasid_ida);
0042
0043
0044 struct amdgpu_pasid_cb {
0045 struct dma_fence_cb cb;
0046 u32 pasid;
0047 };
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060 int amdgpu_pasid_alloc(unsigned int bits)
0061 {
0062 int pasid = -EINVAL;
0063
0064 for (bits = min(bits, 31U); bits > 0; bits--) {
0065 pasid = ida_simple_get(&amdgpu_pasid_ida,
0066 1U << (bits - 1), 1U << bits,
0067 GFP_KERNEL);
0068 if (pasid != -ENOSPC)
0069 break;
0070 }
0071
0072 if (pasid >= 0)
0073 trace_amdgpu_pasid_allocated(pasid);
0074
0075 return pasid;
0076 }
0077
0078
0079
0080
0081
0082 void amdgpu_pasid_free(u32 pasid)
0083 {
0084 trace_amdgpu_pasid_freed(pasid);
0085 ida_simple_remove(&amdgpu_pasid_ida, pasid);
0086 }
0087
0088 static void amdgpu_pasid_free_cb(struct dma_fence *fence,
0089 struct dma_fence_cb *_cb)
0090 {
0091 struct amdgpu_pasid_cb *cb =
0092 container_of(_cb, struct amdgpu_pasid_cb, cb);
0093
0094 amdgpu_pasid_free(cb->pasid);
0095 dma_fence_put(fence);
0096 kfree(cb);
0097 }
0098
0099
0100
0101
0102
0103
0104
0105
0106
0107 void amdgpu_pasid_free_delayed(struct dma_resv *resv,
0108 u32 pasid)
0109 {
0110 struct amdgpu_pasid_cb *cb;
0111 struct dma_fence *fence;
0112 int r;
0113
0114 r = dma_resv_get_singleton(resv, DMA_RESV_USAGE_BOOKKEEP, &fence);
0115 if (r)
0116 goto fallback;
0117
0118 if (!fence) {
0119 amdgpu_pasid_free(pasid);
0120 return;
0121 }
0122
0123 cb = kmalloc(sizeof(*cb), GFP_KERNEL);
0124 if (!cb) {
0125
0126 dma_fence_wait(fence, false);
0127 dma_fence_put(fence);
0128 amdgpu_pasid_free(pasid);
0129 } else {
0130 cb->pasid = pasid;
0131 if (dma_fence_add_callback(fence, &cb->cb,
0132 amdgpu_pasid_free_cb))
0133 amdgpu_pasid_free_cb(fence, &cb->cb);
0134 }
0135
0136 return;
0137
0138 fallback:
0139
0140
0141
0142 dma_resv_wait_timeout(resv, DMA_RESV_USAGE_BOOKKEEP,
0143 false, MAX_SCHEDULE_TIMEOUT);
0144 amdgpu_pasid_free(pasid);
0145 }
0146
0147
0148
0149
0150
0151
0152
0153
0154
0155
0156
0157
0158
0159
0160
0161 bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
0162 struct amdgpu_vmid *id)
0163 {
0164 return id->current_gpu_reset_count !=
0165 atomic_read(&adev->gpu_reset_counter);
0166 }
0167
0168
0169
0170
0171
0172
0173
0174
0175
0176
0177
0178
0179 static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm,
0180 struct amdgpu_ring *ring,
0181 struct amdgpu_sync *sync,
0182 struct amdgpu_vmid **idle)
0183 {
0184 struct amdgpu_device *adev = ring->adev;
0185 unsigned vmhub = ring->funcs->vmhub;
0186 struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
0187 struct dma_fence **fences;
0188 unsigned i;
0189 int r;
0190
0191 if (!dma_fence_is_signaled(ring->vmid_wait))
0192 return amdgpu_sync_fence(sync, ring->vmid_wait);
0193
0194 fences = kmalloc_array(id_mgr->num_ids, sizeof(void *), GFP_KERNEL);
0195 if (!fences)
0196 return -ENOMEM;
0197
0198
0199 i = 0;
0200 list_for_each_entry((*idle), &id_mgr->ids_lru, list) {
0201
0202 struct amdgpu_ring *r = adev->vm_manager.concurrent_flush ?
0203 NULL : ring;
0204
0205 fences[i] = amdgpu_sync_peek_fence(&(*idle)->active, r);
0206 if (!fences[i])
0207 break;
0208 ++i;
0209 }
0210
0211
0212 if (&(*idle)->list == &id_mgr->ids_lru) {
0213 u64 fence_context = adev->vm_manager.fence_context + ring->idx;
0214 unsigned seqno = ++adev->vm_manager.seqno[ring->idx];
0215 struct dma_fence_array *array;
0216 unsigned j;
0217
0218 *idle = NULL;
0219 for (j = 0; j < i; ++j)
0220 dma_fence_get(fences[j]);
0221
0222 array = dma_fence_array_create(i, fences, fence_context,
0223 seqno, true);
0224 if (!array) {
0225 for (j = 0; j < i; ++j)
0226 dma_fence_put(fences[j]);
0227 kfree(fences);
0228 return -ENOMEM;
0229 }
0230
0231 r = amdgpu_sync_fence(sync, &array->base);
0232 dma_fence_put(ring->vmid_wait);
0233 ring->vmid_wait = &array->base;
0234 return r;
0235 }
0236 kfree(fences);
0237
0238 return 0;
0239 }
0240
0241
0242
0243
0244
0245
0246
0247
0248
0249
0250
0251
0252
0253 static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
0254 struct amdgpu_ring *ring,
0255 struct amdgpu_sync *sync,
0256 struct dma_fence *fence,
0257 struct amdgpu_job *job,
0258 struct amdgpu_vmid **id)
0259 {
0260 struct amdgpu_device *adev = ring->adev;
0261 unsigned vmhub = ring->funcs->vmhub;
0262 uint64_t fence_context = adev->fence_context + ring->idx;
0263 bool needs_flush = vm->use_cpu_for_update;
0264 uint64_t updates = amdgpu_vm_tlb_seq(vm);
0265 int r;
0266
0267 *id = vm->reserved_vmid[vmhub];
0268 if ((*id)->owner != vm->immediate.fence_context ||
0269 (*id)->pd_gpu_addr != job->vm_pd_addr ||
0270 (*id)->flushed_updates < updates ||
0271 !(*id)->last_flush ||
0272 ((*id)->last_flush->context != fence_context &&
0273 !dma_fence_is_signaled((*id)->last_flush))) {
0274 struct dma_fence *tmp;
0275
0276
0277 if (adev->vm_manager.concurrent_flush)
0278 ring = NULL;
0279
0280
0281 (*id)->pd_gpu_addr = 0;
0282 tmp = amdgpu_sync_peek_fence(&(*id)->active, ring);
0283 if (tmp) {
0284 *id = NULL;
0285 return amdgpu_sync_fence(sync, tmp);
0286 }
0287 needs_flush = true;
0288 }
0289
0290
0291
0292
0293 r = amdgpu_sync_fence(&(*id)->active, fence);
0294 if (r)
0295 return r;
0296
0297 (*id)->flushed_updates = updates;
0298 job->vm_needs_flush = needs_flush;
0299 return 0;
0300 }
0301
0302
0303
0304
0305
0306
0307
0308
0309
0310
0311
0312
0313
0314 static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
0315 struct amdgpu_ring *ring,
0316 struct amdgpu_sync *sync,
0317 struct dma_fence *fence,
0318 struct amdgpu_job *job,
0319 struct amdgpu_vmid **id)
0320 {
0321 struct amdgpu_device *adev = ring->adev;
0322 unsigned vmhub = ring->funcs->vmhub;
0323 struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
0324 uint64_t fence_context = adev->fence_context + ring->idx;
0325 uint64_t updates = amdgpu_vm_tlb_seq(vm);
0326 int r;
0327
0328 job->vm_needs_flush = vm->use_cpu_for_update;
0329
0330
0331 list_for_each_entry_reverse((*id), &id_mgr->ids_lru, list) {
0332 bool needs_flush = vm->use_cpu_for_update;
0333
0334
0335 if ((*id)->owner != vm->immediate.fence_context)
0336 continue;
0337
0338 if ((*id)->pd_gpu_addr != job->vm_pd_addr)
0339 continue;
0340
0341 if (!(*id)->last_flush ||
0342 ((*id)->last_flush->context != fence_context &&
0343 !dma_fence_is_signaled((*id)->last_flush)))
0344 needs_flush = true;
0345
0346 if ((*id)->flushed_updates < updates)
0347 needs_flush = true;
0348
0349 if (needs_flush && !adev->vm_manager.concurrent_flush)
0350 continue;
0351
0352
0353
0354
0355 r = amdgpu_sync_fence(&(*id)->active, fence);
0356 if (r)
0357 return r;
0358
0359 (*id)->flushed_updates = updates;
0360 job->vm_needs_flush |= needs_flush;
0361 return 0;
0362 }
0363
0364 *id = NULL;
0365 return 0;
0366 }
0367
0368
0369
0370
0371
0372
0373
0374
0375
0376
0377
0378
0379 int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
0380 struct amdgpu_sync *sync, struct dma_fence *fence,
0381 struct amdgpu_job *job)
0382 {
0383 struct amdgpu_device *adev = ring->adev;
0384 unsigned vmhub = ring->funcs->vmhub;
0385 struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
0386 struct amdgpu_vmid *idle = NULL;
0387 struct amdgpu_vmid *id = NULL;
0388 int r = 0;
0389
0390 mutex_lock(&id_mgr->lock);
0391 r = amdgpu_vmid_grab_idle(vm, ring, sync, &idle);
0392 if (r || !idle)
0393 goto error;
0394
0395 if (vm->reserved_vmid[vmhub]) {
0396 r = amdgpu_vmid_grab_reserved(vm, ring, sync, fence, job, &id);
0397 if (r || !id)
0398 goto error;
0399 } else {
0400 r = amdgpu_vmid_grab_used(vm, ring, sync, fence, job, &id);
0401 if (r)
0402 goto error;
0403
0404 if (!id) {
0405
0406 id = idle;
0407
0408
0409 r = amdgpu_sync_fence(&id->active, fence);
0410 if (r)
0411 goto error;
0412
0413 id->flushed_updates = amdgpu_vm_tlb_seq(vm);
0414 job->vm_needs_flush = true;
0415 }
0416
0417 list_move_tail(&id->list, &id_mgr->ids_lru);
0418 }
0419
0420 id->pd_gpu_addr = job->vm_pd_addr;
0421 id->owner = vm->immediate.fence_context;
0422
0423 if (job->vm_needs_flush) {
0424 dma_fence_put(id->last_flush);
0425 id->last_flush = NULL;
0426 }
0427 job->vmid = id - id_mgr->ids;
0428 job->pasid = vm->pasid;
0429 trace_amdgpu_vm_grab_id(vm, ring, job);
0430
0431 error:
0432 mutex_unlock(&id_mgr->lock);
0433 return r;
0434 }
0435
0436 int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,
0437 struct amdgpu_vm *vm,
0438 unsigned vmhub)
0439 {
0440 struct amdgpu_vmid_mgr *id_mgr;
0441 struct amdgpu_vmid *idle;
0442 int r = 0;
0443
0444 id_mgr = &adev->vm_manager.id_mgr[vmhub];
0445 mutex_lock(&id_mgr->lock);
0446 if (vm->reserved_vmid[vmhub])
0447 goto unlock;
0448 if (atomic_inc_return(&id_mgr->reserved_vmid_num) >
0449 AMDGPU_VM_MAX_RESERVED_VMID) {
0450 DRM_ERROR("Over limitation of reserved vmid\n");
0451 atomic_dec(&id_mgr->reserved_vmid_num);
0452 r = -EINVAL;
0453 goto unlock;
0454 }
0455
0456 idle = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vmid, list);
0457 list_del_init(&idle->list);
0458 vm->reserved_vmid[vmhub] = idle;
0459 mutex_unlock(&id_mgr->lock);
0460
0461 return 0;
0462 unlock:
0463 mutex_unlock(&id_mgr->lock);
0464 return r;
0465 }
0466
0467 void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,
0468 struct amdgpu_vm *vm,
0469 unsigned vmhub)
0470 {
0471 struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
0472
0473 mutex_lock(&id_mgr->lock);
0474 if (vm->reserved_vmid[vmhub]) {
0475 list_add(&vm->reserved_vmid[vmhub]->list,
0476 &id_mgr->ids_lru);
0477 vm->reserved_vmid[vmhub] = NULL;
0478 atomic_dec(&id_mgr->reserved_vmid_num);
0479 }
0480 mutex_unlock(&id_mgr->lock);
0481 }
0482
0483
0484
0485
0486
0487
0488
0489
0490
0491
0492 void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned vmhub,
0493 unsigned vmid)
0494 {
0495 struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
0496 struct amdgpu_vmid *id = &id_mgr->ids[vmid];
0497
0498 mutex_lock(&id_mgr->lock);
0499 id->owner = 0;
0500 id->gds_base = 0;
0501 id->gds_size = 0;
0502 id->gws_base = 0;
0503 id->gws_size = 0;
0504 id->oa_base = 0;
0505 id->oa_size = 0;
0506 mutex_unlock(&id_mgr->lock);
0507 }
0508
0509
0510
0511
0512
0513
0514
0515
0516 void amdgpu_vmid_reset_all(struct amdgpu_device *adev)
0517 {
0518 unsigned i, j;
0519
0520 for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
0521 struct amdgpu_vmid_mgr *id_mgr =
0522 &adev->vm_manager.id_mgr[i];
0523
0524 for (j = 1; j < id_mgr->num_ids; ++j)
0525 amdgpu_vmid_reset(adev, i, j);
0526 }
0527 }
0528
0529
0530
0531
0532
0533
0534
0535
0536 void amdgpu_vmid_mgr_init(struct amdgpu_device *adev)
0537 {
0538 unsigned i, j;
0539
0540 for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
0541 struct amdgpu_vmid_mgr *id_mgr =
0542 &adev->vm_manager.id_mgr[i];
0543
0544 mutex_init(&id_mgr->lock);
0545 INIT_LIST_HEAD(&id_mgr->ids_lru);
0546 atomic_set(&id_mgr->reserved_vmid_num, 0);
0547
0548
0549 id_mgr->num_ids = adev->vm_manager.first_kfd_vmid;
0550
0551
0552 for (j = 1; j < id_mgr->num_ids; ++j) {
0553 amdgpu_vmid_reset(adev, i, j);
0554 amdgpu_sync_create(&id_mgr->ids[j].active);
0555 list_add_tail(&id_mgr->ids[j].list, &id_mgr->ids_lru);
0556 }
0557 }
0558 }
0559
0560
0561
0562
0563
0564
0565
0566
0567 void amdgpu_vmid_mgr_fini(struct amdgpu_device *adev)
0568 {
0569 unsigned i, j;
0570
0571 for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
0572 struct amdgpu_vmid_mgr *id_mgr =
0573 &adev->vm_manager.id_mgr[i];
0574
0575 mutex_destroy(&id_mgr->lock);
0576 for (j = 0; j < AMDGPU_NUM_VMID; ++j) {
0577 struct amdgpu_vmid *id = &id_mgr->ids[j];
0578
0579 amdgpu_sync_free(&id->active);
0580 dma_fence_put(id->last_flush);
0581 dma_fence_put(id->pasid_mapping);
0582 }
0583 }
0584 }