0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028 #include <linux/file.h>
0029 #include <linux/pagemap.h>
0030 #include <linux/sync_file.h>
0031 #include <linux/dma-buf.h>
0032
0033 #include <drm/amdgpu_drm.h>
0034 #include <drm/drm_syncobj.h>
0035 #include "amdgpu_cs.h"
0036 #include "amdgpu.h"
0037 #include "amdgpu_trace.h"
0038 #include "amdgpu_gmc.h"
0039 #include "amdgpu_gem.h"
0040 #include "amdgpu_ras.h"
0041
0042 static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
0043 struct drm_amdgpu_cs_chunk_fence *data,
0044 uint32_t *offset)
0045 {
0046 struct drm_gem_object *gobj;
0047 struct amdgpu_bo *bo;
0048 unsigned long size;
0049 int r;
0050
0051 gobj = drm_gem_object_lookup(p->filp, data->handle);
0052 if (gobj == NULL)
0053 return -EINVAL;
0054
0055 bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
0056 p->uf_entry.priority = 0;
0057 p->uf_entry.tv.bo = &bo->tbo;
0058
0059 p->uf_entry.tv.num_shared = 3;
0060
0061 drm_gem_object_put(gobj);
0062
0063 size = amdgpu_bo_size(bo);
0064 if (size != PAGE_SIZE || (data->offset + 8) > size) {
0065 r = -EINVAL;
0066 goto error_unref;
0067 }
0068
0069 if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
0070 r = -EINVAL;
0071 goto error_unref;
0072 }
0073
0074 *offset = data->offset;
0075
0076 return 0;
0077
0078 error_unref:
0079 amdgpu_bo_unref(&bo);
0080 return r;
0081 }
0082
0083 static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p,
0084 struct drm_amdgpu_bo_list_in *data)
0085 {
0086 int r;
0087 struct drm_amdgpu_bo_list_entry *info = NULL;
0088
0089 r = amdgpu_bo_create_list_entry_array(data, &info);
0090 if (r)
0091 return r;
0092
0093 r = amdgpu_bo_list_create(p->adev, p->filp, info, data->bo_number,
0094 &p->bo_list);
0095 if (r)
0096 goto error_free;
0097
0098 kvfree(info);
0099 return 0;
0100
0101 error_free:
0102 kvfree(info);
0103
0104 return r;
0105 }
0106
0107 static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs)
0108 {
0109 struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
0110 struct amdgpu_vm *vm = &fpriv->vm;
0111 uint64_t *chunk_array_user;
0112 uint64_t *chunk_array;
0113 unsigned size, num_ibs = 0;
0114 uint32_t uf_offset = 0;
0115 int i;
0116 int ret;
0117
0118 if (cs->in.num_chunks == 0)
0119 return -EINVAL;
0120
0121 chunk_array = kvmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL);
0122 if (!chunk_array)
0123 return -ENOMEM;
0124
0125 p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id);
0126 if (!p->ctx) {
0127 ret = -EINVAL;
0128 goto free_chunk;
0129 }
0130
0131 mutex_lock(&p->ctx->lock);
0132
0133
0134 if (atomic_read(&p->ctx->guilty) == 1) {
0135 ret = -ECANCELED;
0136 goto free_chunk;
0137 }
0138
0139
0140 chunk_array_user = u64_to_user_ptr(cs->in.chunks);
0141 if (copy_from_user(chunk_array, chunk_array_user,
0142 sizeof(uint64_t)*cs->in.num_chunks)) {
0143 ret = -EFAULT;
0144 goto free_chunk;
0145 }
0146
0147 p->nchunks = cs->in.num_chunks;
0148 p->chunks = kvmalloc_array(p->nchunks, sizeof(struct amdgpu_cs_chunk),
0149 GFP_KERNEL);
0150 if (!p->chunks) {
0151 ret = -ENOMEM;
0152 goto free_chunk;
0153 }
0154
0155 for (i = 0; i < p->nchunks; i++) {
0156 struct drm_amdgpu_cs_chunk __user **chunk_ptr = NULL;
0157 struct drm_amdgpu_cs_chunk user_chunk;
0158 uint32_t __user *cdata;
0159
0160 chunk_ptr = u64_to_user_ptr(chunk_array[i]);
0161 if (copy_from_user(&user_chunk, chunk_ptr,
0162 sizeof(struct drm_amdgpu_cs_chunk))) {
0163 ret = -EFAULT;
0164 i--;
0165 goto free_partial_kdata;
0166 }
0167 p->chunks[i].chunk_id = user_chunk.chunk_id;
0168 p->chunks[i].length_dw = user_chunk.length_dw;
0169
0170 size = p->chunks[i].length_dw;
0171 cdata = u64_to_user_ptr(user_chunk.chunk_data);
0172
0173 p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t), GFP_KERNEL);
0174 if (p->chunks[i].kdata == NULL) {
0175 ret = -ENOMEM;
0176 i--;
0177 goto free_partial_kdata;
0178 }
0179 size *= sizeof(uint32_t);
0180 if (copy_from_user(p->chunks[i].kdata, cdata, size)) {
0181 ret = -EFAULT;
0182 goto free_partial_kdata;
0183 }
0184
0185 switch (p->chunks[i].chunk_id) {
0186 case AMDGPU_CHUNK_ID_IB:
0187 ++num_ibs;
0188 break;
0189
0190 case AMDGPU_CHUNK_ID_FENCE:
0191 size = sizeof(struct drm_amdgpu_cs_chunk_fence);
0192 if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
0193 ret = -EINVAL;
0194 goto free_partial_kdata;
0195 }
0196
0197 ret = amdgpu_cs_user_fence_chunk(p, p->chunks[i].kdata,
0198 &uf_offset);
0199 if (ret)
0200 goto free_partial_kdata;
0201
0202 break;
0203
0204 case AMDGPU_CHUNK_ID_BO_HANDLES:
0205 size = sizeof(struct drm_amdgpu_bo_list_in);
0206 if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
0207 ret = -EINVAL;
0208 goto free_partial_kdata;
0209 }
0210
0211 ret = amdgpu_cs_bo_handles_chunk(p, p->chunks[i].kdata);
0212 if (ret)
0213 goto free_partial_kdata;
0214
0215 break;
0216
0217 case AMDGPU_CHUNK_ID_DEPENDENCIES:
0218 case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
0219 case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
0220 case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
0221 case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
0222 case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
0223 break;
0224
0225 default:
0226 ret = -EINVAL;
0227 goto free_partial_kdata;
0228 }
0229 }
0230
0231 ret = amdgpu_job_alloc(p->adev, num_ibs, &p->job, vm);
0232 if (ret)
0233 goto free_all_kdata;
0234
0235 if (p->ctx->vram_lost_counter != p->job->vram_lost_counter) {
0236 ret = -ECANCELED;
0237 goto free_all_kdata;
0238 }
0239
0240 if (p->uf_entry.tv.bo)
0241 p->job->uf_addr = uf_offset;
0242 kvfree(chunk_array);
0243
0244
0245 amdgpu_vm_set_task_info(vm);
0246
0247 return 0;
0248
0249 free_all_kdata:
0250 i = p->nchunks - 1;
0251 free_partial_kdata:
0252 for (; i >= 0; i--)
0253 kvfree(p->chunks[i].kdata);
0254 kvfree(p->chunks);
0255 p->chunks = NULL;
0256 p->nchunks = 0;
0257 free_chunk:
0258 kvfree(chunk_array);
0259
0260 return ret;
0261 }
0262
0263
0264 static u64 us_to_bytes(struct amdgpu_device *adev, s64 us)
0265 {
0266 if (us <= 0 || !adev->mm_stats.log2_max_MBps)
0267 return 0;
0268
0269
0270
0271
0272 return us << adev->mm_stats.log2_max_MBps;
0273 }
0274
0275 static s64 bytes_to_us(struct amdgpu_device *adev, u64 bytes)
0276 {
0277 if (!adev->mm_stats.log2_max_MBps)
0278 return 0;
0279
0280 return bytes >> adev->mm_stats.log2_max_MBps;
0281 }
0282
0283
0284
0285
0286
0287
0288
0289
0290
0291
0292
0293
0294
0295
0296 static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
0297 u64 *max_bytes,
0298 u64 *max_vis_bytes)
0299 {
0300 s64 time_us, increment_us;
0301 u64 free_vram, total_vram, used_vram;
0302
0303
0304
0305
0306
0307
0308
0309 const s64 us_upper_bound = 200000;
0310
0311 if (!adev->mm_stats.log2_max_MBps) {
0312 *max_bytes = 0;
0313 *max_vis_bytes = 0;
0314 return;
0315 }
0316
0317 total_vram = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size);
0318 used_vram = ttm_resource_manager_usage(&adev->mman.vram_mgr.manager);
0319 free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;
0320
0321 spin_lock(&adev->mm_stats.lock);
0322
0323
0324 time_us = ktime_to_us(ktime_get());
0325 increment_us = time_us - adev->mm_stats.last_update_us;
0326 adev->mm_stats.last_update_us = time_us;
0327 adev->mm_stats.accum_us = min(adev->mm_stats.accum_us + increment_us,
0328 us_upper_bound);
0329
0330
0331
0332
0333
0334
0335
0336
0337
0338
0339
0340
0341
0342 if (free_vram >= 128 * 1024 * 1024 || free_vram >= total_vram / 8) {
0343 s64 min_us;
0344
0345
0346
0347
0348 if (!(adev->flags & AMD_IS_APU))
0349 min_us = bytes_to_us(adev, free_vram / 4);
0350 else
0351 min_us = 0;
0352
0353 adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us);
0354 }
0355
0356
0357
0358
0359 *max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
0360
0361
0362 if (!amdgpu_gmc_vram_full_visible(&adev->gmc)) {
0363 u64 total_vis_vram = adev->gmc.visible_vram_size;
0364 u64 used_vis_vram =
0365 amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr);
0366
0367 if (used_vis_vram < total_vis_vram) {
0368 u64 free_vis_vram = total_vis_vram - used_vis_vram;
0369 adev->mm_stats.accum_us_vis = min(adev->mm_stats.accum_us_vis +
0370 increment_us, us_upper_bound);
0371
0372 if (free_vis_vram >= total_vis_vram / 2)
0373 adev->mm_stats.accum_us_vis =
0374 max(bytes_to_us(adev, free_vis_vram / 2),
0375 adev->mm_stats.accum_us_vis);
0376 }
0377
0378 *max_vis_bytes = us_to_bytes(adev, adev->mm_stats.accum_us_vis);
0379 } else {
0380 *max_vis_bytes = 0;
0381 }
0382
0383 spin_unlock(&adev->mm_stats.lock);
0384 }
0385
0386
0387
0388
0389
0390 void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
0391 u64 num_vis_bytes)
0392 {
0393 spin_lock(&adev->mm_stats.lock);
0394 adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes);
0395 adev->mm_stats.accum_us_vis -= bytes_to_us(adev, num_vis_bytes);
0396 spin_unlock(&adev->mm_stats.lock);
0397 }
0398
0399 static int amdgpu_cs_bo_validate(void *param, struct amdgpu_bo *bo)
0400 {
0401 struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
0402 struct amdgpu_cs_parser *p = param;
0403 struct ttm_operation_ctx ctx = {
0404 .interruptible = true,
0405 .no_wait_gpu = false,
0406 .resv = bo->tbo.base.resv
0407 };
0408 uint32_t domain;
0409 int r;
0410
0411 if (bo->tbo.pin_count)
0412 return 0;
0413
0414
0415
0416
0417 if (p->bytes_moved < p->bytes_moved_threshold &&
0418 (!bo->tbo.base.dma_buf ||
0419 list_empty(&bo->tbo.base.dma_buf->attachments))) {
0420 if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
0421 (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
0422
0423
0424
0425
0426 if (p->bytes_moved_vis < p->bytes_moved_vis_threshold)
0427 domain = bo->preferred_domains;
0428 else
0429 domain = bo->allowed_domains;
0430 } else {
0431 domain = bo->preferred_domains;
0432 }
0433 } else {
0434 domain = bo->allowed_domains;
0435 }
0436
0437 retry:
0438 amdgpu_bo_placement_from_domain(bo, domain);
0439 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
0440
0441 p->bytes_moved += ctx.bytes_moved;
0442 if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
0443 amdgpu_bo_in_cpu_visible_vram(bo))
0444 p->bytes_moved_vis += ctx.bytes_moved;
0445
0446 if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
0447 domain = bo->allowed_domains;
0448 goto retry;
0449 }
0450
0451 return r;
0452 }
0453
0454 static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
0455 struct list_head *validated)
0456 {
0457 struct ttm_operation_ctx ctx = { true, false };
0458 struct amdgpu_bo_list_entry *lobj;
0459 int r;
0460
0461 list_for_each_entry(lobj, validated, tv.head) {
0462 struct amdgpu_bo *bo = ttm_to_amdgpu_bo(lobj->tv.bo);
0463 struct mm_struct *usermm;
0464
0465 usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm);
0466 if (usermm && usermm != current->mm)
0467 return -EPERM;
0468
0469 if (amdgpu_ttm_tt_is_userptr(bo->tbo.ttm) &&
0470 lobj->user_invalidated && lobj->user_pages) {
0471 amdgpu_bo_placement_from_domain(bo,
0472 AMDGPU_GEM_DOMAIN_CPU);
0473 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
0474 if (r)
0475 return r;
0476
0477 amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
0478 lobj->user_pages);
0479 }
0480
0481 r = amdgpu_cs_bo_validate(p, bo);
0482 if (r)
0483 return r;
0484
0485 kvfree(lobj->user_pages);
0486 lobj->user_pages = NULL;
0487 }
0488 return 0;
0489 }
0490
0491 static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
0492 union drm_amdgpu_cs *cs)
0493 {
0494 struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
0495 struct amdgpu_vm *vm = &fpriv->vm;
0496 struct amdgpu_bo_list_entry *e;
0497 struct list_head duplicates;
0498 struct amdgpu_bo *gds;
0499 struct amdgpu_bo *gws;
0500 struct amdgpu_bo *oa;
0501 int r;
0502
0503 INIT_LIST_HEAD(&p->validated);
0504
0505
0506 if (cs->in.bo_list_handle) {
0507 if (p->bo_list)
0508 return -EINVAL;
0509
0510 r = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle,
0511 &p->bo_list);
0512 if (r)
0513 return r;
0514 } else if (!p->bo_list) {
0515
0516 r = amdgpu_bo_list_create(p->adev, p->filp, NULL, 0,
0517 &p->bo_list);
0518 if (r)
0519 return r;
0520 }
0521
0522 mutex_lock(&p->bo_list->bo_list_mutex);
0523
0524
0525 amdgpu_bo_list_for_each_entry(e, p->bo_list)
0526 e->tv.num_shared = 2;
0527
0528 amdgpu_bo_list_get_list(p->bo_list, &p->validated);
0529
0530 INIT_LIST_HEAD(&duplicates);
0531 amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd);
0532
0533 if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent)
0534 list_add(&p->uf_entry.tv.head, &p->validated);
0535
0536
0537
0538
0539
0540 amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
0541 struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
0542 bool userpage_invalidated = false;
0543 int i;
0544
0545 e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
0546 sizeof(struct page *),
0547 GFP_KERNEL | __GFP_ZERO);
0548 if (!e->user_pages) {
0549 DRM_ERROR("kvmalloc_array failure\n");
0550 r = -ENOMEM;
0551 goto out_free_user_pages;
0552 }
0553
0554 r = amdgpu_ttm_tt_get_user_pages(bo, e->user_pages);
0555 if (r) {
0556 kvfree(e->user_pages);
0557 e->user_pages = NULL;
0558 goto out_free_user_pages;
0559 }
0560
0561 for (i = 0; i < bo->tbo.ttm->num_pages; i++) {
0562 if (bo->tbo.ttm->pages[i] != e->user_pages[i]) {
0563 userpage_invalidated = true;
0564 break;
0565 }
0566 }
0567 e->user_invalidated = userpage_invalidated;
0568 }
0569
0570 r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
0571 &duplicates);
0572 if (unlikely(r != 0)) {
0573 if (r != -ERESTARTSYS)
0574 DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
0575 goto out_free_user_pages;
0576 }
0577
0578 amdgpu_bo_list_for_each_entry(e, p->bo_list) {
0579 struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
0580
0581 e->bo_va = amdgpu_vm_bo_find(vm, bo);
0582 }
0583
0584
0585
0586
0587 r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entity);
0588 if (unlikely(r != 0)) {
0589 if (r != -ERESTARTSYS)
0590 DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n");
0591 goto error_validate;
0592 }
0593
0594 amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
0595 &p->bytes_moved_vis_threshold);
0596 p->bytes_moved = 0;
0597 p->bytes_moved_vis = 0;
0598
0599 r = amdgpu_vm_validate_pt_bos(p->adev, &fpriv->vm,
0600 amdgpu_cs_bo_validate, p);
0601 if (r) {
0602 DRM_ERROR("amdgpu_vm_validate_pt_bos() failed.\n");
0603 goto error_validate;
0604 }
0605
0606 r = amdgpu_cs_list_validate(p, &duplicates);
0607 if (r)
0608 goto error_validate;
0609
0610 r = amdgpu_cs_list_validate(p, &p->validated);
0611 if (r)
0612 goto error_validate;
0613
0614 amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
0615 p->bytes_moved_vis);
0616
0617 gds = p->bo_list->gds_obj;
0618 gws = p->bo_list->gws_obj;
0619 oa = p->bo_list->oa_obj;
0620
0621 if (gds) {
0622 p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
0623 p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
0624 }
0625 if (gws) {
0626 p->job->gws_base = amdgpu_bo_gpu_offset(gws) >> PAGE_SHIFT;
0627 p->job->gws_size = amdgpu_bo_size(gws) >> PAGE_SHIFT;
0628 }
0629 if (oa) {
0630 p->job->oa_base = amdgpu_bo_gpu_offset(oa) >> PAGE_SHIFT;
0631 p->job->oa_size = amdgpu_bo_size(oa) >> PAGE_SHIFT;
0632 }
0633
0634 if (!r && p->uf_entry.tv.bo) {
0635 struct amdgpu_bo *uf = ttm_to_amdgpu_bo(p->uf_entry.tv.bo);
0636
0637 r = amdgpu_ttm_alloc_gart(&uf->tbo);
0638 p->job->uf_addr += amdgpu_bo_gpu_offset(uf);
0639 }
0640
0641 error_validate:
0642 if (r)
0643 ttm_eu_backoff_reservation(&p->ticket, &p->validated);
0644
0645 out_free_user_pages:
0646 if (r) {
0647 amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
0648 struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
0649
0650 if (!e->user_pages)
0651 continue;
0652 amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
0653 kvfree(e->user_pages);
0654 e->user_pages = NULL;
0655 }
0656 mutex_unlock(&p->bo_list->bo_list_mutex);
0657 }
0658 return r;
0659 }
0660
0661 static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
0662 {
0663 struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
0664 struct amdgpu_bo_list_entry *e;
0665 int r;
0666
0667 list_for_each_entry(e, &p->validated, tv.head) {
0668 struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
0669 struct dma_resv *resv = bo->tbo.base.resv;
0670 enum amdgpu_sync_mode sync_mode;
0671
0672 sync_mode = amdgpu_bo_explicit_sync(bo) ?
0673 AMDGPU_SYNC_EXPLICIT : AMDGPU_SYNC_NE_OWNER;
0674 r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, sync_mode,
0675 &fpriv->vm);
0676 if (r)
0677 return r;
0678 }
0679 return 0;
0680 }
0681
0682
0683
0684
0685
0686
0687
0688
0689
0690
0691 static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
0692 bool backoff)
0693 {
0694 unsigned i;
0695
0696 if (error && backoff) {
0697 ttm_eu_backoff_reservation(&parser->ticket,
0698 &parser->validated);
0699 mutex_unlock(&parser->bo_list->bo_list_mutex);
0700 }
0701
0702 for (i = 0; i < parser->num_post_deps; i++) {
0703 drm_syncobj_put(parser->post_deps[i].syncobj);
0704 kfree(parser->post_deps[i].chain);
0705 }
0706 kfree(parser->post_deps);
0707
0708 dma_fence_put(parser->fence);
0709
0710 if (parser->ctx) {
0711 mutex_unlock(&parser->ctx->lock);
0712 amdgpu_ctx_put(parser->ctx);
0713 }
0714 if (parser->bo_list)
0715 amdgpu_bo_list_put(parser->bo_list);
0716
0717 for (i = 0; i < parser->nchunks; i++)
0718 kvfree(parser->chunks[i].kdata);
0719 kvfree(parser->chunks);
0720 if (parser->job)
0721 amdgpu_job_free(parser->job);
0722 if (parser->uf_entry.tv.bo) {
0723 struct amdgpu_bo *uf = ttm_to_amdgpu_bo(parser->uf_entry.tv.bo);
0724
0725 amdgpu_bo_unref(&uf);
0726 }
0727 }
0728
0729 static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
0730 {
0731 struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
0732 struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
0733 struct amdgpu_device *adev = p->adev;
0734 struct amdgpu_vm *vm = &fpriv->vm;
0735 struct amdgpu_bo_list_entry *e;
0736 struct amdgpu_bo_va *bo_va;
0737 struct amdgpu_bo *bo;
0738 int r;
0739
0740
0741 if (ring->funcs->parse_cs || ring->funcs->patch_cs_in_place) {
0742 unsigned i, j;
0743
0744 for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) {
0745 struct drm_amdgpu_cs_chunk_ib *chunk_ib;
0746 struct amdgpu_bo_va_mapping *m;
0747 struct amdgpu_bo *aobj = NULL;
0748 struct amdgpu_cs_chunk *chunk;
0749 uint64_t offset, va_start;
0750 struct amdgpu_ib *ib;
0751 uint8_t *kptr;
0752
0753 chunk = &p->chunks[i];
0754 ib = &p->job->ibs[j];
0755 chunk_ib = chunk->kdata;
0756
0757 if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
0758 continue;
0759
0760 va_start = chunk_ib->va_start & AMDGPU_GMC_HOLE_MASK;
0761 r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m);
0762 if (r) {
0763 DRM_ERROR("IB va_start is invalid\n");
0764 return r;
0765 }
0766
0767 if ((va_start + chunk_ib->ib_bytes) >
0768 (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
0769 DRM_ERROR("IB va_start+ib_bytes is invalid\n");
0770 return -EINVAL;
0771 }
0772
0773
0774 r = amdgpu_bo_kmap(aobj, (void **)&kptr);
0775 if (r) {
0776 return r;
0777 }
0778
0779 offset = m->start * AMDGPU_GPU_PAGE_SIZE;
0780 kptr += va_start - offset;
0781
0782 if (ring->funcs->parse_cs) {
0783 memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
0784 amdgpu_bo_kunmap(aobj);
0785
0786 r = amdgpu_ring_parse_cs(ring, p, p->job, ib);
0787 if (r)
0788 return r;
0789 } else {
0790 ib->ptr = (uint32_t *)kptr;
0791 r = amdgpu_ring_patch_cs_in_place(ring, p, p->job, ib);
0792 amdgpu_bo_kunmap(aobj);
0793 if (r)
0794 return r;
0795 }
0796
0797 j++;
0798 }
0799 }
0800
0801 if (!p->job->vm)
0802 return amdgpu_cs_sync_rings(p);
0803
0804
0805 r = amdgpu_vm_clear_freed(adev, vm, NULL);
0806 if (r)
0807 return r;
0808
0809 r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);
0810 if (r)
0811 return r;
0812
0813 r = amdgpu_sync_fence(&p->job->sync, fpriv->prt_va->last_pt_update);
0814 if (r)
0815 return r;
0816
0817 if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
0818 bo_va = fpriv->csa_va;
0819 BUG_ON(!bo_va);
0820 r = amdgpu_vm_bo_update(adev, bo_va, false);
0821 if (r)
0822 return r;
0823
0824 r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update);
0825 if (r)
0826 return r;
0827 }
0828
0829 amdgpu_bo_list_for_each_entry(e, p->bo_list) {
0830
0831 bo = ttm_to_amdgpu_bo(e->tv.bo);
0832 if (!bo)
0833 continue;
0834
0835 bo_va = e->bo_va;
0836 if (bo_va == NULL)
0837 continue;
0838
0839 r = amdgpu_vm_bo_update(adev, bo_va, false);
0840 if (r)
0841 return r;
0842
0843 r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update);
0844 if (r)
0845 return r;
0846 }
0847
0848 r = amdgpu_vm_handle_moved(adev, vm);
0849 if (r)
0850 return r;
0851
0852 r = amdgpu_vm_update_pdes(adev, vm, false);
0853 if (r)
0854 return r;
0855
0856 r = amdgpu_sync_fence(&p->job->sync, vm->last_update);
0857 if (r)
0858 return r;
0859
0860 p->job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.bo);
0861
0862 if (amdgpu_vm_debug) {
0863
0864 amdgpu_bo_list_for_each_entry(e, p->bo_list) {
0865 struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
0866
0867
0868 if (!bo)
0869 continue;
0870
0871 amdgpu_vm_bo_invalidate(adev, bo, false);
0872 }
0873 }
0874
0875 return amdgpu_cs_sync_rings(p);
0876 }
0877
0878 static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
0879 struct amdgpu_cs_parser *parser)
0880 {
0881 struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
0882 struct amdgpu_vm *vm = &fpriv->vm;
0883 int r, ce_preempt = 0, de_preempt = 0;
0884 struct amdgpu_ring *ring;
0885 int i, j;
0886
0887 for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) {
0888 struct amdgpu_cs_chunk *chunk;
0889 struct amdgpu_ib *ib;
0890 struct drm_amdgpu_cs_chunk_ib *chunk_ib;
0891 struct drm_sched_entity *entity;
0892
0893 chunk = &parser->chunks[i];
0894 ib = &parser->job->ibs[j];
0895 chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
0896
0897 if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
0898 continue;
0899
0900 if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
0901 (amdgpu_mcbp || amdgpu_sriov_vf(adev))) {
0902 if (chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
0903 if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
0904 ce_preempt++;
0905 else
0906 de_preempt++;
0907 }
0908
0909
0910 if (ce_preempt > 1 || de_preempt > 1)
0911 return -EINVAL;
0912 }
0913
0914 r = amdgpu_ctx_get_entity(parser->ctx, chunk_ib->ip_type,
0915 chunk_ib->ip_instance, chunk_ib->ring,
0916 &entity);
0917 if (r)
0918 return r;
0919
0920 if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE)
0921 parser->job->preamble_status |=
0922 AMDGPU_PREAMBLE_IB_PRESENT;
0923
0924 if (parser->entity && parser->entity != entity)
0925 return -EINVAL;
0926
0927
0928
0929 if (entity->rq == NULL)
0930 return -EINVAL;
0931
0932 parser->entity = entity;
0933
0934 ring = to_amdgpu_ring(entity->rq->sched);
0935 r = amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ?
0936 chunk_ib->ib_bytes : 0,
0937 AMDGPU_IB_POOL_DELAYED, ib);
0938 if (r) {
0939 DRM_ERROR("Failed to get ib !\n");
0940 return r;
0941 }
0942
0943 ib->gpu_addr = chunk_ib->va_start;
0944 ib->length_dw = chunk_ib->ib_bytes / 4;
0945 ib->flags = chunk_ib->flags;
0946
0947 j++;
0948 }
0949
0950
0951 ring = to_amdgpu_ring(parser->entity->rq->sched);
0952 if (parser->job->uf_addr && ring->funcs->no_user_fence)
0953 return -EINVAL;
0954
0955 return 0;
0956 }
0957
0958 static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
0959 struct amdgpu_cs_chunk *chunk)
0960 {
0961 struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
0962 unsigned num_deps;
0963 int i, r;
0964 struct drm_amdgpu_cs_chunk_dep *deps;
0965
0966 deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata;
0967 num_deps = chunk->length_dw * 4 /
0968 sizeof(struct drm_amdgpu_cs_chunk_dep);
0969
0970 for (i = 0; i < num_deps; ++i) {
0971 struct amdgpu_ctx *ctx;
0972 struct drm_sched_entity *entity;
0973 struct dma_fence *fence;
0974
0975 ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id);
0976 if (ctx == NULL)
0977 return -EINVAL;
0978
0979 r = amdgpu_ctx_get_entity(ctx, deps[i].ip_type,
0980 deps[i].ip_instance,
0981 deps[i].ring, &entity);
0982 if (r) {
0983 amdgpu_ctx_put(ctx);
0984 return r;
0985 }
0986
0987 fence = amdgpu_ctx_get_fence(ctx, entity, deps[i].handle);
0988 amdgpu_ctx_put(ctx);
0989
0990 if (IS_ERR(fence))
0991 return PTR_ERR(fence);
0992 else if (!fence)
0993 continue;
0994
0995 if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) {
0996 struct drm_sched_fence *s_fence;
0997 struct dma_fence *old = fence;
0998
0999 s_fence = to_drm_sched_fence(fence);
1000 fence = dma_fence_get(&s_fence->scheduled);
1001 dma_fence_put(old);
1002 }
1003
1004 r = amdgpu_sync_fence(&p->job->sync, fence);
1005 dma_fence_put(fence);
1006 if (r)
1007 return r;
1008 }
1009 return 0;
1010 }
1011
1012 static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
1013 uint32_t handle, u64 point,
1014 u64 flags)
1015 {
1016 struct dma_fence *fence;
1017 int r;
1018
1019 r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence);
1020 if (r) {
1021 DRM_ERROR("syncobj %u failed to find fence @ %llu (%d)!\n",
1022 handle, point, r);
1023 return r;
1024 }
1025
1026 r = amdgpu_sync_fence(&p->job->sync, fence);
1027 dma_fence_put(fence);
1028
1029 return r;
1030 }
1031
1032 static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p,
1033 struct amdgpu_cs_chunk *chunk)
1034 {
1035 struct drm_amdgpu_cs_chunk_sem *deps;
1036 unsigned num_deps;
1037 int i, r;
1038
1039 deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
1040 num_deps = chunk->length_dw * 4 /
1041 sizeof(struct drm_amdgpu_cs_chunk_sem);
1042 for (i = 0; i < num_deps; ++i) {
1043 r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle,
1044 0, 0);
1045 if (r)
1046 return r;
1047 }
1048
1049 return 0;
1050 }
1051
1052
1053 static int amdgpu_cs_process_syncobj_timeline_in_dep(struct amdgpu_cs_parser *p,
1054 struct amdgpu_cs_chunk *chunk)
1055 {
1056 struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
1057 unsigned num_deps;
1058 int i, r;
1059
1060 syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
1061 num_deps = chunk->length_dw * 4 /
1062 sizeof(struct drm_amdgpu_cs_chunk_syncobj);
1063 for (i = 0; i < num_deps; ++i) {
1064 r = amdgpu_syncobj_lookup_and_add_to_sync(p,
1065 syncobj_deps[i].handle,
1066 syncobj_deps[i].point,
1067 syncobj_deps[i].flags);
1068 if (r)
1069 return r;
1070 }
1071
1072 return 0;
1073 }
1074
1075 static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p,
1076 struct amdgpu_cs_chunk *chunk)
1077 {
1078 struct drm_amdgpu_cs_chunk_sem *deps;
1079 unsigned num_deps;
1080 int i;
1081
1082 deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
1083 num_deps = chunk->length_dw * 4 /
1084 sizeof(struct drm_amdgpu_cs_chunk_sem);
1085
1086 if (p->post_deps)
1087 return -EINVAL;
1088
1089 p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
1090 GFP_KERNEL);
1091 p->num_post_deps = 0;
1092
1093 if (!p->post_deps)
1094 return -ENOMEM;
1095
1096
1097 for (i = 0; i < num_deps; ++i) {
1098 p->post_deps[i].syncobj =
1099 drm_syncobj_find(p->filp, deps[i].handle);
1100 if (!p->post_deps[i].syncobj)
1101 return -EINVAL;
1102 p->post_deps[i].chain = NULL;
1103 p->post_deps[i].point = 0;
1104 p->num_post_deps++;
1105 }
1106
1107 return 0;
1108 }
1109
1110
1111 static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p,
1112 struct amdgpu_cs_chunk *chunk)
1113 {
1114 struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
1115 unsigned num_deps;
1116 int i;
1117
1118 syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
1119 num_deps = chunk->length_dw * 4 /
1120 sizeof(struct drm_amdgpu_cs_chunk_syncobj);
1121
1122 if (p->post_deps)
1123 return -EINVAL;
1124
1125 p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
1126 GFP_KERNEL);
1127 p->num_post_deps = 0;
1128
1129 if (!p->post_deps)
1130 return -ENOMEM;
1131
1132 for (i = 0; i < num_deps; ++i) {
1133 struct amdgpu_cs_post_dep *dep = &p->post_deps[i];
1134
1135 dep->chain = NULL;
1136 if (syncobj_deps[i].point) {
1137 dep->chain = dma_fence_chain_alloc();
1138 if (!dep->chain)
1139 return -ENOMEM;
1140 }
1141
1142 dep->syncobj = drm_syncobj_find(p->filp,
1143 syncobj_deps[i].handle);
1144 if (!dep->syncobj) {
1145 dma_fence_chain_free(dep->chain);
1146 return -EINVAL;
1147 }
1148 dep->point = syncobj_deps[i].point;
1149 p->num_post_deps++;
1150 }
1151
1152 return 0;
1153 }
1154
1155 static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
1156 struct amdgpu_cs_parser *p)
1157 {
1158 int i, r;
1159
1160
1161 mutex_unlock(&p->ctx->lock);
1162
1163 for (i = 0; i < p->nchunks; ++i) {
1164 struct amdgpu_cs_chunk *chunk;
1165
1166 chunk = &p->chunks[i];
1167
1168 switch (chunk->chunk_id) {
1169 case AMDGPU_CHUNK_ID_DEPENDENCIES:
1170 case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
1171 r = amdgpu_cs_process_fence_dep(p, chunk);
1172 if (r)
1173 goto out;
1174 break;
1175 case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
1176 r = amdgpu_cs_process_syncobj_in_dep(p, chunk);
1177 if (r)
1178 goto out;
1179 break;
1180 case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
1181 r = amdgpu_cs_process_syncobj_out_dep(p, chunk);
1182 if (r)
1183 goto out;
1184 break;
1185 case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
1186 r = amdgpu_cs_process_syncobj_timeline_in_dep(p, chunk);
1187 if (r)
1188 goto out;
1189 break;
1190 case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
1191 r = amdgpu_cs_process_syncobj_timeline_out_dep(p, chunk);
1192 if (r)
1193 goto out;
1194 break;
1195 }
1196 }
1197
1198 out:
1199 mutex_lock(&p->ctx->lock);
1200 return r;
1201 }
1202
1203 static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
1204 {
1205 int i;
1206
1207 for (i = 0; i < p->num_post_deps; ++i) {
1208 if (p->post_deps[i].chain && p->post_deps[i].point) {
1209 drm_syncobj_add_point(p->post_deps[i].syncobj,
1210 p->post_deps[i].chain,
1211 p->fence, p->post_deps[i].point);
1212 p->post_deps[i].chain = NULL;
1213 } else {
1214 drm_syncobj_replace_fence(p->post_deps[i].syncobj,
1215 p->fence);
1216 }
1217 }
1218 }
1219
1220 static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
1221 union drm_amdgpu_cs *cs)
1222 {
1223 struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
1224 struct drm_sched_entity *entity = p->entity;
1225 struct amdgpu_bo_list_entry *e;
1226 struct amdgpu_job *job;
1227 uint64_t seq;
1228 int r;
1229
1230 job = p->job;
1231 p->job = NULL;
1232
1233 r = drm_sched_job_init(&job->base, entity, &fpriv->vm);
1234 if (r)
1235 goto error_unlock;
1236
1237 drm_sched_job_arm(&job->base);
1238
1239
1240
1241
1242
1243 mutex_lock(&p->adev->notifier_lock);
1244
1245
1246
1247
1248 amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
1249 struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
1250
1251 r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
1252 }
1253 if (r) {
1254 r = -EAGAIN;
1255 goto error_abort;
1256 }
1257
1258 p->fence = dma_fence_get(&job->base.s_fence->finished);
1259
1260 seq = amdgpu_ctx_add_fence(p->ctx, entity, p->fence);
1261 amdgpu_cs_post_dependencies(p);
1262
1263 if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
1264 !p->ctx->preamble_presented) {
1265 job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
1266 p->ctx->preamble_presented = true;
1267 }
1268
1269 cs->out.handle = seq;
1270 job->uf_sequence = seq;
1271
1272 amdgpu_job_free_resources(job);
1273
1274 trace_amdgpu_cs_ioctl(job);
1275 amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->ticket);
1276 drm_sched_entity_push_job(&job->base);
1277
1278 amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
1279
1280
1281 amdgpu_bo_list_for_each_entry(e, p->bo_list)
1282 e->tv.num_shared = 0;
1283
1284 ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
1285 mutex_unlock(&p->adev->notifier_lock);
1286 mutex_unlock(&p->bo_list->bo_list_mutex);
1287
1288 return 0;
1289
1290 error_abort:
1291 drm_sched_job_cleanup(&job->base);
1292 mutex_unlock(&p->adev->notifier_lock);
1293
1294 error_unlock:
1295 amdgpu_job_free(job);
1296 return r;
1297 }
1298
1299 static void trace_amdgpu_cs_ibs(struct amdgpu_cs_parser *parser)
1300 {
1301 int i;
1302
1303 if (!trace_amdgpu_cs_enabled())
1304 return;
1305
1306 for (i = 0; i < parser->job->num_ibs; i++)
1307 trace_amdgpu_cs(parser, i);
1308 }
1309
1310 int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
1311 {
1312 struct amdgpu_device *adev = drm_to_adev(dev);
1313 union drm_amdgpu_cs *cs = data;
1314 struct amdgpu_cs_parser parser = {};
1315 bool reserved_buffers = false;
1316 int r;
1317
1318 if (amdgpu_ras_intr_triggered())
1319 return -EHWPOISON;
1320
1321 if (!adev->accel_working)
1322 return -EBUSY;
1323
1324 parser.adev = adev;
1325 parser.filp = filp;
1326
1327 r = amdgpu_cs_parser_init(&parser, data);
1328 if (r) {
1329 if (printk_ratelimit())
1330 DRM_ERROR("Failed to initialize parser %d!\n", r);
1331 goto out;
1332 }
1333
1334 r = amdgpu_cs_ib_fill(adev, &parser);
1335 if (r)
1336 goto out;
1337
1338 r = amdgpu_cs_dependencies(adev, &parser);
1339 if (r) {
1340 DRM_ERROR("Failed in the dependencies handling %d!\n", r);
1341 goto out;
1342 }
1343
1344 r = amdgpu_cs_parser_bos(&parser, data);
1345 if (r) {
1346 if (r == -ENOMEM)
1347 DRM_ERROR("Not enough memory for command submission!\n");
1348 else if (r != -ERESTARTSYS && r != -EAGAIN)
1349 DRM_ERROR("Failed to process the buffer list %d!\n", r);
1350 goto out;
1351 }
1352
1353 reserved_buffers = true;
1354
1355 trace_amdgpu_cs_ibs(&parser);
1356
1357 r = amdgpu_cs_vm_handling(&parser);
1358 if (r)
1359 goto out;
1360
1361 r = amdgpu_cs_submit(&parser, cs);
1362
1363 out:
1364 amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
1365
1366 return r;
1367 }
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378 int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
1379 struct drm_file *filp)
1380 {
1381 union drm_amdgpu_wait_cs *wait = data;
1382 unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
1383 struct drm_sched_entity *entity;
1384 struct amdgpu_ctx *ctx;
1385 struct dma_fence *fence;
1386 long r;
1387
1388 ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id);
1389 if (ctx == NULL)
1390 return -EINVAL;
1391
1392 r = amdgpu_ctx_get_entity(ctx, wait->in.ip_type, wait->in.ip_instance,
1393 wait->in.ring, &entity);
1394 if (r) {
1395 amdgpu_ctx_put(ctx);
1396 return r;
1397 }
1398
1399 fence = amdgpu_ctx_get_fence(ctx, entity, wait->in.handle);
1400 if (IS_ERR(fence))
1401 r = PTR_ERR(fence);
1402 else if (fence) {
1403 r = dma_fence_wait_timeout(fence, true, timeout);
1404 if (r > 0 && fence->error)
1405 r = fence->error;
1406 dma_fence_put(fence);
1407 } else
1408 r = 1;
1409
1410 amdgpu_ctx_put(ctx);
1411 if (r < 0)
1412 return r;
1413
1414 memset(wait, 0, sizeof(*wait));
1415 wait->out.status = (r == 0);
1416
1417 return 0;
1418 }
1419
1420
1421
1422
1423
1424
1425
1426
1427 static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev,
1428 struct drm_file *filp,
1429 struct drm_amdgpu_fence *user)
1430 {
1431 struct drm_sched_entity *entity;
1432 struct amdgpu_ctx *ctx;
1433 struct dma_fence *fence;
1434 int r;
1435
1436 ctx = amdgpu_ctx_get(filp->driver_priv, user->ctx_id);
1437 if (ctx == NULL)
1438 return ERR_PTR(-EINVAL);
1439
1440 r = amdgpu_ctx_get_entity(ctx, user->ip_type, user->ip_instance,
1441 user->ring, &entity);
1442 if (r) {
1443 amdgpu_ctx_put(ctx);
1444 return ERR_PTR(r);
1445 }
1446
1447 fence = amdgpu_ctx_get_fence(ctx, entity, user->seq_no);
1448 amdgpu_ctx_put(ctx);
1449
1450 return fence;
1451 }
1452
1453 int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data,
1454 struct drm_file *filp)
1455 {
1456 struct amdgpu_device *adev = drm_to_adev(dev);
1457 union drm_amdgpu_fence_to_handle *info = data;
1458 struct dma_fence *fence;
1459 struct drm_syncobj *syncobj;
1460 struct sync_file *sync_file;
1461 int fd, r;
1462
1463 fence = amdgpu_cs_get_fence(adev, filp, &info->in.fence);
1464 if (IS_ERR(fence))
1465 return PTR_ERR(fence);
1466
1467 if (!fence)
1468 fence = dma_fence_get_stub();
1469
1470 switch (info->in.what) {
1471 case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ:
1472 r = drm_syncobj_create(&syncobj, 0, fence);
1473 dma_fence_put(fence);
1474 if (r)
1475 return r;
1476 r = drm_syncobj_get_handle(filp, syncobj, &info->out.handle);
1477 drm_syncobj_put(syncobj);
1478 return r;
1479
1480 case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD:
1481 r = drm_syncobj_create(&syncobj, 0, fence);
1482 dma_fence_put(fence);
1483 if (r)
1484 return r;
1485 r = drm_syncobj_get_fd(syncobj, (int *)&info->out.handle);
1486 drm_syncobj_put(syncobj);
1487 return r;
1488
1489 case AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD:
1490 fd = get_unused_fd_flags(O_CLOEXEC);
1491 if (fd < 0) {
1492 dma_fence_put(fence);
1493 return fd;
1494 }
1495
1496 sync_file = sync_file_create(fence);
1497 dma_fence_put(fence);
1498 if (!sync_file) {
1499 put_unused_fd(fd);
1500 return -ENOMEM;
1501 }
1502
1503 fd_install(fd, sync_file->file);
1504 info->out.handle = fd;
1505 return 0;
1506
1507 default:
1508 dma_fence_put(fence);
1509 return -EINVAL;
1510 }
1511 }
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521 static int amdgpu_cs_wait_all_fences(struct amdgpu_device *adev,
1522 struct drm_file *filp,
1523 union drm_amdgpu_wait_fences *wait,
1524 struct drm_amdgpu_fence *fences)
1525 {
1526 uint32_t fence_count = wait->in.fence_count;
1527 unsigned int i;
1528 long r = 1;
1529
1530 for (i = 0; i < fence_count; i++) {
1531 struct dma_fence *fence;
1532 unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns);
1533
1534 fence = amdgpu_cs_get_fence(adev, filp, &fences[i]);
1535 if (IS_ERR(fence))
1536 return PTR_ERR(fence);
1537 else if (!fence)
1538 continue;
1539
1540 r = dma_fence_wait_timeout(fence, true, timeout);
1541 dma_fence_put(fence);
1542 if (r < 0)
1543 return r;
1544
1545 if (r == 0)
1546 break;
1547
1548 if (fence->error)
1549 return fence->error;
1550 }
1551
1552 memset(wait, 0, sizeof(*wait));
1553 wait->out.status = (r > 0);
1554
1555 return 0;
1556 }
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566 static int amdgpu_cs_wait_any_fence(struct amdgpu_device *adev,
1567 struct drm_file *filp,
1568 union drm_amdgpu_wait_fences *wait,
1569 struct drm_amdgpu_fence *fences)
1570 {
1571 unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns);
1572 uint32_t fence_count = wait->in.fence_count;
1573 uint32_t first = ~0;
1574 struct dma_fence **array;
1575 unsigned int i;
1576 long r;
1577
1578
1579 array = kcalloc(fence_count, sizeof(struct dma_fence *), GFP_KERNEL);
1580
1581 if (array == NULL)
1582 return -ENOMEM;
1583
1584 for (i = 0; i < fence_count; i++) {
1585 struct dma_fence *fence;
1586
1587 fence = amdgpu_cs_get_fence(adev, filp, &fences[i]);
1588 if (IS_ERR(fence)) {
1589 r = PTR_ERR(fence);
1590 goto err_free_fence_array;
1591 } else if (fence) {
1592 array[i] = fence;
1593 } else {
1594 r = 1;
1595 first = i;
1596 goto out;
1597 }
1598 }
1599
1600 r = dma_fence_wait_any_timeout(array, fence_count, true, timeout,
1601 &first);
1602 if (r < 0)
1603 goto err_free_fence_array;
1604
1605 out:
1606 memset(wait, 0, sizeof(*wait));
1607 wait->out.status = (r > 0);
1608 wait->out.first_signaled = first;
1609
1610 if (first < fence_count && array[first])
1611 r = array[first]->error;
1612 else
1613 r = 0;
1614
1615 err_free_fence_array:
1616 for (i = 0; i < fence_count; i++)
1617 dma_fence_put(array[i]);
1618 kfree(array);
1619
1620 return r;
1621 }
1622
1623
1624
1625
1626
1627
1628
1629
1630 int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
1631 struct drm_file *filp)
1632 {
1633 struct amdgpu_device *adev = drm_to_adev(dev);
1634 union drm_amdgpu_wait_fences *wait = data;
1635 uint32_t fence_count = wait->in.fence_count;
1636 struct drm_amdgpu_fence *fences_user;
1637 struct drm_amdgpu_fence *fences;
1638 int r;
1639
1640
1641 fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence),
1642 GFP_KERNEL);
1643 if (fences == NULL)
1644 return -ENOMEM;
1645
1646 fences_user = u64_to_user_ptr(wait->in.fences);
1647 if (copy_from_user(fences, fences_user,
1648 sizeof(struct drm_amdgpu_fence) * fence_count)) {
1649 r = -EFAULT;
1650 goto err_free_fences;
1651 }
1652
1653 if (wait->in.wait_all)
1654 r = amdgpu_cs_wait_all_fences(adev, filp, wait, fences);
1655 else
1656 r = amdgpu_cs_wait_any_fence(adev, filp, wait, fences);
1657
1658 err_free_fences:
1659 kfree(fences);
1660
1661 return r;
1662 }
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676 int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
1677 uint64_t addr, struct amdgpu_bo **bo,
1678 struct amdgpu_bo_va_mapping **map)
1679 {
1680 struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
1681 struct ttm_operation_ctx ctx = { false, false };
1682 struct amdgpu_vm *vm = &fpriv->vm;
1683 struct amdgpu_bo_va_mapping *mapping;
1684 int r;
1685
1686 addr /= AMDGPU_GPU_PAGE_SIZE;
1687
1688 mapping = amdgpu_vm_bo_lookup_mapping(vm, addr);
1689 if (!mapping || !mapping->bo_va || !mapping->bo_va->base.bo)
1690 return -EINVAL;
1691
1692 *bo = mapping->bo_va->base.bo;
1693 *map = mapping;
1694
1695
1696 if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->ticket)
1697 return -EINVAL;
1698
1699 if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) {
1700 (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
1701 amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains);
1702 r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx);
1703 if (r)
1704 return r;
1705 }
1706
1707 return amdgpu_ttm_alloc_gart(&(*bo)->tbo);
1708 }