0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029 #include <drm/radeon_drm.h>
0030 #include "radeon.h"
0031 #include "radeon_trace.h"
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060 static unsigned radeon_vm_num_pdes(struct radeon_device *rdev)
0061 {
0062 return rdev->vm_manager.max_pfn >> radeon_vm_block_size;
0063 }
0064
0065
0066
0067
0068
0069
0070
0071
0072 static unsigned radeon_vm_directory_size(struct radeon_device *rdev)
0073 {
0074 return RADEON_GPU_PAGE_ALIGN(radeon_vm_num_pdes(rdev) * 8);
0075 }
0076
0077
0078
0079
0080
0081
0082
0083
0084
0085 int radeon_vm_manager_init(struct radeon_device *rdev)
0086 {
0087 int r;
0088
0089 if (!rdev->vm_manager.enabled) {
0090 r = radeon_asic_vm_init(rdev);
0091 if (r)
0092 return r;
0093
0094 rdev->vm_manager.enabled = true;
0095 }
0096 return 0;
0097 }
0098
0099
0100
0101
0102
0103
0104
0105
0106 void radeon_vm_manager_fini(struct radeon_device *rdev)
0107 {
0108 int i;
0109
0110 if (!rdev->vm_manager.enabled)
0111 return;
0112
0113 for (i = 0; i < RADEON_NUM_VM; ++i)
0114 radeon_fence_unref(&rdev->vm_manager.active[i]);
0115 radeon_asic_vm_fini(rdev);
0116 rdev->vm_manager.enabled = false;
0117 }
0118
0119
0120
0121
0122
0123
0124
0125
0126
0127
0128
0129 struct radeon_bo_list *radeon_vm_get_bos(struct radeon_device *rdev,
0130 struct radeon_vm *vm,
0131 struct list_head *head)
0132 {
0133 struct radeon_bo_list *list;
0134 unsigned i, idx;
0135
0136 list = kvmalloc_array(vm->max_pde_used + 2,
0137 sizeof(struct radeon_bo_list), GFP_KERNEL);
0138 if (!list)
0139 return NULL;
0140
0141
0142 list[0].robj = vm->page_directory;
0143 list[0].preferred_domains = RADEON_GEM_DOMAIN_VRAM;
0144 list[0].allowed_domains = RADEON_GEM_DOMAIN_VRAM;
0145 list[0].tv.bo = &vm->page_directory->tbo;
0146 list[0].tv.num_shared = 1;
0147 list[0].tiling_flags = 0;
0148 list_add(&list[0].tv.head, head);
0149
0150 for (i = 0, idx = 1; i <= vm->max_pde_used; i++) {
0151 if (!vm->page_tables[i].bo)
0152 continue;
0153
0154 list[idx].robj = vm->page_tables[i].bo;
0155 list[idx].preferred_domains = RADEON_GEM_DOMAIN_VRAM;
0156 list[idx].allowed_domains = RADEON_GEM_DOMAIN_VRAM;
0157 list[idx].tv.bo = &list[idx].robj->tbo;
0158 list[idx].tv.num_shared = 1;
0159 list[idx].tiling_flags = 0;
0160 list_add(&list[idx++].tv.head, head);
0161 }
0162
0163 return list;
0164 }
0165
0166
0167
0168
0169
0170
0171
0172
0173
0174
0175
0176
0177
0178 struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
0179 struct radeon_vm *vm, int ring)
0180 {
0181 struct radeon_fence *best[RADEON_NUM_RINGS] = {};
0182 struct radeon_vm_id *vm_id = &vm->ids[ring];
0183
0184 unsigned choices[2] = {};
0185 unsigned i;
0186
0187
0188 if (vm_id->id && vm_id->last_id_use &&
0189 vm_id->last_id_use == rdev->vm_manager.active[vm_id->id])
0190 return NULL;
0191
0192
0193 vm_id->pd_gpu_addr = ~0ll;
0194
0195
0196 for (i = 1; i < rdev->vm_manager.nvm; ++i) {
0197 struct radeon_fence *fence = rdev->vm_manager.active[i];
0198
0199 if (fence == NULL) {
0200
0201 vm_id->id = i;
0202 trace_radeon_vm_grab_id(i, ring);
0203 return NULL;
0204 }
0205
0206 if (radeon_fence_is_earlier(fence, best[fence->ring])) {
0207 best[fence->ring] = fence;
0208 choices[fence->ring == ring ? 0 : 1] = i;
0209 }
0210 }
0211
0212 for (i = 0; i < 2; ++i) {
0213 if (choices[i]) {
0214 vm_id->id = choices[i];
0215 trace_radeon_vm_grab_id(choices[i], ring);
0216 return rdev->vm_manager.active[choices[i]];
0217 }
0218 }
0219
0220
0221 BUG();
0222 return NULL;
0223 }
0224
0225
0226
0227
0228
0229
0230
0231
0232
0233
0234
0235
0236
0237 void radeon_vm_flush(struct radeon_device *rdev,
0238 struct radeon_vm *vm,
0239 int ring, struct radeon_fence *updates)
0240 {
0241 uint64_t pd_addr = radeon_bo_gpu_offset(vm->page_directory);
0242 struct radeon_vm_id *vm_id = &vm->ids[ring];
0243
0244 if (pd_addr != vm_id->pd_gpu_addr || !vm_id->flushed_updates ||
0245 radeon_fence_is_earlier(vm_id->flushed_updates, updates)) {
0246
0247 trace_radeon_vm_flush(pd_addr, ring, vm->ids[ring].id);
0248 radeon_fence_unref(&vm_id->flushed_updates);
0249 vm_id->flushed_updates = radeon_fence_ref(updates);
0250 vm_id->pd_gpu_addr = pd_addr;
0251 radeon_ring_vm_flush(rdev, &rdev->ring[ring],
0252 vm_id->id, vm_id->pd_gpu_addr);
0253
0254 }
0255 }
0256
0257
0258
0259
0260
0261
0262
0263
0264
0265
0266
0267
0268
0269 void radeon_vm_fence(struct radeon_device *rdev,
0270 struct radeon_vm *vm,
0271 struct radeon_fence *fence)
0272 {
0273 unsigned vm_id = vm->ids[fence->ring].id;
0274
0275 radeon_fence_unref(&rdev->vm_manager.active[vm_id]);
0276 rdev->vm_manager.active[vm_id] = radeon_fence_ref(fence);
0277
0278 radeon_fence_unref(&vm->ids[fence->ring].last_id_use);
0279 vm->ids[fence->ring].last_id_use = radeon_fence_ref(fence);
0280 }
0281
0282
0283
0284
0285
0286
0287
0288
0289
0290
0291
0292
0293
0294 struct radeon_bo_va *radeon_vm_bo_find(struct radeon_vm *vm,
0295 struct radeon_bo *bo)
0296 {
0297 struct radeon_bo_va *bo_va;
0298
0299 list_for_each_entry(bo_va, &bo->va, bo_list) {
0300 if (bo_va->vm == vm)
0301 return bo_va;
0302
0303 }
0304 return NULL;
0305 }
0306
0307
0308
0309
0310
0311
0312
0313
0314
0315
0316
0317
0318
0319
0320 struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev,
0321 struct radeon_vm *vm,
0322 struct radeon_bo *bo)
0323 {
0324 struct radeon_bo_va *bo_va;
0325
0326 bo_va = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL);
0327 if (bo_va == NULL)
0328 return NULL;
0329
0330 bo_va->vm = vm;
0331 bo_va->bo = bo;
0332 bo_va->it.start = 0;
0333 bo_va->it.last = 0;
0334 bo_va->flags = 0;
0335 bo_va->ref_count = 1;
0336 INIT_LIST_HEAD(&bo_va->bo_list);
0337 INIT_LIST_HEAD(&bo_va->vm_status);
0338
0339 mutex_lock(&vm->mutex);
0340 list_add_tail(&bo_va->bo_list, &bo->va);
0341 mutex_unlock(&vm->mutex);
0342
0343 return bo_va;
0344 }
0345
0346
0347
0348
0349
0350
0351
0352
0353
0354
0355
0356
0357
0358
0359
0360 static void radeon_vm_set_pages(struct radeon_device *rdev,
0361 struct radeon_ib *ib,
0362 uint64_t pe,
0363 uint64_t addr, unsigned count,
0364 uint32_t incr, uint32_t flags)
0365 {
0366 trace_radeon_vm_set_page(pe, addr, count, incr, flags);
0367
0368 if ((flags & R600_PTE_GART_MASK) == R600_PTE_GART_MASK) {
0369 uint64_t src = rdev->gart.table_addr + (addr >> 12) * 8;
0370 radeon_asic_vm_copy_pages(rdev, ib, pe, src, count);
0371
0372 } else if ((flags & R600_PTE_SYSTEM) || (count < 3)) {
0373 radeon_asic_vm_write_pages(rdev, ib, pe, addr,
0374 count, incr, flags);
0375
0376 } else {
0377 radeon_asic_vm_set_pages(rdev, ib, pe, addr,
0378 count, incr, flags);
0379 }
0380 }
0381
0382
0383
0384
0385
0386
0387
0388 static int radeon_vm_clear_bo(struct radeon_device *rdev,
0389 struct radeon_bo *bo)
0390 {
0391 struct ttm_operation_ctx ctx = { true, false };
0392 struct radeon_ib ib;
0393 unsigned entries;
0394 uint64_t addr;
0395 int r;
0396
0397 r = radeon_bo_reserve(bo, false);
0398 if (r)
0399 return r;
0400
0401 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
0402 if (r)
0403 goto error_unreserve;
0404
0405 addr = radeon_bo_gpu_offset(bo);
0406 entries = radeon_bo_size(bo) / 8;
0407
0408 r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, 256);
0409 if (r)
0410 goto error_unreserve;
0411
0412 ib.length_dw = 0;
0413
0414 radeon_vm_set_pages(rdev, &ib, addr, 0, entries, 0, 0);
0415 radeon_asic_vm_pad_ib(rdev, &ib);
0416 WARN_ON(ib.length_dw > 64);
0417
0418 r = radeon_ib_schedule(rdev, &ib, NULL, false);
0419 if (r)
0420 goto error_free;
0421
0422 ib.fence->is_vm_update = true;
0423 radeon_bo_fence(bo, ib.fence, false);
0424
0425 error_free:
0426 radeon_ib_free(rdev, &ib);
0427
0428 error_unreserve:
0429 radeon_bo_unreserve(bo);
0430 return r;
0431 }
0432
0433
0434
0435
0436
0437
0438
0439
0440
0441
0442
0443
0444
0445
0446
0447 int radeon_vm_bo_set_addr(struct radeon_device *rdev,
0448 struct radeon_bo_va *bo_va,
0449 uint64_t soffset,
0450 uint32_t flags)
0451 {
0452 uint64_t size = radeon_bo_size(bo_va->bo);
0453 struct radeon_vm *vm = bo_va->vm;
0454 unsigned last_pfn, pt_idx;
0455 uint64_t eoffset;
0456 int r;
0457
0458 if (soffset) {
0459
0460 eoffset = soffset + size - 1;
0461 if (soffset >= eoffset) {
0462 r = -EINVAL;
0463 goto error_unreserve;
0464 }
0465
0466 last_pfn = eoffset / RADEON_GPU_PAGE_SIZE;
0467 if (last_pfn >= rdev->vm_manager.max_pfn) {
0468 dev_err(rdev->dev, "va above limit (0x%08X >= 0x%08X)\n",
0469 last_pfn, rdev->vm_manager.max_pfn);
0470 r = -EINVAL;
0471 goto error_unreserve;
0472 }
0473
0474 } else {
0475 eoffset = last_pfn = 0;
0476 }
0477
0478 mutex_lock(&vm->mutex);
0479 soffset /= RADEON_GPU_PAGE_SIZE;
0480 eoffset /= RADEON_GPU_PAGE_SIZE;
0481 if (soffset || eoffset) {
0482 struct interval_tree_node *it;
0483 it = interval_tree_iter_first(&vm->va, soffset, eoffset);
0484 if (it && it != &bo_va->it) {
0485 struct radeon_bo_va *tmp;
0486 tmp = container_of(it, struct radeon_bo_va, it);
0487
0488 dev_err(rdev->dev, "bo %p va 0x%010Lx conflict with "
0489 "(bo %p 0x%010lx 0x%010lx)\n", bo_va->bo,
0490 soffset, tmp->bo, tmp->it.start, tmp->it.last);
0491 mutex_unlock(&vm->mutex);
0492 r = -EINVAL;
0493 goto error_unreserve;
0494 }
0495 }
0496
0497 if (bo_va->it.start || bo_va->it.last) {
0498
0499 struct radeon_bo_va *tmp;
0500 tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL);
0501 if (!tmp) {
0502 mutex_unlock(&vm->mutex);
0503 r = -ENOMEM;
0504 goto error_unreserve;
0505 }
0506 tmp->it.start = bo_va->it.start;
0507 tmp->it.last = bo_va->it.last;
0508 tmp->vm = vm;
0509 tmp->bo = radeon_bo_ref(bo_va->bo);
0510
0511 interval_tree_remove(&bo_va->it, &vm->va);
0512 spin_lock(&vm->status_lock);
0513 bo_va->it.start = 0;
0514 bo_va->it.last = 0;
0515 list_del_init(&bo_va->vm_status);
0516 list_add(&tmp->vm_status, &vm->freed);
0517 spin_unlock(&vm->status_lock);
0518 }
0519
0520 if (soffset || eoffset) {
0521 spin_lock(&vm->status_lock);
0522 bo_va->it.start = soffset;
0523 bo_va->it.last = eoffset;
0524 list_add(&bo_va->vm_status, &vm->cleared);
0525 spin_unlock(&vm->status_lock);
0526 interval_tree_insert(&bo_va->it, &vm->va);
0527 }
0528
0529 bo_va->flags = flags;
0530
0531 soffset >>= radeon_vm_block_size;
0532 eoffset >>= radeon_vm_block_size;
0533
0534 BUG_ON(eoffset >= radeon_vm_num_pdes(rdev));
0535
0536 if (eoffset > vm->max_pde_used)
0537 vm->max_pde_used = eoffset;
0538
0539 radeon_bo_unreserve(bo_va->bo);
0540
0541
0542 for (pt_idx = soffset; pt_idx <= eoffset; ++pt_idx) {
0543 struct radeon_bo *pt;
0544
0545 if (vm->page_tables[pt_idx].bo)
0546 continue;
0547
0548
0549 mutex_unlock(&vm->mutex);
0550
0551 r = radeon_bo_create(rdev, RADEON_VM_PTE_COUNT * 8,
0552 RADEON_GPU_PAGE_SIZE, true,
0553 RADEON_GEM_DOMAIN_VRAM, 0,
0554 NULL, NULL, &pt);
0555 if (r)
0556 return r;
0557
0558 r = radeon_vm_clear_bo(rdev, pt);
0559 if (r) {
0560 radeon_bo_unref(&pt);
0561 return r;
0562 }
0563
0564
0565 mutex_lock(&vm->mutex);
0566 if (vm->page_tables[pt_idx].bo) {
0567
0568 mutex_unlock(&vm->mutex);
0569 radeon_bo_unref(&pt);
0570 mutex_lock(&vm->mutex);
0571 continue;
0572 }
0573
0574 vm->page_tables[pt_idx].addr = 0;
0575 vm->page_tables[pt_idx].bo = pt;
0576 }
0577
0578 mutex_unlock(&vm->mutex);
0579 return 0;
0580
0581 error_unreserve:
0582 radeon_bo_unreserve(bo_va->bo);
0583 return r;
0584 }
0585
0586
0587
0588
0589
0590
0591
0592
0593
0594
0595
0596 uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr)
0597 {
0598 uint64_t result;
0599
0600
0601 result = rdev->gart.pages_entry[addr >> RADEON_GPU_PAGE_SHIFT];
0602 result &= ~RADEON_GPU_PAGE_MASK;
0603
0604 return result;
0605 }
0606
0607
0608
0609
0610
0611
0612
0613
0614 static uint32_t radeon_vm_page_flags(uint32_t flags)
0615 {
0616 uint32_t hw_flags = 0;
0617
0618 hw_flags |= (flags & RADEON_VM_PAGE_VALID) ? R600_PTE_VALID : 0;
0619 hw_flags |= (flags & RADEON_VM_PAGE_READABLE) ? R600_PTE_READABLE : 0;
0620 hw_flags |= (flags & RADEON_VM_PAGE_WRITEABLE) ? R600_PTE_WRITEABLE : 0;
0621 if (flags & RADEON_VM_PAGE_SYSTEM) {
0622 hw_flags |= R600_PTE_SYSTEM;
0623 hw_flags |= (flags & RADEON_VM_PAGE_SNOOPED) ? R600_PTE_SNOOPED : 0;
0624 }
0625 return hw_flags;
0626 }
0627
0628
0629
0630
0631
0632
0633
0634
0635
0636
0637
0638
0639
0640 int radeon_vm_update_page_directory(struct radeon_device *rdev,
0641 struct radeon_vm *vm)
0642 {
0643 struct radeon_bo *pd = vm->page_directory;
0644 uint64_t pd_addr = radeon_bo_gpu_offset(pd);
0645 uint32_t incr = RADEON_VM_PTE_COUNT * 8;
0646 uint64_t last_pde = ~0, last_pt = ~0;
0647 unsigned count = 0, pt_idx, ndw;
0648 struct radeon_ib ib;
0649 int r;
0650
0651
0652 ndw = 64;
0653
0654
0655 ndw += vm->max_pde_used * 6;
0656
0657
0658 if (ndw > 0xfffff)
0659 return -ENOMEM;
0660
0661 r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4);
0662 if (r)
0663 return r;
0664 ib.length_dw = 0;
0665
0666
0667 for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) {
0668 struct radeon_bo *bo = vm->page_tables[pt_idx].bo;
0669 uint64_t pde, pt;
0670
0671 if (bo == NULL)
0672 continue;
0673
0674 pt = radeon_bo_gpu_offset(bo);
0675 if (vm->page_tables[pt_idx].addr == pt)
0676 continue;
0677 vm->page_tables[pt_idx].addr = pt;
0678
0679 pde = pd_addr + pt_idx * 8;
0680 if (((last_pde + 8 * count) != pde) ||
0681 ((last_pt + incr * count) != pt)) {
0682
0683 if (count) {
0684 radeon_vm_set_pages(rdev, &ib, last_pde,
0685 last_pt, count, incr,
0686 R600_PTE_VALID);
0687 }
0688
0689 count = 1;
0690 last_pde = pde;
0691 last_pt = pt;
0692 } else {
0693 ++count;
0694 }
0695 }
0696
0697 if (count)
0698 radeon_vm_set_pages(rdev, &ib, last_pde, last_pt, count,
0699 incr, R600_PTE_VALID);
0700
0701 if (ib.length_dw != 0) {
0702 radeon_asic_vm_pad_ib(rdev, &ib);
0703
0704 radeon_sync_resv(rdev, &ib.sync, pd->tbo.base.resv, true);
0705 WARN_ON(ib.length_dw > ndw);
0706 r = radeon_ib_schedule(rdev, &ib, NULL, false);
0707 if (r) {
0708 radeon_ib_free(rdev, &ib);
0709 return r;
0710 }
0711 ib.fence->is_vm_update = true;
0712 radeon_bo_fence(pd, ib.fence, false);
0713 }
0714 radeon_ib_free(rdev, &ib);
0715
0716 return 0;
0717 }
0718
0719
0720
0721
0722
0723
0724
0725
0726
0727
0728
0729
0730
0731 static void radeon_vm_frag_ptes(struct radeon_device *rdev,
0732 struct radeon_ib *ib,
0733 uint64_t pe_start, uint64_t pe_end,
0734 uint64_t addr, uint32_t flags)
0735 {
0736
0737
0738
0739
0740
0741
0742
0743
0744
0745
0746
0747
0748
0749
0750
0751
0752
0753
0754
0755
0756 uint64_t frag_flags = ((rdev->family == CHIP_CAYMAN) ||
0757 (rdev->family == CHIP_ARUBA)) ?
0758 R600_PTE_FRAG_256KB : R600_PTE_FRAG_64KB;
0759 uint64_t frag_align = ((rdev->family == CHIP_CAYMAN) ||
0760 (rdev->family == CHIP_ARUBA)) ? 0x200 : 0x80;
0761
0762 uint64_t frag_start = ALIGN(pe_start, frag_align);
0763 uint64_t frag_end = pe_end & ~(frag_align - 1);
0764
0765 unsigned count;
0766
0767
0768 if ((flags & R600_PTE_SYSTEM) || !(flags & R600_PTE_VALID) ||
0769 (frag_start >= frag_end)) {
0770
0771 count = (pe_end - pe_start) / 8;
0772 radeon_vm_set_pages(rdev, ib, pe_start, addr, count,
0773 RADEON_GPU_PAGE_SIZE, flags);
0774 return;
0775 }
0776
0777
0778 if (pe_start != frag_start) {
0779 count = (frag_start - pe_start) / 8;
0780 radeon_vm_set_pages(rdev, ib, pe_start, addr, count,
0781 RADEON_GPU_PAGE_SIZE, flags);
0782 addr += RADEON_GPU_PAGE_SIZE * count;
0783 }
0784
0785
0786 count = (frag_end - frag_start) / 8;
0787 radeon_vm_set_pages(rdev, ib, frag_start, addr, count,
0788 RADEON_GPU_PAGE_SIZE, flags | frag_flags);
0789
0790
0791 if (frag_end != pe_end) {
0792 addr += RADEON_GPU_PAGE_SIZE * count;
0793 count = (pe_end - frag_end) / 8;
0794 radeon_vm_set_pages(rdev, ib, frag_end, addr, count,
0795 RADEON_GPU_PAGE_SIZE, flags);
0796 }
0797 }
0798
0799
0800
0801
0802
0803
0804
0805
0806
0807
0808
0809
0810
0811
0812
0813
0814 static int radeon_vm_update_ptes(struct radeon_device *rdev,
0815 struct radeon_vm *vm,
0816 struct radeon_ib *ib,
0817 uint64_t start, uint64_t end,
0818 uint64_t dst, uint32_t flags)
0819 {
0820 uint64_t mask = RADEON_VM_PTE_COUNT - 1;
0821 uint64_t last_pte = ~0, last_dst = ~0;
0822 unsigned count = 0;
0823 uint64_t addr;
0824
0825
0826 for (addr = start; addr < end; ) {
0827 uint64_t pt_idx = addr >> radeon_vm_block_size;
0828 struct radeon_bo *pt = vm->page_tables[pt_idx].bo;
0829 unsigned nptes;
0830 uint64_t pte;
0831 int r;
0832
0833 radeon_sync_resv(rdev, &ib->sync, pt->tbo.base.resv, true);
0834 r = dma_resv_reserve_fences(pt->tbo.base.resv, 1);
0835 if (r)
0836 return r;
0837
0838 if ((addr & ~mask) == (end & ~mask))
0839 nptes = end - addr;
0840 else
0841 nptes = RADEON_VM_PTE_COUNT - (addr & mask);
0842
0843 pte = radeon_bo_gpu_offset(pt);
0844 pte += (addr & mask) * 8;
0845
0846 if ((last_pte + 8 * count) != pte) {
0847
0848 if (count) {
0849 radeon_vm_frag_ptes(rdev, ib, last_pte,
0850 last_pte + 8 * count,
0851 last_dst, flags);
0852 }
0853
0854 count = nptes;
0855 last_pte = pte;
0856 last_dst = dst;
0857 } else {
0858 count += nptes;
0859 }
0860
0861 addr += nptes;
0862 dst += nptes * RADEON_GPU_PAGE_SIZE;
0863 }
0864
0865 if (count) {
0866 radeon_vm_frag_ptes(rdev, ib, last_pte,
0867 last_pte + 8 * count,
0868 last_dst, flags);
0869 }
0870
0871 return 0;
0872 }
0873
0874
0875
0876
0877
0878
0879
0880
0881
0882
0883
0884
0885
0886 static void radeon_vm_fence_pts(struct radeon_vm *vm,
0887 uint64_t start, uint64_t end,
0888 struct radeon_fence *fence)
0889 {
0890 unsigned i;
0891
0892 start >>= radeon_vm_block_size;
0893 end = (end - 1) >> radeon_vm_block_size;
0894
0895 for (i = start; i <= end; ++i)
0896 radeon_bo_fence(vm->page_tables[i].bo, fence, true);
0897 }
0898
0899
0900
0901
0902
0903
0904
0905
0906
0907
0908
0909
0910
0911 int radeon_vm_bo_update(struct radeon_device *rdev,
0912 struct radeon_bo_va *bo_va,
0913 struct ttm_resource *mem)
0914 {
0915 struct radeon_vm *vm = bo_va->vm;
0916 struct radeon_ib ib;
0917 unsigned nptes, ncmds, ndw;
0918 uint64_t addr;
0919 uint32_t flags;
0920 int r;
0921
0922 if (!bo_va->it.start) {
0923 dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n",
0924 bo_va->bo, vm);
0925 return -EINVAL;
0926 }
0927
0928 spin_lock(&vm->status_lock);
0929 if (mem) {
0930 if (list_empty(&bo_va->vm_status)) {
0931 spin_unlock(&vm->status_lock);
0932 return 0;
0933 }
0934 list_del_init(&bo_va->vm_status);
0935 } else {
0936 list_del(&bo_va->vm_status);
0937 list_add(&bo_va->vm_status, &vm->cleared);
0938 }
0939 spin_unlock(&vm->status_lock);
0940
0941 bo_va->flags &= ~RADEON_VM_PAGE_VALID;
0942 bo_va->flags &= ~RADEON_VM_PAGE_SYSTEM;
0943 bo_va->flags &= ~RADEON_VM_PAGE_SNOOPED;
0944 if (bo_va->bo && radeon_ttm_tt_is_readonly(rdev, bo_va->bo->tbo.ttm))
0945 bo_va->flags &= ~RADEON_VM_PAGE_WRITEABLE;
0946
0947 if (mem) {
0948 addr = (u64)mem->start << PAGE_SHIFT;
0949 if (mem->mem_type != TTM_PL_SYSTEM)
0950 bo_va->flags |= RADEON_VM_PAGE_VALID;
0951
0952 if (mem->mem_type == TTM_PL_TT) {
0953 bo_va->flags |= RADEON_VM_PAGE_SYSTEM;
0954 if (!(bo_va->bo->flags & (RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC)))
0955 bo_va->flags |= RADEON_VM_PAGE_SNOOPED;
0956
0957 } else {
0958 addr += rdev->vm_manager.vram_base_offset;
0959 }
0960 } else {
0961 addr = 0;
0962 }
0963
0964 trace_radeon_vm_bo_update(bo_va);
0965
0966 nptes = bo_va->it.last - bo_va->it.start + 1;
0967
0968
0969
0970 ncmds = (nptes >> min(radeon_vm_block_size, 11)) + 1;
0971
0972
0973 ndw = 64;
0974
0975 flags = radeon_vm_page_flags(bo_va->flags);
0976 if ((flags & R600_PTE_GART_MASK) == R600_PTE_GART_MASK) {
0977
0978 ndw += ncmds * 7;
0979
0980 } else if (flags & R600_PTE_SYSTEM) {
0981
0982 ndw += ncmds * 4;
0983
0984
0985 ndw += nptes * 2;
0986
0987 } else {
0988
0989 ndw += ncmds * 10;
0990
0991
0992 ndw += 2 * 10;
0993 }
0994
0995
0996 if (ndw > 0xfffff)
0997 return -ENOMEM;
0998
0999 r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4);
1000 if (r)
1001 return r;
1002 ib.length_dw = 0;
1003
1004 if (!(bo_va->flags & RADEON_VM_PAGE_VALID)) {
1005 unsigned i;
1006
1007 for (i = 0; i < RADEON_NUM_RINGS; ++i)
1008 radeon_sync_fence(&ib.sync, vm->ids[i].last_id_use);
1009 }
1010
1011 r = radeon_vm_update_ptes(rdev, vm, &ib, bo_va->it.start,
1012 bo_va->it.last + 1, addr,
1013 radeon_vm_page_flags(bo_va->flags));
1014 if (r) {
1015 radeon_ib_free(rdev, &ib);
1016 return r;
1017 }
1018
1019 radeon_asic_vm_pad_ib(rdev, &ib);
1020 WARN_ON(ib.length_dw > ndw);
1021
1022 r = radeon_ib_schedule(rdev, &ib, NULL, false);
1023 if (r) {
1024 radeon_ib_free(rdev, &ib);
1025 return r;
1026 }
1027 ib.fence->is_vm_update = true;
1028 radeon_vm_fence_pts(vm, bo_va->it.start, bo_va->it.last + 1, ib.fence);
1029 radeon_fence_unref(&bo_va->last_pt_update);
1030 bo_va->last_pt_update = radeon_fence_ref(ib.fence);
1031 radeon_ib_free(rdev, &ib);
1032
1033 return 0;
1034 }
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047 int radeon_vm_clear_freed(struct radeon_device *rdev,
1048 struct radeon_vm *vm)
1049 {
1050 struct radeon_bo_va *bo_va;
1051 int r = 0;
1052
1053 spin_lock(&vm->status_lock);
1054 while (!list_empty(&vm->freed)) {
1055 bo_va = list_first_entry(&vm->freed,
1056 struct radeon_bo_va, vm_status);
1057 spin_unlock(&vm->status_lock);
1058
1059 r = radeon_vm_bo_update(rdev, bo_va, NULL);
1060 radeon_bo_unref(&bo_va->bo);
1061 radeon_fence_unref(&bo_va->last_pt_update);
1062 spin_lock(&vm->status_lock);
1063 list_del(&bo_va->vm_status);
1064 kfree(bo_va);
1065 if (r)
1066 break;
1067
1068 }
1069 spin_unlock(&vm->status_lock);
1070 return r;
1071
1072 }
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085 int radeon_vm_clear_invalids(struct radeon_device *rdev,
1086 struct radeon_vm *vm)
1087 {
1088 struct radeon_bo_va *bo_va;
1089 int r;
1090
1091 spin_lock(&vm->status_lock);
1092 while (!list_empty(&vm->invalidated)) {
1093 bo_va = list_first_entry(&vm->invalidated,
1094 struct radeon_bo_va, vm_status);
1095 spin_unlock(&vm->status_lock);
1096
1097 r = radeon_vm_bo_update(rdev, bo_va, NULL);
1098 if (r)
1099 return r;
1100
1101 spin_lock(&vm->status_lock);
1102 }
1103 spin_unlock(&vm->status_lock);
1104
1105 return 0;
1106 }
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118 void radeon_vm_bo_rmv(struct radeon_device *rdev,
1119 struct radeon_bo_va *bo_va)
1120 {
1121 struct radeon_vm *vm = bo_va->vm;
1122
1123 list_del(&bo_va->bo_list);
1124
1125 mutex_lock(&vm->mutex);
1126 if (bo_va->it.start || bo_va->it.last)
1127 interval_tree_remove(&bo_va->it, &vm->va);
1128
1129 spin_lock(&vm->status_lock);
1130 list_del(&bo_va->vm_status);
1131 if (bo_va->it.start || bo_va->it.last) {
1132 bo_va->bo = radeon_bo_ref(bo_va->bo);
1133 list_add(&bo_va->vm_status, &vm->freed);
1134 } else {
1135 radeon_fence_unref(&bo_va->last_pt_update);
1136 kfree(bo_va);
1137 }
1138 spin_unlock(&vm->status_lock);
1139
1140 mutex_unlock(&vm->mutex);
1141 }
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151 void radeon_vm_bo_invalidate(struct radeon_device *rdev,
1152 struct radeon_bo *bo)
1153 {
1154 struct radeon_bo_va *bo_va;
1155
1156 list_for_each_entry(bo_va, &bo->va, bo_list) {
1157 spin_lock(&bo_va->vm->status_lock);
1158 if (list_empty(&bo_va->vm_status) &&
1159 (bo_va->it.start || bo_va->it.last))
1160 list_add(&bo_va->vm_status, &bo_va->vm->invalidated);
1161 spin_unlock(&bo_va->vm->status_lock);
1162 }
1163 }
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173 int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
1174 {
1175 const unsigned align = min(RADEON_VM_PTB_ALIGN_SIZE,
1176 RADEON_VM_PTE_COUNT * 8);
1177 unsigned pd_size, pd_entries, pts_size;
1178 int i, r;
1179
1180 vm->ib_bo_va = NULL;
1181 for (i = 0; i < RADEON_NUM_RINGS; ++i) {
1182 vm->ids[i].id = 0;
1183 vm->ids[i].flushed_updates = NULL;
1184 vm->ids[i].last_id_use = NULL;
1185 }
1186 mutex_init(&vm->mutex);
1187 vm->va = RB_ROOT_CACHED;
1188 spin_lock_init(&vm->status_lock);
1189 INIT_LIST_HEAD(&vm->invalidated);
1190 INIT_LIST_HEAD(&vm->freed);
1191 INIT_LIST_HEAD(&vm->cleared);
1192
1193 pd_size = radeon_vm_directory_size(rdev);
1194 pd_entries = radeon_vm_num_pdes(rdev);
1195
1196
1197 pts_size = pd_entries * sizeof(struct radeon_vm_pt);
1198 vm->page_tables = kzalloc(pts_size, GFP_KERNEL);
1199 if (vm->page_tables == NULL) {
1200 DRM_ERROR("Cannot allocate memory for page table array\n");
1201 return -ENOMEM;
1202 }
1203
1204 r = radeon_bo_create(rdev, pd_size, align, true,
1205 RADEON_GEM_DOMAIN_VRAM, 0, NULL,
1206 NULL, &vm->page_directory);
1207 if (r)
1208 return r;
1209
1210 r = radeon_vm_clear_bo(rdev, vm->page_directory);
1211 if (r) {
1212 radeon_bo_unref(&vm->page_directory);
1213 vm->page_directory = NULL;
1214 return r;
1215 }
1216
1217 return 0;
1218 }
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229 void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)
1230 {
1231 struct radeon_bo_va *bo_va, *tmp;
1232 int i, r;
1233
1234 if (!RB_EMPTY_ROOT(&vm->va.rb_root))
1235 dev_err(rdev->dev, "still active bo inside vm\n");
1236
1237 rbtree_postorder_for_each_entry_safe(bo_va, tmp,
1238 &vm->va.rb_root, it.rb) {
1239 interval_tree_remove(&bo_va->it, &vm->va);
1240 r = radeon_bo_reserve(bo_va->bo, false);
1241 if (!r) {
1242 list_del_init(&bo_va->bo_list);
1243 radeon_bo_unreserve(bo_va->bo);
1244 radeon_fence_unref(&bo_va->last_pt_update);
1245 kfree(bo_va);
1246 }
1247 }
1248 list_for_each_entry_safe(bo_va, tmp, &vm->freed, vm_status) {
1249 radeon_bo_unref(&bo_va->bo);
1250 radeon_fence_unref(&bo_va->last_pt_update);
1251 kfree(bo_va);
1252 }
1253
1254 for (i = 0; i < radeon_vm_num_pdes(rdev); i++)
1255 radeon_bo_unref(&vm->page_tables[i].bo);
1256 kfree(vm->page_tables);
1257
1258 radeon_bo_unref(&vm->page_directory);
1259
1260 for (i = 0; i < RADEON_NUM_RINGS; ++i) {
1261 radeon_fence_unref(&vm->ids[i].flushed_updates);
1262 radeon_fence_unref(&vm->ids[i].last_id_use);
1263 }
1264
1265 mutex_destroy(&vm->mutex);
1266 }