0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025 #include "amdgpu.h"
0026 #include "amdgpu_trace.h"
0027 #include "si.h"
0028 #include "sid.h"
0029
0030 const u32 sdma_offsets[SDMA_MAX_INSTANCE] =
0031 {
0032 DMA0_REGISTER_OFFSET,
0033 DMA1_REGISTER_OFFSET
0034 };
0035
0036 static void si_dma_set_ring_funcs(struct amdgpu_device *adev);
0037 static void si_dma_set_buffer_funcs(struct amdgpu_device *adev);
0038 static void si_dma_set_vm_pte_funcs(struct amdgpu_device *adev);
0039 static void si_dma_set_irq_funcs(struct amdgpu_device *adev);
0040
0041 static uint64_t si_dma_ring_get_rptr(struct amdgpu_ring *ring)
0042 {
0043 return *ring->rptr_cpu_addr;
0044 }
0045
0046 static uint64_t si_dma_ring_get_wptr(struct amdgpu_ring *ring)
0047 {
0048 struct amdgpu_device *adev = ring->adev;
0049 u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1;
0050
0051 return (RREG32(DMA_RB_WPTR + sdma_offsets[me]) & 0x3fffc) >> 2;
0052 }
0053
0054 static void si_dma_ring_set_wptr(struct amdgpu_ring *ring)
0055 {
0056 struct amdgpu_device *adev = ring->adev;
0057 u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1;
0058
0059 WREG32(DMA_RB_WPTR + sdma_offsets[me], (ring->wptr << 2) & 0x3fffc);
0060 }
0061
0062 static void si_dma_ring_emit_ib(struct amdgpu_ring *ring,
0063 struct amdgpu_job *job,
0064 struct amdgpu_ib *ib,
0065 uint32_t flags)
0066 {
0067 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
0068
0069
0070
0071 while ((lower_32_bits(ring->wptr) & 7) != 5)
0072 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
0073 amdgpu_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, vmid, 0));
0074 amdgpu_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
0075 amdgpu_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
0076
0077 }
0078
0079
0080
0081
0082
0083
0084
0085
0086
0087
0088
0089
0090
0091 static void si_dma_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
0092 unsigned flags)
0093 {
0094
0095 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
0096
0097 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0, 0));
0098 amdgpu_ring_write(ring, addr & 0xfffffffc);
0099 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xff));
0100 amdgpu_ring_write(ring, seq);
0101
0102 if (write64bit) {
0103 addr += 4;
0104 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0, 0));
0105 amdgpu_ring_write(ring, addr & 0xfffffffc);
0106 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xff));
0107 amdgpu_ring_write(ring, upper_32_bits(seq));
0108 }
0109
0110 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0, 0));
0111 }
0112
0113 static void si_dma_stop(struct amdgpu_device *adev)
0114 {
0115 struct amdgpu_ring *ring;
0116 u32 rb_cntl;
0117 unsigned i;
0118
0119 for (i = 0; i < adev->sdma.num_instances; i++) {
0120 ring = &adev->sdma.instance[i].ring;
0121
0122 rb_cntl = RREG32(DMA_RB_CNTL + sdma_offsets[i]);
0123 rb_cntl &= ~DMA_RB_ENABLE;
0124 WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl);
0125
0126 if (adev->mman.buffer_funcs_ring == ring)
0127 amdgpu_ttm_set_buffer_funcs_status(adev, false);
0128 }
0129 }
0130
0131 static int si_dma_start(struct amdgpu_device *adev)
0132 {
0133 struct amdgpu_ring *ring;
0134 u32 rb_cntl, dma_cntl, ib_cntl, rb_bufsz;
0135 int i, r;
0136 uint64_t rptr_addr;
0137
0138 for (i = 0; i < adev->sdma.num_instances; i++) {
0139 ring = &adev->sdma.instance[i].ring;
0140
0141 WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + sdma_offsets[i], 0);
0142 WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0);
0143
0144
0145 rb_bufsz = order_base_2(ring->ring_size / 4);
0146 rb_cntl = rb_bufsz << 1;
0147 #ifdef __BIG_ENDIAN
0148 rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
0149 #endif
0150 WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl);
0151
0152
0153 WREG32(DMA_RB_RPTR + sdma_offsets[i], 0);
0154 WREG32(DMA_RB_WPTR + sdma_offsets[i], 0);
0155
0156 rptr_addr = ring->rptr_gpu_addr;
0157
0158 WREG32(DMA_RB_RPTR_ADDR_LO + sdma_offsets[i], lower_32_bits(rptr_addr));
0159 WREG32(DMA_RB_RPTR_ADDR_HI + sdma_offsets[i], upper_32_bits(rptr_addr) & 0xFF);
0160
0161 rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
0162
0163 WREG32(DMA_RB_BASE + sdma_offsets[i], ring->gpu_addr >> 8);
0164
0165
0166 ib_cntl = DMA_IB_ENABLE | CMD_VMID_FORCE;
0167 #ifdef __BIG_ENDIAN
0168 ib_cntl |= DMA_IB_SWAP_ENABLE;
0169 #endif
0170 WREG32(DMA_IB_CNTL + sdma_offsets[i], ib_cntl);
0171
0172 dma_cntl = RREG32(DMA_CNTL + sdma_offsets[i]);
0173 dma_cntl &= ~CTXEMPTY_INT_ENABLE;
0174 WREG32(DMA_CNTL + sdma_offsets[i], dma_cntl);
0175
0176 ring->wptr = 0;
0177 WREG32(DMA_RB_WPTR + sdma_offsets[i], ring->wptr << 2);
0178 WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_RB_ENABLE);
0179
0180 ring->sched.ready = true;
0181
0182 r = amdgpu_ring_test_helper(ring);
0183 if (r)
0184 return r;
0185
0186 if (adev->mman.buffer_funcs_ring == ring)
0187 amdgpu_ttm_set_buffer_funcs_status(adev, true);
0188 }
0189
0190 return 0;
0191 }
0192
0193
0194
0195
0196
0197
0198
0199
0200
0201
0202 static int si_dma_ring_test_ring(struct amdgpu_ring *ring)
0203 {
0204 struct amdgpu_device *adev = ring->adev;
0205 unsigned i;
0206 unsigned index;
0207 int r;
0208 u32 tmp;
0209 u64 gpu_addr;
0210
0211 r = amdgpu_device_wb_get(adev, &index);
0212 if (r)
0213 return r;
0214
0215 gpu_addr = adev->wb.gpu_addr + (index * 4);
0216 tmp = 0xCAFEDEAD;
0217 adev->wb.wb[index] = cpu_to_le32(tmp);
0218
0219 r = amdgpu_ring_alloc(ring, 4);
0220 if (r)
0221 goto error_free_wb;
0222
0223 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, 1));
0224 amdgpu_ring_write(ring, lower_32_bits(gpu_addr));
0225 amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xff);
0226 amdgpu_ring_write(ring, 0xDEADBEEF);
0227 amdgpu_ring_commit(ring);
0228
0229 for (i = 0; i < adev->usec_timeout; i++) {
0230 tmp = le32_to_cpu(adev->wb.wb[index]);
0231 if (tmp == 0xDEADBEEF)
0232 break;
0233 udelay(1);
0234 }
0235
0236 if (i >= adev->usec_timeout)
0237 r = -ETIMEDOUT;
0238
0239 error_free_wb:
0240 amdgpu_device_wb_free(adev, index);
0241 return r;
0242 }
0243
0244
0245
0246
0247
0248
0249
0250
0251
0252
0253 static int si_dma_ring_test_ib(struct amdgpu_ring *ring, long timeout)
0254 {
0255 struct amdgpu_device *adev = ring->adev;
0256 struct amdgpu_ib ib;
0257 struct dma_fence *f = NULL;
0258 unsigned index;
0259 u32 tmp = 0;
0260 u64 gpu_addr;
0261 long r;
0262
0263 r = amdgpu_device_wb_get(adev, &index);
0264 if (r)
0265 return r;
0266
0267 gpu_addr = adev->wb.gpu_addr + (index * 4);
0268 tmp = 0xCAFEDEAD;
0269 adev->wb.wb[index] = cpu_to_le32(tmp);
0270 memset(&ib, 0, sizeof(ib));
0271 r = amdgpu_ib_get(adev, NULL, 256,
0272 AMDGPU_IB_POOL_DIRECT, &ib);
0273 if (r)
0274 goto err0;
0275
0276 ib.ptr[0] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, 1);
0277 ib.ptr[1] = lower_32_bits(gpu_addr);
0278 ib.ptr[2] = upper_32_bits(gpu_addr) & 0xff;
0279 ib.ptr[3] = 0xDEADBEEF;
0280 ib.length_dw = 4;
0281 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
0282 if (r)
0283 goto err1;
0284
0285 r = dma_fence_wait_timeout(f, false, timeout);
0286 if (r == 0) {
0287 r = -ETIMEDOUT;
0288 goto err1;
0289 } else if (r < 0) {
0290 goto err1;
0291 }
0292 tmp = le32_to_cpu(adev->wb.wb[index]);
0293 if (tmp == 0xDEADBEEF)
0294 r = 0;
0295 else
0296 r = -EINVAL;
0297
0298 err1:
0299 amdgpu_ib_free(adev, &ib, NULL);
0300 dma_fence_put(f);
0301 err0:
0302 amdgpu_device_wb_free(adev, index);
0303 return r;
0304 }
0305
0306
0307
0308
0309
0310
0311
0312
0313
0314
0315
0316 static void si_dma_vm_copy_pte(struct amdgpu_ib *ib,
0317 uint64_t pe, uint64_t src,
0318 unsigned count)
0319 {
0320 unsigned bytes = count * 8;
0321
0322 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY,
0323 1, 0, 0, bytes);
0324 ib->ptr[ib->length_dw++] = lower_32_bits(pe);
0325 ib->ptr[ib->length_dw++] = lower_32_bits(src);
0326 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
0327 ib->ptr[ib->length_dw++] = upper_32_bits(src) & 0xff;
0328 }
0329
0330
0331
0332
0333
0334
0335
0336
0337
0338
0339
0340
0341 static void si_dma_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
0342 uint64_t value, unsigned count,
0343 uint32_t incr)
0344 {
0345 unsigned ndw = count * 2;
0346
0347 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw);
0348 ib->ptr[ib->length_dw++] = lower_32_bits(pe);
0349 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
0350 for (; ndw > 0; ndw -= 2) {
0351 ib->ptr[ib->length_dw++] = lower_32_bits(value);
0352 ib->ptr[ib->length_dw++] = upper_32_bits(value);
0353 value += incr;
0354 }
0355 }
0356
0357
0358
0359
0360
0361
0362
0363
0364
0365
0366
0367
0368
0369 static void si_dma_vm_set_pte_pde(struct amdgpu_ib *ib,
0370 uint64_t pe,
0371 uint64_t addr, unsigned count,
0372 uint32_t incr, uint64_t flags)
0373 {
0374 uint64_t value;
0375 unsigned ndw;
0376
0377 while (count) {
0378 ndw = count * 2;
0379 if (ndw > 0xFFFFE)
0380 ndw = 0xFFFFE;
0381
0382 if (flags & AMDGPU_PTE_VALID)
0383 value = addr;
0384 else
0385 value = 0;
0386
0387
0388 ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
0389 ib->ptr[ib->length_dw++] = pe;
0390 ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
0391 ib->ptr[ib->length_dw++] = lower_32_bits(flags);
0392 ib->ptr[ib->length_dw++] = upper_32_bits(flags);
0393 ib->ptr[ib->length_dw++] = value;
0394 ib->ptr[ib->length_dw++] = upper_32_bits(value);
0395 ib->ptr[ib->length_dw++] = incr;
0396 ib->ptr[ib->length_dw++] = 0;
0397 pe += ndw * 4;
0398 addr += (ndw / 2) * incr;
0399 count -= ndw / 2;
0400 }
0401 }
0402
0403
0404
0405
0406
0407
0408
0409
0410 static void si_dma_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
0411 {
0412 while (ib->length_dw & 0x7)
0413 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0);
0414 }
0415
0416
0417
0418
0419
0420
0421
0422
0423 static void si_dma_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
0424 {
0425 uint32_t seq = ring->fence_drv.sync_seq;
0426 uint64_t addr = ring->fence_drv.gpu_addr;
0427
0428
0429 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_POLL_REG_MEM, 0, 0, 0, 0) |
0430 (1 << 27));
0431 amdgpu_ring_write(ring, lower_32_bits(addr));
0432 amdgpu_ring_write(ring, (0xff << 16) | upper_32_bits(addr));
0433 amdgpu_ring_write(ring, 0xffffffff);
0434 amdgpu_ring_write(ring, seq);
0435 amdgpu_ring_write(ring, (3 << 28) | 0x20);
0436 }
0437
0438
0439
0440
0441
0442
0443
0444
0445
0446
0447
0448 static void si_dma_ring_emit_vm_flush(struct amdgpu_ring *ring,
0449 unsigned vmid, uint64_t pd_addr)
0450 {
0451 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
0452
0453
0454 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_POLL_REG_MEM, 0, 0, 0, 0));
0455 amdgpu_ring_write(ring, VM_INVALIDATE_REQUEST);
0456 amdgpu_ring_write(ring, 0xff << 16);
0457 amdgpu_ring_write(ring, 1 << vmid);
0458 amdgpu_ring_write(ring, 0);
0459 amdgpu_ring_write(ring, (0 << 28) | 0x20);
0460 }
0461
0462 static void si_dma_ring_emit_wreg(struct amdgpu_ring *ring,
0463 uint32_t reg, uint32_t val)
0464 {
0465 amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
0466 amdgpu_ring_write(ring, (0xf << 16) | reg);
0467 amdgpu_ring_write(ring, val);
0468 }
0469
0470 static int si_dma_early_init(void *handle)
0471 {
0472 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
0473
0474 adev->sdma.num_instances = 2;
0475
0476 si_dma_set_ring_funcs(adev);
0477 si_dma_set_buffer_funcs(adev);
0478 si_dma_set_vm_pte_funcs(adev);
0479 si_dma_set_irq_funcs(adev);
0480
0481 return 0;
0482 }
0483
0484 static int si_dma_sw_init(void *handle)
0485 {
0486 struct amdgpu_ring *ring;
0487 int r, i;
0488 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
0489
0490
0491 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 224,
0492 &adev->sdma.trap_irq);
0493 if (r)
0494 return r;
0495
0496
0497 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 244,
0498 &adev->sdma.trap_irq);
0499 if (r)
0500 return r;
0501
0502 for (i = 0; i < adev->sdma.num_instances; i++) {
0503 ring = &adev->sdma.instance[i].ring;
0504 ring->ring_obj = NULL;
0505 ring->use_doorbell = false;
0506 sprintf(ring->name, "sdma%d", i);
0507 r = amdgpu_ring_init(adev, ring, 1024,
0508 &adev->sdma.trap_irq,
0509 (i == 0) ? AMDGPU_SDMA_IRQ_INSTANCE0 :
0510 AMDGPU_SDMA_IRQ_INSTANCE1,
0511 AMDGPU_RING_PRIO_DEFAULT, NULL);
0512 if (r)
0513 return r;
0514 }
0515
0516 return r;
0517 }
0518
0519 static int si_dma_sw_fini(void *handle)
0520 {
0521 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
0522 int i;
0523
0524 for (i = 0; i < adev->sdma.num_instances; i++)
0525 amdgpu_ring_fini(&adev->sdma.instance[i].ring);
0526
0527 return 0;
0528 }
0529
0530 static int si_dma_hw_init(void *handle)
0531 {
0532 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
0533
0534 return si_dma_start(adev);
0535 }
0536
0537 static int si_dma_hw_fini(void *handle)
0538 {
0539 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
0540
0541 si_dma_stop(adev);
0542
0543 return 0;
0544 }
0545
0546 static int si_dma_suspend(void *handle)
0547 {
0548 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
0549
0550 return si_dma_hw_fini(adev);
0551 }
0552
0553 static int si_dma_resume(void *handle)
0554 {
0555 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
0556
0557 return si_dma_hw_init(adev);
0558 }
0559
0560 static bool si_dma_is_idle(void *handle)
0561 {
0562 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
0563 u32 tmp = RREG32(SRBM_STATUS2);
0564
0565 if (tmp & (DMA_BUSY_MASK | DMA1_BUSY_MASK))
0566 return false;
0567
0568 return true;
0569 }
0570
0571 static int si_dma_wait_for_idle(void *handle)
0572 {
0573 unsigned i;
0574 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
0575
0576 for (i = 0; i < adev->usec_timeout; i++) {
0577 if (si_dma_is_idle(handle))
0578 return 0;
0579 udelay(1);
0580 }
0581 return -ETIMEDOUT;
0582 }
0583
0584 static int si_dma_soft_reset(void *handle)
0585 {
0586 DRM_INFO("si_dma_soft_reset --- not implemented !!!!!!!\n");
0587 return 0;
0588 }
0589
0590 static int si_dma_set_trap_irq_state(struct amdgpu_device *adev,
0591 struct amdgpu_irq_src *src,
0592 unsigned type,
0593 enum amdgpu_interrupt_state state)
0594 {
0595 u32 sdma_cntl;
0596
0597 switch (type) {
0598 case AMDGPU_SDMA_IRQ_INSTANCE0:
0599 switch (state) {
0600 case AMDGPU_IRQ_STATE_DISABLE:
0601 sdma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET);
0602 sdma_cntl &= ~TRAP_ENABLE;
0603 WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl);
0604 break;
0605 case AMDGPU_IRQ_STATE_ENABLE:
0606 sdma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET);
0607 sdma_cntl |= TRAP_ENABLE;
0608 WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, sdma_cntl);
0609 break;
0610 default:
0611 break;
0612 }
0613 break;
0614 case AMDGPU_SDMA_IRQ_INSTANCE1:
0615 switch (state) {
0616 case AMDGPU_IRQ_STATE_DISABLE:
0617 sdma_cntl = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET);
0618 sdma_cntl &= ~TRAP_ENABLE;
0619 WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl);
0620 break;
0621 case AMDGPU_IRQ_STATE_ENABLE:
0622 sdma_cntl = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET);
0623 sdma_cntl |= TRAP_ENABLE;
0624 WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, sdma_cntl);
0625 break;
0626 default:
0627 break;
0628 }
0629 break;
0630 default:
0631 break;
0632 }
0633 return 0;
0634 }
0635
0636 static int si_dma_process_trap_irq(struct amdgpu_device *adev,
0637 struct amdgpu_irq_src *source,
0638 struct amdgpu_iv_entry *entry)
0639 {
0640 if (entry->src_id == 224)
0641 amdgpu_fence_process(&adev->sdma.instance[0].ring);
0642 else
0643 amdgpu_fence_process(&adev->sdma.instance[1].ring);
0644 return 0;
0645 }
0646
0647 static int si_dma_set_clockgating_state(void *handle,
0648 enum amd_clockgating_state state)
0649 {
0650 u32 orig, data, offset;
0651 int i;
0652 bool enable;
0653 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
0654
0655 enable = (state == AMD_CG_STATE_GATE);
0656
0657 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) {
0658 for (i = 0; i < adev->sdma.num_instances; i++) {
0659 if (i == 0)
0660 offset = DMA0_REGISTER_OFFSET;
0661 else
0662 offset = DMA1_REGISTER_OFFSET;
0663 orig = data = RREG32(DMA_POWER_CNTL + offset);
0664 data &= ~MEM_POWER_OVERRIDE;
0665 if (data != orig)
0666 WREG32(DMA_POWER_CNTL + offset, data);
0667 WREG32(DMA_CLK_CTRL + offset, 0x00000100);
0668 }
0669 } else {
0670 for (i = 0; i < adev->sdma.num_instances; i++) {
0671 if (i == 0)
0672 offset = DMA0_REGISTER_OFFSET;
0673 else
0674 offset = DMA1_REGISTER_OFFSET;
0675 orig = data = RREG32(DMA_POWER_CNTL + offset);
0676 data |= MEM_POWER_OVERRIDE;
0677 if (data != orig)
0678 WREG32(DMA_POWER_CNTL + offset, data);
0679
0680 orig = data = RREG32(DMA_CLK_CTRL + offset);
0681 data = 0xff000000;
0682 if (data != orig)
0683 WREG32(DMA_CLK_CTRL + offset, data);
0684 }
0685 }
0686
0687 return 0;
0688 }
0689
0690 static int si_dma_set_powergating_state(void *handle,
0691 enum amd_powergating_state state)
0692 {
0693 u32 tmp;
0694
0695 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
0696
0697 WREG32(DMA_PGFSM_WRITE, 0x00002000);
0698 WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
0699
0700 for (tmp = 0; tmp < 5; tmp++)
0701 WREG32(DMA_PGFSM_WRITE, 0);
0702
0703 return 0;
0704 }
0705
0706 static const struct amd_ip_funcs si_dma_ip_funcs = {
0707 .name = "si_dma",
0708 .early_init = si_dma_early_init,
0709 .late_init = NULL,
0710 .sw_init = si_dma_sw_init,
0711 .sw_fini = si_dma_sw_fini,
0712 .hw_init = si_dma_hw_init,
0713 .hw_fini = si_dma_hw_fini,
0714 .suspend = si_dma_suspend,
0715 .resume = si_dma_resume,
0716 .is_idle = si_dma_is_idle,
0717 .wait_for_idle = si_dma_wait_for_idle,
0718 .soft_reset = si_dma_soft_reset,
0719 .set_clockgating_state = si_dma_set_clockgating_state,
0720 .set_powergating_state = si_dma_set_powergating_state,
0721 };
0722
0723 static const struct amdgpu_ring_funcs si_dma_ring_funcs = {
0724 .type = AMDGPU_RING_TYPE_SDMA,
0725 .align_mask = 0xf,
0726 .nop = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0),
0727 .support_64bit_ptrs = false,
0728 .get_rptr = si_dma_ring_get_rptr,
0729 .get_wptr = si_dma_ring_get_wptr,
0730 .set_wptr = si_dma_ring_set_wptr,
0731 .emit_frame_size =
0732 3 + 3 +
0733 6 +
0734 SI_FLUSH_GPU_TLB_NUM_WREG * 3 + 6 +
0735 9 + 9 + 9,
0736 .emit_ib_size = 7 + 3,
0737 .emit_ib = si_dma_ring_emit_ib,
0738 .emit_fence = si_dma_ring_emit_fence,
0739 .emit_pipeline_sync = si_dma_ring_emit_pipeline_sync,
0740 .emit_vm_flush = si_dma_ring_emit_vm_flush,
0741 .test_ring = si_dma_ring_test_ring,
0742 .test_ib = si_dma_ring_test_ib,
0743 .insert_nop = amdgpu_ring_insert_nop,
0744 .pad_ib = si_dma_ring_pad_ib,
0745 .emit_wreg = si_dma_ring_emit_wreg,
0746 };
0747
0748 static void si_dma_set_ring_funcs(struct amdgpu_device *adev)
0749 {
0750 int i;
0751
0752 for (i = 0; i < adev->sdma.num_instances; i++)
0753 adev->sdma.instance[i].ring.funcs = &si_dma_ring_funcs;
0754 }
0755
0756 static const struct amdgpu_irq_src_funcs si_dma_trap_irq_funcs = {
0757 .set = si_dma_set_trap_irq_state,
0758 .process = si_dma_process_trap_irq,
0759 };
0760
0761 static void si_dma_set_irq_funcs(struct amdgpu_device *adev)
0762 {
0763 adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
0764 adev->sdma.trap_irq.funcs = &si_dma_trap_irq_funcs;
0765 }
0766
0767
0768
0769
0770
0771
0772
0773
0774
0775
0776
0777
0778
0779
0780 static void si_dma_emit_copy_buffer(struct amdgpu_ib *ib,
0781 uint64_t src_offset,
0782 uint64_t dst_offset,
0783 uint32_t byte_count,
0784 bool tmz)
0785 {
0786 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY,
0787 1, 0, 0, byte_count);
0788 ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
0789 ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
0790 ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset) & 0xff;
0791 ib->ptr[ib->length_dw++] = upper_32_bits(src_offset) & 0xff;
0792 }
0793
0794
0795
0796
0797
0798
0799
0800
0801
0802
0803
0804 static void si_dma_emit_fill_buffer(struct amdgpu_ib *ib,
0805 uint32_t src_data,
0806 uint64_t dst_offset,
0807 uint32_t byte_count)
0808 {
0809 ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_CONSTANT_FILL,
0810 0, 0, 0, byte_count / 4);
0811 ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
0812 ib->ptr[ib->length_dw++] = src_data;
0813 ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset) << 16;
0814 }
0815
0816
0817 static const struct amdgpu_buffer_funcs si_dma_buffer_funcs = {
0818 .copy_max_bytes = 0xffff8,
0819 .copy_num_dw = 5,
0820 .emit_copy_buffer = si_dma_emit_copy_buffer,
0821
0822 .fill_max_bytes = 0xffff8,
0823 .fill_num_dw = 4,
0824 .emit_fill_buffer = si_dma_emit_fill_buffer,
0825 };
0826
0827 static void si_dma_set_buffer_funcs(struct amdgpu_device *adev)
0828 {
0829 adev->mman.buffer_funcs = &si_dma_buffer_funcs;
0830 adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
0831 }
0832
0833 static const struct amdgpu_vm_pte_funcs si_dma_vm_pte_funcs = {
0834 .copy_pte_num_dw = 5,
0835 .copy_pte = si_dma_vm_copy_pte,
0836
0837 .write_pte = si_dma_vm_write_pte,
0838 .set_pte_pde = si_dma_vm_set_pte_pde,
0839 };
0840
0841 static void si_dma_set_vm_pte_funcs(struct amdgpu_device *adev)
0842 {
0843 unsigned i;
0844
0845 adev->vm_manager.vm_pte_funcs = &si_dma_vm_pte_funcs;
0846 for (i = 0; i < adev->sdma.num_instances; i++) {
0847 adev->vm_manager.vm_pte_scheds[i] =
0848 &adev->sdma.instance[i].ring.sched;
0849 }
0850 adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
0851 }
0852
0853 const struct amdgpu_ip_block_version si_dma_ip_block =
0854 {
0855 .type = AMD_IP_BLOCK_TYPE_SDMA,
0856 .major = 1,
0857 .minor = 0,
0858 .rev = 0,
0859 .funcs = &si_dma_ip_funcs,
0860 };