0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024 #include <linux/dma-mapping.h>
0025
0026 #include "amdgpu.h"
0027 #include "amdgpu_ih.h"
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041 int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
0042 unsigned ring_size, bool use_bus_addr)
0043 {
0044 u32 rb_bufsz;
0045 int r;
0046
0047
0048 rb_bufsz = order_base_2(ring_size / 4);
0049 ring_size = (1 << rb_bufsz) * 4;
0050 ih->ring_size = ring_size;
0051 ih->ptr_mask = ih->ring_size - 1;
0052 ih->rptr = 0;
0053 ih->use_bus_addr = use_bus_addr;
0054
0055 if (use_bus_addr) {
0056 dma_addr_t dma_addr;
0057
0058 if (ih->ring)
0059 return 0;
0060
0061
0062
0063
0064 ih->ring = dma_alloc_coherent(adev->dev, ih->ring_size + 8,
0065 &dma_addr, GFP_KERNEL);
0066 if (ih->ring == NULL)
0067 return -ENOMEM;
0068
0069 ih->gpu_addr = dma_addr;
0070 ih->wptr_addr = dma_addr + ih->ring_size;
0071 ih->wptr_cpu = &ih->ring[ih->ring_size / 4];
0072 ih->rptr_addr = dma_addr + ih->ring_size + 4;
0073 ih->rptr_cpu = &ih->ring[(ih->ring_size / 4) + 1];
0074 } else {
0075 unsigned wptr_offs, rptr_offs;
0076
0077 r = amdgpu_device_wb_get(adev, &wptr_offs);
0078 if (r)
0079 return r;
0080
0081 r = amdgpu_device_wb_get(adev, &rptr_offs);
0082 if (r) {
0083 amdgpu_device_wb_free(adev, wptr_offs);
0084 return r;
0085 }
0086
0087 r = amdgpu_bo_create_kernel(adev, ih->ring_size, PAGE_SIZE,
0088 AMDGPU_GEM_DOMAIN_GTT,
0089 &ih->ring_obj, &ih->gpu_addr,
0090 (void **)&ih->ring);
0091 if (r) {
0092 amdgpu_device_wb_free(adev, rptr_offs);
0093 amdgpu_device_wb_free(adev, wptr_offs);
0094 return r;
0095 }
0096
0097 ih->wptr_addr = adev->wb.gpu_addr + wptr_offs * 4;
0098 ih->wptr_cpu = &adev->wb.wb[wptr_offs];
0099 ih->rptr_addr = adev->wb.gpu_addr + rptr_offs * 4;
0100 ih->rptr_cpu = &adev->wb.wb[rptr_offs];
0101 }
0102
0103 init_waitqueue_head(&ih->wait_process);
0104 return 0;
0105 }
0106
0107
0108
0109
0110
0111
0112
0113
0114
0115
0116 void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
0117 {
0118
0119 if (!ih->ring)
0120 return;
0121
0122 if (ih->use_bus_addr) {
0123
0124
0125
0126
0127 dma_free_coherent(adev->dev, ih->ring_size + 8,
0128 (void *)ih->ring, ih->gpu_addr);
0129 ih->ring = NULL;
0130 } else {
0131 amdgpu_bo_free_kernel(&ih->ring_obj, &ih->gpu_addr,
0132 (void **)&ih->ring);
0133 amdgpu_device_wb_free(adev, (ih->wptr_addr - ih->gpu_addr) / 4);
0134 amdgpu_device_wb_free(adev, (ih->rptr_addr - ih->gpu_addr) / 4);
0135 }
0136 }
0137
0138
0139
0140
0141
0142
0143
0144
0145
0146
0147
0148 void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv,
0149 unsigned int num_dw)
0150 {
0151 uint32_t wptr = le32_to_cpu(*ih->wptr_cpu) >> 2;
0152 unsigned int i;
0153
0154 for (i = 0; i < num_dw; ++i)
0155 ih->ring[wptr++] = cpu_to_le32(iv[i]);
0156
0157 wptr <<= 2;
0158 wptr &= ih->ptr_mask;
0159
0160
0161 if (wptr != READ_ONCE(ih->rptr)) {
0162 wmb();
0163 WRITE_ONCE(*ih->wptr_cpu, cpu_to_le32(wptr));
0164 }
0165 }
0166
0167
0168
0169
0170
0171
0172
0173
0174
0175 int amdgpu_ih_wait_on_checkpoint_process_ts(struct amdgpu_device *adev,
0176 struct amdgpu_ih_ring *ih)
0177 {
0178 uint32_t checkpoint_wptr;
0179 uint64_t checkpoint_ts;
0180 long timeout = HZ;
0181
0182 if (!ih->enabled || adev->shutdown)
0183 return -ENODEV;
0184
0185 checkpoint_wptr = amdgpu_ih_get_wptr(adev, ih);
0186
0187 rmb();
0188 checkpoint_ts = amdgpu_ih_decode_iv_ts(adev, ih, checkpoint_wptr, -1);
0189
0190 return wait_event_interruptible_timeout(ih->wait_process,
0191 amdgpu_ih_ts_after(checkpoint_ts, ih->processed_timestamp) ||
0192 ih->rptr == amdgpu_ih_get_wptr(adev, ih), timeout);
0193 }
0194
0195
0196
0197
0198
0199
0200
0201
0202
0203
0204 int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
0205 {
0206 unsigned int count;
0207 u32 wptr;
0208
0209 if (!ih->enabled || adev->shutdown)
0210 return IRQ_NONE;
0211
0212 wptr = amdgpu_ih_get_wptr(adev, ih);
0213
0214 restart_ih:
0215 count = AMDGPU_IH_MAX_NUM_IVS;
0216 DRM_DEBUG("%s: rptr %d, wptr %d\n", __func__, ih->rptr, wptr);
0217
0218
0219 rmb();
0220
0221 while (ih->rptr != wptr && --count) {
0222 amdgpu_irq_dispatch(adev, ih);
0223 ih->rptr &= ih->ptr_mask;
0224 }
0225
0226 amdgpu_ih_set_rptr(adev, ih);
0227 wake_up_all(&ih->wait_process);
0228
0229
0230 wptr = amdgpu_ih_get_wptr(adev, ih);
0231 if (wptr != ih->rptr)
0232 goto restart_ih;
0233
0234 return IRQ_HANDLED;
0235 }
0236
0237
0238
0239
0240
0241
0242
0243
0244
0245
0246
0247
0248 void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev,
0249 struct amdgpu_ih_ring *ih,
0250 struct amdgpu_iv_entry *entry)
0251 {
0252
0253 u32 ring_index = ih->rptr >> 2;
0254 uint32_t dw[8];
0255
0256 dw[0] = le32_to_cpu(ih->ring[ring_index + 0]);
0257 dw[1] = le32_to_cpu(ih->ring[ring_index + 1]);
0258 dw[2] = le32_to_cpu(ih->ring[ring_index + 2]);
0259 dw[3] = le32_to_cpu(ih->ring[ring_index + 3]);
0260 dw[4] = le32_to_cpu(ih->ring[ring_index + 4]);
0261 dw[5] = le32_to_cpu(ih->ring[ring_index + 5]);
0262 dw[6] = le32_to_cpu(ih->ring[ring_index + 6]);
0263 dw[7] = le32_to_cpu(ih->ring[ring_index + 7]);
0264
0265 entry->client_id = dw[0] & 0xff;
0266 entry->src_id = (dw[0] >> 8) & 0xff;
0267 entry->ring_id = (dw[0] >> 16) & 0xff;
0268 entry->vmid = (dw[0] >> 24) & 0xf;
0269 entry->vmid_src = (dw[0] >> 31);
0270 entry->timestamp = dw[1] | ((u64)(dw[2] & 0xffff) << 32);
0271 entry->timestamp_src = dw[2] >> 31;
0272 entry->pasid = dw[3] & 0xffff;
0273 entry->pasid_src = dw[3] >> 31;
0274 entry->src_data[0] = dw[4];
0275 entry->src_data[1] = dw[5];
0276 entry->src_data[2] = dw[6];
0277 entry->src_data[3] = dw[7];
0278
0279
0280 ih->rptr += 32;
0281 }
0282
0283 uint64_t amdgpu_ih_decode_iv_ts_helper(struct amdgpu_ih_ring *ih, u32 rptr,
0284 signed int offset)
0285 {
0286 uint32_t iv_size = 32;
0287 uint32_t ring_index;
0288 uint32_t dw1, dw2;
0289
0290 rptr += iv_size * offset;
0291 ring_index = (rptr & ih->ptr_mask) >> 2;
0292
0293 dw1 = le32_to_cpu(ih->ring[ring_index + 1]);
0294 dw2 = le32_to_cpu(ih->ring[ring_index + 2]);
0295 return dw1 | ((u64)(dw2 & 0xffff) << 32);
0296 }