0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024 #ifndef __AMDGPU_RING_H__
0025 #define __AMDGPU_RING_H__
0026
0027 #include <drm/amdgpu_drm.h>
0028 #include <drm/gpu_scheduler.h>
0029 #include <drm/drm_print.h>
0030
0031 struct amdgpu_device;
0032 struct amdgpu_ring;
0033 struct amdgpu_ib;
0034 struct amdgpu_cs_parser;
0035 struct amdgpu_job;
0036 struct amdgpu_vm;
0037
0038
0039 #define AMDGPU_MAX_RINGS 28
0040 #define AMDGPU_MAX_HWIP_RINGS 8
0041 #define AMDGPU_MAX_GFX_RINGS 2
0042 #define AMDGPU_MAX_COMPUTE_RINGS 8
0043 #define AMDGPU_MAX_VCE_RINGS 3
0044 #define AMDGPU_MAX_UVD_ENC_RINGS 2
0045
0046 enum amdgpu_ring_priority_level {
0047 AMDGPU_RING_PRIO_0,
0048 AMDGPU_RING_PRIO_1,
0049 AMDGPU_RING_PRIO_DEFAULT = 1,
0050 AMDGPU_RING_PRIO_2,
0051 AMDGPU_RING_PRIO_MAX
0052 };
0053
0054
0055 #define AMDGPU_FENCE_OWNER_UNDEFINED ((void *)0ul)
0056 #define AMDGPU_FENCE_OWNER_VM ((void *)1ul)
0057 #define AMDGPU_FENCE_OWNER_KFD ((void *)2ul)
0058
0059 #define AMDGPU_FENCE_FLAG_64BIT (1 << 0)
0060 #define AMDGPU_FENCE_FLAG_INT (1 << 1)
0061 #define AMDGPU_FENCE_FLAG_TC_WB_ONLY (1 << 2)
0062
0063 #define to_amdgpu_ring(s) container_of((s), struct amdgpu_ring, sched)
0064
0065 #define AMDGPU_IB_POOL_SIZE (1024 * 1024)
0066
0067 enum amdgpu_ring_type {
0068 AMDGPU_RING_TYPE_GFX = AMDGPU_HW_IP_GFX,
0069 AMDGPU_RING_TYPE_COMPUTE = AMDGPU_HW_IP_COMPUTE,
0070 AMDGPU_RING_TYPE_SDMA = AMDGPU_HW_IP_DMA,
0071 AMDGPU_RING_TYPE_UVD = AMDGPU_HW_IP_UVD,
0072 AMDGPU_RING_TYPE_VCE = AMDGPU_HW_IP_VCE,
0073 AMDGPU_RING_TYPE_UVD_ENC = AMDGPU_HW_IP_UVD_ENC,
0074 AMDGPU_RING_TYPE_VCN_DEC = AMDGPU_HW_IP_VCN_DEC,
0075 AMDGPU_RING_TYPE_VCN_ENC = AMDGPU_HW_IP_VCN_ENC,
0076 AMDGPU_RING_TYPE_VCN_JPEG = AMDGPU_HW_IP_VCN_JPEG,
0077 AMDGPU_RING_TYPE_KIQ,
0078 AMDGPU_RING_TYPE_MES
0079 };
0080
0081 enum amdgpu_ib_pool_type {
0082
0083 AMDGPU_IB_POOL_DELAYED,
0084
0085 AMDGPU_IB_POOL_IMMEDIATE,
0086
0087 AMDGPU_IB_POOL_DIRECT,
0088
0089 AMDGPU_IB_POOL_MAX
0090 };
0091
0092 struct amdgpu_ib {
0093 struct amdgpu_sa_bo *sa_bo;
0094 uint32_t length_dw;
0095 uint64_t gpu_addr;
0096 uint32_t *ptr;
0097 uint32_t flags;
0098 };
0099
0100 struct amdgpu_sched {
0101 u32 num_scheds;
0102 struct drm_gpu_scheduler *sched[AMDGPU_MAX_HWIP_RINGS];
0103 };
0104
0105
0106
0107
0108 struct amdgpu_fence_driver {
0109 uint64_t gpu_addr;
0110 volatile uint32_t *cpu_addr;
0111
0112 uint32_t sync_seq;
0113 atomic_t last_seq;
0114 bool initialized;
0115 struct amdgpu_irq_src *irq_src;
0116 unsigned irq_type;
0117 struct timer_list fallback_timer;
0118 unsigned num_fences_mask;
0119 spinlock_t lock;
0120 struct dma_fence **fences;
0121 };
0122
0123 extern const struct drm_sched_backend_ops amdgpu_sched_ops;
0124
0125 void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring);
0126 void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring);
0127
0128 int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring);
0129 int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
0130 struct amdgpu_irq_src *irq_src,
0131 unsigned irq_type);
0132 void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev);
0133 void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev);
0134 int amdgpu_fence_driver_sw_init(struct amdgpu_device *adev);
0135 void amdgpu_fence_driver_sw_fini(struct amdgpu_device *adev);
0136 int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence, struct amdgpu_job *job,
0137 unsigned flags);
0138 int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s,
0139 uint32_t timeout);
0140 bool amdgpu_fence_process(struct amdgpu_ring *ring);
0141 int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
0142 signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring,
0143 uint32_t wait_seq,
0144 signed long timeout);
0145 unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
0146 void amdgpu_fence_driver_isr_toggle(struct amdgpu_device *adev, bool stop);
0147
0148
0149
0150
0151
0152
0153 struct amdgpu_ring_funcs {
0154 enum amdgpu_ring_type type;
0155 uint32_t align_mask;
0156 u32 nop;
0157 bool support_64bit_ptrs;
0158 bool no_user_fence;
0159 bool secure_submission_supported;
0160 unsigned vmhub;
0161 unsigned extra_dw;
0162
0163
0164 u64 (*get_rptr)(struct amdgpu_ring *ring);
0165 u64 (*get_wptr)(struct amdgpu_ring *ring);
0166 void (*set_wptr)(struct amdgpu_ring *ring);
0167
0168 int (*parse_cs)(struct amdgpu_cs_parser *p,
0169 struct amdgpu_job *job,
0170 struct amdgpu_ib *ib);
0171 int (*patch_cs_in_place)(struct amdgpu_cs_parser *p,
0172 struct amdgpu_job *job,
0173 struct amdgpu_ib *ib);
0174
0175 unsigned emit_frame_size;
0176 unsigned emit_ib_size;
0177
0178 void (*emit_ib)(struct amdgpu_ring *ring,
0179 struct amdgpu_job *job,
0180 struct amdgpu_ib *ib,
0181 uint32_t flags);
0182 void (*emit_fence)(struct amdgpu_ring *ring, uint64_t addr,
0183 uint64_t seq, unsigned flags);
0184 void (*emit_pipeline_sync)(struct amdgpu_ring *ring);
0185 void (*emit_vm_flush)(struct amdgpu_ring *ring, unsigned vmid,
0186 uint64_t pd_addr);
0187 void (*emit_hdp_flush)(struct amdgpu_ring *ring);
0188 void (*emit_gds_switch)(struct amdgpu_ring *ring, uint32_t vmid,
0189 uint32_t gds_base, uint32_t gds_size,
0190 uint32_t gws_base, uint32_t gws_size,
0191 uint32_t oa_base, uint32_t oa_size);
0192
0193 int (*test_ring)(struct amdgpu_ring *ring);
0194 int (*test_ib)(struct amdgpu_ring *ring, long timeout);
0195
0196 void (*insert_nop)(struct amdgpu_ring *ring, uint32_t count);
0197 void (*insert_start)(struct amdgpu_ring *ring);
0198 void (*insert_end)(struct amdgpu_ring *ring);
0199
0200 void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
0201 unsigned (*init_cond_exec)(struct amdgpu_ring *ring);
0202 void (*patch_cond_exec)(struct amdgpu_ring *ring, unsigned offset);
0203
0204 void (*begin_use)(struct amdgpu_ring *ring);
0205 void (*end_use)(struct amdgpu_ring *ring);
0206 void (*emit_switch_buffer) (struct amdgpu_ring *ring);
0207 void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
0208 void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg,
0209 uint32_t reg_val_offs);
0210 void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
0211 void (*emit_reg_wait)(struct amdgpu_ring *ring, uint32_t reg,
0212 uint32_t val, uint32_t mask);
0213 void (*emit_reg_write_reg_wait)(struct amdgpu_ring *ring,
0214 uint32_t reg0, uint32_t reg1,
0215 uint32_t ref, uint32_t mask);
0216 void (*emit_frame_cntl)(struct amdgpu_ring *ring, bool start,
0217 bool secure);
0218
0219 void (*soft_recovery)(struct amdgpu_ring *ring, unsigned vmid);
0220 int (*preempt_ib)(struct amdgpu_ring *ring);
0221 void (*emit_mem_sync)(struct amdgpu_ring *ring);
0222 void (*emit_wave_limit)(struct amdgpu_ring *ring, bool enable);
0223 };
0224
0225 struct amdgpu_ring {
0226 struct amdgpu_device *adev;
0227 const struct amdgpu_ring_funcs *funcs;
0228 struct amdgpu_fence_driver fence_drv;
0229 struct drm_gpu_scheduler sched;
0230
0231 struct amdgpu_bo *ring_obj;
0232 volatile uint32_t *ring;
0233 unsigned rptr_offs;
0234 u64 rptr_gpu_addr;
0235 volatile u32 *rptr_cpu_addr;
0236 u64 wptr;
0237 u64 wptr_old;
0238 unsigned ring_size;
0239 unsigned max_dw;
0240 int count_dw;
0241 uint64_t gpu_addr;
0242 uint64_t ptr_mask;
0243 uint32_t buf_mask;
0244 u32 idx;
0245 u32 me;
0246 u32 pipe;
0247 u32 queue;
0248 struct amdgpu_bo *mqd_obj;
0249 uint64_t mqd_gpu_addr;
0250 void *mqd_ptr;
0251 uint64_t eop_gpu_addr;
0252 u32 doorbell_index;
0253 bool use_doorbell;
0254 bool use_pollmem;
0255 unsigned wptr_offs;
0256 u64 wptr_gpu_addr;
0257 volatile u32 *wptr_cpu_addr;
0258 unsigned fence_offs;
0259 u64 fence_gpu_addr;
0260 volatile u32 *fence_cpu_addr;
0261 uint64_t current_ctx;
0262 char name[16];
0263 u32 trail_seq;
0264 unsigned trail_fence_offs;
0265 u64 trail_fence_gpu_addr;
0266 volatile u32 *trail_fence_cpu_addr;
0267 unsigned cond_exe_offs;
0268 u64 cond_exe_gpu_addr;
0269 volatile u32 *cond_exe_cpu_addr;
0270 unsigned vm_inv_eng;
0271 struct dma_fence *vmid_wait;
0272 bool has_compute_vm_bug;
0273 bool no_scheduler;
0274 int hw_prio;
0275 unsigned num_hw_submission;
0276 atomic_t *sched_score;
0277
0278
0279 bool is_mes_queue;
0280 uint32_t hw_queue_id;
0281 struct amdgpu_mes_ctx_data *mes_ctx;
0282 };
0283
0284 #define amdgpu_ring_parse_cs(r, p, job, ib) ((r)->funcs->parse_cs((p), (job), (ib)))
0285 #define amdgpu_ring_patch_cs_in_place(r, p, job, ib) ((r)->funcs->patch_cs_in_place((p), (job), (ib)))
0286 #define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r))
0287 #define amdgpu_ring_test_ib(r, t) (r)->funcs->test_ib((r), (t))
0288 #define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r))
0289 #define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r))
0290 #define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r))
0291 #define amdgpu_ring_emit_ib(r, job, ib, flags) ((r)->funcs->emit_ib((r), (job), (ib), (flags)))
0292 #define amdgpu_ring_emit_pipeline_sync(r) (r)->funcs->emit_pipeline_sync((r))
0293 #define amdgpu_ring_emit_vm_flush(r, vmid, addr) (r)->funcs->emit_vm_flush((r), (vmid), (addr))
0294 #define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags))
0295 #define amdgpu_ring_emit_gds_switch(r, v, db, ds, wb, ws, ab, as) (r)->funcs->emit_gds_switch((r), (v), (db), (ds), (wb), (ws), (ab), (as))
0296 #define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
0297 #define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
0298 #define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d))
0299 #define amdgpu_ring_emit_rreg(r, d, o) (r)->funcs->emit_rreg((r), (d), (o))
0300 #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
0301 #define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
0302 #define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m))
0303 #define amdgpu_ring_emit_frame_cntl(r, b, s) (r)->funcs->emit_frame_cntl((r), (b), (s))
0304 #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
0305 #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
0306 #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
0307 #define amdgpu_ring_preempt_ib(r) (r)->funcs->preempt_ib(r)
0308
0309 int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw);
0310 void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
0311 void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
0312 void amdgpu_ring_commit(struct amdgpu_ring *ring);
0313 void amdgpu_ring_undo(struct amdgpu_ring *ring);
0314 int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
0315 unsigned int max_dw, struct amdgpu_irq_src *irq_src,
0316 unsigned int irq_type, unsigned int hw_prio,
0317 atomic_t *sched_score);
0318 void amdgpu_ring_fini(struct amdgpu_ring *ring);
0319 void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring,
0320 uint32_t reg0, uint32_t val0,
0321 uint32_t reg1, uint32_t val1);
0322 bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
0323 struct dma_fence *fence);
0324
0325 static inline void amdgpu_ring_set_preempt_cond_exec(struct amdgpu_ring *ring,
0326 bool cond_exec)
0327 {
0328 *ring->cond_exe_cpu_addr = cond_exec;
0329 }
0330
0331 static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring)
0332 {
0333 int i = 0;
0334 while (i <= ring->buf_mask)
0335 ring->ring[i++] = ring->funcs->nop;
0336
0337 }
0338
0339 static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v)
0340 {
0341 if (ring->count_dw <= 0)
0342 DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n");
0343 ring->ring[ring->wptr++ & ring->buf_mask] = v;
0344 ring->wptr &= ring->ptr_mask;
0345 ring->count_dw--;
0346 }
0347
0348 static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring,
0349 void *src, int count_dw)
0350 {
0351 unsigned occupied, chunk1, chunk2;
0352 void *dst;
0353
0354 if (unlikely(ring->count_dw < count_dw))
0355 DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n");
0356
0357 occupied = ring->wptr & ring->buf_mask;
0358 dst = (void *)&ring->ring[occupied];
0359 chunk1 = ring->buf_mask + 1 - occupied;
0360 chunk1 = (chunk1 >= count_dw) ? count_dw: chunk1;
0361 chunk2 = count_dw - chunk1;
0362 chunk1 <<= 2;
0363 chunk2 <<= 2;
0364
0365 if (chunk1)
0366 memcpy(dst, src, chunk1);
0367
0368 if (chunk2) {
0369 src += chunk1;
0370 dst = (void *)ring->ring;
0371 memcpy(dst, src, chunk2);
0372 }
0373
0374 ring->wptr += count_dw;
0375 ring->wptr &= ring->ptr_mask;
0376 ring->count_dw -= count_dw;
0377 }
0378
0379 #define amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset) \
0380 (ring->is_mes_queue && ring->mes_ctx ? \
0381 (ring->mes_ctx->meta_data_gpu_addr + offset) : 0)
0382
0383 #define amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset) \
0384 (ring->is_mes_queue && ring->mes_ctx ? \
0385 (void *)((uint8_t *)(ring->mes_ctx->meta_data_ptr) + offset) : \
0386 NULL)
0387
0388 int amdgpu_ring_test_helper(struct amdgpu_ring *ring);
0389
0390 void amdgpu_debugfs_ring_init(struct amdgpu_device *adev,
0391 struct amdgpu_ring *ring);
0392
0393 int amdgpu_ring_init_mqd(struct amdgpu_ring *ring);
0394
0395 static inline u32 amdgpu_ib_get_value(struct amdgpu_ib *ib, int idx)
0396 {
0397 return ib->ptr[idx];
0398 }
0399
0400 static inline void amdgpu_ib_set_value(struct amdgpu_ib *ib, int idx,
0401 uint32_t value)
0402 {
0403 ib->ptr[idx] = value;
0404 }
0405
0406 int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
0407 unsigned size,
0408 enum amdgpu_ib_pool_type pool,
0409 struct amdgpu_ib *ib);
0410 void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib,
0411 struct dma_fence *f);
0412 int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
0413 struct amdgpu_ib *ibs, struct amdgpu_job *job,
0414 struct dma_fence **f);
0415 int amdgpu_ib_pool_init(struct amdgpu_device *adev);
0416 void amdgpu_ib_pool_fini(struct amdgpu_device *adev);
0417 int amdgpu_ib_ring_tests(struct amdgpu_device *adev);
0418
0419 #endif