Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Copyright 2016 Advanced Micro Devices, Inc.
0003  *
0004  * Permission is hereby granted, free of charge, to any person obtaining a
0005  * copy of this software and associated documentation files (the "Software"),
0006  * to deal in the Software without restriction, including without limitation
0007  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
0008  * and/or sell copies of the Software, and to permit persons to whom the
0009  * Software is furnished to do so, subject to the following conditions:
0010  *
0011  * The above copyright notice and this permission notice shall be included in
0012  * all copies or substantial portions of the Software.
0013  *
0014  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
0015  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
0016  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
0017  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
0018  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
0019  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
0020  * OTHER DEALINGS IN THE SOFTWARE.
0021  *
0022  * Authors: Christian König
0023  */
0024 #ifndef __AMDGPU_RING_H__
0025 #define __AMDGPU_RING_H__
0026 
0027 #include <drm/amdgpu_drm.h>
0028 #include <drm/gpu_scheduler.h>
0029 #include <drm/drm_print.h>
0030 
0031 struct amdgpu_device;
0032 struct amdgpu_ring;
0033 struct amdgpu_ib;
0034 struct amdgpu_cs_parser;
0035 struct amdgpu_job;
0036 struct amdgpu_vm;
0037 
0038 /* max number of rings */
0039 #define AMDGPU_MAX_RINGS        28
0040 #define AMDGPU_MAX_HWIP_RINGS       8
0041 #define AMDGPU_MAX_GFX_RINGS        2
0042 #define AMDGPU_MAX_COMPUTE_RINGS    8
0043 #define AMDGPU_MAX_VCE_RINGS        3
0044 #define AMDGPU_MAX_UVD_ENC_RINGS    2
0045 
0046 enum amdgpu_ring_priority_level {
0047     AMDGPU_RING_PRIO_0,
0048     AMDGPU_RING_PRIO_1,
0049     AMDGPU_RING_PRIO_DEFAULT = 1,
0050     AMDGPU_RING_PRIO_2,
0051     AMDGPU_RING_PRIO_MAX
0052 };
0053 
0054 /* some special values for the owner field */
0055 #define AMDGPU_FENCE_OWNER_UNDEFINED    ((void *)0ul)
0056 #define AMDGPU_FENCE_OWNER_VM       ((void *)1ul)
0057 #define AMDGPU_FENCE_OWNER_KFD      ((void *)2ul)
0058 
0059 #define AMDGPU_FENCE_FLAG_64BIT         (1 << 0)
0060 #define AMDGPU_FENCE_FLAG_INT           (1 << 1)
0061 #define AMDGPU_FENCE_FLAG_TC_WB_ONLY    (1 << 2)
0062 
0063 #define to_amdgpu_ring(s) container_of((s), struct amdgpu_ring, sched)
0064 
0065 #define AMDGPU_IB_POOL_SIZE (1024 * 1024)
0066 
0067 enum amdgpu_ring_type {
0068     AMDGPU_RING_TYPE_GFX        = AMDGPU_HW_IP_GFX,
0069     AMDGPU_RING_TYPE_COMPUTE    = AMDGPU_HW_IP_COMPUTE,
0070     AMDGPU_RING_TYPE_SDMA       = AMDGPU_HW_IP_DMA,
0071     AMDGPU_RING_TYPE_UVD        = AMDGPU_HW_IP_UVD,
0072     AMDGPU_RING_TYPE_VCE        = AMDGPU_HW_IP_VCE,
0073     AMDGPU_RING_TYPE_UVD_ENC    = AMDGPU_HW_IP_UVD_ENC,
0074     AMDGPU_RING_TYPE_VCN_DEC    = AMDGPU_HW_IP_VCN_DEC,
0075     AMDGPU_RING_TYPE_VCN_ENC    = AMDGPU_HW_IP_VCN_ENC,
0076     AMDGPU_RING_TYPE_VCN_JPEG   = AMDGPU_HW_IP_VCN_JPEG,
0077     AMDGPU_RING_TYPE_KIQ,
0078     AMDGPU_RING_TYPE_MES
0079 };
0080 
0081 enum amdgpu_ib_pool_type {
0082     /* Normal submissions to the top of the pipeline. */
0083     AMDGPU_IB_POOL_DELAYED,
0084     /* Immediate submissions to the bottom of the pipeline. */
0085     AMDGPU_IB_POOL_IMMEDIATE,
0086     /* Direct submission to the ring buffer during init and reset. */
0087     AMDGPU_IB_POOL_DIRECT,
0088 
0089     AMDGPU_IB_POOL_MAX
0090 };
0091 
0092 struct amdgpu_ib {
0093     struct amdgpu_sa_bo     *sa_bo;
0094     uint32_t            length_dw;
0095     uint64_t            gpu_addr;
0096     uint32_t            *ptr;
0097     uint32_t            flags;
0098 };
0099 
0100 struct amdgpu_sched {
0101     u32             num_scheds;
0102     struct drm_gpu_scheduler    *sched[AMDGPU_MAX_HWIP_RINGS];
0103 };
0104 
0105 /*
0106  * Fences.
0107  */
0108 struct amdgpu_fence_driver {
0109     uint64_t            gpu_addr;
0110     volatile uint32_t       *cpu_addr;
0111     /* sync_seq is protected by ring emission lock */
0112     uint32_t            sync_seq;
0113     atomic_t            last_seq;
0114     bool                initialized;
0115     struct amdgpu_irq_src       *irq_src;
0116     unsigned            irq_type;
0117     struct timer_list       fallback_timer;
0118     unsigned            num_fences_mask;
0119     spinlock_t          lock;
0120     struct dma_fence        **fences;
0121 };
0122 
0123 extern const struct drm_sched_backend_ops amdgpu_sched_ops;
0124 
0125 void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring);
0126 void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring);
0127 
0128 int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring);
0129 int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
0130                    struct amdgpu_irq_src *irq_src,
0131                    unsigned irq_type);
0132 void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev);
0133 void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev);
0134 int amdgpu_fence_driver_sw_init(struct amdgpu_device *adev);
0135 void amdgpu_fence_driver_sw_fini(struct amdgpu_device *adev);
0136 int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence, struct amdgpu_job *job,
0137               unsigned flags);
0138 int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s,
0139                   uint32_t timeout);
0140 bool amdgpu_fence_process(struct amdgpu_ring *ring);
0141 int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
0142 signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring,
0143                       uint32_t wait_seq,
0144                       signed long timeout);
0145 unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
0146 void amdgpu_fence_driver_isr_toggle(struct amdgpu_device *adev, bool stop);
0147 
0148 /*
0149  * Rings.
0150  */
0151 
0152 /* provided by hw blocks that expose a ring buffer for commands */
0153 struct amdgpu_ring_funcs {
0154     enum amdgpu_ring_type   type;
0155     uint32_t        align_mask;
0156     u32         nop;
0157     bool            support_64bit_ptrs;
0158     bool            no_user_fence;
0159     bool            secure_submission_supported;
0160     unsigned        vmhub;
0161     unsigned        extra_dw;
0162 
0163     /* ring read/write ptr handling */
0164     u64 (*get_rptr)(struct amdgpu_ring *ring);
0165     u64 (*get_wptr)(struct amdgpu_ring *ring);
0166     void (*set_wptr)(struct amdgpu_ring *ring);
0167     /* validating and patching of IBs */
0168     int (*parse_cs)(struct amdgpu_cs_parser *p,
0169             struct amdgpu_job *job,
0170             struct amdgpu_ib *ib);
0171     int (*patch_cs_in_place)(struct amdgpu_cs_parser *p,
0172                  struct amdgpu_job *job,
0173                  struct amdgpu_ib *ib);
0174     /* constants to calculate how many DW are needed for an emit */
0175     unsigned emit_frame_size;
0176     unsigned emit_ib_size;
0177     /* command emit functions */
0178     void (*emit_ib)(struct amdgpu_ring *ring,
0179             struct amdgpu_job *job,
0180             struct amdgpu_ib *ib,
0181             uint32_t flags);
0182     void (*emit_fence)(struct amdgpu_ring *ring, uint64_t addr,
0183                uint64_t seq, unsigned flags);
0184     void (*emit_pipeline_sync)(struct amdgpu_ring *ring);
0185     void (*emit_vm_flush)(struct amdgpu_ring *ring, unsigned vmid,
0186                   uint64_t pd_addr);
0187     void (*emit_hdp_flush)(struct amdgpu_ring *ring);
0188     void (*emit_gds_switch)(struct amdgpu_ring *ring, uint32_t vmid,
0189                 uint32_t gds_base, uint32_t gds_size,
0190                 uint32_t gws_base, uint32_t gws_size,
0191                 uint32_t oa_base, uint32_t oa_size);
0192     /* testing functions */
0193     int (*test_ring)(struct amdgpu_ring *ring);
0194     int (*test_ib)(struct amdgpu_ring *ring, long timeout);
0195     /* insert NOP packets */
0196     void (*insert_nop)(struct amdgpu_ring *ring, uint32_t count);
0197     void (*insert_start)(struct amdgpu_ring *ring);
0198     void (*insert_end)(struct amdgpu_ring *ring);
0199     /* pad the indirect buffer to the necessary number of dw */
0200     void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
0201     unsigned (*init_cond_exec)(struct amdgpu_ring *ring);
0202     void (*patch_cond_exec)(struct amdgpu_ring *ring, unsigned offset);
0203     /* note usage for clock and power gating */
0204     void (*begin_use)(struct amdgpu_ring *ring);
0205     void (*end_use)(struct amdgpu_ring *ring);
0206     void (*emit_switch_buffer) (struct amdgpu_ring *ring);
0207     void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
0208     void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg,
0209               uint32_t reg_val_offs);
0210     void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
0211     void (*emit_reg_wait)(struct amdgpu_ring *ring, uint32_t reg,
0212                   uint32_t val, uint32_t mask);
0213     void (*emit_reg_write_reg_wait)(struct amdgpu_ring *ring,
0214                     uint32_t reg0, uint32_t reg1,
0215                     uint32_t ref, uint32_t mask);
0216     void (*emit_frame_cntl)(struct amdgpu_ring *ring, bool start,
0217                 bool secure);
0218     /* Try to soft recover the ring to make the fence signal */
0219     void (*soft_recovery)(struct amdgpu_ring *ring, unsigned vmid);
0220     int (*preempt_ib)(struct amdgpu_ring *ring);
0221     void (*emit_mem_sync)(struct amdgpu_ring *ring);
0222     void (*emit_wave_limit)(struct amdgpu_ring *ring, bool enable);
0223 };
0224 
0225 struct amdgpu_ring {
0226     struct amdgpu_device        *adev;
0227     const struct amdgpu_ring_funcs  *funcs;
0228     struct amdgpu_fence_driver  fence_drv;
0229     struct drm_gpu_scheduler    sched;
0230 
0231     struct amdgpu_bo    *ring_obj;
0232     volatile uint32_t   *ring;
0233     unsigned        rptr_offs;
0234     u64         rptr_gpu_addr;
0235     volatile u32        *rptr_cpu_addr;
0236     u64         wptr;
0237     u64         wptr_old;
0238     unsigned        ring_size;
0239     unsigned        max_dw;
0240     int         count_dw;
0241     uint64_t        gpu_addr;
0242     uint64_t        ptr_mask;
0243     uint32_t        buf_mask;
0244     u32         idx;
0245     u32         me;
0246     u32         pipe;
0247     u32         queue;
0248     struct amdgpu_bo    *mqd_obj;
0249     uint64_t                mqd_gpu_addr;
0250     void                    *mqd_ptr;
0251     uint64_t                eop_gpu_addr;
0252     u32         doorbell_index;
0253     bool            use_doorbell;
0254     bool            use_pollmem;
0255     unsigned        wptr_offs;
0256     u64         wptr_gpu_addr;
0257     volatile u32        *wptr_cpu_addr;
0258     unsigned        fence_offs;
0259     u64         fence_gpu_addr;
0260     volatile u32        *fence_cpu_addr;
0261     uint64_t        current_ctx;
0262     char            name[16];
0263     u32                     trail_seq;
0264     unsigned        trail_fence_offs;
0265     u64         trail_fence_gpu_addr;
0266     volatile u32        *trail_fence_cpu_addr;
0267     unsigned        cond_exe_offs;
0268     u64         cond_exe_gpu_addr;
0269     volatile u32        *cond_exe_cpu_addr;
0270     unsigned        vm_inv_eng;
0271     struct dma_fence    *vmid_wait;
0272     bool            has_compute_vm_bug;
0273     bool            no_scheduler;
0274     int         hw_prio;
0275     unsigned        num_hw_submission;
0276     atomic_t        *sched_score;
0277 
0278     /* used for mes */
0279     bool            is_mes_queue;
0280     uint32_t        hw_queue_id;
0281     struct amdgpu_mes_ctx_data *mes_ctx;
0282 };
0283 
0284 #define amdgpu_ring_parse_cs(r, p, job, ib) ((r)->funcs->parse_cs((p), (job), (ib)))
0285 #define amdgpu_ring_patch_cs_in_place(r, p, job, ib) ((r)->funcs->patch_cs_in_place((p), (job), (ib)))
0286 #define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r))
0287 #define amdgpu_ring_test_ib(r, t) (r)->funcs->test_ib((r), (t))
0288 #define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r))
0289 #define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r))
0290 #define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r))
0291 #define amdgpu_ring_emit_ib(r, job, ib, flags) ((r)->funcs->emit_ib((r), (job), (ib), (flags)))
0292 #define amdgpu_ring_emit_pipeline_sync(r) (r)->funcs->emit_pipeline_sync((r))
0293 #define amdgpu_ring_emit_vm_flush(r, vmid, addr) (r)->funcs->emit_vm_flush((r), (vmid), (addr))
0294 #define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags))
0295 #define amdgpu_ring_emit_gds_switch(r, v, db, ds, wb, ws, ab, as) (r)->funcs->emit_gds_switch((r), (v), (db), (ds), (wb), (ws), (ab), (as))
0296 #define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
0297 #define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
0298 #define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d))
0299 #define amdgpu_ring_emit_rreg(r, d, o) (r)->funcs->emit_rreg((r), (d), (o))
0300 #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
0301 #define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
0302 #define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m))
0303 #define amdgpu_ring_emit_frame_cntl(r, b, s) (r)->funcs->emit_frame_cntl((r), (b), (s))
0304 #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
0305 #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
0306 #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
0307 #define amdgpu_ring_preempt_ib(r) (r)->funcs->preempt_ib(r)
0308 
0309 int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw);
0310 void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
0311 void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
0312 void amdgpu_ring_commit(struct amdgpu_ring *ring);
0313 void amdgpu_ring_undo(struct amdgpu_ring *ring);
0314 int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
0315              unsigned int max_dw, struct amdgpu_irq_src *irq_src,
0316              unsigned int irq_type, unsigned int hw_prio,
0317              atomic_t *sched_score);
0318 void amdgpu_ring_fini(struct amdgpu_ring *ring);
0319 void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring,
0320                         uint32_t reg0, uint32_t val0,
0321                         uint32_t reg1, uint32_t val1);
0322 bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
0323                    struct dma_fence *fence);
0324 
0325 static inline void amdgpu_ring_set_preempt_cond_exec(struct amdgpu_ring *ring,
0326                             bool cond_exec)
0327 {
0328     *ring->cond_exe_cpu_addr = cond_exec;
0329 }
0330 
0331 static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring)
0332 {
0333     int i = 0;
0334     while (i <= ring->buf_mask)
0335         ring->ring[i++] = ring->funcs->nop;
0336 
0337 }
0338 
0339 static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v)
0340 {
0341     if (ring->count_dw <= 0)
0342         DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n");
0343     ring->ring[ring->wptr++ & ring->buf_mask] = v;
0344     ring->wptr &= ring->ptr_mask;
0345     ring->count_dw--;
0346 }
0347 
0348 static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring,
0349                           void *src, int count_dw)
0350 {
0351     unsigned occupied, chunk1, chunk2;
0352     void *dst;
0353 
0354     if (unlikely(ring->count_dw < count_dw))
0355         DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n");
0356 
0357     occupied = ring->wptr & ring->buf_mask;
0358     dst = (void *)&ring->ring[occupied];
0359     chunk1 = ring->buf_mask + 1 - occupied;
0360     chunk1 = (chunk1 >= count_dw) ? count_dw: chunk1;
0361     chunk2 = count_dw - chunk1;
0362     chunk1 <<= 2;
0363     chunk2 <<= 2;
0364 
0365     if (chunk1)
0366         memcpy(dst, src, chunk1);
0367 
0368     if (chunk2) {
0369         src += chunk1;
0370         dst = (void *)ring->ring;
0371         memcpy(dst, src, chunk2);
0372     }
0373 
0374     ring->wptr += count_dw;
0375     ring->wptr &= ring->ptr_mask;
0376     ring->count_dw -= count_dw;
0377 }
0378 
0379 #define amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset)          \
0380     (ring->is_mes_queue && ring->mes_ctx ?              \
0381      (ring->mes_ctx->meta_data_gpu_addr + offset) : 0)
0382 
0383 #define amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset)          \
0384     (ring->is_mes_queue && ring->mes_ctx ?              \
0385      (void *)((uint8_t *)(ring->mes_ctx->meta_data_ptr) + offset) : \
0386      NULL)
0387 
0388 int amdgpu_ring_test_helper(struct amdgpu_ring *ring);
0389 
0390 void amdgpu_debugfs_ring_init(struct amdgpu_device *adev,
0391                   struct amdgpu_ring *ring);
0392 
0393 int amdgpu_ring_init_mqd(struct amdgpu_ring *ring);
0394 
0395 static inline u32 amdgpu_ib_get_value(struct amdgpu_ib *ib, int idx)
0396 {
0397     return ib->ptr[idx];
0398 }
0399 
0400 static inline void amdgpu_ib_set_value(struct amdgpu_ib *ib, int idx,
0401                        uint32_t value)
0402 {
0403     ib->ptr[idx] = value;
0404 }
0405 
0406 int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
0407           unsigned size,
0408           enum amdgpu_ib_pool_type pool,
0409           struct amdgpu_ib *ib);
0410 void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib,
0411             struct dma_fence *f);
0412 int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
0413                struct amdgpu_ib *ibs, struct amdgpu_job *job,
0414                struct dma_fence **f);
0415 int amdgpu_ib_pool_init(struct amdgpu_device *adev);
0416 void amdgpu_ib_pool_fini(struct amdgpu_device *adev);
0417 int amdgpu_ib_ring_tests(struct amdgpu_device *adev);
0418 
0419 #endif