Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Copyright 2019 Advanced Micro Devices, Inc.
0003  *
0004  * Permission is hereby granted, free of charge, to any person obtaining a
0005  * copy of this software and associated documentation files (the "Software"),
0006  * to deal in the Software without restriction, including without limitation
0007  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
0008  * and/or sell copies of the Software, and to permit persons to whom the
0009  * Software is furnished to do so, subject to the following conditions:
0010  *
0011  * The above copyright notice and this permission notice shall be included in
0012  * all copies or substantial portions of the Software.
0013  *
0014  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
0015  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
0016  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
0017  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
0018  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
0019  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
0020  * OTHER DEALINGS IN THE SOFTWARE.
0021  *
0022  */
0023 
0024 #ifndef __AMDGPU_MES_H__
0025 #define __AMDGPU_MES_H__
0026 
0027 #include "amdgpu_irq.h"
0028 #include "kgd_kfd_interface.h"
0029 #include "amdgpu_gfx.h"
0030 #include <linux/sched/mm.h>
0031 
0032 #define AMDGPU_MES_MAX_COMPUTE_PIPES        8
0033 #define AMDGPU_MES_MAX_GFX_PIPES            2
0034 #define AMDGPU_MES_MAX_SDMA_PIPES           2
0035 
0036 #define AMDGPU_MES_API_VERSION_SHIFT    12
0037 #define AMDGPU_MES_FEAT_VERSION_SHIFT   24
0038 
0039 #define AMDGPU_MES_VERSION_MASK     0x00000fff
0040 #define AMDGPU_MES_API_VERSION_MASK 0x00fff000
0041 #define AMDGPU_MES_FEAT_VERSION_MASK    0xff000000
0042 
0043 enum amdgpu_mes_priority_level {
0044     AMDGPU_MES_PRIORITY_LEVEL_LOW       = 0,
0045     AMDGPU_MES_PRIORITY_LEVEL_NORMAL    = 1,
0046     AMDGPU_MES_PRIORITY_LEVEL_MEDIUM    = 2,
0047     AMDGPU_MES_PRIORITY_LEVEL_HIGH      = 3,
0048     AMDGPU_MES_PRIORITY_LEVEL_REALTIME  = 4,
0049     AMDGPU_MES_PRIORITY_NUM_LEVELS
0050 };
0051 
0052 #define AMDGPU_MES_PROC_CTX_SIZE 0x1000 /* one page area */
0053 #define AMDGPU_MES_GANG_CTX_SIZE 0x1000 /* one page area */
0054 
0055 struct amdgpu_mes_funcs;
0056 
0057 enum admgpu_mes_pipe {
0058     AMDGPU_MES_SCHED_PIPE = 0,
0059     AMDGPU_MES_KIQ_PIPE,
0060     AMDGPU_MAX_MES_PIPES = 2,
0061 };
0062 
0063 struct amdgpu_mes {
0064     struct amdgpu_device            *adev;
0065 
0066     struct mutex                    mutex_hidden;
0067 
0068     struct idr                      pasid_idr;
0069     struct idr                      gang_id_idr;
0070     struct idr                      queue_id_idr;
0071     struct ida                      doorbell_ida;
0072 
0073     spinlock_t                      queue_id_lock;
0074 
0075     uint32_t            sched_version;
0076     uint32_t            kiq_version;
0077 
0078     uint32_t                        total_max_queue;
0079     uint32_t                        doorbell_id_offset;
0080     uint32_t                        max_doorbell_slices;
0081 
0082     uint64_t                        default_process_quantum;
0083     uint64_t                        default_gang_quantum;
0084 
0085     struct amdgpu_ring              ring;
0086     spinlock_t                      ring_lock;
0087 
0088     const struct firmware           *fw[AMDGPU_MAX_MES_PIPES];
0089 
0090     /* mes ucode */
0091     struct amdgpu_bo        *ucode_fw_obj[AMDGPU_MAX_MES_PIPES];
0092     uint64_t            ucode_fw_gpu_addr[AMDGPU_MAX_MES_PIPES];
0093     uint32_t            *ucode_fw_ptr[AMDGPU_MAX_MES_PIPES];
0094     uint32_t                        ucode_fw_version[AMDGPU_MAX_MES_PIPES];
0095     uint64_t                        uc_start_addr[AMDGPU_MAX_MES_PIPES];
0096 
0097     /* mes ucode data */
0098     struct amdgpu_bo        *data_fw_obj[AMDGPU_MAX_MES_PIPES];
0099     uint64_t            data_fw_gpu_addr[AMDGPU_MAX_MES_PIPES];
0100     uint32_t            *data_fw_ptr[AMDGPU_MAX_MES_PIPES];
0101     uint32_t                        data_fw_version[AMDGPU_MAX_MES_PIPES];
0102     uint64_t                        data_start_addr[AMDGPU_MAX_MES_PIPES];
0103 
0104     /* eop gpu obj */
0105     struct amdgpu_bo        *eop_gpu_obj[AMDGPU_MAX_MES_PIPES];
0106     uint64_t                        eop_gpu_addr[AMDGPU_MAX_MES_PIPES];
0107 
0108     void                            *mqd_backup[AMDGPU_MAX_MES_PIPES];
0109     struct amdgpu_irq_src           irq[AMDGPU_MAX_MES_PIPES];
0110 
0111     uint32_t                        vmid_mask_gfxhub;
0112     uint32_t                        vmid_mask_mmhub;
0113     uint32_t                        compute_hqd_mask[AMDGPU_MES_MAX_COMPUTE_PIPES];
0114     uint32_t                        gfx_hqd_mask[AMDGPU_MES_MAX_GFX_PIPES];
0115     uint32_t                        sdma_hqd_mask[AMDGPU_MES_MAX_SDMA_PIPES];
0116     uint32_t                        aggregated_doorbells[AMDGPU_MES_PRIORITY_NUM_LEVELS];
0117     uint32_t                        sch_ctx_offs;
0118     uint64_t            sch_ctx_gpu_addr;
0119     uint64_t            *sch_ctx_ptr;
0120     uint32_t            query_status_fence_offs;
0121     uint64_t            query_status_fence_gpu_addr;
0122     uint64_t            *query_status_fence_ptr;
0123     uint32_t                        read_val_offs;
0124     uint64_t            read_val_gpu_addr;
0125     uint32_t            *read_val_ptr;
0126 
0127     uint32_t            saved_flags;
0128 
0129     /* initialize kiq pipe */
0130     int                             (*kiq_hw_init)(struct amdgpu_device *adev);
0131     int                             (*kiq_hw_fini)(struct amdgpu_device *adev);
0132 
0133     /* ip specific functions */
0134     const struct amdgpu_mes_funcs   *funcs;
0135 };
0136 
0137 struct amdgpu_mes_process {
0138     int         pasid;
0139     struct          amdgpu_vm *vm;
0140     uint64_t        pd_gpu_addr;
0141     struct amdgpu_bo    *proc_ctx_bo;
0142     uint64_t        proc_ctx_gpu_addr;
0143     void            *proc_ctx_cpu_ptr;
0144     uint64_t        process_quantum;
0145     struct          list_head gang_list;
0146     uint32_t        doorbell_index;
0147     unsigned long       *doorbell_bitmap;
0148     struct mutex        doorbell_lock;
0149 };
0150 
0151 struct amdgpu_mes_gang {
0152     int                 gang_id;
0153     int                 priority;
0154     int                 inprocess_gang_priority;
0155     int                 global_priority_level;
0156     struct list_head        list;
0157     struct amdgpu_mes_process   *process;
0158     struct amdgpu_bo        *gang_ctx_bo;
0159     uint64_t            gang_ctx_gpu_addr;
0160     void                *gang_ctx_cpu_ptr;
0161     uint64_t            gang_quantum;
0162     struct list_head        queue_list;
0163 };
0164 
0165 struct amdgpu_mes_queue {
0166     struct list_head        list;
0167     struct amdgpu_mes_gang      *gang;
0168     int                 queue_id;
0169     uint64_t            doorbell_off;
0170     struct amdgpu_bo        *mqd_obj;
0171     void                *mqd_cpu_ptr;
0172     uint64_t            mqd_gpu_addr;
0173     uint64_t            wptr_gpu_addr;
0174     int                 queue_type;
0175     int                 paging;
0176     struct amdgpu_ring      *ring;
0177 };
0178 
0179 struct amdgpu_mes_queue_properties {
0180     int             queue_type;
0181     uint64_t                hqd_base_gpu_addr;
0182     uint64_t                rptr_gpu_addr;
0183     uint64_t                wptr_gpu_addr;
0184     uint64_t                wptr_mc_addr;
0185     uint32_t                queue_size;
0186     uint64_t                eop_gpu_addr;
0187     uint32_t                hqd_pipe_priority;
0188     uint32_t                hqd_queue_priority;
0189     bool            paging;
0190     struct amdgpu_ring  *ring;
0191     /* out */
0192     uint64_t            doorbell_off;
0193 };
0194 
0195 struct amdgpu_mes_gang_properties {
0196     uint32_t    priority;
0197     uint32_t    gang_quantum;
0198     uint32_t    inprocess_gang_priority;
0199     uint32_t    priority_level;
0200     int         global_priority_level;
0201 };
0202 
0203 struct mes_add_queue_input {
0204     uint32_t    process_id;
0205     uint64_t    page_table_base_addr;
0206     uint64_t    process_va_start;
0207     uint64_t    process_va_end;
0208     uint64_t    process_quantum;
0209     uint64_t    process_context_addr;
0210     uint64_t    gang_quantum;
0211     uint64_t    gang_context_addr;
0212     uint32_t    inprocess_gang_priority;
0213     uint32_t    gang_global_priority_level;
0214     uint32_t    doorbell_offset;
0215     uint64_t    mqd_addr;
0216     uint64_t    wptr_addr;
0217     uint64_t    wptr_mc_addr;
0218     uint32_t    queue_type;
0219     uint32_t    paging;
0220     uint32_t        gws_base;
0221     uint32_t        gws_size;
0222     uint64_t    tba_addr;
0223     uint64_t    tma_addr;
0224     uint32_t    is_kfd_process;
0225     uint32_t    is_aql_queue;
0226     uint32_t    queue_size;
0227 };
0228 
0229 struct mes_remove_queue_input {
0230     uint32_t    doorbell_offset;
0231     uint64_t    gang_context_addr;
0232 };
0233 
0234 struct mes_unmap_legacy_queue_input {
0235     enum amdgpu_unmap_queues_action    action;
0236     uint32_t                           queue_type;
0237     uint32_t                           doorbell_offset;
0238     uint32_t                           pipe_id;
0239     uint32_t                           queue_id;
0240     uint64_t                           trail_fence_addr;
0241     uint64_t                           trail_fence_data;
0242 };
0243 
0244 struct mes_suspend_gang_input {
0245     bool        suspend_all_gangs;
0246     uint64_t    gang_context_addr;
0247     uint64_t    suspend_fence_addr;
0248     uint32_t    suspend_fence_value;
0249 };
0250 
0251 struct mes_resume_gang_input {
0252     bool        resume_all_gangs;
0253     uint64_t    gang_context_addr;
0254 };
0255 
0256 enum mes_misc_opcode {
0257     MES_MISC_OP_WRITE_REG,
0258     MES_MISC_OP_READ_REG,
0259     MES_MISC_OP_WRM_REG_WAIT,
0260     MES_MISC_OP_WRM_REG_WR_WAIT,
0261 };
0262 
0263 struct mes_misc_op_input {
0264     enum mes_misc_opcode op;
0265 
0266     union {
0267         struct {
0268             uint32_t                  reg_offset;
0269             uint64_t                  buffer_addr;
0270         } read_reg;
0271 
0272         struct {
0273             uint32_t                  reg_offset;
0274             uint32_t                  reg_value;
0275         } write_reg;
0276 
0277         struct {
0278             uint32_t                   ref;
0279             uint32_t                   mask;
0280             uint32_t                   reg0;
0281             uint32_t                   reg1;
0282         } wrm_reg;
0283     };
0284 };
0285 
0286 struct amdgpu_mes_funcs {
0287     int (*add_hw_queue)(struct amdgpu_mes *mes,
0288                 struct mes_add_queue_input *input);
0289 
0290     int (*remove_hw_queue)(struct amdgpu_mes *mes,
0291                    struct mes_remove_queue_input *input);
0292 
0293     int (*unmap_legacy_queue)(struct amdgpu_mes *mes,
0294                   struct mes_unmap_legacy_queue_input *input);
0295 
0296     int (*suspend_gang)(struct amdgpu_mes *mes,
0297                 struct mes_suspend_gang_input *input);
0298 
0299     int (*resume_gang)(struct amdgpu_mes *mes,
0300                struct mes_resume_gang_input *input);
0301 
0302     int (*misc_op)(struct amdgpu_mes *mes,
0303                struct mes_misc_op_input *input);
0304 };
0305 
0306 #define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev))
0307 #define amdgpu_mes_kiq_hw_fini(adev) (adev)->mes.kiq_hw_fini((adev))
0308 
0309 int amdgpu_mes_ctx_get_offs(struct amdgpu_ring *ring, unsigned int id_offs);
0310 
0311 int amdgpu_mes_init(struct amdgpu_device *adev);
0312 void amdgpu_mes_fini(struct amdgpu_device *adev);
0313 
0314 int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid,
0315                   struct amdgpu_vm *vm);
0316 void amdgpu_mes_destroy_process(struct amdgpu_device *adev, int pasid);
0317 
0318 int amdgpu_mes_add_gang(struct amdgpu_device *adev, int pasid,
0319             struct amdgpu_mes_gang_properties *gprops,
0320             int *gang_id);
0321 int amdgpu_mes_remove_gang(struct amdgpu_device *adev, int gang_id);
0322 
0323 int amdgpu_mes_suspend(struct amdgpu_device *adev);
0324 int amdgpu_mes_resume(struct amdgpu_device *adev);
0325 
0326 int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
0327                 struct amdgpu_mes_queue_properties *qprops,
0328                 int *queue_id);
0329 int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id);
0330 
0331 int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
0332                   struct amdgpu_ring *ring,
0333                   enum amdgpu_unmap_queues_action action,
0334                   u64 gpu_addr, u64 seq);
0335 
0336 uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg);
0337 int amdgpu_mes_wreg(struct amdgpu_device *adev,
0338             uint32_t reg, uint32_t val);
0339 int amdgpu_mes_reg_wait(struct amdgpu_device *adev, uint32_t reg,
0340             uint32_t val, uint32_t mask);
0341 int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev,
0342                   uint32_t reg0, uint32_t reg1,
0343                   uint32_t ref, uint32_t mask);
0344 
0345 int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
0346             int queue_type, int idx,
0347             struct amdgpu_mes_ctx_data *ctx_data,
0348             struct amdgpu_ring **out);
0349 void amdgpu_mes_remove_ring(struct amdgpu_device *adev,
0350                 struct amdgpu_ring *ring);
0351 
0352 uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev,
0353                            enum amdgpu_mes_priority_level prio);
0354 
0355 int amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device *adev,
0356                    struct amdgpu_mes_ctx_data *ctx_data);
0357 void amdgpu_mes_ctx_free_meta_data(struct amdgpu_mes_ctx_data *ctx_data);
0358 int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev,
0359                  struct amdgpu_vm *vm,
0360                  struct amdgpu_mes_ctx_data *ctx_data);
0361 int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev,
0362                    struct amdgpu_mes_ctx_data *ctx_data);
0363 
0364 int amdgpu_mes_self_test(struct amdgpu_device *adev);
0365 
0366 int amdgpu_mes_alloc_process_doorbells(struct amdgpu_device *adev,
0367                     unsigned int *doorbell_index);
0368 void amdgpu_mes_free_process_doorbells(struct amdgpu_device *adev,
0369                     unsigned int doorbell_index);
0370 unsigned int amdgpu_mes_get_doorbell_dw_offset_in_bar(
0371                     struct amdgpu_device *adev,
0372                     uint32_t doorbell_index,
0373                     unsigned int doorbell_id);
0374 int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev);
0375 
0376 /*
0377  * MES lock can be taken in MMU notifiers.
0378  *
0379  * A bit more detail about why to set no-FS reclaim with MES lock:
0380  *
0381  * The purpose of the MMU notifier is to stop GPU access to memory so
0382  * that the Linux VM subsystem can move pages around safely. This is
0383  * done by preempting user mode queues for the affected process. When
0384  * MES is used, MES lock needs to be taken to preempt the queues.
0385  *
0386  * The MMU notifier callback entry point in the driver is
0387  * amdgpu_mn_invalidate_range_start_hsa. The relevant call chain from
0388  * there is:
0389  * amdgpu_amdkfd_evict_userptr -> kgd2kfd_quiesce_mm ->
0390  * kfd_process_evict_queues -> pdd->dev->dqm->ops.evict_process_queues
0391  *
0392  * The last part of the chain is a function pointer where we take the
0393  * MES lock.
0394  *
0395  * The problem with taking locks in the MMU notifier is, that MMU
0396  * notifiers can be called in reclaim-FS context. That's where the
0397  * kernel frees up pages to make room for new page allocations under
0398  * memory pressure. While we are running in reclaim-FS context, we must
0399  * not trigger another memory reclaim operation because that would
0400  * recursively reenter the reclaim code and cause a deadlock. The
0401  * memalloc_nofs_save/restore calls guarantee that.
0402  *
0403  * In addition we also need to avoid lock dependencies on other locks taken
0404  * under the MES lock, for example reservation locks. Here is a possible
0405  * scenario of a deadlock:
0406  * Thread A: takes and holds reservation lock | triggers reclaim-FS |
0407  * MMU notifier | blocks trying to take MES lock
0408  * Thread B: takes and holds MES lock | blocks trying to take reservation lock
0409  *
0410  * In this scenario Thread B gets involved in a deadlock even without
0411  * triggering a reclaim-FS operation itself.
0412  * To fix this and break the lock dependency chain you'd need to either:
0413  * 1. protect reservation locks with memalloc_nofs_save/restore, or
0414  * 2. avoid taking reservation locks under the MES lock.
0415  *
0416  * Reservation locks are taken all over the kernel in different subsystems, we
0417  * have no control over them and their lock dependencies.So the only workable
0418  * solution is to avoid taking other locks under the MES lock.
0419  * As a result, make sure no reclaim-FS happens while holding this lock anywhere
0420  * to prevent deadlocks when an MMU notifier runs in reclaim-FS context.
0421  */
0422 static inline void amdgpu_mes_lock(struct amdgpu_mes *mes)
0423 {
0424     mutex_lock(&mes->mutex_hidden);
0425     mes->saved_flags = memalloc_noreclaim_save();
0426 }
0427 
0428 static inline void amdgpu_mes_unlock(struct amdgpu_mes *mes)
0429 {
0430     memalloc_noreclaim_restore(mes->saved_flags);
0431     mutex_unlock(&mes->mutex_hidden);
0432 }
0433 #endif /* __AMDGPU_MES_H__ */