Back to home page

OSCL-LXR

 
 

    


0001 /* SPDX-License-Identifier: GPL-2.0 OR MIT */
0002 /*
0003  * Copyright 2014-2022 Advanced Micro Devices, Inc.
0004  *
0005  * Permission is hereby granted, free of charge, to any person obtaining a
0006  * copy of this software and associated documentation files (the "Software"),
0007  * to deal in the Software without restriction, including without limitation
0008  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
0009  * and/or sell copies of the Software, and to permit persons to whom the
0010  * Software is furnished to do so, subject to the following conditions:
0011  *
0012  * The above copyright notice and this permission notice shall be included in
0013  * all copies or substantial portions of the Software.
0014  *
0015  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
0016  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
0017  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
0018  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
0019  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
0020  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
0021  * OTHER DEALINGS IN THE SOFTWARE.
0022  */
0023 
0024 #ifndef KFD_PRIV_H_INCLUDED
0025 #define KFD_PRIV_H_INCLUDED
0026 
0027 #include <linux/hashtable.h>
0028 #include <linux/mmu_notifier.h>
0029 #include <linux/memremap.h>
0030 #include <linux/mutex.h>
0031 #include <linux/types.h>
0032 #include <linux/atomic.h>
0033 #include <linux/workqueue.h>
0034 #include <linux/spinlock.h>
0035 #include <linux/kfd_ioctl.h>
0036 #include <linux/idr.h>
0037 #include <linux/kfifo.h>
0038 #include <linux/seq_file.h>
0039 #include <linux/kref.h>
0040 #include <linux/sysfs.h>
0041 #include <linux/device_cgroup.h>
0042 #include <drm/drm_file.h>
0043 #include <drm/drm_drv.h>
0044 #include <drm/drm_device.h>
0045 #include <drm/drm_ioctl.h>
0046 #include <kgd_kfd_interface.h>
0047 #include <linux/swap.h>
0048 
0049 #include "amd_shared.h"
0050 #include "amdgpu.h"
0051 
0052 #define KFD_MAX_RING_ENTRY_SIZE 8
0053 
0054 #define KFD_SYSFS_FILE_MODE 0444
0055 
0056 /* GPU ID hash width in bits */
0057 #define KFD_GPU_ID_HASH_WIDTH 16
0058 
0059 /* Use upper bits of mmap offset to store KFD driver specific information.
0060  * BITS[63:62] - Encode MMAP type
0061  * BITS[61:46] - Encode gpu_id. To identify to which GPU the offset belongs to
0062  * BITS[45:0]  - MMAP offset value
0063  *
0064  * NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these
0065  *  defines are w.r.t to PAGE_SIZE
0066  */
0067 #define KFD_MMAP_TYPE_SHIFT 62
0068 #define KFD_MMAP_TYPE_MASK  (0x3ULL << KFD_MMAP_TYPE_SHIFT)
0069 #define KFD_MMAP_TYPE_DOORBELL  (0x3ULL << KFD_MMAP_TYPE_SHIFT)
0070 #define KFD_MMAP_TYPE_EVENTS    (0x2ULL << KFD_MMAP_TYPE_SHIFT)
0071 #define KFD_MMAP_TYPE_RESERVED_MEM  (0x1ULL << KFD_MMAP_TYPE_SHIFT)
0072 #define KFD_MMAP_TYPE_MMIO  (0x0ULL << KFD_MMAP_TYPE_SHIFT)
0073 
0074 #define KFD_MMAP_GPU_ID_SHIFT 46
0075 #define KFD_MMAP_GPU_ID_MASK (((1ULL << KFD_GPU_ID_HASH_WIDTH) - 1) \
0076                 << KFD_MMAP_GPU_ID_SHIFT)
0077 #define KFD_MMAP_GPU_ID(gpu_id) ((((uint64_t)gpu_id) << KFD_MMAP_GPU_ID_SHIFT)\
0078                 & KFD_MMAP_GPU_ID_MASK)
0079 #define KFD_MMAP_GET_GPU_ID(offset)    ((offset & KFD_MMAP_GPU_ID_MASK) \
0080                 >> KFD_MMAP_GPU_ID_SHIFT)
0081 
0082 /*
0083  * When working with cp scheduler we should assign the HIQ manually or via
0084  * the amdgpu driver to a fixed hqd slot, here are the fixed HIQ hqd slot
0085  * definitions for Kaveri. In Kaveri only the first ME queues participates
0086  * in the cp scheduling taking that in mind we set the HIQ slot in the
0087  * second ME.
0088  */
0089 #define KFD_CIK_HIQ_PIPE 4
0090 #define KFD_CIK_HIQ_QUEUE 0
0091 
0092 /* Macro for allocating structures */
0093 #define kfd_alloc_struct(ptr_to_struct) \
0094     ((typeof(ptr_to_struct)) kzalloc(sizeof(*ptr_to_struct), GFP_KERNEL))
0095 
0096 #define KFD_MAX_NUM_OF_PROCESSES 512
0097 #define KFD_MAX_NUM_OF_QUEUES_PER_PROCESS 1024
0098 
0099 /*
0100  * Size of the per-process TBA+TMA buffer: 2 pages
0101  *
0102  * The first page is the TBA used for the CWSR ISA code. The second
0103  * page is used as TMA for user-mode trap handler setup in daisy-chain mode.
0104  */
0105 #define KFD_CWSR_TBA_TMA_SIZE (PAGE_SIZE * 2)
0106 #define KFD_CWSR_TMA_OFFSET PAGE_SIZE
0107 
0108 #define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE        \
0109     (KFD_MAX_NUM_OF_PROCESSES *         \
0110             KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
0111 
0112 #define KFD_KERNEL_QUEUE_SIZE 2048
0113 
0114 #define KFD_UNMAP_LATENCY_MS    (4000)
0115 
0116 /*
0117  * 512 = 0x200
0118  * The doorbell index distance between SDMA RLC (2*i) and (2*i+1) in the
0119  * same SDMA engine on SOC15, which has 8-byte doorbells for SDMA.
0120  * 512 8-byte doorbell distance (i.e. one page away) ensures that SDMA RLC
0121  * (2*i+1) doorbells (in terms of the lower 12 bit address) lie exactly in
0122  * the OFFSET and SIZE set in registers like BIF_SDMA0_DOORBELL_RANGE.
0123  */
0124 #define KFD_QUEUE_DOORBELL_MIRROR_OFFSET 512
0125 
0126 /**
0127  * enum kfd_ioctl_flags - KFD ioctl flags
0128  * Various flags that can be set in &amdkfd_ioctl_desc.flags to control how
0129  * userspace can use a given ioctl.
0130  */
0131 enum kfd_ioctl_flags {
0132     /*
0133      * @KFD_IOC_FLAG_CHECKPOINT_RESTORE:
0134      * Certain KFD ioctls such as AMDKFD_IOC_CRIU_OP can potentially
0135      * perform privileged operations and load arbitrary data into MQDs and
0136      * eventually HQD registers when the queue is mapped by HWS. In order to
0137      * prevent this we should perform additional security checks.
0138      *
0139      * This is equivalent to callers with the CHECKPOINT_RESTORE capability.
0140      *
0141      * Note: Since earlier versions of docker do not support CHECKPOINT_RESTORE,
0142      * we also allow ioctls with SYS_ADMIN capability.
0143      */
0144     KFD_IOC_FLAG_CHECKPOINT_RESTORE = BIT(0),
0145 };
0146 /*
0147  * Kernel module parameter to specify maximum number of supported queues per
0148  * device
0149  */
0150 extern int max_num_of_queues_per_device;
0151 
0152 
0153 /* Kernel module parameter to specify the scheduling policy */
0154 extern int sched_policy;
0155 
0156 /*
0157  * Kernel module parameter to specify the maximum process
0158  * number per HW scheduler
0159  */
0160 extern int hws_max_conc_proc;
0161 
0162 extern int cwsr_enable;
0163 
0164 /*
0165  * Kernel module parameter to specify whether to send sigterm to HSA process on
0166  * unhandled exception
0167  */
0168 extern int send_sigterm;
0169 
0170 /*
0171  * This kernel module is used to simulate large bar machine on non-large bar
0172  * enabled machines.
0173  */
0174 extern int debug_largebar;
0175 
0176 /*
0177  * Ignore CRAT table during KFD initialization, can be used to work around
0178  * broken CRAT tables on some AMD systems
0179  */
0180 extern int ignore_crat;
0181 
0182 /* Set sh_mem_config.retry_disable on GFX v9 */
0183 extern int amdgpu_noretry;
0184 
0185 /* Halt if HWS hang is detected */
0186 extern int halt_if_hws_hang;
0187 
0188 /* Whether MEC FW support GWS barriers */
0189 extern bool hws_gws_support;
0190 
0191 /* Queue preemption timeout in ms */
0192 extern int queue_preemption_timeout_ms;
0193 
0194 /*
0195  * Don't evict process queues on vm fault
0196  */
0197 extern int amdgpu_no_queue_eviction_on_vm_fault;
0198 
0199 /* Enable eviction debug messages */
0200 extern bool debug_evictions;
0201 
0202 enum cache_policy {
0203     cache_policy_coherent,
0204     cache_policy_noncoherent
0205 };
0206 
0207 #define KFD_GC_VERSION(dev) ((dev)->adev->ip_versions[GC_HWIP][0])
0208 #define KFD_IS_SOC15(dev)   ((KFD_GC_VERSION(dev)) >= (IP_VERSION(9, 0, 1)))
0209 
0210 struct kfd_event_interrupt_class {
0211     bool (*interrupt_isr)(struct kfd_dev *dev,
0212             const uint32_t *ih_ring_entry, uint32_t *patched_ihre,
0213             bool *patched_flag);
0214     void (*interrupt_wq)(struct kfd_dev *dev,
0215             const uint32_t *ih_ring_entry);
0216 };
0217 
0218 struct kfd_device_info {
0219     uint32_t gfx_target_version;
0220     const struct kfd_event_interrupt_class *event_interrupt_class;
0221     unsigned int max_pasid_bits;
0222     unsigned int max_no_of_hqd;
0223     unsigned int doorbell_size;
0224     size_t ih_ring_entry_size;
0225     uint8_t num_of_watch_points;
0226     uint16_t mqd_size_aligned;
0227     bool supports_cwsr;
0228     bool needs_iommu_device;
0229     bool needs_pci_atomics;
0230     uint32_t no_atomic_fw_version;
0231     unsigned int num_sdma_queues_per_engine;
0232     unsigned int num_reserved_sdma_queues_per_engine;
0233     uint64_t reserved_sdma_queues_bitmap;
0234 };
0235 
0236 unsigned int kfd_get_num_sdma_engines(struct kfd_dev *kdev);
0237 unsigned int kfd_get_num_xgmi_sdma_engines(struct kfd_dev *kdev);
0238 
0239 struct kfd_mem_obj {
0240     uint32_t range_start;
0241     uint32_t range_end;
0242     uint64_t gpu_addr;
0243     uint32_t *cpu_ptr;
0244     void *gtt_mem;
0245 };
0246 
0247 struct kfd_vmid_info {
0248     uint32_t first_vmid_kfd;
0249     uint32_t last_vmid_kfd;
0250     uint32_t vmid_num_kfd;
0251 };
0252 
0253 struct kfd_dev {
0254     struct amdgpu_device *adev;
0255 
0256     struct kfd_device_info device_info;
0257     struct pci_dev *pdev;
0258     struct drm_device *ddev;
0259 
0260     unsigned int id;        /* topology stub index */
0261 
0262     phys_addr_t doorbell_base;  /* Start of actual doorbells used by
0263                      * KFD. It is aligned for mapping
0264                      * into user mode
0265                      */
0266     size_t doorbell_base_dw_offset; /* Offset from the start of the PCI
0267                      * doorbell BAR to the first KFD
0268                      * doorbell in dwords. GFX reserves
0269                      * the segment before this offset.
0270                      */
0271     u32 __iomem *doorbell_kernel_ptr; /* This is a pointer for a doorbells
0272                        * page used by kernel queue
0273                        */
0274 
0275     struct kgd2kfd_shared_resources shared_resources;
0276     struct kfd_vmid_info vm_info;
0277     struct kfd_local_mem_info local_mem_info;
0278 
0279     const struct kfd2kgd_calls *kfd2kgd;
0280     struct mutex doorbell_mutex;
0281     DECLARE_BITMAP(doorbell_available_index,
0282             KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
0283 
0284     void *gtt_mem;
0285     uint64_t gtt_start_gpu_addr;
0286     void *gtt_start_cpu_ptr;
0287     void *gtt_sa_bitmap;
0288     struct mutex gtt_sa_lock;
0289     unsigned int gtt_sa_chunk_size;
0290     unsigned int gtt_sa_num_of_chunks;
0291 
0292     /* Interrupts */
0293     struct kfifo ih_fifo;
0294     struct workqueue_struct *ih_wq;
0295     struct work_struct interrupt_work;
0296     spinlock_t interrupt_lock;
0297 
0298     /* QCM Device instance */
0299     struct device_queue_manager *dqm;
0300 
0301     bool init_complete;
0302     /*
0303      * Interrupts of interest to KFD are copied
0304      * from the HW ring into a SW ring.
0305      */
0306     bool interrupts_active;
0307 
0308     /* Firmware versions */
0309     uint16_t mec_fw_version;
0310     uint16_t mec2_fw_version;
0311     uint16_t sdma_fw_version;
0312 
0313     /* Maximum process number mapped to HW scheduler */
0314     unsigned int max_proc_per_quantum;
0315 
0316     /* CWSR */
0317     bool cwsr_enabled;
0318     const void *cwsr_isa;
0319     unsigned int cwsr_isa_size;
0320 
0321     /* xGMI */
0322     uint64_t hive_id;
0323 
0324     bool pci_atomic_requested;
0325 
0326     /* Use IOMMU v2 flag */
0327     bool use_iommu_v2;
0328 
0329     /* SRAM ECC flag */
0330     atomic_t sram_ecc_flag;
0331 
0332     /* Compute Profile ref. count */
0333     atomic_t compute_profile;
0334 
0335     /* Global GWS resource shared between processes */
0336     void *gws;
0337 
0338     /* Clients watching SMI events */
0339     struct list_head smi_clients;
0340     spinlock_t smi_lock;
0341 
0342     uint32_t reset_seq_num;
0343 
0344     struct ida doorbell_ida;
0345     unsigned int max_doorbell_slices;
0346 
0347     int noretry;
0348 
0349     /* HMM page migration MEMORY_DEVICE_PRIVATE mapping */
0350     struct dev_pagemap pgmap;
0351 };
0352 
0353 enum kfd_mempool {
0354     KFD_MEMPOOL_SYSTEM_CACHEABLE = 1,
0355     KFD_MEMPOOL_SYSTEM_WRITECOMBINE = 2,
0356     KFD_MEMPOOL_FRAMEBUFFER = 3,
0357 };
0358 
0359 /* Character device interface */
0360 int kfd_chardev_init(void);
0361 void kfd_chardev_exit(void);
0362 
0363 /**
0364  * enum kfd_unmap_queues_filter - Enum for queue filters.
0365  *
0366  * @KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES: Preempts all queues in the
0367  *                      running queues list.
0368  *
0369  * @KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES: Preempts all non-static queues
0370  *                      in the run list.
0371  *
0372  * @KFD_UNMAP_QUEUES_FILTER_BY_PASID: Preempts queues that belongs to
0373  *                      specific process.
0374  *
0375  */
0376 enum kfd_unmap_queues_filter {
0377     KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES = 1,
0378     KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES = 2,
0379     KFD_UNMAP_QUEUES_FILTER_BY_PASID = 3
0380 };
0381 
0382 /**
0383  * enum kfd_queue_type - Enum for various queue types.
0384  *
0385  * @KFD_QUEUE_TYPE_COMPUTE: Regular user mode queue type.
0386  *
0387  * @KFD_QUEUE_TYPE_SDMA: SDMA user mode queue type.
0388  *
0389  * @KFD_QUEUE_TYPE_HIQ: HIQ queue type.
0390  *
0391  * @KFD_QUEUE_TYPE_DIQ: DIQ queue type.
0392  *
0393  * @KFD_QUEUE_TYPE_SDMA_XGMI: Special SDMA queue for XGMI interface.
0394  */
0395 enum kfd_queue_type  {
0396     KFD_QUEUE_TYPE_COMPUTE,
0397     KFD_QUEUE_TYPE_SDMA,
0398     KFD_QUEUE_TYPE_HIQ,
0399     KFD_QUEUE_TYPE_DIQ,
0400     KFD_QUEUE_TYPE_SDMA_XGMI
0401 };
0402 
0403 enum kfd_queue_format {
0404     KFD_QUEUE_FORMAT_PM4,
0405     KFD_QUEUE_FORMAT_AQL
0406 };
0407 
0408 enum KFD_QUEUE_PRIORITY {
0409     KFD_QUEUE_PRIORITY_MINIMUM = 0,
0410     KFD_QUEUE_PRIORITY_MAXIMUM = 15
0411 };
0412 
0413 /**
0414  * struct queue_properties
0415  *
0416  * @type: The queue type.
0417  *
0418  * @queue_id: Queue identifier.
0419  *
0420  * @queue_address: Queue ring buffer address.
0421  *
0422  * @queue_size: Queue ring buffer size.
0423  *
0424  * @priority: Defines the queue priority relative to other queues in the
0425  * process.
0426  * This is just an indication and HW scheduling may override the priority as
0427  * necessary while keeping the relative prioritization.
0428  * the priority granularity is from 0 to f which f is the highest priority.
0429  * currently all queues are initialized with the highest priority.
0430  *
0431  * @queue_percent: This field is partially implemented and currently a zero in
0432  * this field defines that the queue is non active.
0433  *
0434  * @read_ptr: User space address which points to the number of dwords the
0435  * cp read from the ring buffer. This field updates automatically by the H/W.
0436  *
0437  * @write_ptr: Defines the number of dwords written to the ring buffer.
0438  *
0439  * @doorbell_ptr: Notifies the H/W of new packet written to the queue ring
0440  * buffer. This field should be similar to write_ptr and the user should
0441  * update this field after updating the write_ptr.
0442  *
0443  * @doorbell_off: The doorbell offset in the doorbell pci-bar.
0444  *
0445  * @is_interop: Defines if this is a interop queue. Interop queue means that
0446  * the queue can access both graphics and compute resources.
0447  *
0448  * @is_evicted: Defines if the queue is evicted. Only active queues
0449  * are evicted, rendering them inactive.
0450  *
0451  * @is_active: Defines if the queue is active or not. @is_active and
0452  * @is_evicted are protected by the DQM lock.
0453  *
0454  * @is_gws: Defines if the queue has been updated to be GWS-capable or not.
0455  * @is_gws should be protected by the DQM lock, since changing it can yield the
0456  * possibility of updating DQM state on number of GWS queues.
0457  *
0458  * @vmid: If the scheduling mode is no cp scheduling the field defines the vmid
0459  * of the queue.
0460  *
0461  * This structure represents the queue properties for each queue no matter if
0462  * it's user mode or kernel mode queue.
0463  *
0464  */
0465 
0466 struct queue_properties {
0467     enum kfd_queue_type type;
0468     enum kfd_queue_format format;
0469     unsigned int queue_id;
0470     uint64_t queue_address;
0471     uint64_t  queue_size;
0472     uint32_t priority;
0473     uint32_t queue_percent;
0474     uint32_t *read_ptr;
0475     uint32_t *write_ptr;
0476     void __iomem *doorbell_ptr;
0477     uint32_t doorbell_off;
0478     bool is_interop;
0479     bool is_evicted;
0480     bool is_active;
0481     bool is_gws;
0482     /* Not relevant for user mode queues in cp scheduling */
0483     unsigned int vmid;
0484     /* Relevant only for sdma queues*/
0485     uint32_t sdma_engine_id;
0486     uint32_t sdma_queue_id;
0487     uint32_t sdma_vm_addr;
0488     /* Relevant only for VI */
0489     uint64_t eop_ring_buffer_address;
0490     uint32_t eop_ring_buffer_size;
0491     uint64_t ctx_save_restore_area_address;
0492     uint32_t ctx_save_restore_area_size;
0493     uint32_t ctl_stack_size;
0494     uint64_t tba_addr;
0495     uint64_t tma_addr;
0496 };
0497 
0498 #define QUEUE_IS_ACTIVE(q) ((q).queue_size > 0 &&   \
0499                 (q).queue_address != 0 &&   \
0500                 (q).queue_percent > 0 &&    \
0501                 !(q).is_evicted)
0502 
0503 enum mqd_update_flag {
0504     UPDATE_FLAG_CU_MASK = 0,
0505 };
0506 
0507 struct mqd_update_info {
0508     union {
0509         struct {
0510             uint32_t count; /* Must be a multiple of 32 */
0511             uint32_t *ptr;
0512         } cu_mask;
0513     };
0514     enum mqd_update_flag update_flag;
0515 };
0516 
0517 /**
0518  * struct queue
0519  *
0520  * @list: Queue linked list.
0521  *
0522  * @mqd: The queue MQD (memory queue descriptor).
0523  *
0524  * @mqd_mem_obj: The MQD local gpu memory object.
0525  *
0526  * @gart_mqd_addr: The MQD gart mc address.
0527  *
0528  * @properties: The queue properties.
0529  *
0530  * @mec: Used only in no cp scheduling mode and identifies to micro engine id
0531  *   that the queue should be executed on.
0532  *
0533  * @pipe: Used only in no cp scheduling mode and identifies the queue's pipe
0534  *    id.
0535  *
0536  * @queue: Used only in no cp scheduliong mode and identifies the queue's slot.
0537  *
0538  * @process: The kfd process that created this queue.
0539  *
0540  * @device: The kfd device that created this queue.
0541  *
0542  * @gws: Pointing to gws kgd_mem if this is a gws control queue; NULL
0543  * otherwise.
0544  *
0545  * This structure represents user mode compute queues.
0546  * It contains all the necessary data to handle such queues.
0547  *
0548  */
0549 
0550 struct queue {
0551     struct list_head list;
0552     void *mqd;
0553     struct kfd_mem_obj *mqd_mem_obj;
0554     uint64_t gart_mqd_addr;
0555     struct queue_properties properties;
0556 
0557     uint32_t mec;
0558     uint32_t pipe;
0559     uint32_t queue;
0560 
0561     unsigned int sdma_id;
0562     unsigned int doorbell_id;
0563 
0564     struct kfd_process  *process;
0565     struct kfd_dev      *device;
0566     void *gws;
0567 
0568     /* procfs */
0569     struct kobject kobj;
0570 
0571     void *gang_ctx_bo;
0572     uint64_t gang_ctx_gpu_addr;
0573     void *gang_ctx_cpu_ptr;
0574 
0575     struct amdgpu_bo *wptr_bo;
0576 };
0577 
0578 enum KFD_MQD_TYPE {
0579     KFD_MQD_TYPE_HIQ = 0,       /* for hiq */
0580     KFD_MQD_TYPE_CP,        /* for cp queues and diq */
0581     KFD_MQD_TYPE_SDMA,      /* for sdma queues */
0582     KFD_MQD_TYPE_DIQ,       /* for diq */
0583     KFD_MQD_TYPE_MAX
0584 };
0585 
0586 enum KFD_PIPE_PRIORITY {
0587     KFD_PIPE_PRIORITY_CS_LOW = 0,
0588     KFD_PIPE_PRIORITY_CS_MEDIUM,
0589     KFD_PIPE_PRIORITY_CS_HIGH
0590 };
0591 
0592 struct scheduling_resources {
0593     unsigned int vmid_mask;
0594     enum kfd_queue_type type;
0595     uint64_t queue_mask;
0596     uint64_t gws_mask;
0597     uint32_t oac_mask;
0598     uint32_t gds_heap_base;
0599     uint32_t gds_heap_size;
0600 };
0601 
0602 struct process_queue_manager {
0603     /* data */
0604     struct kfd_process  *process;
0605     struct list_head    queues;
0606     unsigned long       *queue_slot_bitmap;
0607 };
0608 
0609 struct qcm_process_device {
0610     /* The Device Queue Manager that owns this data */
0611     struct device_queue_manager *dqm;
0612     struct process_queue_manager *pqm;
0613     /* Queues list */
0614     struct list_head queues_list;
0615     struct list_head priv_queue_list;
0616 
0617     unsigned int queue_count;
0618     unsigned int vmid;
0619     bool is_debug;
0620     unsigned int evicted; /* eviction counter, 0=active */
0621 
0622     /* This flag tells if we should reset all wavefronts on
0623      * process termination
0624      */
0625     bool reset_wavefronts;
0626 
0627     /* This flag tells us if this process has a GWS-capable
0628      * queue that will be mapped into the runlist. It's
0629      * possible to request a GWS BO, but not have the queue
0630      * currently mapped, and this changes how the MAP_PROCESS
0631      * PM4 packet is configured.
0632      */
0633     bool mapped_gws_queue;
0634 
0635     /* All the memory management data should be here too */
0636     uint64_t gds_context_area;
0637     /* Contains page table flags such as AMDGPU_PTE_VALID since gfx9 */
0638     uint64_t page_table_base;
0639     uint32_t sh_mem_config;
0640     uint32_t sh_mem_bases;
0641     uint32_t sh_mem_ape1_base;
0642     uint32_t sh_mem_ape1_limit;
0643     uint32_t gds_size;
0644     uint32_t num_gws;
0645     uint32_t num_oac;
0646     uint32_t sh_hidden_private_base;
0647 
0648     /* CWSR memory */
0649     struct kgd_mem *cwsr_mem;
0650     void *cwsr_kaddr;
0651     uint64_t cwsr_base;
0652     uint64_t tba_addr;
0653     uint64_t tma_addr;
0654 
0655     /* IB memory */
0656     struct kgd_mem *ib_mem;
0657     uint64_t ib_base;
0658     void *ib_kaddr;
0659 
0660     /* doorbell resources per process per device */
0661     unsigned long *doorbell_bitmap;
0662 };
0663 
0664 /* KFD Memory Eviction */
0665 
0666 /* Approx. wait time before attempting to restore evicted BOs */
0667 #define PROCESS_RESTORE_TIME_MS 100
0668 /* Approx. back off time if restore fails due to lack of memory */
0669 #define PROCESS_BACK_OFF_TIME_MS 100
0670 /* Approx. time before evicting the process again */
0671 #define PROCESS_ACTIVE_TIME_MS 10
0672 
0673 /* 8 byte handle containing GPU ID in the most significant 4 bytes and
0674  * idr_handle in the least significant 4 bytes
0675  */
0676 #define MAKE_HANDLE(gpu_id, idr_handle) \
0677     (((uint64_t)(gpu_id) << 32) + idr_handle)
0678 #define GET_GPU_ID(handle) (handle >> 32)
0679 #define GET_IDR_HANDLE(handle) (handle & 0xFFFFFFFF)
0680 
0681 enum kfd_pdd_bound {
0682     PDD_UNBOUND = 0,
0683     PDD_BOUND,
0684     PDD_BOUND_SUSPENDED,
0685 };
0686 
0687 #define MAX_SYSFS_FILENAME_LEN 15
0688 
0689 /*
0690  * SDMA counter runs at 100MHz frequency.
0691  * We display SDMA activity in microsecond granularity in sysfs.
0692  * As a result, the divisor is 100.
0693  */
0694 #define SDMA_ACTIVITY_DIVISOR  100
0695 
0696 /* Data that is per-process-per device. */
0697 struct kfd_process_device {
0698     /* The device that owns this data. */
0699     struct kfd_dev *dev;
0700 
0701     /* The process that owns this kfd_process_device. */
0702     struct kfd_process *process;
0703 
0704     /* per-process-per device QCM data structure */
0705     struct qcm_process_device qpd;
0706 
0707     /*Apertures*/
0708     uint64_t lds_base;
0709     uint64_t lds_limit;
0710     uint64_t gpuvm_base;
0711     uint64_t gpuvm_limit;
0712     uint64_t scratch_base;
0713     uint64_t scratch_limit;
0714 
0715     /* VM context for GPUVM allocations */
0716     struct file *drm_file;
0717     void *drm_priv;
0718     atomic64_t tlb_seq;
0719 
0720     /* GPUVM allocations storage */
0721     struct idr alloc_idr;
0722 
0723     /* Flag used to tell the pdd has dequeued from the dqm.
0724      * This is used to prevent dev->dqm->ops.process_termination() from
0725      * being called twice when it is already called in IOMMU callback
0726      * function.
0727      */
0728     bool already_dequeued;
0729     bool runtime_inuse;
0730 
0731     /* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */
0732     enum kfd_pdd_bound bound;
0733 
0734     /* VRAM usage */
0735     uint64_t vram_usage;
0736     struct attribute attr_vram;
0737     char vram_filename[MAX_SYSFS_FILENAME_LEN];
0738 
0739     /* SDMA activity tracking */
0740     uint64_t sdma_past_activity_counter;
0741     struct attribute attr_sdma;
0742     char sdma_filename[MAX_SYSFS_FILENAME_LEN];
0743 
0744     /* Eviction activity tracking */
0745     uint64_t last_evict_timestamp;
0746     atomic64_t evict_duration_counter;
0747     struct attribute attr_evict;
0748 
0749     struct kobject *kobj_stats;
0750     unsigned int doorbell_index;
0751 
0752     /*
0753      * @cu_occupancy: Reports occupancy of Compute Units (CU) of a process
0754      * that is associated with device encoded by "this" struct instance. The
0755      * value reflects CU usage by all of the waves launched by this process
0756      * on this device. A very important property of occupancy parameter is
0757      * that its value is a snapshot of current use.
0758      *
0759      * Following is to be noted regarding how this parameter is reported:
0760      *
0761      *  The number of waves that a CU can launch is limited by couple of
0762      *  parameters. These are encoded by struct amdgpu_cu_info instance
0763      *  that is part of every device definition. For GFX9 devices this
0764      *  translates to 40 waves (simd_per_cu * max_waves_per_simd) when waves
0765      *  do not use scratch memory and 32 waves (max_scratch_slots_per_cu)
0766      *  when they do use scratch memory. This could change for future
0767      *  devices and therefore this example should be considered as a guide.
0768      *
0769      *  All CU's of a device are available for the process. This may not be true
0770      *  under certain conditions - e.g. CU masking.
0771      *
0772      *  Finally number of CU's that are occupied by a process is affected by both
0773      *  number of CU's a device has along with number of other competing processes
0774      */
0775     struct attribute attr_cu_occupancy;
0776 
0777     /* sysfs counters for GPU retry fault and page migration tracking */
0778     struct kobject *kobj_counters;
0779     struct attribute attr_faults;
0780     struct attribute attr_page_in;
0781     struct attribute attr_page_out;
0782     uint64_t faults;
0783     uint64_t page_in;
0784     uint64_t page_out;
0785     /*
0786      * If this process has been checkpointed before, then the user
0787      * application will use the original gpu_id on the
0788      * checkpointed node to refer to this device.
0789      */
0790     uint32_t user_gpu_id;
0791 
0792     void *proc_ctx_bo;
0793     uint64_t proc_ctx_gpu_addr;
0794     void *proc_ctx_cpu_ptr;
0795 };
0796 
0797 #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
0798 
0799 struct svm_range_list {
0800     struct mutex            lock;
0801     struct rb_root_cached       objects;
0802     struct list_head        list;
0803     struct work_struct      deferred_list_work;
0804     struct list_head        deferred_range_list;
0805     struct list_head                criu_svm_metadata_list;
0806     spinlock_t          deferred_list_lock;
0807     atomic_t            evicted_ranges;
0808     atomic_t            drain_pagefaults;
0809     struct delayed_work     restore_work;
0810     DECLARE_BITMAP(bitmap_supported, MAX_GPU_INSTANCE);
0811     struct task_struct      *faulting_task;
0812 };
0813 
0814 /* Process data */
0815 struct kfd_process {
0816     /*
0817      * kfd_process are stored in an mm_struct*->kfd_process*
0818      * hash table (kfd_processes in kfd_process.c)
0819      */
0820     struct hlist_node kfd_processes;
0821 
0822     /*
0823      * Opaque pointer to mm_struct. We don't hold a reference to
0824      * it so it should never be dereferenced from here. This is
0825      * only used for looking up processes by their mm.
0826      */
0827     void *mm;
0828 
0829     struct kref ref;
0830     struct work_struct release_work;
0831 
0832     struct mutex mutex;
0833 
0834     /*
0835      * In any process, the thread that started main() is the lead
0836      * thread and outlives the rest.
0837      * It is here because amd_iommu_bind_pasid wants a task_struct.
0838      * It can also be used for safely getting a reference to the
0839      * mm_struct of the process.
0840      */
0841     struct task_struct *lead_thread;
0842 
0843     /* We want to receive a notification when the mm_struct is destroyed */
0844     struct mmu_notifier mmu_notifier;
0845 
0846     u32 pasid;
0847 
0848     /*
0849      * Array of kfd_process_device pointers,
0850      * one for each device the process is using.
0851      */
0852     struct kfd_process_device *pdds[MAX_GPU_INSTANCE];
0853     uint32_t n_pdds;
0854 
0855     struct process_queue_manager pqm;
0856 
0857     /*Is the user space process 32 bit?*/
0858     bool is_32bit_user_mode;
0859 
0860     /* Event-related data */
0861     struct mutex event_mutex;
0862     /* Event ID allocator and lookup */
0863     struct idr event_idr;
0864     /* Event page */
0865     u64 signal_handle;
0866     struct kfd_signal_page *signal_page;
0867     size_t signal_mapped_size;
0868     size_t signal_event_count;
0869     bool signal_event_limit_reached;
0870 
0871     /* Information used for memory eviction */
0872     void *kgd_process_info;
0873     /* Eviction fence that is attached to all the BOs of this process. The
0874      * fence will be triggered during eviction and new one will be created
0875      * during restore
0876      */
0877     struct dma_fence *ef;
0878 
0879     /* Work items for evicting and restoring BOs */
0880     struct delayed_work eviction_work;
0881     struct delayed_work restore_work;
0882     /* seqno of the last scheduled eviction */
0883     unsigned int last_eviction_seqno;
0884     /* Approx. the last timestamp (in jiffies) when the process was
0885      * restored after an eviction
0886      */
0887     unsigned long last_restore_timestamp;
0888 
0889     /* Kobj for our procfs */
0890     struct kobject *kobj;
0891     struct kobject *kobj_queues;
0892     struct attribute attr_pasid;
0893 
0894     /* shared virtual memory registered by this process */
0895     struct svm_range_list svms;
0896 
0897     bool xnack_enabled;
0898 
0899     atomic_t poison;
0900     /* Queues are in paused stated because we are in the process of doing a CRIU checkpoint */
0901     bool queues_paused;
0902 };
0903 
0904 #define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */
0905 extern DECLARE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE);
0906 extern struct srcu_struct kfd_processes_srcu;
0907 
0908 /**
0909  * typedef amdkfd_ioctl_t - typedef for ioctl function pointer.
0910  *
0911  * @filep: pointer to file structure.
0912  * @p: amdkfd process pointer.
0913  * @data: pointer to arg that was copied from user.
0914  *
0915  * Return: returns ioctl completion code.
0916  */
0917 typedef int amdkfd_ioctl_t(struct file *filep, struct kfd_process *p,
0918                 void *data);
0919 
0920 struct amdkfd_ioctl_desc {
0921     unsigned int cmd;
0922     int flags;
0923     amdkfd_ioctl_t *func;
0924     unsigned int cmd_drv;
0925     const char *name;
0926 };
0927 bool kfd_dev_is_large_bar(struct kfd_dev *dev);
0928 
0929 int kfd_process_create_wq(void);
0930 void kfd_process_destroy_wq(void);
0931 struct kfd_process *kfd_create_process(struct file *filep);
0932 struct kfd_process *kfd_get_process(const struct task_struct *task);
0933 struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid);
0934 struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm);
0935 
0936 int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id);
0937 int kfd_process_gpuid_from_adev(struct kfd_process *p,
0938                    struct amdgpu_device *adev, uint32_t *gpuid,
0939                    uint32_t *gpuidx);
0940 static inline int kfd_process_gpuid_from_gpuidx(struct kfd_process *p,
0941                 uint32_t gpuidx, uint32_t *gpuid) {
0942     return gpuidx < p->n_pdds ? p->pdds[gpuidx]->dev->id : -EINVAL;
0943 }
0944 static inline struct kfd_process_device *kfd_process_device_from_gpuidx(
0945                 struct kfd_process *p, uint32_t gpuidx) {
0946     return gpuidx < p->n_pdds ? p->pdds[gpuidx] : NULL;
0947 }
0948 
0949 void kfd_unref_process(struct kfd_process *p);
0950 int kfd_process_evict_queues(struct kfd_process *p, uint32_t trigger);
0951 int kfd_process_restore_queues(struct kfd_process *p);
0952 void kfd_suspend_all_processes(void);
0953 int kfd_resume_all_processes(void);
0954 
0955 struct kfd_process_device *kfd_process_device_data_by_id(struct kfd_process *process,
0956                              uint32_t gpu_id);
0957 
0958 int kfd_process_get_user_gpu_id(struct kfd_process *p, uint32_t actual_gpu_id);
0959 
0960 int kfd_process_device_init_vm(struct kfd_process_device *pdd,
0961                    struct file *drm_file);
0962 struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
0963                         struct kfd_process *p);
0964 struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
0965                             struct kfd_process *p);
0966 struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
0967                             struct kfd_process *p);
0968 
0969 bool kfd_process_xnack_mode(struct kfd_process *p, bool supported);
0970 
0971 int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
0972               struct vm_area_struct *vma);
0973 
0974 /* KFD process API for creating and translating handles */
0975 int kfd_process_device_create_obj_handle(struct kfd_process_device *pdd,
0976                     void *mem);
0977 void *kfd_process_device_translate_handle(struct kfd_process_device *p,
0978                     int handle);
0979 void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd,
0980                     int handle);
0981 struct kfd_process *kfd_lookup_process_by_pid(struct pid *pid);
0982 
0983 /* PASIDs */
0984 int kfd_pasid_init(void);
0985 void kfd_pasid_exit(void);
0986 bool kfd_set_pasid_limit(unsigned int new_limit);
0987 unsigned int kfd_get_pasid_limit(void);
0988 u32 kfd_pasid_alloc(void);
0989 void kfd_pasid_free(u32 pasid);
0990 
0991 /* Doorbells */
0992 size_t kfd_doorbell_process_slice(struct kfd_dev *kfd);
0993 int kfd_doorbell_init(struct kfd_dev *kfd);
0994 void kfd_doorbell_fini(struct kfd_dev *kfd);
0995 int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process,
0996               struct vm_area_struct *vma);
0997 void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
0998                     unsigned int *doorbell_off);
0999 void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr);
1000 u32 read_kernel_doorbell(u32 __iomem *db);
1001 void write_kernel_doorbell(void __iomem *db, u32 value);
1002 void write_kernel_doorbell64(void __iomem *db, u64 value);
1003 unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
1004                     struct kfd_process_device *pdd,
1005                     unsigned int doorbell_id);
1006 phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd);
1007 int kfd_alloc_process_doorbells(struct kfd_dev *kfd,
1008                 unsigned int *doorbell_index);
1009 void kfd_free_process_doorbells(struct kfd_dev *kfd,
1010                 unsigned int doorbell_index);
1011 /* GTT Sub-Allocator */
1012 
1013 int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
1014             struct kfd_mem_obj **mem_obj);
1015 
1016 int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj);
1017 
1018 extern struct device *kfd_device;
1019 
1020 /* KFD's procfs */
1021 void kfd_procfs_init(void);
1022 void kfd_procfs_shutdown(void);
1023 int kfd_procfs_add_queue(struct queue *q);
1024 void kfd_procfs_del_queue(struct queue *q);
1025 
1026 /* Topology */
1027 int kfd_topology_init(void);
1028 void kfd_topology_shutdown(void);
1029 int kfd_topology_add_device(struct kfd_dev *gpu);
1030 int kfd_topology_remove_device(struct kfd_dev *gpu);
1031 struct kfd_topology_device *kfd_topology_device_by_proximity_domain(
1032                         uint32_t proximity_domain);
1033 struct kfd_topology_device *kfd_topology_device_by_proximity_domain_no_lock(
1034                         uint32_t proximity_domain);
1035 struct kfd_topology_device *kfd_topology_device_by_id(uint32_t gpu_id);
1036 struct kfd_dev *kfd_device_by_id(uint32_t gpu_id);
1037 struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev);
1038 struct kfd_dev *kfd_device_by_adev(const struct amdgpu_device *adev);
1039 int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev);
1040 int kfd_numa_node_to_apic_id(int numa_node_id);
1041 void kfd_double_confirm_iommu_support(struct kfd_dev *gpu);
1042 
1043 /* Interrupts */
1044 int kfd_interrupt_init(struct kfd_dev *dev);
1045 void kfd_interrupt_exit(struct kfd_dev *dev);
1046 bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry);
1047 bool interrupt_is_wanted(struct kfd_dev *dev,
1048                 const uint32_t *ih_ring_entry,
1049                 uint32_t *patched_ihre, bool *flag);
1050 
1051 /* amdkfd Apertures */
1052 int kfd_init_apertures(struct kfd_process *process);
1053 
1054 void kfd_process_set_trap_handler(struct qcm_process_device *qpd,
1055                   uint64_t tba_addr,
1056                   uint64_t tma_addr);
1057 
1058 /* CRIU */
1059 /*
1060  * Need to increment KFD_CRIU_PRIV_VERSION each time a change is made to any of the CRIU private
1061  * structures:
1062  * kfd_criu_process_priv_data
1063  * kfd_criu_device_priv_data
1064  * kfd_criu_bo_priv_data
1065  * kfd_criu_queue_priv_data
1066  * kfd_criu_event_priv_data
1067  * kfd_criu_svm_range_priv_data
1068  */
1069 
1070 #define KFD_CRIU_PRIV_VERSION 1
1071 
1072 struct kfd_criu_process_priv_data {
1073     uint32_t version;
1074     uint32_t xnack_mode;
1075 };
1076 
1077 struct kfd_criu_device_priv_data {
1078     /* For future use */
1079     uint64_t reserved;
1080 };
1081 
1082 struct kfd_criu_bo_priv_data {
1083     uint64_t user_addr;
1084     uint32_t idr_handle;
1085     uint32_t mapped_gpuids[MAX_GPU_INSTANCE];
1086 };
1087 
1088 /*
1089  * The first 4 bytes of kfd_criu_queue_priv_data, kfd_criu_event_priv_data,
1090  * kfd_criu_svm_range_priv_data is the object type
1091  */
1092 enum kfd_criu_object_type {
1093     KFD_CRIU_OBJECT_TYPE_QUEUE,
1094     KFD_CRIU_OBJECT_TYPE_EVENT,
1095     KFD_CRIU_OBJECT_TYPE_SVM_RANGE,
1096 };
1097 
1098 struct kfd_criu_svm_range_priv_data {
1099     uint32_t object_type;
1100     uint64_t start_addr;
1101     uint64_t size;
1102     /* Variable length array of attributes */
1103     struct kfd_ioctl_svm_attribute attrs[];
1104 };
1105 
1106 struct kfd_criu_queue_priv_data {
1107     uint32_t object_type;
1108     uint64_t q_address;
1109     uint64_t q_size;
1110     uint64_t read_ptr_addr;
1111     uint64_t write_ptr_addr;
1112     uint64_t doorbell_off;
1113     uint64_t eop_ring_buffer_address;
1114     uint64_t ctx_save_restore_area_address;
1115     uint32_t gpu_id;
1116     uint32_t type;
1117     uint32_t format;
1118     uint32_t q_id;
1119     uint32_t priority;
1120     uint32_t q_percent;
1121     uint32_t doorbell_id;
1122     uint32_t gws;
1123     uint32_t sdma_id;
1124     uint32_t eop_ring_buffer_size;
1125     uint32_t ctx_save_restore_area_size;
1126     uint32_t ctl_stack_size;
1127     uint32_t mqd_size;
1128 };
1129 
1130 struct kfd_criu_event_priv_data {
1131     uint32_t object_type;
1132     uint64_t user_handle;
1133     uint32_t event_id;
1134     uint32_t auto_reset;
1135     uint32_t type;
1136     uint32_t signaled;
1137 
1138     union {
1139         struct kfd_hsa_memory_exception_data memory_exception_data;
1140         struct kfd_hsa_hw_exception_data hw_exception_data;
1141     };
1142 };
1143 
1144 int kfd_process_get_queue_info(struct kfd_process *p,
1145                    uint32_t *num_queues,
1146                    uint64_t *priv_data_sizes);
1147 
1148 int kfd_criu_checkpoint_queues(struct kfd_process *p,
1149              uint8_t __user *user_priv_data,
1150              uint64_t *priv_data_offset);
1151 
1152 int kfd_criu_restore_queue(struct kfd_process *p,
1153                uint8_t __user *user_priv_data,
1154                uint64_t *priv_data_offset,
1155                uint64_t max_priv_data_size);
1156 
1157 int kfd_criu_checkpoint_events(struct kfd_process *p,
1158              uint8_t __user *user_priv_data,
1159              uint64_t *priv_data_offset);
1160 
1161 int kfd_criu_restore_event(struct file *devkfd,
1162                struct kfd_process *p,
1163                uint8_t __user *user_priv_data,
1164                uint64_t *priv_data_offset,
1165                uint64_t max_priv_data_size);
1166 /* CRIU - End */
1167 
1168 /* Queue Context Management */
1169 int init_queue(struct queue **q, const struct queue_properties *properties);
1170 void uninit_queue(struct queue *q);
1171 void print_queue_properties(struct queue_properties *q);
1172 void print_queue(struct queue *q);
1173 
1174 struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
1175         struct kfd_dev *dev);
1176 struct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type,
1177         struct kfd_dev *dev);
1178 struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
1179         struct kfd_dev *dev);
1180 struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type,
1181         struct kfd_dev *dev);
1182 struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
1183         struct kfd_dev *dev);
1184 struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type,
1185         struct kfd_dev *dev);
1186 struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type,
1187         struct kfd_dev *dev);
1188 struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev);
1189 void device_queue_manager_uninit(struct device_queue_manager *dqm);
1190 struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
1191                     enum kfd_queue_type type);
1192 void kernel_queue_uninit(struct kernel_queue *kq, bool hanging);
1193 int kfd_dqm_evict_pasid(struct device_queue_manager *dqm, u32 pasid);
1194 
1195 /* Process Queue Manager */
1196 struct process_queue_node {
1197     struct queue *q;
1198     struct kernel_queue *kq;
1199     struct list_head process_queue_list;
1200 };
1201 
1202 void kfd_process_dequeue_from_device(struct kfd_process_device *pdd);
1203 void kfd_process_dequeue_from_all_devices(struct kfd_process *p);
1204 int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p);
1205 void pqm_uninit(struct process_queue_manager *pqm);
1206 int pqm_create_queue(struct process_queue_manager *pqm,
1207                 struct kfd_dev *dev,
1208                 struct file *f,
1209                 struct queue_properties *properties,
1210                 unsigned int *qid,
1211                 struct amdgpu_bo *wptr_bo,
1212                 const struct kfd_criu_queue_priv_data *q_data,
1213                 const void *restore_mqd,
1214                 const void *restore_ctl_stack,
1215                 uint32_t *p_doorbell_offset_in_process);
1216 int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid);
1217 int pqm_update_queue_properties(struct process_queue_manager *pqm, unsigned int qid,
1218             struct queue_properties *p);
1219 int pqm_update_mqd(struct process_queue_manager *pqm, unsigned int qid,
1220             struct mqd_update_info *minfo);
1221 int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
1222             void *gws);
1223 struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm,
1224                         unsigned int qid);
1225 struct queue *pqm_get_user_queue(struct process_queue_manager *pqm,
1226                         unsigned int qid);
1227 int pqm_get_wave_state(struct process_queue_manager *pqm,
1228                unsigned int qid,
1229                void __user *ctl_stack,
1230                u32 *ctl_stack_used_size,
1231                u32 *save_area_used_size);
1232 
1233 int amdkfd_fence_wait_timeout(uint64_t *fence_addr,
1234                   uint64_t fence_value,
1235                   unsigned int timeout_ms);
1236 
1237 int pqm_get_queue_checkpoint_info(struct process_queue_manager *pqm,
1238                   unsigned int qid,
1239                   u32 *mqd_size,
1240                   u32 *ctl_stack_size);
1241 /* Packet Manager */
1242 
1243 #define KFD_FENCE_COMPLETED (100)
1244 #define KFD_FENCE_INIT   (10)
1245 
1246 struct packet_manager {
1247     struct device_queue_manager *dqm;
1248     struct kernel_queue *priv_queue;
1249     struct mutex lock;
1250     bool allocated;
1251     struct kfd_mem_obj *ib_buffer_obj;
1252     unsigned int ib_size_bytes;
1253     bool is_over_subscription;
1254 
1255     const struct packet_manager_funcs *pmf;
1256 };
1257 
1258 struct packet_manager_funcs {
1259     /* Support ASIC-specific packet formats for PM4 packets */
1260     int (*map_process)(struct packet_manager *pm, uint32_t *buffer,
1261             struct qcm_process_device *qpd);
1262     int (*runlist)(struct packet_manager *pm, uint32_t *buffer,
1263             uint64_t ib, size_t ib_size_in_dwords, bool chain);
1264     int (*set_resources)(struct packet_manager *pm, uint32_t *buffer,
1265             struct scheduling_resources *res);
1266     int (*map_queues)(struct packet_manager *pm, uint32_t *buffer,
1267             struct queue *q, bool is_static);
1268     int (*unmap_queues)(struct packet_manager *pm, uint32_t *buffer,
1269             enum kfd_unmap_queues_filter mode,
1270             uint32_t filter_param, bool reset);
1271     int (*query_status)(struct packet_manager *pm, uint32_t *buffer,
1272             uint64_t fence_address, uint64_t fence_value);
1273     int (*release_mem)(uint64_t gpu_addr, uint32_t *buffer);
1274 
1275     /* Packet sizes */
1276     int map_process_size;
1277     int runlist_size;
1278     int set_resources_size;
1279     int map_queues_size;
1280     int unmap_queues_size;
1281     int query_status_size;
1282     int release_mem_size;
1283 };
1284 
1285 extern const struct packet_manager_funcs kfd_vi_pm_funcs;
1286 extern const struct packet_manager_funcs kfd_v9_pm_funcs;
1287 extern const struct packet_manager_funcs kfd_aldebaran_pm_funcs;
1288 
1289 int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm);
1290 void pm_uninit(struct packet_manager *pm, bool hanging);
1291 int pm_send_set_resources(struct packet_manager *pm,
1292                 struct scheduling_resources *res);
1293 int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues);
1294 int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
1295                 uint64_t fence_value);
1296 
1297 int pm_send_unmap_queue(struct packet_manager *pm,
1298             enum kfd_unmap_queues_filter mode,
1299             uint32_t filter_param, bool reset);
1300 
1301 void pm_release_ib(struct packet_manager *pm);
1302 
1303 /* Following PM funcs can be shared among VI and AI */
1304 unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size);
1305 
1306 uint64_t kfd_get_number_elems(struct kfd_dev *kfd);
1307 
1308 /* Events */
1309 extern const struct kfd_event_interrupt_class event_interrupt_class_cik;
1310 extern const struct kfd_event_interrupt_class event_interrupt_class_v9;
1311 extern const struct kfd_event_interrupt_class event_interrupt_class_v11;
1312 
1313 extern const struct kfd_device_global_init_class device_global_init_class_cik;
1314 
1315 int kfd_event_init_process(struct kfd_process *p);
1316 void kfd_event_free_process(struct kfd_process *p);
1317 int kfd_event_mmap(struct kfd_process *process, struct vm_area_struct *vma);
1318 int kfd_wait_on_events(struct kfd_process *p,
1319                uint32_t num_events, void __user *data,
1320                bool all, uint32_t *user_timeout_ms,
1321                uint32_t *wait_result);
1322 void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id,
1323                 uint32_t valid_id_bits);
1324 void kfd_signal_iommu_event(struct kfd_dev *dev,
1325                 u32 pasid, unsigned long address,
1326                 bool is_write_requested, bool is_execute_requested);
1327 void kfd_signal_hw_exception_event(u32 pasid);
1328 int kfd_set_event(struct kfd_process *p, uint32_t event_id);
1329 int kfd_reset_event(struct kfd_process *p, uint32_t event_id);
1330 int kfd_kmap_event_page(struct kfd_process *p, uint64_t event_page_offset);
1331 
1332 int kfd_event_create(struct file *devkfd, struct kfd_process *p,
1333              uint32_t event_type, bool auto_reset, uint32_t node_id,
1334              uint32_t *event_id, uint32_t *event_trigger_data,
1335              uint64_t *event_page_offset, uint32_t *event_slot_index);
1336 
1337 int kfd_get_num_events(struct kfd_process *p);
1338 int kfd_event_destroy(struct kfd_process *p, uint32_t event_id);
1339 
1340 void kfd_signal_vm_fault_event(struct kfd_dev *dev, u32 pasid,
1341                 struct kfd_vm_fault_info *info);
1342 
1343 void kfd_signal_reset_event(struct kfd_dev *dev);
1344 
1345 void kfd_signal_poison_consumed_event(struct kfd_dev *dev, u32 pasid);
1346 
1347 void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type);
1348 
1349 static inline bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev)
1350 {
1351     return KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2) ||
1352            (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 1) &&
1353            dev->adev->sdma.instance[0].fw_version >= 18) ||
1354            KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 0);
1355 }
1356 
1357 bool kfd_is_locked(void);
1358 
1359 /* Compute profile */
1360 void kfd_inc_compute_active(struct kfd_dev *dev);
1361 void kfd_dec_compute_active(struct kfd_dev *dev);
1362 
1363 /* Cgroup Support */
1364 /* Check with device cgroup if @kfd device is accessible */
1365 static inline int kfd_devcgroup_check_permission(struct kfd_dev *kfd)
1366 {
1367 #if defined(CONFIG_CGROUP_DEVICE) || defined(CONFIG_CGROUP_BPF)
1368     struct drm_device *ddev = kfd->ddev;
1369 
1370     return devcgroup_check_permission(DEVCG_DEV_CHAR, DRM_MAJOR,
1371                       ddev->render->index,
1372                       DEVCG_ACC_WRITE | DEVCG_ACC_READ);
1373 #else
1374     return 0;
1375 #endif
1376 }
1377 
1378 /* Debugfs */
1379 #if defined(CONFIG_DEBUG_FS)
1380 
1381 void kfd_debugfs_init(void);
1382 void kfd_debugfs_fini(void);
1383 int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data);
1384 int pqm_debugfs_mqds(struct seq_file *m, void *data);
1385 int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data);
1386 int dqm_debugfs_hqds(struct seq_file *m, void *data);
1387 int kfd_debugfs_rls_by_device(struct seq_file *m, void *data);
1388 int pm_debugfs_runlist(struct seq_file *m, void *data);
1389 
1390 int kfd_debugfs_hang_hws(struct kfd_dev *dev);
1391 int pm_debugfs_hang_hws(struct packet_manager *pm);
1392 int dqm_debugfs_hang_hws(struct device_queue_manager *dqm);
1393 
1394 #else
1395 
1396 static inline void kfd_debugfs_init(void) {}
1397 static inline void kfd_debugfs_fini(void) {}
1398 
1399 #endif
1400 
1401 #endif