0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024 #ifndef KFD_PRIV_H_INCLUDED
0025 #define KFD_PRIV_H_INCLUDED
0026
0027 #include <linux/hashtable.h>
0028 #include <linux/mmu_notifier.h>
0029 #include <linux/memremap.h>
0030 #include <linux/mutex.h>
0031 #include <linux/types.h>
0032 #include <linux/atomic.h>
0033 #include <linux/workqueue.h>
0034 #include <linux/spinlock.h>
0035 #include <linux/kfd_ioctl.h>
0036 #include <linux/idr.h>
0037 #include <linux/kfifo.h>
0038 #include <linux/seq_file.h>
0039 #include <linux/kref.h>
0040 #include <linux/sysfs.h>
0041 #include <linux/device_cgroup.h>
0042 #include <drm/drm_file.h>
0043 #include <drm/drm_drv.h>
0044 #include <drm/drm_device.h>
0045 #include <drm/drm_ioctl.h>
0046 #include <kgd_kfd_interface.h>
0047 #include <linux/swap.h>
0048
0049 #include "amd_shared.h"
0050 #include "amdgpu.h"
0051
0052 #define KFD_MAX_RING_ENTRY_SIZE 8
0053
0054 #define KFD_SYSFS_FILE_MODE 0444
0055
0056
0057 #define KFD_GPU_ID_HASH_WIDTH 16
0058
0059
0060
0061
0062
0063
0064
0065
0066
0067 #define KFD_MMAP_TYPE_SHIFT 62
0068 #define KFD_MMAP_TYPE_MASK (0x3ULL << KFD_MMAP_TYPE_SHIFT)
0069 #define KFD_MMAP_TYPE_DOORBELL (0x3ULL << KFD_MMAP_TYPE_SHIFT)
0070 #define KFD_MMAP_TYPE_EVENTS (0x2ULL << KFD_MMAP_TYPE_SHIFT)
0071 #define KFD_MMAP_TYPE_RESERVED_MEM (0x1ULL << KFD_MMAP_TYPE_SHIFT)
0072 #define KFD_MMAP_TYPE_MMIO (0x0ULL << KFD_MMAP_TYPE_SHIFT)
0073
0074 #define KFD_MMAP_GPU_ID_SHIFT 46
0075 #define KFD_MMAP_GPU_ID_MASK (((1ULL << KFD_GPU_ID_HASH_WIDTH) - 1) \
0076 << KFD_MMAP_GPU_ID_SHIFT)
0077 #define KFD_MMAP_GPU_ID(gpu_id) ((((uint64_t)gpu_id) << KFD_MMAP_GPU_ID_SHIFT)\
0078 & KFD_MMAP_GPU_ID_MASK)
0079 #define KFD_MMAP_GET_GPU_ID(offset) ((offset & KFD_MMAP_GPU_ID_MASK) \
0080 >> KFD_MMAP_GPU_ID_SHIFT)
0081
0082
0083
0084
0085
0086
0087
0088
0089 #define KFD_CIK_HIQ_PIPE 4
0090 #define KFD_CIK_HIQ_QUEUE 0
0091
0092
0093 #define kfd_alloc_struct(ptr_to_struct) \
0094 ((typeof(ptr_to_struct)) kzalloc(sizeof(*ptr_to_struct), GFP_KERNEL))
0095
0096 #define KFD_MAX_NUM_OF_PROCESSES 512
0097 #define KFD_MAX_NUM_OF_QUEUES_PER_PROCESS 1024
0098
0099
0100
0101
0102
0103
0104
0105 #define KFD_CWSR_TBA_TMA_SIZE (PAGE_SIZE * 2)
0106 #define KFD_CWSR_TMA_OFFSET PAGE_SIZE
0107
0108 #define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE \
0109 (KFD_MAX_NUM_OF_PROCESSES * \
0110 KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
0111
0112 #define KFD_KERNEL_QUEUE_SIZE 2048
0113
0114 #define KFD_UNMAP_LATENCY_MS (4000)
0115
0116
0117
0118
0119
0120
0121
0122
0123
0124 #define KFD_QUEUE_DOORBELL_MIRROR_OFFSET 512
0125
0126
0127
0128
0129
0130
0131 enum kfd_ioctl_flags {
0132
0133
0134
0135
0136
0137
0138
0139
0140
0141
0142
0143
0144 KFD_IOC_FLAG_CHECKPOINT_RESTORE = BIT(0),
0145 };
0146
0147
0148
0149
0150 extern int max_num_of_queues_per_device;
0151
0152
0153
0154 extern int sched_policy;
0155
0156
0157
0158
0159
0160 extern int hws_max_conc_proc;
0161
0162 extern int cwsr_enable;
0163
0164
0165
0166
0167
0168 extern int send_sigterm;
0169
0170
0171
0172
0173
0174 extern int debug_largebar;
0175
0176
0177
0178
0179
0180 extern int ignore_crat;
0181
0182
0183 extern int amdgpu_noretry;
0184
0185
0186 extern int halt_if_hws_hang;
0187
0188
0189 extern bool hws_gws_support;
0190
0191
0192 extern int queue_preemption_timeout_ms;
0193
0194
0195
0196
0197 extern int amdgpu_no_queue_eviction_on_vm_fault;
0198
0199
0200 extern bool debug_evictions;
0201
0202 enum cache_policy {
0203 cache_policy_coherent,
0204 cache_policy_noncoherent
0205 };
0206
0207 #define KFD_GC_VERSION(dev) ((dev)->adev->ip_versions[GC_HWIP][0])
0208 #define KFD_IS_SOC15(dev) ((KFD_GC_VERSION(dev)) >= (IP_VERSION(9, 0, 1)))
0209
0210 struct kfd_event_interrupt_class {
0211 bool (*interrupt_isr)(struct kfd_dev *dev,
0212 const uint32_t *ih_ring_entry, uint32_t *patched_ihre,
0213 bool *patched_flag);
0214 void (*interrupt_wq)(struct kfd_dev *dev,
0215 const uint32_t *ih_ring_entry);
0216 };
0217
0218 struct kfd_device_info {
0219 uint32_t gfx_target_version;
0220 const struct kfd_event_interrupt_class *event_interrupt_class;
0221 unsigned int max_pasid_bits;
0222 unsigned int max_no_of_hqd;
0223 unsigned int doorbell_size;
0224 size_t ih_ring_entry_size;
0225 uint8_t num_of_watch_points;
0226 uint16_t mqd_size_aligned;
0227 bool supports_cwsr;
0228 bool needs_iommu_device;
0229 bool needs_pci_atomics;
0230 uint32_t no_atomic_fw_version;
0231 unsigned int num_sdma_queues_per_engine;
0232 unsigned int num_reserved_sdma_queues_per_engine;
0233 uint64_t reserved_sdma_queues_bitmap;
0234 };
0235
0236 unsigned int kfd_get_num_sdma_engines(struct kfd_dev *kdev);
0237 unsigned int kfd_get_num_xgmi_sdma_engines(struct kfd_dev *kdev);
0238
0239 struct kfd_mem_obj {
0240 uint32_t range_start;
0241 uint32_t range_end;
0242 uint64_t gpu_addr;
0243 uint32_t *cpu_ptr;
0244 void *gtt_mem;
0245 };
0246
0247 struct kfd_vmid_info {
0248 uint32_t first_vmid_kfd;
0249 uint32_t last_vmid_kfd;
0250 uint32_t vmid_num_kfd;
0251 };
0252
0253 struct kfd_dev {
0254 struct amdgpu_device *adev;
0255
0256 struct kfd_device_info device_info;
0257 struct pci_dev *pdev;
0258 struct drm_device *ddev;
0259
0260 unsigned int id;
0261
0262 phys_addr_t doorbell_base;
0263
0264
0265
0266 size_t doorbell_base_dw_offset;
0267
0268
0269
0270
0271 u32 __iomem *doorbell_kernel_ptr;
0272
0273
0274
0275 struct kgd2kfd_shared_resources shared_resources;
0276 struct kfd_vmid_info vm_info;
0277 struct kfd_local_mem_info local_mem_info;
0278
0279 const struct kfd2kgd_calls *kfd2kgd;
0280 struct mutex doorbell_mutex;
0281 DECLARE_BITMAP(doorbell_available_index,
0282 KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
0283
0284 void *gtt_mem;
0285 uint64_t gtt_start_gpu_addr;
0286 void *gtt_start_cpu_ptr;
0287 void *gtt_sa_bitmap;
0288 struct mutex gtt_sa_lock;
0289 unsigned int gtt_sa_chunk_size;
0290 unsigned int gtt_sa_num_of_chunks;
0291
0292
0293 struct kfifo ih_fifo;
0294 struct workqueue_struct *ih_wq;
0295 struct work_struct interrupt_work;
0296 spinlock_t interrupt_lock;
0297
0298
0299 struct device_queue_manager *dqm;
0300
0301 bool init_complete;
0302
0303
0304
0305
0306 bool interrupts_active;
0307
0308
0309 uint16_t mec_fw_version;
0310 uint16_t mec2_fw_version;
0311 uint16_t sdma_fw_version;
0312
0313
0314 unsigned int max_proc_per_quantum;
0315
0316
0317 bool cwsr_enabled;
0318 const void *cwsr_isa;
0319 unsigned int cwsr_isa_size;
0320
0321
0322 uint64_t hive_id;
0323
0324 bool pci_atomic_requested;
0325
0326
0327 bool use_iommu_v2;
0328
0329
0330 atomic_t sram_ecc_flag;
0331
0332
0333 atomic_t compute_profile;
0334
0335
0336 void *gws;
0337
0338
0339 struct list_head smi_clients;
0340 spinlock_t smi_lock;
0341
0342 uint32_t reset_seq_num;
0343
0344 struct ida doorbell_ida;
0345 unsigned int max_doorbell_slices;
0346
0347 int noretry;
0348
0349
0350 struct dev_pagemap pgmap;
0351 };
0352
0353 enum kfd_mempool {
0354 KFD_MEMPOOL_SYSTEM_CACHEABLE = 1,
0355 KFD_MEMPOOL_SYSTEM_WRITECOMBINE = 2,
0356 KFD_MEMPOOL_FRAMEBUFFER = 3,
0357 };
0358
0359
0360 int kfd_chardev_init(void);
0361 void kfd_chardev_exit(void);
0362
0363
0364
0365
0366
0367
0368
0369
0370
0371
0372
0373
0374
0375
0376 enum kfd_unmap_queues_filter {
0377 KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES = 1,
0378 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES = 2,
0379 KFD_UNMAP_QUEUES_FILTER_BY_PASID = 3
0380 };
0381
0382
0383
0384
0385
0386
0387
0388
0389
0390
0391
0392
0393
0394
0395 enum kfd_queue_type {
0396 KFD_QUEUE_TYPE_COMPUTE,
0397 KFD_QUEUE_TYPE_SDMA,
0398 KFD_QUEUE_TYPE_HIQ,
0399 KFD_QUEUE_TYPE_DIQ,
0400 KFD_QUEUE_TYPE_SDMA_XGMI
0401 };
0402
0403 enum kfd_queue_format {
0404 KFD_QUEUE_FORMAT_PM4,
0405 KFD_QUEUE_FORMAT_AQL
0406 };
0407
0408 enum KFD_QUEUE_PRIORITY {
0409 KFD_QUEUE_PRIORITY_MINIMUM = 0,
0410 KFD_QUEUE_PRIORITY_MAXIMUM = 15
0411 };
0412
0413
0414
0415
0416
0417
0418
0419
0420
0421
0422
0423
0424
0425
0426
0427
0428
0429
0430
0431
0432
0433
0434
0435
0436
0437
0438
0439
0440
0441
0442
0443
0444
0445
0446
0447
0448
0449
0450
0451
0452
0453
0454
0455
0456
0457
0458
0459
0460
0461
0462
0463
0464
0465
0466 struct queue_properties {
0467 enum kfd_queue_type type;
0468 enum kfd_queue_format format;
0469 unsigned int queue_id;
0470 uint64_t queue_address;
0471 uint64_t queue_size;
0472 uint32_t priority;
0473 uint32_t queue_percent;
0474 uint32_t *read_ptr;
0475 uint32_t *write_ptr;
0476 void __iomem *doorbell_ptr;
0477 uint32_t doorbell_off;
0478 bool is_interop;
0479 bool is_evicted;
0480 bool is_active;
0481 bool is_gws;
0482
0483 unsigned int vmid;
0484
0485 uint32_t sdma_engine_id;
0486 uint32_t sdma_queue_id;
0487 uint32_t sdma_vm_addr;
0488
0489 uint64_t eop_ring_buffer_address;
0490 uint32_t eop_ring_buffer_size;
0491 uint64_t ctx_save_restore_area_address;
0492 uint32_t ctx_save_restore_area_size;
0493 uint32_t ctl_stack_size;
0494 uint64_t tba_addr;
0495 uint64_t tma_addr;
0496 };
0497
0498 #define QUEUE_IS_ACTIVE(q) ((q).queue_size > 0 && \
0499 (q).queue_address != 0 && \
0500 (q).queue_percent > 0 && \
0501 !(q).is_evicted)
0502
0503 enum mqd_update_flag {
0504 UPDATE_FLAG_CU_MASK = 0,
0505 };
0506
0507 struct mqd_update_info {
0508 union {
0509 struct {
0510 uint32_t count;
0511 uint32_t *ptr;
0512 } cu_mask;
0513 };
0514 enum mqd_update_flag update_flag;
0515 };
0516
0517
0518
0519
0520
0521
0522
0523
0524
0525
0526
0527
0528
0529
0530
0531
0532
0533
0534
0535
0536
0537
0538
0539
0540
0541
0542
0543
0544
0545
0546
0547
0548
0549
0550 struct queue {
0551 struct list_head list;
0552 void *mqd;
0553 struct kfd_mem_obj *mqd_mem_obj;
0554 uint64_t gart_mqd_addr;
0555 struct queue_properties properties;
0556
0557 uint32_t mec;
0558 uint32_t pipe;
0559 uint32_t queue;
0560
0561 unsigned int sdma_id;
0562 unsigned int doorbell_id;
0563
0564 struct kfd_process *process;
0565 struct kfd_dev *device;
0566 void *gws;
0567
0568
0569 struct kobject kobj;
0570
0571 void *gang_ctx_bo;
0572 uint64_t gang_ctx_gpu_addr;
0573 void *gang_ctx_cpu_ptr;
0574
0575 struct amdgpu_bo *wptr_bo;
0576 };
0577
0578 enum KFD_MQD_TYPE {
0579 KFD_MQD_TYPE_HIQ = 0,
0580 KFD_MQD_TYPE_CP,
0581 KFD_MQD_TYPE_SDMA,
0582 KFD_MQD_TYPE_DIQ,
0583 KFD_MQD_TYPE_MAX
0584 };
0585
0586 enum KFD_PIPE_PRIORITY {
0587 KFD_PIPE_PRIORITY_CS_LOW = 0,
0588 KFD_PIPE_PRIORITY_CS_MEDIUM,
0589 KFD_PIPE_PRIORITY_CS_HIGH
0590 };
0591
0592 struct scheduling_resources {
0593 unsigned int vmid_mask;
0594 enum kfd_queue_type type;
0595 uint64_t queue_mask;
0596 uint64_t gws_mask;
0597 uint32_t oac_mask;
0598 uint32_t gds_heap_base;
0599 uint32_t gds_heap_size;
0600 };
0601
0602 struct process_queue_manager {
0603
0604 struct kfd_process *process;
0605 struct list_head queues;
0606 unsigned long *queue_slot_bitmap;
0607 };
0608
0609 struct qcm_process_device {
0610
0611 struct device_queue_manager *dqm;
0612 struct process_queue_manager *pqm;
0613
0614 struct list_head queues_list;
0615 struct list_head priv_queue_list;
0616
0617 unsigned int queue_count;
0618 unsigned int vmid;
0619 bool is_debug;
0620 unsigned int evicted;
0621
0622
0623
0624
0625 bool reset_wavefronts;
0626
0627
0628
0629
0630
0631
0632
0633 bool mapped_gws_queue;
0634
0635
0636 uint64_t gds_context_area;
0637
0638 uint64_t page_table_base;
0639 uint32_t sh_mem_config;
0640 uint32_t sh_mem_bases;
0641 uint32_t sh_mem_ape1_base;
0642 uint32_t sh_mem_ape1_limit;
0643 uint32_t gds_size;
0644 uint32_t num_gws;
0645 uint32_t num_oac;
0646 uint32_t sh_hidden_private_base;
0647
0648
0649 struct kgd_mem *cwsr_mem;
0650 void *cwsr_kaddr;
0651 uint64_t cwsr_base;
0652 uint64_t tba_addr;
0653 uint64_t tma_addr;
0654
0655
0656 struct kgd_mem *ib_mem;
0657 uint64_t ib_base;
0658 void *ib_kaddr;
0659
0660
0661 unsigned long *doorbell_bitmap;
0662 };
0663
0664
0665
0666
0667 #define PROCESS_RESTORE_TIME_MS 100
0668
0669 #define PROCESS_BACK_OFF_TIME_MS 100
0670
0671 #define PROCESS_ACTIVE_TIME_MS 10
0672
0673
0674
0675
0676 #define MAKE_HANDLE(gpu_id, idr_handle) \
0677 (((uint64_t)(gpu_id) << 32) + idr_handle)
0678 #define GET_GPU_ID(handle) (handle >> 32)
0679 #define GET_IDR_HANDLE(handle) (handle & 0xFFFFFFFF)
0680
0681 enum kfd_pdd_bound {
0682 PDD_UNBOUND = 0,
0683 PDD_BOUND,
0684 PDD_BOUND_SUSPENDED,
0685 };
0686
0687 #define MAX_SYSFS_FILENAME_LEN 15
0688
0689
0690
0691
0692
0693
0694 #define SDMA_ACTIVITY_DIVISOR 100
0695
0696
0697 struct kfd_process_device {
0698
0699 struct kfd_dev *dev;
0700
0701
0702 struct kfd_process *process;
0703
0704
0705 struct qcm_process_device qpd;
0706
0707
0708 uint64_t lds_base;
0709 uint64_t lds_limit;
0710 uint64_t gpuvm_base;
0711 uint64_t gpuvm_limit;
0712 uint64_t scratch_base;
0713 uint64_t scratch_limit;
0714
0715
0716 struct file *drm_file;
0717 void *drm_priv;
0718 atomic64_t tlb_seq;
0719
0720
0721 struct idr alloc_idr;
0722
0723
0724
0725
0726
0727
0728 bool already_dequeued;
0729 bool runtime_inuse;
0730
0731
0732 enum kfd_pdd_bound bound;
0733
0734
0735 uint64_t vram_usage;
0736 struct attribute attr_vram;
0737 char vram_filename[MAX_SYSFS_FILENAME_LEN];
0738
0739
0740 uint64_t sdma_past_activity_counter;
0741 struct attribute attr_sdma;
0742 char sdma_filename[MAX_SYSFS_FILENAME_LEN];
0743
0744
0745 uint64_t last_evict_timestamp;
0746 atomic64_t evict_duration_counter;
0747 struct attribute attr_evict;
0748
0749 struct kobject *kobj_stats;
0750 unsigned int doorbell_index;
0751
0752
0753
0754
0755
0756
0757
0758
0759
0760
0761
0762
0763
0764
0765
0766
0767
0768
0769
0770
0771
0772
0773
0774
0775 struct attribute attr_cu_occupancy;
0776
0777
0778 struct kobject *kobj_counters;
0779 struct attribute attr_faults;
0780 struct attribute attr_page_in;
0781 struct attribute attr_page_out;
0782 uint64_t faults;
0783 uint64_t page_in;
0784 uint64_t page_out;
0785
0786
0787
0788
0789
0790 uint32_t user_gpu_id;
0791
0792 void *proc_ctx_bo;
0793 uint64_t proc_ctx_gpu_addr;
0794 void *proc_ctx_cpu_ptr;
0795 };
0796
0797 #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
0798
0799 struct svm_range_list {
0800 struct mutex lock;
0801 struct rb_root_cached objects;
0802 struct list_head list;
0803 struct work_struct deferred_list_work;
0804 struct list_head deferred_range_list;
0805 struct list_head criu_svm_metadata_list;
0806 spinlock_t deferred_list_lock;
0807 atomic_t evicted_ranges;
0808 atomic_t drain_pagefaults;
0809 struct delayed_work restore_work;
0810 DECLARE_BITMAP(bitmap_supported, MAX_GPU_INSTANCE);
0811 struct task_struct *faulting_task;
0812 };
0813
0814
0815 struct kfd_process {
0816
0817
0818
0819
0820 struct hlist_node kfd_processes;
0821
0822
0823
0824
0825
0826
0827 void *mm;
0828
0829 struct kref ref;
0830 struct work_struct release_work;
0831
0832 struct mutex mutex;
0833
0834
0835
0836
0837
0838
0839
0840
0841 struct task_struct *lead_thread;
0842
0843
0844 struct mmu_notifier mmu_notifier;
0845
0846 u32 pasid;
0847
0848
0849
0850
0851
0852 struct kfd_process_device *pdds[MAX_GPU_INSTANCE];
0853 uint32_t n_pdds;
0854
0855 struct process_queue_manager pqm;
0856
0857
0858 bool is_32bit_user_mode;
0859
0860
0861 struct mutex event_mutex;
0862
0863 struct idr event_idr;
0864
0865 u64 signal_handle;
0866 struct kfd_signal_page *signal_page;
0867 size_t signal_mapped_size;
0868 size_t signal_event_count;
0869 bool signal_event_limit_reached;
0870
0871
0872 void *kgd_process_info;
0873
0874
0875
0876
0877 struct dma_fence *ef;
0878
0879
0880 struct delayed_work eviction_work;
0881 struct delayed_work restore_work;
0882
0883 unsigned int last_eviction_seqno;
0884
0885
0886
0887 unsigned long last_restore_timestamp;
0888
0889
0890 struct kobject *kobj;
0891 struct kobject *kobj_queues;
0892 struct attribute attr_pasid;
0893
0894
0895 struct svm_range_list svms;
0896
0897 bool xnack_enabled;
0898
0899 atomic_t poison;
0900
0901 bool queues_paused;
0902 };
0903
0904 #define KFD_PROCESS_TABLE_SIZE 5
0905 extern DECLARE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE);
0906 extern struct srcu_struct kfd_processes_srcu;
0907
0908
0909
0910
0911
0912
0913
0914
0915
0916
0917 typedef int amdkfd_ioctl_t(struct file *filep, struct kfd_process *p,
0918 void *data);
0919
0920 struct amdkfd_ioctl_desc {
0921 unsigned int cmd;
0922 int flags;
0923 amdkfd_ioctl_t *func;
0924 unsigned int cmd_drv;
0925 const char *name;
0926 };
0927 bool kfd_dev_is_large_bar(struct kfd_dev *dev);
0928
0929 int kfd_process_create_wq(void);
0930 void kfd_process_destroy_wq(void);
0931 struct kfd_process *kfd_create_process(struct file *filep);
0932 struct kfd_process *kfd_get_process(const struct task_struct *task);
0933 struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid);
0934 struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm);
0935
0936 int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id);
0937 int kfd_process_gpuid_from_adev(struct kfd_process *p,
0938 struct amdgpu_device *adev, uint32_t *gpuid,
0939 uint32_t *gpuidx);
0940 static inline int kfd_process_gpuid_from_gpuidx(struct kfd_process *p,
0941 uint32_t gpuidx, uint32_t *gpuid) {
0942 return gpuidx < p->n_pdds ? p->pdds[gpuidx]->dev->id : -EINVAL;
0943 }
0944 static inline struct kfd_process_device *kfd_process_device_from_gpuidx(
0945 struct kfd_process *p, uint32_t gpuidx) {
0946 return gpuidx < p->n_pdds ? p->pdds[gpuidx] : NULL;
0947 }
0948
0949 void kfd_unref_process(struct kfd_process *p);
0950 int kfd_process_evict_queues(struct kfd_process *p, uint32_t trigger);
0951 int kfd_process_restore_queues(struct kfd_process *p);
0952 void kfd_suspend_all_processes(void);
0953 int kfd_resume_all_processes(void);
0954
0955 struct kfd_process_device *kfd_process_device_data_by_id(struct kfd_process *process,
0956 uint32_t gpu_id);
0957
0958 int kfd_process_get_user_gpu_id(struct kfd_process *p, uint32_t actual_gpu_id);
0959
0960 int kfd_process_device_init_vm(struct kfd_process_device *pdd,
0961 struct file *drm_file);
0962 struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
0963 struct kfd_process *p);
0964 struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
0965 struct kfd_process *p);
0966 struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
0967 struct kfd_process *p);
0968
0969 bool kfd_process_xnack_mode(struct kfd_process *p, bool supported);
0970
0971 int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
0972 struct vm_area_struct *vma);
0973
0974
0975 int kfd_process_device_create_obj_handle(struct kfd_process_device *pdd,
0976 void *mem);
0977 void *kfd_process_device_translate_handle(struct kfd_process_device *p,
0978 int handle);
0979 void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd,
0980 int handle);
0981 struct kfd_process *kfd_lookup_process_by_pid(struct pid *pid);
0982
0983
0984 int kfd_pasid_init(void);
0985 void kfd_pasid_exit(void);
0986 bool kfd_set_pasid_limit(unsigned int new_limit);
0987 unsigned int kfd_get_pasid_limit(void);
0988 u32 kfd_pasid_alloc(void);
0989 void kfd_pasid_free(u32 pasid);
0990
0991
0992 size_t kfd_doorbell_process_slice(struct kfd_dev *kfd);
0993 int kfd_doorbell_init(struct kfd_dev *kfd);
0994 void kfd_doorbell_fini(struct kfd_dev *kfd);
0995 int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process,
0996 struct vm_area_struct *vma);
0997 void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
0998 unsigned int *doorbell_off);
0999 void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr);
1000 u32 read_kernel_doorbell(u32 __iomem *db);
1001 void write_kernel_doorbell(void __iomem *db, u32 value);
1002 void write_kernel_doorbell64(void __iomem *db, u64 value);
1003 unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
1004 struct kfd_process_device *pdd,
1005 unsigned int doorbell_id);
1006 phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd);
1007 int kfd_alloc_process_doorbells(struct kfd_dev *kfd,
1008 unsigned int *doorbell_index);
1009 void kfd_free_process_doorbells(struct kfd_dev *kfd,
1010 unsigned int doorbell_index);
1011
1012
1013 int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
1014 struct kfd_mem_obj **mem_obj);
1015
1016 int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj);
1017
1018 extern struct device *kfd_device;
1019
1020
1021 void kfd_procfs_init(void);
1022 void kfd_procfs_shutdown(void);
1023 int kfd_procfs_add_queue(struct queue *q);
1024 void kfd_procfs_del_queue(struct queue *q);
1025
1026
1027 int kfd_topology_init(void);
1028 void kfd_topology_shutdown(void);
1029 int kfd_topology_add_device(struct kfd_dev *gpu);
1030 int kfd_topology_remove_device(struct kfd_dev *gpu);
1031 struct kfd_topology_device *kfd_topology_device_by_proximity_domain(
1032 uint32_t proximity_domain);
1033 struct kfd_topology_device *kfd_topology_device_by_proximity_domain_no_lock(
1034 uint32_t proximity_domain);
1035 struct kfd_topology_device *kfd_topology_device_by_id(uint32_t gpu_id);
1036 struct kfd_dev *kfd_device_by_id(uint32_t gpu_id);
1037 struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev);
1038 struct kfd_dev *kfd_device_by_adev(const struct amdgpu_device *adev);
1039 int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev);
1040 int kfd_numa_node_to_apic_id(int numa_node_id);
1041 void kfd_double_confirm_iommu_support(struct kfd_dev *gpu);
1042
1043
1044 int kfd_interrupt_init(struct kfd_dev *dev);
1045 void kfd_interrupt_exit(struct kfd_dev *dev);
1046 bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry);
1047 bool interrupt_is_wanted(struct kfd_dev *dev,
1048 const uint32_t *ih_ring_entry,
1049 uint32_t *patched_ihre, bool *flag);
1050
1051
1052 int kfd_init_apertures(struct kfd_process *process);
1053
1054 void kfd_process_set_trap_handler(struct qcm_process_device *qpd,
1055 uint64_t tba_addr,
1056 uint64_t tma_addr);
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070 #define KFD_CRIU_PRIV_VERSION 1
1071
1072 struct kfd_criu_process_priv_data {
1073 uint32_t version;
1074 uint32_t xnack_mode;
1075 };
1076
1077 struct kfd_criu_device_priv_data {
1078
1079 uint64_t reserved;
1080 };
1081
1082 struct kfd_criu_bo_priv_data {
1083 uint64_t user_addr;
1084 uint32_t idr_handle;
1085 uint32_t mapped_gpuids[MAX_GPU_INSTANCE];
1086 };
1087
1088
1089
1090
1091
1092 enum kfd_criu_object_type {
1093 KFD_CRIU_OBJECT_TYPE_QUEUE,
1094 KFD_CRIU_OBJECT_TYPE_EVENT,
1095 KFD_CRIU_OBJECT_TYPE_SVM_RANGE,
1096 };
1097
1098 struct kfd_criu_svm_range_priv_data {
1099 uint32_t object_type;
1100 uint64_t start_addr;
1101 uint64_t size;
1102
1103 struct kfd_ioctl_svm_attribute attrs[];
1104 };
1105
1106 struct kfd_criu_queue_priv_data {
1107 uint32_t object_type;
1108 uint64_t q_address;
1109 uint64_t q_size;
1110 uint64_t read_ptr_addr;
1111 uint64_t write_ptr_addr;
1112 uint64_t doorbell_off;
1113 uint64_t eop_ring_buffer_address;
1114 uint64_t ctx_save_restore_area_address;
1115 uint32_t gpu_id;
1116 uint32_t type;
1117 uint32_t format;
1118 uint32_t q_id;
1119 uint32_t priority;
1120 uint32_t q_percent;
1121 uint32_t doorbell_id;
1122 uint32_t gws;
1123 uint32_t sdma_id;
1124 uint32_t eop_ring_buffer_size;
1125 uint32_t ctx_save_restore_area_size;
1126 uint32_t ctl_stack_size;
1127 uint32_t mqd_size;
1128 };
1129
1130 struct kfd_criu_event_priv_data {
1131 uint32_t object_type;
1132 uint64_t user_handle;
1133 uint32_t event_id;
1134 uint32_t auto_reset;
1135 uint32_t type;
1136 uint32_t signaled;
1137
1138 union {
1139 struct kfd_hsa_memory_exception_data memory_exception_data;
1140 struct kfd_hsa_hw_exception_data hw_exception_data;
1141 };
1142 };
1143
1144 int kfd_process_get_queue_info(struct kfd_process *p,
1145 uint32_t *num_queues,
1146 uint64_t *priv_data_sizes);
1147
1148 int kfd_criu_checkpoint_queues(struct kfd_process *p,
1149 uint8_t __user *user_priv_data,
1150 uint64_t *priv_data_offset);
1151
1152 int kfd_criu_restore_queue(struct kfd_process *p,
1153 uint8_t __user *user_priv_data,
1154 uint64_t *priv_data_offset,
1155 uint64_t max_priv_data_size);
1156
1157 int kfd_criu_checkpoint_events(struct kfd_process *p,
1158 uint8_t __user *user_priv_data,
1159 uint64_t *priv_data_offset);
1160
1161 int kfd_criu_restore_event(struct file *devkfd,
1162 struct kfd_process *p,
1163 uint8_t __user *user_priv_data,
1164 uint64_t *priv_data_offset,
1165 uint64_t max_priv_data_size);
1166
1167
1168
1169 int init_queue(struct queue **q, const struct queue_properties *properties);
1170 void uninit_queue(struct queue *q);
1171 void print_queue_properties(struct queue_properties *q);
1172 void print_queue(struct queue *q);
1173
1174 struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
1175 struct kfd_dev *dev);
1176 struct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type,
1177 struct kfd_dev *dev);
1178 struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
1179 struct kfd_dev *dev);
1180 struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type,
1181 struct kfd_dev *dev);
1182 struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
1183 struct kfd_dev *dev);
1184 struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type,
1185 struct kfd_dev *dev);
1186 struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type,
1187 struct kfd_dev *dev);
1188 struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev);
1189 void device_queue_manager_uninit(struct device_queue_manager *dqm);
1190 struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
1191 enum kfd_queue_type type);
1192 void kernel_queue_uninit(struct kernel_queue *kq, bool hanging);
1193 int kfd_dqm_evict_pasid(struct device_queue_manager *dqm, u32 pasid);
1194
1195
1196 struct process_queue_node {
1197 struct queue *q;
1198 struct kernel_queue *kq;
1199 struct list_head process_queue_list;
1200 };
1201
1202 void kfd_process_dequeue_from_device(struct kfd_process_device *pdd);
1203 void kfd_process_dequeue_from_all_devices(struct kfd_process *p);
1204 int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p);
1205 void pqm_uninit(struct process_queue_manager *pqm);
1206 int pqm_create_queue(struct process_queue_manager *pqm,
1207 struct kfd_dev *dev,
1208 struct file *f,
1209 struct queue_properties *properties,
1210 unsigned int *qid,
1211 struct amdgpu_bo *wptr_bo,
1212 const struct kfd_criu_queue_priv_data *q_data,
1213 const void *restore_mqd,
1214 const void *restore_ctl_stack,
1215 uint32_t *p_doorbell_offset_in_process);
1216 int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid);
1217 int pqm_update_queue_properties(struct process_queue_manager *pqm, unsigned int qid,
1218 struct queue_properties *p);
1219 int pqm_update_mqd(struct process_queue_manager *pqm, unsigned int qid,
1220 struct mqd_update_info *minfo);
1221 int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
1222 void *gws);
1223 struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm,
1224 unsigned int qid);
1225 struct queue *pqm_get_user_queue(struct process_queue_manager *pqm,
1226 unsigned int qid);
1227 int pqm_get_wave_state(struct process_queue_manager *pqm,
1228 unsigned int qid,
1229 void __user *ctl_stack,
1230 u32 *ctl_stack_used_size,
1231 u32 *save_area_used_size);
1232
1233 int amdkfd_fence_wait_timeout(uint64_t *fence_addr,
1234 uint64_t fence_value,
1235 unsigned int timeout_ms);
1236
1237 int pqm_get_queue_checkpoint_info(struct process_queue_manager *pqm,
1238 unsigned int qid,
1239 u32 *mqd_size,
1240 u32 *ctl_stack_size);
1241
1242
1243 #define KFD_FENCE_COMPLETED (100)
1244 #define KFD_FENCE_INIT (10)
1245
1246 struct packet_manager {
1247 struct device_queue_manager *dqm;
1248 struct kernel_queue *priv_queue;
1249 struct mutex lock;
1250 bool allocated;
1251 struct kfd_mem_obj *ib_buffer_obj;
1252 unsigned int ib_size_bytes;
1253 bool is_over_subscription;
1254
1255 const struct packet_manager_funcs *pmf;
1256 };
1257
1258 struct packet_manager_funcs {
1259
1260 int (*map_process)(struct packet_manager *pm, uint32_t *buffer,
1261 struct qcm_process_device *qpd);
1262 int (*runlist)(struct packet_manager *pm, uint32_t *buffer,
1263 uint64_t ib, size_t ib_size_in_dwords, bool chain);
1264 int (*set_resources)(struct packet_manager *pm, uint32_t *buffer,
1265 struct scheduling_resources *res);
1266 int (*map_queues)(struct packet_manager *pm, uint32_t *buffer,
1267 struct queue *q, bool is_static);
1268 int (*unmap_queues)(struct packet_manager *pm, uint32_t *buffer,
1269 enum kfd_unmap_queues_filter mode,
1270 uint32_t filter_param, bool reset);
1271 int (*query_status)(struct packet_manager *pm, uint32_t *buffer,
1272 uint64_t fence_address, uint64_t fence_value);
1273 int (*release_mem)(uint64_t gpu_addr, uint32_t *buffer);
1274
1275
1276 int map_process_size;
1277 int runlist_size;
1278 int set_resources_size;
1279 int map_queues_size;
1280 int unmap_queues_size;
1281 int query_status_size;
1282 int release_mem_size;
1283 };
1284
1285 extern const struct packet_manager_funcs kfd_vi_pm_funcs;
1286 extern const struct packet_manager_funcs kfd_v9_pm_funcs;
1287 extern const struct packet_manager_funcs kfd_aldebaran_pm_funcs;
1288
1289 int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm);
1290 void pm_uninit(struct packet_manager *pm, bool hanging);
1291 int pm_send_set_resources(struct packet_manager *pm,
1292 struct scheduling_resources *res);
1293 int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues);
1294 int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
1295 uint64_t fence_value);
1296
1297 int pm_send_unmap_queue(struct packet_manager *pm,
1298 enum kfd_unmap_queues_filter mode,
1299 uint32_t filter_param, bool reset);
1300
1301 void pm_release_ib(struct packet_manager *pm);
1302
1303
1304 unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size);
1305
1306 uint64_t kfd_get_number_elems(struct kfd_dev *kfd);
1307
1308
1309 extern const struct kfd_event_interrupt_class event_interrupt_class_cik;
1310 extern const struct kfd_event_interrupt_class event_interrupt_class_v9;
1311 extern const struct kfd_event_interrupt_class event_interrupt_class_v11;
1312
1313 extern const struct kfd_device_global_init_class device_global_init_class_cik;
1314
1315 int kfd_event_init_process(struct kfd_process *p);
1316 void kfd_event_free_process(struct kfd_process *p);
1317 int kfd_event_mmap(struct kfd_process *process, struct vm_area_struct *vma);
1318 int kfd_wait_on_events(struct kfd_process *p,
1319 uint32_t num_events, void __user *data,
1320 bool all, uint32_t *user_timeout_ms,
1321 uint32_t *wait_result);
1322 void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id,
1323 uint32_t valid_id_bits);
1324 void kfd_signal_iommu_event(struct kfd_dev *dev,
1325 u32 pasid, unsigned long address,
1326 bool is_write_requested, bool is_execute_requested);
1327 void kfd_signal_hw_exception_event(u32 pasid);
1328 int kfd_set_event(struct kfd_process *p, uint32_t event_id);
1329 int kfd_reset_event(struct kfd_process *p, uint32_t event_id);
1330 int kfd_kmap_event_page(struct kfd_process *p, uint64_t event_page_offset);
1331
1332 int kfd_event_create(struct file *devkfd, struct kfd_process *p,
1333 uint32_t event_type, bool auto_reset, uint32_t node_id,
1334 uint32_t *event_id, uint32_t *event_trigger_data,
1335 uint64_t *event_page_offset, uint32_t *event_slot_index);
1336
1337 int kfd_get_num_events(struct kfd_process *p);
1338 int kfd_event_destroy(struct kfd_process *p, uint32_t event_id);
1339
1340 void kfd_signal_vm_fault_event(struct kfd_dev *dev, u32 pasid,
1341 struct kfd_vm_fault_info *info);
1342
1343 void kfd_signal_reset_event(struct kfd_dev *dev);
1344
1345 void kfd_signal_poison_consumed_event(struct kfd_dev *dev, u32 pasid);
1346
1347 void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type);
1348
1349 static inline bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev)
1350 {
1351 return KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2) ||
1352 (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 1) &&
1353 dev->adev->sdma.instance[0].fw_version >= 18) ||
1354 KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 0);
1355 }
1356
1357 bool kfd_is_locked(void);
1358
1359
1360 void kfd_inc_compute_active(struct kfd_dev *dev);
1361 void kfd_dec_compute_active(struct kfd_dev *dev);
1362
1363
1364
1365 static inline int kfd_devcgroup_check_permission(struct kfd_dev *kfd)
1366 {
1367 #if defined(CONFIG_CGROUP_DEVICE) || defined(CONFIG_CGROUP_BPF)
1368 struct drm_device *ddev = kfd->ddev;
1369
1370 return devcgroup_check_permission(DEVCG_DEV_CHAR, DRM_MAJOR,
1371 ddev->render->index,
1372 DEVCG_ACC_WRITE | DEVCG_ACC_READ);
1373 #else
1374 return 0;
1375 #endif
1376 }
1377
1378
1379 #if defined(CONFIG_DEBUG_FS)
1380
1381 void kfd_debugfs_init(void);
1382 void kfd_debugfs_fini(void);
1383 int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data);
1384 int pqm_debugfs_mqds(struct seq_file *m, void *data);
1385 int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data);
1386 int dqm_debugfs_hqds(struct seq_file *m, void *data);
1387 int kfd_debugfs_rls_by_device(struct seq_file *m, void *data);
1388 int pm_debugfs_runlist(struct seq_file *m, void *data);
1389
1390 int kfd_debugfs_hang_hws(struct kfd_dev *dev);
1391 int pm_debugfs_hang_hws(struct packet_manager *pm);
1392 int dqm_debugfs_hang_hws(struct device_queue_manager *dqm);
1393
1394 #else
1395
1396 static inline void kfd_debugfs_init(void) {}
1397 static inline void kfd_debugfs_fini(void) {}
1398
1399 #endif
1400
1401 #endif