0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024 #include <linux/bsearch.h>
0025 #include <linux/pci.h>
0026 #include <linux/slab.h>
0027 #include "kfd_priv.h"
0028 #include "kfd_device_queue_manager.h"
0029 #include "kfd_pm4_headers_vi.h"
0030 #include "kfd_pm4_headers_aldebaran.h"
0031 #include "cwsr_trap_handler.h"
0032 #include "kfd_iommu.h"
0033 #include "amdgpu_amdkfd.h"
0034 #include "kfd_smi_events.h"
0035 #include "kfd_migrate.h"
0036 #include "amdgpu.h"
0037
0038 #define MQD_SIZE_ALIGNED 768
0039
0040
0041
0042
0043
0044
0045 static atomic_t kfd_locked = ATOMIC_INIT(0);
0046
0047 #ifdef CONFIG_DRM_AMDGPU_CIK
0048 extern const struct kfd2kgd_calls gfx_v7_kfd2kgd;
0049 #endif
0050 extern const struct kfd2kgd_calls gfx_v8_kfd2kgd;
0051 extern const struct kfd2kgd_calls gfx_v9_kfd2kgd;
0052 extern const struct kfd2kgd_calls arcturus_kfd2kgd;
0053 extern const struct kfd2kgd_calls aldebaran_kfd2kgd;
0054 extern const struct kfd2kgd_calls gfx_v10_kfd2kgd;
0055 extern const struct kfd2kgd_calls gfx_v10_3_kfd2kgd;
0056 extern const struct kfd2kgd_calls gfx_v11_kfd2kgd;
0057
0058 static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
0059 unsigned int chunk_size);
0060 static void kfd_gtt_sa_fini(struct kfd_dev *kfd);
0061
0062 static int kfd_resume(struct kfd_dev *kfd);
0063
0064 static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd)
0065 {
0066 uint32_t sdma_version = kfd->adev->ip_versions[SDMA0_HWIP][0];
0067
0068 switch (sdma_version) {
0069 case IP_VERSION(4, 0, 0):
0070 case IP_VERSION(4, 0, 1):
0071 case IP_VERSION(4, 1, 0):
0072 case IP_VERSION(4, 1, 1):
0073 case IP_VERSION(4, 1, 2):
0074 case IP_VERSION(5, 2, 1):
0075 case IP_VERSION(5, 2, 3):
0076 case IP_VERSION(5, 2, 6):
0077 case IP_VERSION(5, 2, 7):
0078 kfd->device_info.num_sdma_queues_per_engine = 2;
0079 break;
0080 case IP_VERSION(4, 2, 0):
0081 case IP_VERSION(4, 2, 2):
0082 case IP_VERSION(4, 4, 0):
0083 case IP_VERSION(5, 0, 0):
0084 case IP_VERSION(5, 0, 1):
0085 case IP_VERSION(5, 0, 2):
0086 case IP_VERSION(5, 0, 5):
0087 case IP_VERSION(5, 2, 0):
0088 case IP_VERSION(5, 2, 2):
0089 case IP_VERSION(5, 2, 4):
0090 case IP_VERSION(5, 2, 5):
0091 case IP_VERSION(6, 0, 0):
0092 case IP_VERSION(6, 0, 1):
0093 case IP_VERSION(6, 0, 2):
0094 kfd->device_info.num_sdma_queues_per_engine = 8;
0095 break;
0096 default:
0097 dev_warn(kfd_device,
0098 "Default sdma queue per engine(8) is set due to mismatch of sdma ip block(SDMA_HWIP:0x%x).\n",
0099 sdma_version);
0100 kfd->device_info.num_sdma_queues_per_engine = 8;
0101 }
0102
0103 switch (sdma_version) {
0104 case IP_VERSION(6, 0, 0):
0105 case IP_VERSION(6, 0, 2):
0106
0107 kfd->device_info.num_reserved_sdma_queues_per_engine = 2;
0108
0109 kfd->device_info.reserved_sdma_queues_bitmap = 0xFULL;
0110 break;
0111 case IP_VERSION(6, 0, 1):
0112
0113 kfd->device_info.num_reserved_sdma_queues_per_engine = 2;
0114
0115 kfd->device_info.reserved_sdma_queues_bitmap = 0x3ULL;
0116 break;
0117 default:
0118 break;
0119 }
0120 }
0121
0122 static void kfd_device_info_set_event_interrupt_class(struct kfd_dev *kfd)
0123 {
0124 uint32_t gc_version = KFD_GC_VERSION(kfd);
0125
0126 switch (gc_version) {
0127 case IP_VERSION(9, 0, 1):
0128 case IP_VERSION(9, 1, 0):
0129 case IP_VERSION(9, 2, 1):
0130 case IP_VERSION(9, 2, 2):
0131 case IP_VERSION(9, 3, 0):
0132 case IP_VERSION(9, 4, 0):
0133 case IP_VERSION(9, 4, 1):
0134 case IP_VERSION(9, 4, 2):
0135 case IP_VERSION(10, 3, 1):
0136 case IP_VERSION(10, 3, 3):
0137 case IP_VERSION(10, 3, 6):
0138 case IP_VERSION(10, 3, 7):
0139 case IP_VERSION(10, 1, 3):
0140 case IP_VERSION(10, 1, 4):
0141 case IP_VERSION(10, 1, 10):
0142 case IP_VERSION(10, 1, 2):
0143 case IP_VERSION(10, 1, 1):
0144 case IP_VERSION(10, 3, 0):
0145 case IP_VERSION(10, 3, 2):
0146 case IP_VERSION(10, 3, 4):
0147 case IP_VERSION(10, 3, 5):
0148 kfd->device_info.event_interrupt_class = &event_interrupt_class_v9;
0149 break;
0150 case IP_VERSION(11, 0, 0):
0151 case IP_VERSION(11, 0, 1):
0152 case IP_VERSION(11, 0, 2):
0153 kfd->device_info.event_interrupt_class = &event_interrupt_class_v11;
0154 break;
0155 default:
0156 dev_warn(kfd_device, "v9 event interrupt handler is set due to "
0157 "mismatch of gc ip block(GC_HWIP:0x%x).\n", gc_version);
0158 kfd->device_info.event_interrupt_class = &event_interrupt_class_v9;
0159 }
0160 }
0161
0162 static void kfd_device_info_init(struct kfd_dev *kfd,
0163 bool vf, uint32_t gfx_target_version)
0164 {
0165 uint32_t gc_version = KFD_GC_VERSION(kfd);
0166 uint32_t asic_type = kfd->adev->asic_type;
0167
0168 kfd->device_info.max_pasid_bits = 16;
0169 kfd->device_info.max_no_of_hqd = 24;
0170 kfd->device_info.num_of_watch_points = 4;
0171 kfd->device_info.mqd_size_aligned = MQD_SIZE_ALIGNED;
0172 kfd->device_info.gfx_target_version = gfx_target_version;
0173
0174 if (KFD_IS_SOC15(kfd)) {
0175 kfd->device_info.doorbell_size = 8;
0176 kfd->device_info.ih_ring_entry_size = 8 * sizeof(uint32_t);
0177 kfd->device_info.supports_cwsr = true;
0178
0179 kfd_device_info_set_sdma_info(kfd);
0180
0181 kfd_device_info_set_event_interrupt_class(kfd);
0182
0183
0184 if (gc_version == IP_VERSION(9, 1, 0) ||
0185 gc_version == IP_VERSION(9, 2, 2))
0186 kfd->device_info.needs_iommu_device = true;
0187
0188 if (gc_version < IP_VERSION(11, 0, 0)) {
0189
0190 if (gc_version == IP_VERSION(10, 3, 6))
0191 kfd->device_info.no_atomic_fw_version = 14;
0192 else if (gc_version == IP_VERSION(10, 3, 7))
0193 kfd->device_info.no_atomic_fw_version = 3;
0194 else if (gc_version >= IP_VERSION(10, 3, 0))
0195 kfd->device_info.no_atomic_fw_version = 92;
0196 else if (gc_version >= IP_VERSION(10, 1, 1))
0197 kfd->device_info.no_atomic_fw_version = 145;
0198
0199
0200 if (gc_version >= IP_VERSION(10, 1, 1))
0201 kfd->device_info.needs_pci_atomics = true;
0202 }
0203 } else {
0204 kfd->device_info.doorbell_size = 4;
0205 kfd->device_info.ih_ring_entry_size = 4 * sizeof(uint32_t);
0206 kfd->device_info.event_interrupt_class = &event_interrupt_class_cik;
0207 kfd->device_info.num_sdma_queues_per_engine = 2;
0208
0209 if (asic_type != CHIP_KAVERI &&
0210 asic_type != CHIP_HAWAII &&
0211 asic_type != CHIP_TONGA)
0212 kfd->device_info.supports_cwsr = true;
0213
0214 if (asic_type == CHIP_KAVERI ||
0215 asic_type == CHIP_CARRIZO)
0216 kfd->device_info.needs_iommu_device = true;
0217
0218 if (asic_type != CHIP_HAWAII && !vf)
0219 kfd->device_info.needs_pci_atomics = true;
0220 }
0221 }
0222
0223 struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
0224 {
0225 struct kfd_dev *kfd = NULL;
0226 const struct kfd2kgd_calls *f2g = NULL;
0227 struct pci_dev *pdev = adev->pdev;
0228 uint32_t gfx_target_version = 0;
0229
0230 switch (adev->asic_type) {
0231 #ifdef KFD_SUPPORT_IOMMU_V2
0232 #ifdef CONFIG_DRM_AMDGPU_CIK
0233 case CHIP_KAVERI:
0234 gfx_target_version = 70000;
0235 if (!vf)
0236 f2g = &gfx_v7_kfd2kgd;
0237 break;
0238 #endif
0239 case CHIP_CARRIZO:
0240 gfx_target_version = 80001;
0241 if (!vf)
0242 f2g = &gfx_v8_kfd2kgd;
0243 break;
0244 #endif
0245 #ifdef CONFIG_DRM_AMDGPU_CIK
0246 case CHIP_HAWAII:
0247 gfx_target_version = 70001;
0248 if (!amdgpu_exp_hw_support)
0249 pr_info(
0250 "KFD support on Hawaii is experimental. See modparam exp_hw_support\n"
0251 );
0252 else if (!vf)
0253 f2g = &gfx_v7_kfd2kgd;
0254 break;
0255 #endif
0256 case CHIP_TONGA:
0257 gfx_target_version = 80002;
0258 if (!vf)
0259 f2g = &gfx_v8_kfd2kgd;
0260 break;
0261 case CHIP_FIJI:
0262 gfx_target_version = 80003;
0263 f2g = &gfx_v8_kfd2kgd;
0264 break;
0265 case CHIP_POLARIS10:
0266 gfx_target_version = 80003;
0267 f2g = &gfx_v8_kfd2kgd;
0268 break;
0269 case CHIP_POLARIS11:
0270 gfx_target_version = 80003;
0271 if (!vf)
0272 f2g = &gfx_v8_kfd2kgd;
0273 break;
0274 case CHIP_POLARIS12:
0275 gfx_target_version = 80003;
0276 if (!vf)
0277 f2g = &gfx_v8_kfd2kgd;
0278 break;
0279 case CHIP_VEGAM:
0280 gfx_target_version = 80003;
0281 if (!vf)
0282 f2g = &gfx_v8_kfd2kgd;
0283 break;
0284 default:
0285 switch (adev->ip_versions[GC_HWIP][0]) {
0286
0287 case IP_VERSION(9, 0, 1):
0288 gfx_target_version = 90000;
0289 f2g = &gfx_v9_kfd2kgd;
0290 break;
0291 #ifdef KFD_SUPPORT_IOMMU_V2
0292
0293 case IP_VERSION(9, 1, 0):
0294 case IP_VERSION(9, 2, 2):
0295 gfx_target_version = 90002;
0296 if (!vf)
0297 f2g = &gfx_v9_kfd2kgd;
0298 break;
0299 #endif
0300
0301 case IP_VERSION(9, 2, 1):
0302 gfx_target_version = 90004;
0303 if (!vf)
0304 f2g = &gfx_v9_kfd2kgd;
0305 break;
0306
0307 case IP_VERSION(9, 3, 0):
0308 gfx_target_version = 90012;
0309 if (!vf)
0310 f2g = &gfx_v9_kfd2kgd;
0311 break;
0312
0313 case IP_VERSION(9, 4, 0):
0314 gfx_target_version = 90006;
0315 if (!vf)
0316 f2g = &gfx_v9_kfd2kgd;
0317 break;
0318
0319 case IP_VERSION(9, 4, 1):
0320 gfx_target_version = 90008;
0321 f2g = &arcturus_kfd2kgd;
0322 break;
0323
0324 case IP_VERSION(9, 4, 2):
0325 gfx_target_version = 90010;
0326 f2g = &aldebaran_kfd2kgd;
0327 break;
0328
0329 case IP_VERSION(10, 1, 10):
0330 gfx_target_version = 100100;
0331 if (!vf)
0332 f2g = &gfx_v10_kfd2kgd;
0333 break;
0334
0335 case IP_VERSION(10, 1, 2):
0336 gfx_target_version = 100101;
0337 f2g = &gfx_v10_kfd2kgd;
0338 break;
0339
0340 case IP_VERSION(10, 1, 1):
0341 gfx_target_version = 100102;
0342 if (!vf)
0343 f2g = &gfx_v10_kfd2kgd;
0344 break;
0345
0346 case IP_VERSION(10, 1, 3):
0347 case IP_VERSION(10, 1, 4):
0348 gfx_target_version = 100103;
0349 if (!vf)
0350 f2g = &gfx_v10_kfd2kgd;
0351 break;
0352
0353 case IP_VERSION(10, 3, 0):
0354 gfx_target_version = 100300;
0355 f2g = &gfx_v10_3_kfd2kgd;
0356 break;
0357
0358 case IP_VERSION(10, 3, 2):
0359 gfx_target_version = 100301;
0360 f2g = &gfx_v10_3_kfd2kgd;
0361 break;
0362
0363 case IP_VERSION(10, 3, 1):
0364 gfx_target_version = 100303;
0365 if (!vf)
0366 f2g = &gfx_v10_3_kfd2kgd;
0367 break;
0368
0369 case IP_VERSION(10, 3, 4):
0370 gfx_target_version = 100302;
0371 f2g = &gfx_v10_3_kfd2kgd;
0372 break;
0373
0374 case IP_VERSION(10, 3, 5):
0375 gfx_target_version = 100304;
0376 f2g = &gfx_v10_3_kfd2kgd;
0377 break;
0378
0379 case IP_VERSION(10, 3, 3):
0380 gfx_target_version = 100305;
0381 if (!vf)
0382 f2g = &gfx_v10_3_kfd2kgd;
0383 break;
0384 case IP_VERSION(10, 3, 6):
0385 case IP_VERSION(10, 3, 7):
0386 gfx_target_version = 100306;
0387 if (!vf)
0388 f2g = &gfx_v10_3_kfd2kgd;
0389 break;
0390 case IP_VERSION(11, 0, 0):
0391 gfx_target_version = 110000;
0392 f2g = &gfx_v11_kfd2kgd;
0393 break;
0394 case IP_VERSION(11, 0, 1):
0395 gfx_target_version = 110003;
0396 f2g = &gfx_v11_kfd2kgd;
0397 break;
0398 case IP_VERSION(11, 0, 2):
0399 gfx_target_version = 110002;
0400 f2g = &gfx_v11_kfd2kgd;
0401 break;
0402 default:
0403 break;
0404 }
0405 break;
0406 }
0407
0408 if (!f2g) {
0409 if (adev->ip_versions[GC_HWIP][0])
0410 dev_err(kfd_device, "GC IP %06x %s not supported in kfd\n",
0411 adev->ip_versions[GC_HWIP][0], vf ? "VF" : "");
0412 else
0413 dev_err(kfd_device, "%s %s not supported in kfd\n",
0414 amdgpu_asic_name[adev->asic_type], vf ? "VF" : "");
0415 return NULL;
0416 }
0417
0418 kfd = kzalloc(sizeof(*kfd), GFP_KERNEL);
0419 if (!kfd)
0420 return NULL;
0421
0422 kfd->adev = adev;
0423 kfd_device_info_init(kfd, vf, gfx_target_version);
0424 kfd->pdev = pdev;
0425 kfd->init_complete = false;
0426 kfd->kfd2kgd = f2g;
0427 atomic_set(&kfd->compute_profile, 0);
0428
0429 mutex_init(&kfd->doorbell_mutex);
0430 memset(&kfd->doorbell_available_index, 0,
0431 sizeof(kfd->doorbell_available_index));
0432
0433 atomic_set(&kfd->sram_ecc_flag, 0);
0434
0435 ida_init(&kfd->doorbell_ida);
0436
0437 return kfd;
0438 }
0439
0440 static void kfd_cwsr_init(struct kfd_dev *kfd)
0441 {
0442 if (cwsr_enable && kfd->device_info.supports_cwsr) {
0443 if (KFD_GC_VERSION(kfd) < IP_VERSION(9, 0, 1)) {
0444 BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE);
0445 kfd->cwsr_isa = cwsr_trap_gfx8_hex;
0446 kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex);
0447 } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 1)) {
0448 BUILD_BUG_ON(sizeof(cwsr_trap_arcturus_hex) > PAGE_SIZE);
0449 kfd->cwsr_isa = cwsr_trap_arcturus_hex;
0450 kfd->cwsr_isa_size = sizeof(cwsr_trap_arcturus_hex);
0451 } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2)) {
0452 BUILD_BUG_ON(sizeof(cwsr_trap_aldebaran_hex) > PAGE_SIZE);
0453 kfd->cwsr_isa = cwsr_trap_aldebaran_hex;
0454 kfd->cwsr_isa_size = sizeof(cwsr_trap_aldebaran_hex);
0455 } else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 1, 1)) {
0456 BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) > PAGE_SIZE);
0457 kfd->cwsr_isa = cwsr_trap_gfx9_hex;
0458 kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_hex);
0459 } else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 3, 0)) {
0460 BUILD_BUG_ON(sizeof(cwsr_trap_nv1x_hex) > PAGE_SIZE);
0461 kfd->cwsr_isa = cwsr_trap_nv1x_hex;
0462 kfd->cwsr_isa_size = sizeof(cwsr_trap_nv1x_hex);
0463 } else if (KFD_GC_VERSION(kfd) < IP_VERSION(11, 0, 0)) {
0464 BUILD_BUG_ON(sizeof(cwsr_trap_gfx10_hex) > PAGE_SIZE);
0465 kfd->cwsr_isa = cwsr_trap_gfx10_hex;
0466 kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx10_hex);
0467 } else {
0468 BUILD_BUG_ON(sizeof(cwsr_trap_gfx11_hex) > PAGE_SIZE);
0469 kfd->cwsr_isa = cwsr_trap_gfx11_hex;
0470 kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx11_hex);
0471 }
0472
0473 kfd->cwsr_enabled = true;
0474 }
0475 }
0476
0477 static int kfd_gws_init(struct kfd_dev *kfd)
0478 {
0479 int ret = 0;
0480
0481 if (kfd->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
0482 return 0;
0483
0484 if (hws_gws_support || (KFD_IS_SOC15(kfd) &&
0485 ((KFD_GC_VERSION(kfd) == IP_VERSION(9, 0, 1)
0486 && kfd->mec2_fw_version >= 0x81b3) ||
0487 (KFD_GC_VERSION(kfd) <= IP_VERSION(9, 4, 0)
0488 && kfd->mec2_fw_version >= 0x1b3) ||
0489 (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 1)
0490 && kfd->mec2_fw_version >= 0x30) ||
0491 (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2)
0492 && kfd->mec2_fw_version >= 0x28))))
0493 ret = amdgpu_amdkfd_alloc_gws(kfd->adev,
0494 kfd->adev->gds.gws_size, &kfd->gws);
0495
0496 return ret;
0497 }
0498
0499 static void kfd_smi_init(struct kfd_dev *dev)
0500 {
0501 INIT_LIST_HEAD(&dev->smi_clients);
0502 spin_lock_init(&dev->smi_lock);
0503 }
0504
0505 bool kgd2kfd_device_init(struct kfd_dev *kfd,
0506 struct drm_device *ddev,
0507 const struct kgd2kfd_shared_resources *gpu_resources)
0508 {
0509 unsigned int size, map_process_packet_size;
0510
0511 kfd->ddev = ddev;
0512 kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev,
0513 KGD_ENGINE_MEC1);
0514 kfd->mec2_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev,
0515 KGD_ENGINE_MEC2);
0516 kfd->sdma_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev,
0517 KGD_ENGINE_SDMA1);
0518 kfd->shared_resources = *gpu_resources;
0519
0520 kfd->vm_info.first_vmid_kfd = ffs(gpu_resources->compute_vmid_bitmap)-1;
0521 kfd->vm_info.last_vmid_kfd = fls(gpu_resources->compute_vmid_bitmap)-1;
0522 kfd->vm_info.vmid_num_kfd = kfd->vm_info.last_vmid_kfd
0523 - kfd->vm_info.first_vmid_kfd + 1;
0524
0525
0526
0527
0528
0529 kfd->pci_atomic_requested = amdgpu_amdkfd_have_atomics_support(kfd->adev);
0530 if (!kfd->pci_atomic_requested &&
0531 kfd->device_info.needs_pci_atomics &&
0532 (!kfd->device_info.no_atomic_fw_version ||
0533 kfd->mec_fw_version < kfd->device_info.no_atomic_fw_version)) {
0534 dev_info(kfd_device,
0535 "skipped device %x:%x, PCI rejects atomics %d<%d\n",
0536 kfd->pdev->vendor, kfd->pdev->device,
0537 kfd->mec_fw_version,
0538 kfd->device_info.no_atomic_fw_version);
0539 return false;
0540 }
0541
0542
0543 if (hws_max_conc_proc >= 0)
0544 kfd->max_proc_per_quantum = min((u32)hws_max_conc_proc, kfd->vm_info.vmid_num_kfd);
0545 else
0546 kfd->max_proc_per_quantum = kfd->vm_info.vmid_num_kfd;
0547
0548
0549 size = max_num_of_queues_per_device *
0550 kfd->device_info.mqd_size_aligned;
0551
0552
0553
0554
0555
0556 map_process_packet_size = KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2) ?
0557 sizeof(struct pm4_mes_map_process_aldebaran) :
0558 sizeof(struct pm4_mes_map_process);
0559 size += (KFD_MAX_NUM_OF_PROCESSES * map_process_packet_size +
0560 max_num_of_queues_per_device * sizeof(struct pm4_mes_map_queues)
0561 + sizeof(struct pm4_mes_runlist)) * 2;
0562
0563
0564 size += KFD_KERNEL_QUEUE_SIZE * 2;
0565
0566
0567 size += 512 * 1024;
0568
0569 if (amdgpu_amdkfd_alloc_gtt_mem(
0570 kfd->adev, size, &kfd->gtt_mem,
0571 &kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr,
0572 false)) {
0573 dev_err(kfd_device, "Could not allocate %d bytes\n", size);
0574 goto alloc_gtt_mem_failure;
0575 }
0576
0577 dev_info(kfd_device, "Allocated %d bytes on gart\n", size);
0578
0579
0580 if (kfd_gtt_sa_init(kfd, size, 512) != 0) {
0581 dev_err(kfd_device, "Error initializing gtt sub-allocator\n");
0582 goto kfd_gtt_sa_init_error;
0583 }
0584
0585 if (kfd_doorbell_init(kfd)) {
0586 dev_err(kfd_device,
0587 "Error initializing doorbell aperture\n");
0588 goto kfd_doorbell_error;
0589 }
0590
0591 if (amdgpu_use_xgmi_p2p)
0592 kfd->hive_id = kfd->adev->gmc.xgmi.hive_id;
0593
0594 kfd->noretry = kfd->adev->gmc.noretry;
0595
0596 if (kfd_interrupt_init(kfd)) {
0597 dev_err(kfd_device, "Error initializing interrupts\n");
0598 goto kfd_interrupt_error;
0599 }
0600
0601 kfd->dqm = device_queue_manager_init(kfd);
0602 if (!kfd->dqm) {
0603 dev_err(kfd_device, "Error initializing queue manager\n");
0604 goto device_queue_manager_error;
0605 }
0606
0607
0608
0609
0610 if (kfd_gws_init(kfd)) {
0611 dev_err(kfd_device, "Could not allocate %d gws\n",
0612 kfd->adev->gds.gws_size);
0613 goto gws_error;
0614 }
0615
0616
0617 kfd_double_confirm_iommu_support(kfd);
0618
0619 if (kfd_iommu_device_init(kfd)) {
0620 kfd->use_iommu_v2 = false;
0621 dev_err(kfd_device, "Error initializing iommuv2\n");
0622 goto device_iommu_error;
0623 }
0624
0625 kfd_cwsr_init(kfd);
0626
0627 svm_migrate_init(kfd->adev);
0628
0629 if (kgd2kfd_resume_iommu(kfd))
0630 goto device_iommu_error;
0631
0632 if (kfd_resume(kfd))
0633 goto kfd_resume_error;
0634
0635 amdgpu_amdkfd_get_local_mem_info(kfd->adev, &kfd->local_mem_info);
0636
0637 if (kfd_topology_add_device(kfd)) {
0638 dev_err(kfd_device, "Error adding device to topology\n");
0639 goto kfd_topology_add_device_error;
0640 }
0641
0642 kfd_smi_init(kfd);
0643
0644 kfd->init_complete = true;
0645 dev_info(kfd_device, "added device %x:%x\n", kfd->pdev->vendor,
0646 kfd->pdev->device);
0647
0648 pr_debug("Starting kfd with the following scheduling policy %d\n",
0649 kfd->dqm->sched_policy);
0650
0651 goto out;
0652
0653 kfd_topology_add_device_error:
0654 kfd_resume_error:
0655 device_iommu_error:
0656 gws_error:
0657 device_queue_manager_uninit(kfd->dqm);
0658 device_queue_manager_error:
0659 kfd_interrupt_exit(kfd);
0660 kfd_interrupt_error:
0661 kfd_doorbell_fini(kfd);
0662 kfd_doorbell_error:
0663 kfd_gtt_sa_fini(kfd);
0664 kfd_gtt_sa_init_error:
0665 amdgpu_amdkfd_free_gtt_mem(kfd->adev, kfd->gtt_mem);
0666 alloc_gtt_mem_failure:
0667 if (kfd->gws)
0668 amdgpu_amdkfd_free_gws(kfd->adev, kfd->gws);
0669 dev_err(kfd_device,
0670 "device %x:%x NOT added due to errors\n",
0671 kfd->pdev->vendor, kfd->pdev->device);
0672 out:
0673 return kfd->init_complete;
0674 }
0675
0676 void kgd2kfd_device_exit(struct kfd_dev *kfd)
0677 {
0678 if (kfd->init_complete) {
0679 device_queue_manager_uninit(kfd->dqm);
0680 kfd_interrupt_exit(kfd);
0681 kfd_topology_remove_device(kfd);
0682 kfd_doorbell_fini(kfd);
0683 ida_destroy(&kfd->doorbell_ida);
0684 kfd_gtt_sa_fini(kfd);
0685 amdgpu_amdkfd_free_gtt_mem(kfd->adev, kfd->gtt_mem);
0686 if (kfd->gws)
0687 amdgpu_amdkfd_free_gws(kfd->adev, kfd->gws);
0688 }
0689
0690 kfree(kfd);
0691 }
0692
0693 int kgd2kfd_pre_reset(struct kfd_dev *kfd)
0694 {
0695 if (!kfd->init_complete)
0696 return 0;
0697
0698 kfd_smi_event_update_gpu_reset(kfd, false);
0699
0700 kfd->dqm->ops.pre_reset(kfd->dqm);
0701
0702 kgd2kfd_suspend(kfd, false);
0703
0704 kfd_signal_reset_event(kfd);
0705 return 0;
0706 }
0707
0708
0709
0710
0711
0712
0713
0714 int kgd2kfd_post_reset(struct kfd_dev *kfd)
0715 {
0716 int ret;
0717
0718 if (!kfd->init_complete)
0719 return 0;
0720
0721 ret = kfd_resume(kfd);
0722 if (ret)
0723 return ret;
0724 atomic_dec(&kfd_locked);
0725
0726 atomic_set(&kfd->sram_ecc_flag, 0);
0727
0728 kfd_smi_event_update_gpu_reset(kfd, true);
0729
0730 return 0;
0731 }
0732
0733 bool kfd_is_locked(void)
0734 {
0735 return (atomic_read(&kfd_locked) > 0);
0736 }
0737
0738 void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
0739 {
0740 if (!kfd->init_complete)
0741 return;
0742
0743
0744 if (!run_pm) {
0745
0746 if (atomic_inc_return(&kfd_locked) == 1)
0747 kfd_suspend_all_processes();
0748 }
0749
0750 kfd->dqm->ops.stop(kfd->dqm);
0751 kfd_iommu_suspend(kfd);
0752 }
0753
0754 int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
0755 {
0756 int ret, count;
0757
0758 if (!kfd->init_complete)
0759 return 0;
0760
0761 ret = kfd_resume(kfd);
0762 if (ret)
0763 return ret;
0764
0765
0766 if (!run_pm) {
0767 count = atomic_dec_return(&kfd_locked);
0768 WARN_ONCE(count < 0, "KFD suspend / resume ref. error");
0769 if (count == 0)
0770 ret = kfd_resume_all_processes();
0771 }
0772
0773 return ret;
0774 }
0775
0776 int kgd2kfd_resume_iommu(struct kfd_dev *kfd)
0777 {
0778 int err = 0;
0779
0780 err = kfd_iommu_resume(kfd);
0781 if (err)
0782 dev_err(kfd_device,
0783 "Failed to resume IOMMU for device %x:%x\n",
0784 kfd->pdev->vendor, kfd->pdev->device);
0785 return err;
0786 }
0787
0788 static int kfd_resume(struct kfd_dev *kfd)
0789 {
0790 int err = 0;
0791
0792 err = kfd->dqm->ops.start(kfd->dqm);
0793 if (err)
0794 dev_err(kfd_device,
0795 "Error starting queue manager for device %x:%x\n",
0796 kfd->pdev->vendor, kfd->pdev->device);
0797
0798 return err;
0799 }
0800
0801 static inline void kfd_queue_work(struct workqueue_struct *wq,
0802 struct work_struct *work)
0803 {
0804 int cpu, new_cpu;
0805
0806 cpu = new_cpu = smp_processor_id();
0807 do {
0808 new_cpu = cpumask_next(new_cpu, cpu_online_mask) % nr_cpu_ids;
0809 if (cpu_to_node(new_cpu) == numa_node_id())
0810 break;
0811 } while (cpu != new_cpu);
0812
0813 queue_work_on(new_cpu, wq, work);
0814 }
0815
0816
0817 void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
0818 {
0819 uint32_t patched_ihre[KFD_MAX_RING_ENTRY_SIZE];
0820 bool is_patched = false;
0821 unsigned long flags;
0822
0823 if (!kfd->init_complete)
0824 return;
0825
0826 if (kfd->device_info.ih_ring_entry_size > sizeof(patched_ihre)) {
0827 dev_err_once(kfd_device, "Ring entry too small\n");
0828 return;
0829 }
0830
0831 spin_lock_irqsave(&kfd->interrupt_lock, flags);
0832
0833 if (kfd->interrupts_active
0834 && interrupt_is_wanted(kfd, ih_ring_entry,
0835 patched_ihre, &is_patched)
0836 && enqueue_ih_ring_entry(kfd,
0837 is_patched ? patched_ihre : ih_ring_entry))
0838 kfd_queue_work(kfd->ih_wq, &kfd->interrupt_work);
0839
0840 spin_unlock_irqrestore(&kfd->interrupt_lock, flags);
0841 }
0842
0843 int kgd2kfd_quiesce_mm(struct mm_struct *mm, uint32_t trigger)
0844 {
0845 struct kfd_process *p;
0846 int r;
0847
0848
0849
0850
0851
0852 p = kfd_lookup_process_by_mm(mm);
0853 if (!p)
0854 return -ESRCH;
0855
0856 WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid);
0857 r = kfd_process_evict_queues(p, trigger);
0858
0859 kfd_unref_process(p);
0860 return r;
0861 }
0862
0863 int kgd2kfd_resume_mm(struct mm_struct *mm)
0864 {
0865 struct kfd_process *p;
0866 int r;
0867
0868
0869
0870
0871
0872 p = kfd_lookup_process_by_mm(mm);
0873 if (!p)
0874 return -ESRCH;
0875
0876 r = kfd_process_restore_queues(p);
0877
0878 kfd_unref_process(p);
0879 return r;
0880 }
0881
0882
0883
0884
0885
0886
0887
0888
0889
0890 int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
0891 struct dma_fence *fence)
0892 {
0893 struct kfd_process *p;
0894 unsigned long active_time;
0895 unsigned long delay_jiffies = msecs_to_jiffies(PROCESS_ACTIVE_TIME_MS);
0896
0897 if (!fence)
0898 return -EINVAL;
0899
0900 if (dma_fence_is_signaled(fence))
0901 return 0;
0902
0903 p = kfd_lookup_process_by_mm(mm);
0904 if (!p)
0905 return -ENODEV;
0906
0907 if (fence->seqno == p->last_eviction_seqno)
0908 goto out;
0909
0910 p->last_eviction_seqno = fence->seqno;
0911
0912
0913
0914
0915 active_time = get_jiffies_64() - p->last_restore_timestamp;
0916 if (delay_jiffies > active_time)
0917 delay_jiffies -= active_time;
0918 else
0919 delay_jiffies = 0;
0920
0921
0922
0923
0924 WARN(debug_evictions, "Scheduling eviction of pid %d in %ld jiffies",
0925 p->lead_thread->pid, delay_jiffies);
0926 schedule_delayed_work(&p->eviction_work, delay_jiffies);
0927 out:
0928 kfd_unref_process(p);
0929 return 0;
0930 }
0931
0932 static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
0933 unsigned int chunk_size)
0934 {
0935 if (WARN_ON(buf_size < chunk_size))
0936 return -EINVAL;
0937 if (WARN_ON(buf_size == 0))
0938 return -EINVAL;
0939 if (WARN_ON(chunk_size == 0))
0940 return -EINVAL;
0941
0942 kfd->gtt_sa_chunk_size = chunk_size;
0943 kfd->gtt_sa_num_of_chunks = buf_size / chunk_size;
0944
0945 kfd->gtt_sa_bitmap = bitmap_zalloc(kfd->gtt_sa_num_of_chunks,
0946 GFP_KERNEL);
0947 if (!kfd->gtt_sa_bitmap)
0948 return -ENOMEM;
0949
0950 pr_debug("gtt_sa_num_of_chunks = %d, gtt_sa_bitmap = %p\n",
0951 kfd->gtt_sa_num_of_chunks, kfd->gtt_sa_bitmap);
0952
0953 mutex_init(&kfd->gtt_sa_lock);
0954
0955 return 0;
0956 }
0957
0958 static void kfd_gtt_sa_fini(struct kfd_dev *kfd)
0959 {
0960 mutex_destroy(&kfd->gtt_sa_lock);
0961 bitmap_free(kfd->gtt_sa_bitmap);
0962 }
0963
0964 static inline uint64_t kfd_gtt_sa_calc_gpu_addr(uint64_t start_addr,
0965 unsigned int bit_num,
0966 unsigned int chunk_size)
0967 {
0968 return start_addr + bit_num * chunk_size;
0969 }
0970
0971 static inline uint32_t *kfd_gtt_sa_calc_cpu_addr(void *start_addr,
0972 unsigned int bit_num,
0973 unsigned int chunk_size)
0974 {
0975 return (uint32_t *) ((uint64_t) start_addr + bit_num * chunk_size);
0976 }
0977
0978 int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
0979 struct kfd_mem_obj **mem_obj)
0980 {
0981 unsigned int found, start_search, cur_size;
0982
0983 if (size == 0)
0984 return -EINVAL;
0985
0986 if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size)
0987 return -ENOMEM;
0988
0989 *mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
0990 if (!(*mem_obj))
0991 return -ENOMEM;
0992
0993 pr_debug("Allocated mem_obj = %p for size = %d\n", *mem_obj, size);
0994
0995 start_search = 0;
0996
0997 mutex_lock(&kfd->gtt_sa_lock);
0998
0999 kfd_gtt_restart_search:
1000
1001 found = find_next_zero_bit(kfd->gtt_sa_bitmap,
1002 kfd->gtt_sa_num_of_chunks,
1003 start_search);
1004
1005 pr_debug("Found = %d\n", found);
1006
1007
1008 if (found == kfd->gtt_sa_num_of_chunks)
1009 goto kfd_gtt_no_free_chunk;
1010
1011
1012 (*mem_obj)->range_start = found;
1013 (*mem_obj)->range_end = found;
1014 (*mem_obj)->gpu_addr = kfd_gtt_sa_calc_gpu_addr(
1015 kfd->gtt_start_gpu_addr,
1016 found,
1017 kfd->gtt_sa_chunk_size);
1018 (*mem_obj)->cpu_ptr = kfd_gtt_sa_calc_cpu_addr(
1019 kfd->gtt_start_cpu_ptr,
1020 found,
1021 kfd->gtt_sa_chunk_size);
1022
1023 pr_debug("gpu_addr = %p, cpu_addr = %p\n",
1024 (uint64_t *) (*mem_obj)->gpu_addr, (*mem_obj)->cpu_ptr);
1025
1026
1027 if (size <= kfd->gtt_sa_chunk_size) {
1028 pr_debug("Single bit\n");
1029 __set_bit(found, kfd->gtt_sa_bitmap);
1030 goto kfd_gtt_out;
1031 }
1032
1033
1034 cur_size = size - kfd->gtt_sa_chunk_size;
1035 do {
1036 (*mem_obj)->range_end =
1037 find_next_zero_bit(kfd->gtt_sa_bitmap,
1038 kfd->gtt_sa_num_of_chunks, ++found);
1039
1040
1041
1042
1043
1044 if ((*mem_obj)->range_end != found) {
1045 start_search = found;
1046 goto kfd_gtt_restart_search;
1047 }
1048
1049
1050
1051
1052 if (found == kfd->gtt_sa_num_of_chunks)
1053 goto kfd_gtt_no_free_chunk;
1054
1055
1056 if (cur_size <= kfd->gtt_sa_chunk_size)
1057 cur_size = 0;
1058 else
1059 cur_size -= kfd->gtt_sa_chunk_size;
1060
1061 } while (cur_size > 0);
1062
1063 pr_debug("range_start = %d, range_end = %d\n",
1064 (*mem_obj)->range_start, (*mem_obj)->range_end);
1065
1066
1067 bitmap_set(kfd->gtt_sa_bitmap, (*mem_obj)->range_start,
1068 (*mem_obj)->range_end - (*mem_obj)->range_start + 1);
1069
1070 kfd_gtt_out:
1071 mutex_unlock(&kfd->gtt_sa_lock);
1072 return 0;
1073
1074 kfd_gtt_no_free_chunk:
1075 pr_debug("Allocation failed with mem_obj = %p\n", *mem_obj);
1076 mutex_unlock(&kfd->gtt_sa_lock);
1077 kfree(*mem_obj);
1078 return -ENOMEM;
1079 }
1080
1081 int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj)
1082 {
1083
1084 if (!mem_obj)
1085 return 0;
1086
1087 pr_debug("Free mem_obj = %p, range_start = %d, range_end = %d\n",
1088 mem_obj, mem_obj->range_start, mem_obj->range_end);
1089
1090 mutex_lock(&kfd->gtt_sa_lock);
1091
1092
1093 bitmap_clear(kfd->gtt_sa_bitmap, mem_obj->range_start,
1094 mem_obj->range_end - mem_obj->range_start + 1);
1095
1096 mutex_unlock(&kfd->gtt_sa_lock);
1097
1098 kfree(mem_obj);
1099 return 0;
1100 }
1101
1102 void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd)
1103 {
1104 if (kfd)
1105 atomic_inc(&kfd->sram_ecc_flag);
1106 }
1107
1108 void kfd_inc_compute_active(struct kfd_dev *kfd)
1109 {
1110 if (atomic_inc_return(&kfd->compute_profile) == 1)
1111 amdgpu_amdkfd_set_compute_idle(kfd->adev, false);
1112 }
1113
1114 void kfd_dec_compute_active(struct kfd_dev *kfd)
1115 {
1116 int count = atomic_dec_return(&kfd->compute_profile);
1117
1118 if (count == 0)
1119 amdgpu_amdkfd_set_compute_idle(kfd->adev, true);
1120 WARN_ONCE(count < 0, "Compute profile ref. count error");
1121 }
1122
1123 void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask)
1124 {
1125 if (kfd && kfd->init_complete)
1126 kfd_smi_event_update_thermal_throttling(kfd, throttle_bitmask);
1127 }
1128
1129
1130
1131
1132
1133
1134 unsigned int kfd_get_num_sdma_engines(struct kfd_dev *kdev)
1135 {
1136
1137 if (!kdev->adev->gmc.xgmi.supported)
1138 return kdev->adev->sdma.num_instances;
1139
1140 return min(kdev->adev->sdma.num_instances, 2);
1141 }
1142
1143 unsigned int kfd_get_num_xgmi_sdma_engines(struct kfd_dev *kdev)
1144 {
1145
1146 return kdev->adev->sdma.num_instances - kfd_get_num_sdma_engines(kdev);
1147 }
1148
1149 #if defined(CONFIG_DEBUG_FS)
1150
1151
1152
1153
1154 int kfd_debugfs_hang_hws(struct kfd_dev *dev)
1155 {
1156 if (dev->dqm->sched_policy != KFD_SCHED_POLICY_HWS) {
1157 pr_err("HWS is not enabled");
1158 return -EINVAL;
1159 }
1160
1161 return dqm_debugfs_hang_hws(dev->dqm);
1162 }
1163
1164 #endif