0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025 #include <linux/slab.h>
0026 #include <linux/mutex.h>
0027 #include "kfd_device_queue_manager.h"
0028 #include "kfd_kernel_queue.h"
0029 #include "kfd_priv.h"
0030
0031 static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes,
0032 unsigned int buffer_size_bytes)
0033 {
0034 unsigned int temp = *wptr + increment_bytes / sizeof(uint32_t);
0035
0036 WARN((temp * sizeof(uint32_t)) > buffer_size_bytes,
0037 "Runlist IB overflow");
0038 *wptr = temp;
0039 }
0040
0041 static void pm_calc_rlib_size(struct packet_manager *pm,
0042 unsigned int *rlib_size,
0043 bool *over_subscription)
0044 {
0045 unsigned int process_count, queue_count, compute_queue_count, gws_queue_count;
0046 unsigned int map_queue_size;
0047 unsigned int max_proc_per_quantum = 1;
0048 struct kfd_dev *dev = pm->dqm->dev;
0049
0050 process_count = pm->dqm->processes_count;
0051 queue_count = pm->dqm->active_queue_count;
0052 compute_queue_count = pm->dqm->active_cp_queue_count;
0053 gws_queue_count = pm->dqm->gws_queue_count;
0054
0055
0056
0057
0058
0059
0060 *over_subscription = false;
0061
0062 if (dev->max_proc_per_quantum > 1)
0063 max_proc_per_quantum = dev->max_proc_per_quantum;
0064
0065 if ((process_count > max_proc_per_quantum) ||
0066 compute_queue_count > get_cp_queues_num(pm->dqm) ||
0067 gws_queue_count > 1) {
0068 *over_subscription = true;
0069 pr_debug("Over subscribed runlist\n");
0070 }
0071
0072 map_queue_size = pm->pmf->map_queues_size;
0073
0074 *rlib_size = process_count * pm->pmf->map_process_size +
0075 queue_count * map_queue_size;
0076
0077
0078
0079
0080
0081 if (*over_subscription)
0082 *rlib_size += pm->pmf->runlist_size;
0083
0084 pr_debug("runlist ib size %d\n", *rlib_size);
0085 }
0086
0087 static int pm_allocate_runlist_ib(struct packet_manager *pm,
0088 unsigned int **rl_buffer,
0089 uint64_t *rl_gpu_buffer,
0090 unsigned int *rl_buffer_size,
0091 bool *is_over_subscription)
0092 {
0093 int retval;
0094
0095 if (WARN_ON(pm->allocated))
0096 return -EINVAL;
0097
0098 pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription);
0099
0100 mutex_lock(&pm->lock);
0101
0102 retval = kfd_gtt_sa_allocate(pm->dqm->dev, *rl_buffer_size,
0103 &pm->ib_buffer_obj);
0104
0105 if (retval) {
0106 pr_err("Failed to allocate runlist IB\n");
0107 goto out;
0108 }
0109
0110 *(void **)rl_buffer = pm->ib_buffer_obj->cpu_ptr;
0111 *rl_gpu_buffer = pm->ib_buffer_obj->gpu_addr;
0112
0113 memset(*rl_buffer, 0, *rl_buffer_size);
0114 pm->allocated = true;
0115
0116 out:
0117 mutex_unlock(&pm->lock);
0118 return retval;
0119 }
0120
0121 static int pm_create_runlist_ib(struct packet_manager *pm,
0122 struct list_head *queues,
0123 uint64_t *rl_gpu_addr,
0124 size_t *rl_size_bytes)
0125 {
0126 unsigned int alloc_size_bytes;
0127 unsigned int *rl_buffer, rl_wptr, i;
0128 int retval, processes_mapped;
0129 struct device_process_node *cur;
0130 struct qcm_process_device *qpd;
0131 struct queue *q;
0132 struct kernel_queue *kq;
0133 bool is_over_subscription;
0134
0135 rl_wptr = retval = processes_mapped = 0;
0136
0137 retval = pm_allocate_runlist_ib(pm, &rl_buffer, rl_gpu_addr,
0138 &alloc_size_bytes, &is_over_subscription);
0139 if (retval)
0140 return retval;
0141
0142 *rl_size_bytes = alloc_size_bytes;
0143 pm->ib_size_bytes = alloc_size_bytes;
0144
0145 pr_debug("Building runlist ib process count: %d queues count %d\n",
0146 pm->dqm->processes_count, pm->dqm->active_queue_count);
0147
0148
0149 list_for_each_entry(cur, queues, list) {
0150 qpd = cur->qpd;
0151
0152 if (processes_mapped >= pm->dqm->processes_count) {
0153 pr_debug("Not enough space left in runlist IB\n");
0154 pm_release_ib(pm);
0155 return -ENOMEM;
0156 }
0157
0158 retval = pm->pmf->map_process(pm, &rl_buffer[rl_wptr], qpd);
0159 if (retval)
0160 return retval;
0161
0162 processes_mapped++;
0163 inc_wptr(&rl_wptr, pm->pmf->map_process_size,
0164 alloc_size_bytes);
0165
0166 list_for_each_entry(kq, &qpd->priv_queue_list, list) {
0167 if (!kq->queue->properties.is_active)
0168 continue;
0169
0170 pr_debug("static_queue, mapping kernel q %d, is debug status %d\n",
0171 kq->queue->queue, qpd->is_debug);
0172
0173 retval = pm->pmf->map_queues(pm,
0174 &rl_buffer[rl_wptr],
0175 kq->queue,
0176 qpd->is_debug);
0177 if (retval)
0178 return retval;
0179
0180 inc_wptr(&rl_wptr,
0181 pm->pmf->map_queues_size,
0182 alloc_size_bytes);
0183 }
0184
0185 list_for_each_entry(q, &qpd->queues_list, list) {
0186 if (!q->properties.is_active)
0187 continue;
0188
0189 pr_debug("static_queue, mapping user queue %d, is debug status %d\n",
0190 q->queue, qpd->is_debug);
0191
0192 retval = pm->pmf->map_queues(pm,
0193 &rl_buffer[rl_wptr],
0194 q,
0195 qpd->is_debug);
0196
0197 if (retval)
0198 return retval;
0199
0200 inc_wptr(&rl_wptr,
0201 pm->pmf->map_queues_size,
0202 alloc_size_bytes);
0203 }
0204 }
0205
0206 pr_debug("Finished map process and queues to runlist\n");
0207
0208 if (is_over_subscription) {
0209 if (!pm->is_over_subscription)
0210 pr_warn("Runlist is getting oversubscribed. Expect reduced ROCm performance.\n");
0211 retval = pm->pmf->runlist(pm, &rl_buffer[rl_wptr],
0212 *rl_gpu_addr,
0213 alloc_size_bytes / sizeof(uint32_t),
0214 true);
0215 }
0216 pm->is_over_subscription = is_over_subscription;
0217
0218 for (i = 0; i < alloc_size_bytes / sizeof(uint32_t); i++)
0219 pr_debug("0x%2X ", rl_buffer[i]);
0220 pr_debug("\n");
0221
0222 return retval;
0223 }
0224
0225 int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
0226 {
0227 switch (dqm->dev->adev->asic_type) {
0228 case CHIP_KAVERI:
0229 case CHIP_HAWAII:
0230
0231 case CHIP_CARRIZO:
0232 case CHIP_TONGA:
0233 case CHIP_FIJI:
0234 case CHIP_POLARIS10:
0235 case CHIP_POLARIS11:
0236 case CHIP_POLARIS12:
0237 case CHIP_VEGAM:
0238 pm->pmf = &kfd_vi_pm_funcs;
0239 break;
0240 default:
0241 if (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 2))
0242 pm->pmf = &kfd_aldebaran_pm_funcs;
0243 else if (KFD_GC_VERSION(dqm->dev) >= IP_VERSION(9, 0, 1))
0244 pm->pmf = &kfd_v9_pm_funcs;
0245 else {
0246 WARN(1, "Unexpected ASIC family %u",
0247 dqm->dev->adev->asic_type);
0248 return -EINVAL;
0249 }
0250 }
0251
0252 pm->dqm = dqm;
0253 mutex_init(&pm->lock);
0254 pm->priv_queue = kernel_queue_init(dqm->dev, KFD_QUEUE_TYPE_HIQ);
0255 if (!pm->priv_queue) {
0256 mutex_destroy(&pm->lock);
0257 return -ENOMEM;
0258 }
0259 pm->allocated = false;
0260
0261 return 0;
0262 }
0263
0264 void pm_uninit(struct packet_manager *pm, bool hanging)
0265 {
0266 mutex_destroy(&pm->lock);
0267 kernel_queue_uninit(pm->priv_queue, hanging);
0268 pm->priv_queue = NULL;
0269 }
0270
0271 int pm_send_set_resources(struct packet_manager *pm,
0272 struct scheduling_resources *res)
0273 {
0274 uint32_t *buffer, size;
0275 int retval = 0;
0276
0277 size = pm->pmf->set_resources_size;
0278 mutex_lock(&pm->lock);
0279 kq_acquire_packet_buffer(pm->priv_queue,
0280 size / sizeof(uint32_t),
0281 (unsigned int **)&buffer);
0282 if (!buffer) {
0283 pr_err("Failed to allocate buffer on kernel queue\n");
0284 retval = -ENOMEM;
0285 goto out;
0286 }
0287
0288 retval = pm->pmf->set_resources(pm, buffer, res);
0289 if (!retval)
0290 kq_submit_packet(pm->priv_queue);
0291 else
0292 kq_rollback_packet(pm->priv_queue);
0293
0294 out:
0295 mutex_unlock(&pm->lock);
0296
0297 return retval;
0298 }
0299
0300 int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
0301 {
0302 uint64_t rl_gpu_ib_addr;
0303 uint32_t *rl_buffer;
0304 size_t rl_ib_size, packet_size_dwords;
0305 int retval;
0306
0307 retval = pm_create_runlist_ib(pm, dqm_queues, &rl_gpu_ib_addr,
0308 &rl_ib_size);
0309 if (retval)
0310 goto fail_create_runlist_ib;
0311
0312 pr_debug("runlist IB address: 0x%llX\n", rl_gpu_ib_addr);
0313
0314 packet_size_dwords = pm->pmf->runlist_size / sizeof(uint32_t);
0315 mutex_lock(&pm->lock);
0316
0317 retval = kq_acquire_packet_buffer(pm->priv_queue,
0318 packet_size_dwords, &rl_buffer);
0319 if (retval)
0320 goto fail_acquire_packet_buffer;
0321
0322 retval = pm->pmf->runlist(pm, rl_buffer, rl_gpu_ib_addr,
0323 rl_ib_size / sizeof(uint32_t), false);
0324 if (retval)
0325 goto fail_create_runlist;
0326
0327 kq_submit_packet(pm->priv_queue);
0328
0329 mutex_unlock(&pm->lock);
0330
0331 return retval;
0332
0333 fail_create_runlist:
0334 kq_rollback_packet(pm->priv_queue);
0335 fail_acquire_packet_buffer:
0336 mutex_unlock(&pm->lock);
0337 fail_create_runlist_ib:
0338 pm_release_ib(pm);
0339 return retval;
0340 }
0341
0342 int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
0343 uint64_t fence_value)
0344 {
0345 uint32_t *buffer, size;
0346 int retval = 0;
0347
0348 if (WARN_ON(!fence_address))
0349 return -EFAULT;
0350
0351 size = pm->pmf->query_status_size;
0352 mutex_lock(&pm->lock);
0353 kq_acquire_packet_buffer(pm->priv_queue,
0354 size / sizeof(uint32_t), (unsigned int **)&buffer);
0355 if (!buffer) {
0356 pr_err("Failed to allocate buffer on kernel queue\n");
0357 retval = -ENOMEM;
0358 goto out;
0359 }
0360
0361 retval = pm->pmf->query_status(pm, buffer, fence_address, fence_value);
0362 if (!retval)
0363 kq_submit_packet(pm->priv_queue);
0364 else
0365 kq_rollback_packet(pm->priv_queue);
0366
0367 out:
0368 mutex_unlock(&pm->lock);
0369 return retval;
0370 }
0371
0372 int pm_send_unmap_queue(struct packet_manager *pm,
0373 enum kfd_unmap_queues_filter filter,
0374 uint32_t filter_param, bool reset)
0375 {
0376 uint32_t *buffer, size;
0377 int retval = 0;
0378
0379 size = pm->pmf->unmap_queues_size;
0380 mutex_lock(&pm->lock);
0381 kq_acquire_packet_buffer(pm->priv_queue,
0382 size / sizeof(uint32_t), (unsigned int **)&buffer);
0383 if (!buffer) {
0384 pr_err("Failed to allocate buffer on kernel queue\n");
0385 retval = -ENOMEM;
0386 goto out;
0387 }
0388
0389 retval = pm->pmf->unmap_queues(pm, buffer, filter, filter_param, reset);
0390 if (!retval)
0391 kq_submit_packet(pm->priv_queue);
0392 else
0393 kq_rollback_packet(pm->priv_queue);
0394
0395 out:
0396 mutex_unlock(&pm->lock);
0397 return retval;
0398 }
0399
0400 void pm_release_ib(struct packet_manager *pm)
0401 {
0402 mutex_lock(&pm->lock);
0403 if (pm->allocated) {
0404 kfd_gtt_sa_free(pm->dqm->dev, pm->ib_buffer_obj);
0405 pm->allocated = false;
0406 }
0407 mutex_unlock(&pm->lock);
0408 }
0409
0410 #if defined(CONFIG_DEBUG_FS)
0411
0412 int pm_debugfs_runlist(struct seq_file *m, void *data)
0413 {
0414 struct packet_manager *pm = data;
0415
0416 mutex_lock(&pm->lock);
0417
0418 if (!pm->allocated) {
0419 seq_puts(m, " No active runlist\n");
0420 goto out;
0421 }
0422
0423 seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,
0424 pm->ib_buffer_obj->cpu_ptr, pm->ib_size_bytes, false);
0425
0426 out:
0427 mutex_unlock(&pm->lock);
0428 return 0;
0429 }
0430
0431 int pm_debugfs_hang_hws(struct packet_manager *pm)
0432 {
0433 uint32_t *buffer, size;
0434 int r = 0;
0435
0436 if (!pm->priv_queue)
0437 return -EAGAIN;
0438
0439 size = pm->pmf->query_status_size;
0440 mutex_lock(&pm->lock);
0441 kq_acquire_packet_buffer(pm->priv_queue,
0442 size / sizeof(uint32_t), (unsigned int **)&buffer);
0443 if (!buffer) {
0444 pr_err("Failed to allocate buffer on kernel queue\n");
0445 r = -ENOMEM;
0446 goto out;
0447 }
0448 memset(buffer, 0x55, size);
0449 kq_submit_packet(pm->priv_queue);
0450
0451 pr_info("Submitting %x %x %x %x %x %x %x to HIQ to hang the HWS.",
0452 buffer[0], buffer[1], buffer[2], buffer[3],
0453 buffer[4], buffer[5], buffer[6]);
0454 out:
0455 mutex_unlock(&pm->lock);
0456 return r;
0457 }
0458
0459
0460 #endif