0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024 #include <linux/poll.h>
0025 #include <linux/wait.h>
0026 #include <linux/anon_inodes.h>
0027 #include <uapi/linux/kfd_ioctl.h>
0028 #include "amdgpu.h"
0029 #include "amdgpu_vm.h"
0030 #include "kfd_priv.h"
0031 #include "kfd_smi_events.h"
0032
0033 struct kfd_smi_client {
0034 struct list_head list;
0035 struct kfifo fifo;
0036 wait_queue_head_t wait_queue;
0037
0038 uint64_t events;
0039 struct kfd_dev *dev;
0040 spinlock_t lock;
0041 struct rcu_head rcu;
0042 pid_t pid;
0043 bool suser;
0044 };
0045
0046 #define MAX_KFIFO_SIZE 1024
0047
0048 static __poll_t kfd_smi_ev_poll(struct file *, struct poll_table_struct *);
0049 static ssize_t kfd_smi_ev_read(struct file *, char __user *, size_t, loff_t *);
0050 static ssize_t kfd_smi_ev_write(struct file *, const char __user *, size_t,
0051 loff_t *);
0052 static int kfd_smi_ev_release(struct inode *, struct file *);
0053
0054 static const char kfd_smi_name[] = "kfd_smi_ev";
0055
0056 static const struct file_operations kfd_smi_ev_fops = {
0057 .owner = THIS_MODULE,
0058 .poll = kfd_smi_ev_poll,
0059 .read = kfd_smi_ev_read,
0060 .write = kfd_smi_ev_write,
0061 .release = kfd_smi_ev_release
0062 };
0063
0064 static __poll_t kfd_smi_ev_poll(struct file *filep,
0065 struct poll_table_struct *wait)
0066 {
0067 struct kfd_smi_client *client = filep->private_data;
0068 __poll_t mask = 0;
0069
0070 poll_wait(filep, &client->wait_queue, wait);
0071
0072 spin_lock(&client->lock);
0073 if (!kfifo_is_empty(&client->fifo))
0074 mask = EPOLLIN | EPOLLRDNORM;
0075 spin_unlock(&client->lock);
0076
0077 return mask;
0078 }
0079
0080 static ssize_t kfd_smi_ev_read(struct file *filep, char __user *user,
0081 size_t size, loff_t *offset)
0082 {
0083 int ret;
0084 size_t to_copy;
0085 struct kfd_smi_client *client = filep->private_data;
0086 unsigned char *buf;
0087
0088 size = min_t(size_t, size, MAX_KFIFO_SIZE);
0089 buf = kmalloc(size, GFP_KERNEL);
0090 if (!buf)
0091 return -ENOMEM;
0092
0093
0094
0095
0096 spin_lock(&client->lock);
0097 to_copy = kfifo_len(&client->fifo);
0098 if (!to_copy) {
0099 spin_unlock(&client->lock);
0100 ret = -EAGAIN;
0101 goto ret_err;
0102 }
0103 to_copy = min(size, to_copy);
0104 ret = kfifo_out(&client->fifo, buf, to_copy);
0105 spin_unlock(&client->lock);
0106 if (ret <= 0) {
0107 ret = -EAGAIN;
0108 goto ret_err;
0109 }
0110
0111 ret = copy_to_user(user, buf, to_copy);
0112 if (ret) {
0113 ret = -EFAULT;
0114 goto ret_err;
0115 }
0116
0117 kfree(buf);
0118 return to_copy;
0119
0120 ret_err:
0121 kfree(buf);
0122 return ret;
0123 }
0124
0125 static ssize_t kfd_smi_ev_write(struct file *filep, const char __user *user,
0126 size_t size, loff_t *offset)
0127 {
0128 struct kfd_smi_client *client = filep->private_data;
0129 uint64_t events;
0130
0131 if (!access_ok(user, size) || size < sizeof(events))
0132 return -EFAULT;
0133 if (copy_from_user(&events, user, sizeof(events)))
0134 return -EFAULT;
0135
0136 WRITE_ONCE(client->events, events);
0137
0138 return sizeof(events);
0139 }
0140
0141 static void kfd_smi_ev_client_free(struct rcu_head *p)
0142 {
0143 struct kfd_smi_client *ev = container_of(p, struct kfd_smi_client, rcu);
0144
0145 kfifo_free(&ev->fifo);
0146 kfree(ev);
0147 }
0148
0149 static int kfd_smi_ev_release(struct inode *inode, struct file *filep)
0150 {
0151 struct kfd_smi_client *client = filep->private_data;
0152 struct kfd_dev *dev = client->dev;
0153
0154 spin_lock(&dev->smi_lock);
0155 list_del_rcu(&client->list);
0156 spin_unlock(&dev->smi_lock);
0157
0158 call_rcu(&client->rcu, kfd_smi_ev_client_free);
0159 return 0;
0160 }
0161
0162 static bool kfd_smi_ev_enabled(pid_t pid, struct kfd_smi_client *client,
0163 unsigned int event)
0164 {
0165 uint64_t all = KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_ALL_PROCESS);
0166 uint64_t events = READ_ONCE(client->events);
0167
0168 if (pid && client->pid != pid && !(client->suser && (events & all)))
0169 return false;
0170
0171 return events & KFD_SMI_EVENT_MASK_FROM_INDEX(event);
0172 }
0173
0174 static void add_event_to_kfifo(pid_t pid, struct kfd_dev *dev,
0175 unsigned int smi_event, char *event_msg, int len)
0176 {
0177 struct kfd_smi_client *client;
0178
0179 rcu_read_lock();
0180
0181 list_for_each_entry_rcu(client, &dev->smi_clients, list) {
0182 if (!kfd_smi_ev_enabled(pid, client, smi_event))
0183 continue;
0184 spin_lock(&client->lock);
0185 if (kfifo_avail(&client->fifo) >= len) {
0186 kfifo_in(&client->fifo, event_msg, len);
0187 wake_up_all(&client->wait_queue);
0188 } else {
0189 pr_debug("smi_event(EventID: %u): no space left\n",
0190 smi_event);
0191 }
0192 spin_unlock(&client->lock);
0193 }
0194
0195 rcu_read_unlock();
0196 }
0197
0198 __printf(4, 5)
0199 static void kfd_smi_event_add(pid_t pid, struct kfd_dev *dev,
0200 unsigned int event, char *fmt, ...)
0201 {
0202 char fifo_in[KFD_SMI_EVENT_MSG_SIZE];
0203 int len;
0204 va_list args;
0205
0206 if (list_empty(&dev->smi_clients))
0207 return;
0208
0209 len = snprintf(fifo_in, sizeof(fifo_in), "%x ", event);
0210
0211 va_start(args, fmt);
0212 len += vsnprintf(fifo_in + len, sizeof(fifo_in) - len, fmt, args);
0213 va_end(args);
0214
0215 add_event_to_kfifo(pid, dev, event, fifo_in, len);
0216 }
0217
0218 void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset)
0219 {
0220 unsigned int event;
0221
0222 if (post_reset) {
0223 event = KFD_SMI_EVENT_GPU_POST_RESET;
0224 } else {
0225 event = KFD_SMI_EVENT_GPU_PRE_RESET;
0226 ++(dev->reset_seq_num);
0227 }
0228 kfd_smi_event_add(0, dev, event, "%x\n", dev->reset_seq_num);
0229 }
0230
0231 void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
0232 uint64_t throttle_bitmask)
0233 {
0234 kfd_smi_event_add(0, dev, KFD_SMI_EVENT_THERMAL_THROTTLE, "%llx:%llx\n",
0235 throttle_bitmask,
0236 amdgpu_dpm_get_thermal_throttling_counter(dev->adev));
0237 }
0238
0239 void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid)
0240 {
0241 struct amdgpu_task_info task_info;
0242
0243 memset(&task_info, 0, sizeof(struct amdgpu_task_info));
0244 amdgpu_vm_get_task_info(dev->adev, pasid, &task_info);
0245
0246 if (!task_info.pid)
0247 return;
0248
0249 kfd_smi_event_add(0, dev, KFD_SMI_EVENT_VMFAULT, "%x:%s\n",
0250 task_info.pid, task_info.task_name);
0251 }
0252
0253 void kfd_smi_event_page_fault_start(struct kfd_dev *dev, pid_t pid,
0254 unsigned long address, bool write_fault,
0255 ktime_t ts)
0256 {
0257 kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_PAGE_FAULT_START,
0258 "%lld -%d @%lx(%x) %c\n", ktime_to_ns(ts), pid,
0259 address, dev->id, write_fault ? 'W' : 'R');
0260 }
0261
0262 void kfd_smi_event_page_fault_end(struct kfd_dev *dev, pid_t pid,
0263 unsigned long address, bool migration)
0264 {
0265 kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_PAGE_FAULT_END,
0266 "%lld -%d @%lx(%x) %c\n", ktime_get_boottime_ns(),
0267 pid, address, dev->id, migration ? 'M' : 'U');
0268 }
0269
0270 void kfd_smi_event_migration_start(struct kfd_dev *dev, pid_t pid,
0271 unsigned long start, unsigned long end,
0272 uint32_t from, uint32_t to,
0273 uint32_t prefetch_loc, uint32_t preferred_loc,
0274 uint32_t trigger)
0275 {
0276 kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_MIGRATE_START,
0277 "%lld -%d @%lx(%lx) %x->%x %x:%x %d\n",
0278 ktime_get_boottime_ns(), pid, start, end - start,
0279 from, to, prefetch_loc, preferred_loc, trigger);
0280 }
0281
0282 void kfd_smi_event_migration_end(struct kfd_dev *dev, pid_t pid,
0283 unsigned long start, unsigned long end,
0284 uint32_t from, uint32_t to, uint32_t trigger)
0285 {
0286 kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_MIGRATE_END,
0287 "%lld -%d @%lx(%lx) %x->%x %d\n",
0288 ktime_get_boottime_ns(), pid, start, end - start,
0289 from, to, trigger);
0290 }
0291
0292 void kfd_smi_event_queue_eviction(struct kfd_dev *dev, pid_t pid,
0293 uint32_t trigger)
0294 {
0295 kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_QUEUE_EVICTION,
0296 "%lld -%d %x %d\n", ktime_get_boottime_ns(), pid,
0297 dev->id, trigger);
0298 }
0299
0300 void kfd_smi_event_queue_restore(struct kfd_dev *dev, pid_t pid)
0301 {
0302 kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_QUEUE_RESTORE,
0303 "%lld -%d %x\n", ktime_get_boottime_ns(), pid,
0304 dev->id);
0305 }
0306
0307 void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm)
0308 {
0309 struct kfd_process *p;
0310 int i;
0311
0312 p = kfd_lookup_process_by_mm(mm);
0313 if (!p)
0314 return;
0315
0316 for (i = 0; i < p->n_pdds; i++) {
0317 struct kfd_process_device *pdd = p->pdds[i];
0318
0319 kfd_smi_event_add(p->lead_thread->pid, pdd->dev,
0320 KFD_SMI_EVENT_QUEUE_RESTORE,
0321 "%lld -%d %x %c\n", ktime_get_boottime_ns(),
0322 p->lead_thread->pid, pdd->dev->id, 'R');
0323 }
0324 kfd_unref_process(p);
0325 }
0326
0327 void kfd_smi_event_unmap_from_gpu(struct kfd_dev *dev, pid_t pid,
0328 unsigned long address, unsigned long last,
0329 uint32_t trigger)
0330 {
0331 kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_UNMAP_FROM_GPU,
0332 "%lld -%d @%lx(%lx) %x %d\n", ktime_get_boottime_ns(),
0333 pid, address, last - address + 1, dev->id, trigger);
0334 }
0335
0336 int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd)
0337 {
0338 struct kfd_smi_client *client;
0339 int ret;
0340
0341 client = kzalloc(sizeof(struct kfd_smi_client), GFP_KERNEL);
0342 if (!client)
0343 return -ENOMEM;
0344 INIT_LIST_HEAD(&client->list);
0345
0346 ret = kfifo_alloc(&client->fifo, MAX_KFIFO_SIZE, GFP_KERNEL);
0347 if (ret) {
0348 kfree(client);
0349 return ret;
0350 }
0351
0352 init_waitqueue_head(&client->wait_queue);
0353 spin_lock_init(&client->lock);
0354 client->events = 0;
0355 client->dev = dev;
0356 client->pid = current->tgid;
0357 client->suser = capable(CAP_SYS_ADMIN);
0358
0359 spin_lock(&dev->smi_lock);
0360 list_add_rcu(&client->list, &dev->smi_clients);
0361 spin_unlock(&dev->smi_lock);
0362
0363 ret = anon_inode_getfd(kfd_smi_name, &kfd_smi_ev_fops, (void *)client,
0364 O_RDWR);
0365 if (ret < 0) {
0366 spin_lock(&dev->smi_lock);
0367 list_del_rcu(&client->list);
0368 spin_unlock(&dev->smi_lock);
0369
0370 synchronize_rcu();
0371
0372 kfifo_free(&client->fifo);
0373 kfree(client);
0374 return ret;
0375 }
0376 *fd = ret;
0377
0378 return 0;
0379 }