0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023 #include "kfd_priv.h"
0024 #include <linux/mm.h>
0025 #include <linux/mman.h>
0026 #include <linux/slab.h>
0027 #include <linux/io.h>
0028 #include <linux/idr.h>
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050 size_t kfd_doorbell_process_slice(struct kfd_dev *kfd)
0051 {
0052 if (!kfd->shared_resources.enable_mes)
0053 return roundup(kfd->device_info.doorbell_size *
0054 KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
0055 PAGE_SIZE);
0056 else
0057 return amdgpu_mes_doorbell_process_slice(
0058 (struct amdgpu_device *)kfd->adev);
0059 }
0060
0061
0062 int kfd_doorbell_init(struct kfd_dev *kfd)
0063 {
0064 size_t doorbell_start_offset;
0065 size_t doorbell_aperture_size;
0066 size_t doorbell_process_limit;
0067
0068
0069
0070
0071
0072 if (kfd->shared_resources.enable_mes) {
0073 kfd->doorbell_base =
0074 kfd->shared_resources.doorbell_physical_address;
0075 return 0;
0076 }
0077
0078
0079
0080
0081
0082
0083
0084 doorbell_start_offset =
0085 roundup(kfd->shared_resources.doorbell_start_offset,
0086 kfd_doorbell_process_slice(kfd));
0087
0088 doorbell_aperture_size =
0089 rounddown(kfd->shared_resources.doorbell_aperture_size,
0090 kfd_doorbell_process_slice(kfd));
0091
0092 if (doorbell_aperture_size > doorbell_start_offset)
0093 doorbell_process_limit =
0094 (doorbell_aperture_size - doorbell_start_offset) /
0095 kfd_doorbell_process_slice(kfd);
0096 else
0097 return -ENOSPC;
0098
0099 if (!kfd->max_doorbell_slices ||
0100 doorbell_process_limit < kfd->max_doorbell_slices)
0101 kfd->max_doorbell_slices = doorbell_process_limit;
0102
0103 kfd->doorbell_base = kfd->shared_resources.doorbell_physical_address +
0104 doorbell_start_offset;
0105
0106 kfd->doorbell_base_dw_offset = doorbell_start_offset / sizeof(u32);
0107
0108 kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base,
0109 kfd_doorbell_process_slice(kfd));
0110
0111 if (!kfd->doorbell_kernel_ptr)
0112 return -ENOMEM;
0113
0114 pr_debug("Doorbell initialization:\n");
0115 pr_debug("doorbell base == 0x%08lX\n",
0116 (uintptr_t)kfd->doorbell_base);
0117
0118 pr_debug("doorbell_base_dw_offset == 0x%08lX\n",
0119 kfd->doorbell_base_dw_offset);
0120
0121 pr_debug("doorbell_process_limit == 0x%08lX\n",
0122 doorbell_process_limit);
0123
0124 pr_debug("doorbell_kernel_offset == 0x%08lX\n",
0125 (uintptr_t)kfd->doorbell_base);
0126
0127 pr_debug("doorbell aperture size == 0x%08lX\n",
0128 kfd->shared_resources.doorbell_aperture_size);
0129
0130 pr_debug("doorbell kernel address == %p\n", kfd->doorbell_kernel_ptr);
0131
0132 return 0;
0133 }
0134
0135 void kfd_doorbell_fini(struct kfd_dev *kfd)
0136 {
0137 if (kfd->doorbell_kernel_ptr)
0138 iounmap(kfd->doorbell_kernel_ptr);
0139 }
0140
0141 int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process,
0142 struct vm_area_struct *vma)
0143 {
0144 phys_addr_t address;
0145 struct kfd_process_device *pdd;
0146
0147
0148
0149
0150
0151 if (vma->vm_end - vma->vm_start != kfd_doorbell_process_slice(dev))
0152 return -EINVAL;
0153
0154 pdd = kfd_get_process_device_data(dev, process);
0155 if (!pdd)
0156 return -EINVAL;
0157
0158
0159 address = kfd_get_process_doorbells(pdd);
0160 vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
0161 VM_DONTDUMP | VM_PFNMAP;
0162
0163 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
0164
0165 pr_debug("Mapping doorbell page\n"
0166 " target user address == 0x%08llX\n"
0167 " physical address == 0x%08llX\n"
0168 " vm_flags == 0x%04lX\n"
0169 " size == 0x%04lX\n",
0170 (unsigned long long) vma->vm_start, address, vma->vm_flags,
0171 kfd_doorbell_process_slice(dev));
0172
0173
0174 return io_remap_pfn_range(vma,
0175 vma->vm_start,
0176 address >> PAGE_SHIFT,
0177 kfd_doorbell_process_slice(dev),
0178 vma->vm_page_prot);
0179 }
0180
0181
0182
0183 void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
0184 unsigned int *doorbell_off)
0185 {
0186 u32 inx;
0187
0188 mutex_lock(&kfd->doorbell_mutex);
0189 inx = find_first_zero_bit(kfd->doorbell_available_index,
0190 KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
0191
0192 __set_bit(inx, kfd->doorbell_available_index);
0193 mutex_unlock(&kfd->doorbell_mutex);
0194
0195 if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
0196 return NULL;
0197
0198 inx *= kfd->device_info.doorbell_size / sizeof(u32);
0199
0200
0201
0202
0203
0204 *doorbell_off = kfd->doorbell_base_dw_offset + inx;
0205
0206 pr_debug("Get kernel queue doorbell\n"
0207 " doorbell offset == 0x%08X\n"
0208 " doorbell index == 0x%x\n",
0209 *doorbell_off, inx);
0210
0211 return kfd->doorbell_kernel_ptr + inx;
0212 }
0213
0214 void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr)
0215 {
0216 unsigned int inx;
0217
0218 inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr)
0219 * sizeof(u32) / kfd->device_info.doorbell_size;
0220
0221 mutex_lock(&kfd->doorbell_mutex);
0222 __clear_bit(inx, kfd->doorbell_available_index);
0223 mutex_unlock(&kfd->doorbell_mutex);
0224 }
0225
0226 void write_kernel_doorbell(void __iomem *db, u32 value)
0227 {
0228 if (db) {
0229 writel(value, db);
0230 pr_debug("Writing %d to doorbell address %p\n", value, db);
0231 }
0232 }
0233
0234 void write_kernel_doorbell64(void __iomem *db, u64 value)
0235 {
0236 if (db) {
0237 WARN(((unsigned long)db & 7) != 0,
0238 "Unaligned 64-bit doorbell");
0239 writeq(value, (u64 __iomem *)db);
0240 pr_debug("writing %llu to doorbell address %p\n", value, db);
0241 }
0242 }
0243
0244 unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
0245 struct kfd_process_device *pdd,
0246 unsigned int doorbell_id)
0247 {
0248
0249
0250
0251
0252
0253
0254 if (!kfd->shared_resources.enable_mes)
0255 return kfd->doorbell_base_dw_offset +
0256 pdd->doorbell_index
0257 * kfd_doorbell_process_slice(kfd) / sizeof(u32) +
0258 doorbell_id *
0259 kfd->device_info.doorbell_size / sizeof(u32);
0260 else
0261 return amdgpu_mes_get_doorbell_dw_offset_in_bar(
0262 (struct amdgpu_device *)kfd->adev,
0263 pdd->doorbell_index, doorbell_id);
0264 }
0265
0266 uint64_t kfd_get_number_elems(struct kfd_dev *kfd)
0267 {
0268 uint64_t num_of_elems = (kfd->shared_resources.doorbell_aperture_size -
0269 kfd->shared_resources.doorbell_start_offset) /
0270 kfd_doorbell_process_slice(kfd) + 1;
0271
0272 return num_of_elems;
0273
0274 }
0275
0276 phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd)
0277 {
0278 return pdd->dev->doorbell_base +
0279 pdd->doorbell_index * kfd_doorbell_process_slice(pdd->dev);
0280 }
0281
0282 int kfd_alloc_process_doorbells(struct kfd_dev *kfd, unsigned int *doorbell_index)
0283 {
0284 int r = 0;
0285
0286 if (!kfd->shared_resources.enable_mes)
0287 r = ida_simple_get(&kfd->doorbell_ida, 1,
0288 kfd->max_doorbell_slices, GFP_KERNEL);
0289 else
0290 r = amdgpu_mes_alloc_process_doorbells(
0291 (struct amdgpu_device *)kfd->adev,
0292 doorbell_index);
0293
0294 if (r > 0)
0295 *doorbell_index = r;
0296
0297 return r;
0298 }
0299
0300 void kfd_free_process_doorbells(struct kfd_dev *kfd, unsigned int doorbell_index)
0301 {
0302 if (doorbell_index) {
0303 if (!kfd->shared_resources.enable_mes)
0304 ida_simple_remove(&kfd->doorbell_ida, doorbell_index);
0305 else
0306 amdgpu_mes_free_process_doorbells(
0307 (struct amdgpu_device *)kfd->adev,
0308 doorbell_index);
0309 }
0310 }