0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031 #include <linux/dma-fence-chain.h>
0032
0033 #include "amdgpu.h"
0034 #include "amdgpu_trace.h"
0035 #include "amdgpu_amdkfd.h"
0036
0037 struct amdgpu_sync_entry {
0038 struct hlist_node node;
0039 struct dma_fence *fence;
0040 };
0041
0042 static struct kmem_cache *amdgpu_sync_slab;
0043
0044
0045
0046
0047
0048
0049
0050
0051 void amdgpu_sync_create(struct amdgpu_sync *sync)
0052 {
0053 hash_init(sync->fences);
0054 }
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064 static bool amdgpu_sync_same_dev(struct amdgpu_device *adev,
0065 struct dma_fence *f)
0066 {
0067 struct drm_sched_fence *s_fence = to_drm_sched_fence(f);
0068
0069 if (s_fence) {
0070 struct amdgpu_ring *ring;
0071
0072 ring = container_of(s_fence->sched, struct amdgpu_ring, sched);
0073 return ring->adev == adev;
0074 }
0075
0076 return false;
0077 }
0078
0079
0080
0081
0082
0083
0084
0085
0086 static void *amdgpu_sync_get_owner(struct dma_fence *f)
0087 {
0088 struct drm_sched_fence *s_fence;
0089 struct amdgpu_amdkfd_fence *kfd_fence;
0090
0091 if (!f)
0092 return AMDGPU_FENCE_OWNER_UNDEFINED;
0093
0094 s_fence = to_drm_sched_fence(f);
0095 if (s_fence)
0096 return s_fence->owner;
0097
0098 kfd_fence = to_amdgpu_amdkfd_fence(f);
0099 if (kfd_fence)
0100 return AMDGPU_FENCE_OWNER_KFD;
0101
0102 return AMDGPU_FENCE_OWNER_UNDEFINED;
0103 }
0104
0105
0106
0107
0108
0109
0110
0111
0112
0113 static void amdgpu_sync_keep_later(struct dma_fence **keep,
0114 struct dma_fence *fence)
0115 {
0116 if (*keep && dma_fence_is_later(*keep, fence))
0117 return;
0118
0119 dma_fence_put(*keep);
0120 *keep = dma_fence_get(fence);
0121 }
0122
0123
0124
0125
0126
0127
0128
0129
0130
0131
0132 static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f)
0133 {
0134 struct amdgpu_sync_entry *e;
0135
0136 hash_for_each_possible(sync->fences, e, node, f->context) {
0137 if (unlikely(e->fence->context != f->context))
0138 continue;
0139
0140 amdgpu_sync_keep_later(&e->fence, f);
0141 return true;
0142 }
0143 return false;
0144 }
0145
0146
0147
0148
0149
0150
0151
0152
0153
0154 int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f)
0155 {
0156 struct amdgpu_sync_entry *e;
0157
0158 if (!f)
0159 return 0;
0160
0161 if (amdgpu_sync_add_later(sync, f))
0162 return 0;
0163
0164 e = kmem_cache_alloc(amdgpu_sync_slab, GFP_KERNEL);
0165 if (!e)
0166 return -ENOMEM;
0167
0168 hash_add(sync->fences, &e->node, f->context);
0169 e->fence = dma_fence_get(f);
0170 return 0;
0171 }
0172
0173
0174 static bool amdgpu_sync_test_fence(struct amdgpu_device *adev,
0175 enum amdgpu_sync_mode mode,
0176 void *owner, struct dma_fence *f)
0177 {
0178 void *fence_owner = amdgpu_sync_get_owner(f);
0179
0180
0181 if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED)
0182 return true;
0183
0184
0185
0186
0187 if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
0188 owner != AMDGPU_FENCE_OWNER_UNDEFINED)
0189 return false;
0190
0191
0192 if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
0193 owner != AMDGPU_FENCE_OWNER_UNDEFINED)
0194 return false;
0195
0196
0197 switch (mode) {
0198 case AMDGPU_SYNC_ALWAYS:
0199 return true;
0200
0201 case AMDGPU_SYNC_NE_OWNER:
0202 if (amdgpu_sync_same_dev(adev, f) &&
0203 fence_owner == owner)
0204 return false;
0205 break;
0206
0207 case AMDGPU_SYNC_EQ_OWNER:
0208 if (amdgpu_sync_same_dev(adev, f) &&
0209 fence_owner != owner)
0210 return false;
0211 break;
0212
0213 case AMDGPU_SYNC_EXPLICIT:
0214 return false;
0215 }
0216
0217 WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
0218 "Adding eviction fence to sync obj");
0219 return true;
0220 }
0221
0222
0223
0224
0225
0226
0227
0228
0229
0230
0231
0232
0233 int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
0234 struct dma_resv *resv, enum amdgpu_sync_mode mode,
0235 void *owner)
0236 {
0237 struct dma_resv_iter cursor;
0238 struct dma_fence *f;
0239 int r;
0240
0241 if (resv == NULL)
0242 return -EINVAL;
0243
0244
0245 dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP, f) {
0246 dma_fence_chain_for_each(f, f) {
0247 struct dma_fence *tmp = dma_fence_chain_contained(f);
0248
0249 if (amdgpu_sync_test_fence(adev, mode, owner, tmp)) {
0250 r = amdgpu_sync_fence(sync, f);
0251 dma_fence_put(f);
0252 if (r)
0253 return r;
0254 break;
0255 }
0256 }
0257 }
0258 return 0;
0259 }
0260
0261
0262
0263
0264
0265
0266
0267
0268
0269
0270 struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
0271 struct amdgpu_ring *ring)
0272 {
0273 struct amdgpu_sync_entry *e;
0274 struct hlist_node *tmp;
0275 int i;
0276
0277 hash_for_each_safe(sync->fences, i, tmp, e, node) {
0278 struct dma_fence *f = e->fence;
0279 struct drm_sched_fence *s_fence = to_drm_sched_fence(f);
0280
0281 if (dma_fence_is_signaled(f)) {
0282 hash_del(&e->node);
0283 dma_fence_put(f);
0284 kmem_cache_free(amdgpu_sync_slab, e);
0285 continue;
0286 }
0287 if (ring && s_fence) {
0288
0289
0290
0291 if (s_fence->sched == &ring->sched) {
0292 if (dma_fence_is_signaled(&s_fence->scheduled))
0293 continue;
0294
0295 return &s_fence->scheduled;
0296 }
0297 }
0298
0299 return f;
0300 }
0301
0302 return NULL;
0303 }
0304
0305
0306
0307
0308
0309
0310
0311
0312 struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync)
0313 {
0314 struct amdgpu_sync_entry *e;
0315 struct hlist_node *tmp;
0316 struct dma_fence *f;
0317 int i;
0318 hash_for_each_safe(sync->fences, i, tmp, e, node) {
0319
0320 f = e->fence;
0321
0322 hash_del(&e->node);
0323 kmem_cache_free(amdgpu_sync_slab, e);
0324
0325 if (!dma_fence_is_signaled(f))
0326 return f;
0327
0328 dma_fence_put(f);
0329 }
0330 return NULL;
0331 }
0332
0333
0334
0335
0336
0337
0338
0339
0340
0341
0342 int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone)
0343 {
0344 struct amdgpu_sync_entry *e;
0345 struct hlist_node *tmp;
0346 struct dma_fence *f;
0347 int i, r;
0348
0349 hash_for_each_safe(source->fences, i, tmp, e, node) {
0350 f = e->fence;
0351 if (!dma_fence_is_signaled(f)) {
0352 r = amdgpu_sync_fence(clone, f);
0353 if (r)
0354 return r;
0355 } else {
0356 hash_del(&e->node);
0357 dma_fence_put(f);
0358 kmem_cache_free(amdgpu_sync_slab, e);
0359 }
0360 }
0361
0362 return 0;
0363 }
0364
0365 int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr)
0366 {
0367 struct amdgpu_sync_entry *e;
0368 struct hlist_node *tmp;
0369 int i, r;
0370
0371 hash_for_each_safe(sync->fences, i, tmp, e, node) {
0372 r = dma_fence_wait(e->fence, intr);
0373 if (r)
0374 return r;
0375
0376 hash_del(&e->node);
0377 dma_fence_put(e->fence);
0378 kmem_cache_free(amdgpu_sync_slab, e);
0379 }
0380
0381 return 0;
0382 }
0383
0384
0385
0386
0387
0388
0389
0390
0391 void amdgpu_sync_free(struct amdgpu_sync *sync)
0392 {
0393 struct amdgpu_sync_entry *e;
0394 struct hlist_node *tmp;
0395 unsigned i;
0396
0397 hash_for_each_safe(sync->fences, i, tmp, e, node) {
0398 hash_del(&e->node);
0399 dma_fence_put(e->fence);
0400 kmem_cache_free(amdgpu_sync_slab, e);
0401 }
0402 }
0403
0404
0405
0406
0407
0408
0409 int amdgpu_sync_init(void)
0410 {
0411 amdgpu_sync_slab = kmem_cache_create(
0412 "amdgpu_sync", sizeof(struct amdgpu_sync_entry), 0,
0413 SLAB_HWCACHE_ALIGN, NULL);
0414 if (!amdgpu_sync_slab)
0415 return -ENOMEM;
0416
0417 return 0;
0418 }
0419
0420
0421
0422
0423
0424
0425 void amdgpu_sync_fini(void)
0426 {
0427 kmem_cache_destroy(amdgpu_sync_slab);
0428 }