Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Copyright 2009 Jerome Glisse.
0003  * All Rights Reserved.
0004  *
0005  * Permission is hereby granted, free of charge, to any person obtaining a
0006  * copy of this software and associated documentation files (the
0007  * "Software"), to deal in the Software without restriction, including
0008  * without limitation the rights to use, copy, modify, merge, publish,
0009  * distribute, sub license, and/or sell copies of the Software, and to
0010  * permit persons to whom the Software is furnished to do so, subject to
0011  * the following conditions:
0012  *
0013  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
0014  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
0015  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
0016  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
0017  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
0018  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
0019  * USE OR OTHER DEALINGS IN THE SOFTWARE.
0020  *
0021  * The above copyright notice and this permission notice (including the
0022  * next paragraph) shall be included in all copies or substantial portions
0023  * of the Software.
0024  *
0025  */
0026 /*
0027  * Authors:
0028  *    Jerome Glisse <glisse@freedesktop.org>
0029  *    Dave Airlie
0030  */
0031 
0032 #include <linux/atomic.h>
0033 #include <linux/firmware.h>
0034 #include <linux/kref.h>
0035 #include <linux/sched/signal.h>
0036 #include <linux/seq_file.h>
0037 #include <linux/slab.h>
0038 #include <linux/wait.h>
0039 
0040 #include <drm/drm_device.h>
0041 #include <drm/drm_file.h>
0042 
0043 #include "radeon.h"
0044 #include "radeon_reg.h"
0045 #include "radeon_trace.h"
0046 
0047 /*
0048  * Fences
0049  * Fences mark an event in the GPUs pipeline and are used
0050  * for GPU/CPU synchronization.  When the fence is written,
0051  * it is expected that all buffers associated with that fence
0052  * are no longer in use by the associated ring on the GPU and
0053  * that the relevant GPU caches have been flushed.  Whether
0054  * we use a scratch register or memory location depends on the asic
0055  * and whether writeback is enabled.
0056  */
0057 
0058 /**
0059  * radeon_fence_write - write a fence value
0060  *
0061  * @rdev: radeon_device pointer
0062  * @seq: sequence number to write
0063  * @ring: ring index the fence is associated with
0064  *
0065  * Writes a fence value to memory or a scratch register (all asics).
0066  */
0067 static void radeon_fence_write(struct radeon_device *rdev, u32 seq, int ring)
0068 {
0069     struct radeon_fence_driver *drv = &rdev->fence_drv[ring];
0070     if (likely(rdev->wb.enabled || !drv->scratch_reg)) {
0071         if (drv->cpu_addr) {
0072             *drv->cpu_addr = cpu_to_le32(seq);
0073         }
0074     } else {
0075         WREG32(drv->scratch_reg, seq);
0076     }
0077 }
0078 
0079 /**
0080  * radeon_fence_read - read a fence value
0081  *
0082  * @rdev: radeon_device pointer
0083  * @ring: ring index the fence is associated with
0084  *
0085  * Reads a fence value from memory or a scratch register (all asics).
0086  * Returns the value of the fence read from memory or register.
0087  */
0088 static u32 radeon_fence_read(struct radeon_device *rdev, int ring)
0089 {
0090     struct radeon_fence_driver *drv = &rdev->fence_drv[ring];
0091     u32 seq = 0;
0092 
0093     if (likely(rdev->wb.enabled || !drv->scratch_reg)) {
0094         if (drv->cpu_addr) {
0095             seq = le32_to_cpu(*drv->cpu_addr);
0096         } else {
0097             seq = lower_32_bits(atomic64_read(&drv->last_seq));
0098         }
0099     } else {
0100         seq = RREG32(drv->scratch_reg);
0101     }
0102     return seq;
0103 }
0104 
0105 /**
0106  * radeon_fence_schedule_check - schedule lockup check
0107  *
0108  * @rdev: radeon_device pointer
0109  * @ring: ring index we should work with
0110  *
0111  * Queues a delayed work item to check for lockups.
0112  */
0113 static void radeon_fence_schedule_check(struct radeon_device *rdev, int ring)
0114 {
0115     /*
0116      * Do not reset the timer here with mod_delayed_work,
0117      * this can livelock in an interaction with TTM delayed destroy.
0118      */
0119     queue_delayed_work(system_power_efficient_wq,
0120                &rdev->fence_drv[ring].lockup_work,
0121                RADEON_FENCE_JIFFIES_TIMEOUT);
0122 }
0123 
0124 /**
0125  * radeon_fence_emit - emit a fence on the requested ring
0126  *
0127  * @rdev: radeon_device pointer
0128  * @fence: radeon fence object
0129  * @ring: ring index the fence is associated with
0130  *
0131  * Emits a fence command on the requested ring (all asics).
0132  * Returns 0 on success, -ENOMEM on failure.
0133  */
0134 int radeon_fence_emit(struct radeon_device *rdev,
0135               struct radeon_fence **fence,
0136               int ring)
0137 {
0138     u64 seq;
0139 
0140     /* we are protected by the ring emission mutex */
0141     *fence = kmalloc(sizeof(struct radeon_fence), GFP_KERNEL);
0142     if ((*fence) == NULL) {
0143         return -ENOMEM;
0144     }
0145     (*fence)->rdev = rdev;
0146     (*fence)->seq = seq = ++rdev->fence_drv[ring].sync_seq[ring];
0147     (*fence)->ring = ring;
0148     (*fence)->is_vm_update = false;
0149     dma_fence_init(&(*fence)->base, &radeon_fence_ops,
0150                &rdev->fence_queue.lock,
0151                rdev->fence_context + ring,
0152                seq);
0153     radeon_fence_ring_emit(rdev, ring, *fence);
0154     trace_radeon_fence_emit(rdev->ddev, ring, (*fence)->seq);
0155     radeon_fence_schedule_check(rdev, ring);
0156     return 0;
0157 }
0158 
0159 /*
0160  * radeon_fence_check_signaled - callback from fence_queue
0161  *
0162  * this function is called with fence_queue lock held, which is also used
0163  * for the fence locking itself, so unlocked variants are used for
0164  * fence_signal, and remove_wait_queue.
0165  */
0166 static int radeon_fence_check_signaled(wait_queue_entry_t *wait, unsigned mode, int flags, void *key)
0167 {
0168     struct radeon_fence *fence;
0169     u64 seq;
0170 
0171     fence = container_of(wait, struct radeon_fence, fence_wake);
0172 
0173     /*
0174      * We cannot use radeon_fence_process here because we're already
0175      * in the waitqueue, in a call from wake_up_all.
0176      */
0177     seq = atomic64_read(&fence->rdev->fence_drv[fence->ring].last_seq);
0178     if (seq >= fence->seq) {
0179         dma_fence_signal_locked(&fence->base);
0180         radeon_irq_kms_sw_irq_put(fence->rdev, fence->ring);
0181         __remove_wait_queue(&fence->rdev->fence_queue, &fence->fence_wake);
0182         dma_fence_put(&fence->base);
0183     }
0184     return 0;
0185 }
0186 
0187 /**
0188  * radeon_fence_activity - check for fence activity
0189  *
0190  * @rdev: radeon_device pointer
0191  * @ring: ring index the fence is associated with
0192  *
0193  * Checks the current fence value and calculates the last
0194  * signalled fence value. Returns true if activity occured
0195  * on the ring, and the fence_queue should be waken up.
0196  */
0197 static bool radeon_fence_activity(struct radeon_device *rdev, int ring)
0198 {
0199     uint64_t seq, last_seq, last_emitted;
0200     unsigned count_loop = 0;
0201     bool wake = false;
0202 
0203     /* Note there is a scenario here for an infinite loop but it's
0204      * very unlikely to happen. For it to happen, the current polling
0205      * process need to be interrupted by another process and another
0206      * process needs to update the last_seq btw the atomic read and
0207      * xchg of the current process.
0208      *
0209      * More over for this to go in infinite loop there need to be
0210      * continuously new fence signaled ie radeon_fence_read needs
0211      * to return a different value each time for both the currently
0212      * polling process and the other process that xchg the last_seq
0213      * btw atomic read and xchg of the current process. And the
0214      * value the other process set as last seq must be higher than
0215      * the seq value we just read. Which means that current process
0216      * need to be interrupted after radeon_fence_read and before
0217      * atomic xchg.
0218      *
0219      * To be even more safe we count the number of time we loop and
0220      * we bail after 10 loop just accepting the fact that we might
0221      * have temporarly set the last_seq not to the true real last
0222      * seq but to an older one.
0223      */
0224     last_seq = atomic64_read(&rdev->fence_drv[ring].last_seq);
0225     do {
0226         last_emitted = rdev->fence_drv[ring].sync_seq[ring];
0227         seq = radeon_fence_read(rdev, ring);
0228         seq |= last_seq & 0xffffffff00000000LL;
0229         if (seq < last_seq) {
0230             seq &= 0xffffffff;
0231             seq |= last_emitted & 0xffffffff00000000LL;
0232         }
0233 
0234         if (seq <= last_seq || seq > last_emitted) {
0235             break;
0236         }
0237         /* If we loop over we don't want to return without
0238          * checking if a fence is signaled as it means that the
0239          * seq we just read is different from the previous on.
0240          */
0241         wake = true;
0242         last_seq = seq;
0243         if ((count_loop++) > 10) {
0244             /* We looped over too many time leave with the
0245              * fact that we might have set an older fence
0246              * seq then the current real last seq as signaled
0247              * by the hw.
0248              */
0249             break;
0250         }
0251     } while (atomic64_xchg(&rdev->fence_drv[ring].last_seq, seq) > seq);
0252 
0253     if (seq < last_emitted)
0254         radeon_fence_schedule_check(rdev, ring);
0255 
0256     return wake;
0257 }
0258 
0259 /**
0260  * radeon_fence_check_lockup - check for hardware lockup
0261  *
0262  * @work: delayed work item
0263  *
0264  * Checks for fence activity and if there is none probe
0265  * the hardware if a lockup occured.
0266  */
0267 static void radeon_fence_check_lockup(struct work_struct *work)
0268 {
0269     struct radeon_fence_driver *fence_drv;
0270     struct radeon_device *rdev;
0271     int ring;
0272 
0273     fence_drv = container_of(work, struct radeon_fence_driver,
0274                  lockup_work.work);
0275     rdev = fence_drv->rdev;
0276     ring = fence_drv - &rdev->fence_drv[0];
0277 
0278     if (!down_read_trylock(&rdev->exclusive_lock)) {
0279         /* just reschedule the check if a reset is going on */
0280         radeon_fence_schedule_check(rdev, ring);
0281         return;
0282     }
0283 
0284     if (fence_drv->delayed_irq && rdev->irq.installed) {
0285         unsigned long irqflags;
0286 
0287         fence_drv->delayed_irq = false;
0288         spin_lock_irqsave(&rdev->irq.lock, irqflags);
0289         radeon_irq_set(rdev);
0290         spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
0291     }
0292 
0293     if (radeon_fence_activity(rdev, ring))
0294         wake_up_all(&rdev->fence_queue);
0295 
0296     else if (radeon_ring_is_lockup(rdev, ring, &rdev->ring[ring])) {
0297 
0298         /* good news we believe it's a lockup */
0299         dev_warn(rdev->dev, "GPU lockup (current fence id "
0300              "0x%016llx last fence id 0x%016llx on ring %d)\n",
0301              (uint64_t)atomic64_read(&fence_drv->last_seq),
0302              fence_drv->sync_seq[ring], ring);
0303 
0304         /* remember that we need an reset */
0305         rdev->needs_reset = true;
0306         wake_up_all(&rdev->fence_queue);
0307     }
0308     up_read(&rdev->exclusive_lock);
0309 }
0310 
0311 /**
0312  * radeon_fence_process - process a fence
0313  *
0314  * @rdev: radeon_device pointer
0315  * @ring: ring index the fence is associated with
0316  *
0317  * Checks the current fence value and wakes the fence queue
0318  * if the sequence number has increased (all asics).
0319  */
0320 void radeon_fence_process(struct radeon_device *rdev, int ring)
0321 {
0322     if (radeon_fence_activity(rdev, ring))
0323         wake_up_all(&rdev->fence_queue);
0324 }
0325 
0326 /**
0327  * radeon_fence_seq_signaled - check if a fence sequence number has signaled
0328  *
0329  * @rdev: radeon device pointer
0330  * @seq: sequence number
0331  * @ring: ring index the fence is associated with
0332  *
0333  * Check if the last signaled fence sequnce number is >= the requested
0334  * sequence number (all asics).
0335  * Returns true if the fence has signaled (current fence value
0336  * is >= requested value) or false if it has not (current fence
0337  * value is < the requested value.  Helper function for
0338  * radeon_fence_signaled().
0339  */
0340 static bool radeon_fence_seq_signaled(struct radeon_device *rdev,
0341                       u64 seq, unsigned ring)
0342 {
0343     if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
0344         return true;
0345     }
0346     /* poll new last sequence at least once */
0347     radeon_fence_process(rdev, ring);
0348     if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
0349         return true;
0350     }
0351     return false;
0352 }
0353 
0354 static bool radeon_fence_is_signaled(struct dma_fence *f)
0355 {
0356     struct radeon_fence *fence = to_radeon_fence(f);
0357     struct radeon_device *rdev = fence->rdev;
0358     unsigned ring = fence->ring;
0359     u64 seq = fence->seq;
0360 
0361     if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
0362         return true;
0363     }
0364 
0365     if (down_read_trylock(&rdev->exclusive_lock)) {
0366         radeon_fence_process(rdev, ring);
0367         up_read(&rdev->exclusive_lock);
0368 
0369         if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
0370             return true;
0371         }
0372     }
0373     return false;
0374 }
0375 
0376 /**
0377  * radeon_fence_enable_signaling - enable signalling on fence
0378  * @f: fence
0379  *
0380  * This function is called with fence_queue lock held, and adds a callback
0381  * to fence_queue that checks if this fence is signaled, and if so it
0382  * signals the fence and removes itself.
0383  */
0384 static bool radeon_fence_enable_signaling(struct dma_fence *f)
0385 {
0386     struct radeon_fence *fence = to_radeon_fence(f);
0387     struct radeon_device *rdev = fence->rdev;
0388 
0389     if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq)
0390         return false;
0391 
0392     if (down_read_trylock(&rdev->exclusive_lock)) {
0393         radeon_irq_kms_sw_irq_get(rdev, fence->ring);
0394 
0395         if (radeon_fence_activity(rdev, fence->ring))
0396             wake_up_all_locked(&rdev->fence_queue);
0397 
0398         /* did fence get signaled after we enabled the sw irq? */
0399         if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq) {
0400             radeon_irq_kms_sw_irq_put(rdev, fence->ring);
0401             up_read(&rdev->exclusive_lock);
0402             return false;
0403         }
0404 
0405         up_read(&rdev->exclusive_lock);
0406     } else {
0407         /* we're probably in a lockup, lets not fiddle too much */
0408         if (radeon_irq_kms_sw_irq_get_delayed(rdev, fence->ring))
0409             rdev->fence_drv[fence->ring].delayed_irq = true;
0410         radeon_fence_schedule_check(rdev, fence->ring);
0411     }
0412 
0413     fence->fence_wake.flags = 0;
0414     fence->fence_wake.private = NULL;
0415     fence->fence_wake.func = radeon_fence_check_signaled;
0416     __add_wait_queue(&rdev->fence_queue, &fence->fence_wake);
0417     dma_fence_get(f);
0418     return true;
0419 }
0420 
0421 /**
0422  * radeon_fence_signaled - check if a fence has signaled
0423  *
0424  * @fence: radeon fence object
0425  *
0426  * Check if the requested fence has signaled (all asics).
0427  * Returns true if the fence has signaled or false if it has not.
0428  */
0429 bool radeon_fence_signaled(struct radeon_fence *fence)
0430 {
0431     if (!fence)
0432         return true;
0433 
0434     if (radeon_fence_seq_signaled(fence->rdev, fence->seq, fence->ring)) {
0435         dma_fence_signal(&fence->base);
0436         return true;
0437     }
0438     return false;
0439 }
0440 
0441 /**
0442  * radeon_fence_any_seq_signaled - check if any sequence number is signaled
0443  *
0444  * @rdev: radeon device pointer
0445  * @seq: sequence numbers
0446  *
0447  * Check if the last signaled fence sequnce number is >= the requested
0448  * sequence number (all asics).
0449  * Returns true if any has signaled (current value is >= requested value)
0450  * or false if it has not. Helper function for radeon_fence_wait_seq.
0451  */
0452 static bool radeon_fence_any_seq_signaled(struct radeon_device *rdev, u64 *seq)
0453 {
0454     unsigned i;
0455 
0456     for (i = 0; i < RADEON_NUM_RINGS; ++i) {
0457         if (seq[i] && radeon_fence_seq_signaled(rdev, seq[i], i))
0458             return true;
0459     }
0460     return false;
0461 }
0462 
0463 /**
0464  * radeon_fence_wait_seq_timeout - wait for a specific sequence numbers
0465  *
0466  * @rdev: radeon device pointer
0467  * @target_seq: sequence number(s) we want to wait for
0468  * @intr: use interruptable sleep
0469  * @timeout: maximum time to wait, or MAX_SCHEDULE_TIMEOUT for infinite wait
0470  *
0471  * Wait for the requested sequence number(s) to be written by any ring
0472  * (all asics).  Sequnce number array is indexed by ring id.
0473  * @intr selects whether to use interruptable (true) or non-interruptable
0474  * (false) sleep when waiting for the sequence number.  Helper function
0475  * for radeon_fence_wait_*().
0476  * Returns remaining time if the sequence number has passed, 0 when
0477  * the wait timeout, or an error for all other cases.
0478  * -EDEADLK is returned when a GPU lockup has been detected.
0479  */
0480 static long radeon_fence_wait_seq_timeout(struct radeon_device *rdev,
0481                       u64 *target_seq, bool intr,
0482                       long timeout)
0483 {
0484     long r;
0485     int i;
0486 
0487     if (radeon_fence_any_seq_signaled(rdev, target_seq))
0488         return timeout;
0489 
0490     /* enable IRQs and tracing */
0491     for (i = 0; i < RADEON_NUM_RINGS; ++i) {
0492         if (!target_seq[i])
0493             continue;
0494 
0495         trace_radeon_fence_wait_begin(rdev->ddev, i, target_seq[i]);
0496         radeon_irq_kms_sw_irq_get(rdev, i);
0497     }
0498 
0499     if (intr) {
0500         r = wait_event_interruptible_timeout(rdev->fence_queue, (
0501             radeon_fence_any_seq_signaled(rdev, target_seq)
0502              || rdev->needs_reset), timeout);
0503     } else {
0504         r = wait_event_timeout(rdev->fence_queue, (
0505             radeon_fence_any_seq_signaled(rdev, target_seq)
0506              || rdev->needs_reset), timeout);
0507     }
0508 
0509     if (rdev->needs_reset)
0510         r = -EDEADLK;
0511 
0512     for (i = 0; i < RADEON_NUM_RINGS; ++i) {
0513         if (!target_seq[i])
0514             continue;
0515 
0516         radeon_irq_kms_sw_irq_put(rdev, i);
0517         trace_radeon_fence_wait_end(rdev->ddev, i, target_seq[i]);
0518     }
0519 
0520     return r;
0521 }
0522 
0523 /**
0524  * radeon_fence_wait_timeout - wait for a fence to signal with timeout
0525  *
0526  * @fence: radeon fence object
0527  * @intr: use interruptible sleep
0528  *
0529  * Wait for the requested fence to signal (all asics).
0530  * @intr selects whether to use interruptable (true) or non-interruptable
0531  * (false) sleep when waiting for the fence.
0532  * @timeout: maximum time to wait, or MAX_SCHEDULE_TIMEOUT for infinite wait
0533  * Returns remaining time if the sequence number has passed, 0 when
0534  * the wait timeout, or an error for all other cases.
0535  */
0536 long radeon_fence_wait_timeout(struct radeon_fence *fence, bool intr, long timeout)
0537 {
0538     uint64_t seq[RADEON_NUM_RINGS] = {};
0539     long r;
0540 
0541     /*
0542      * This function should not be called on !radeon fences.
0543      * If this is the case, it would mean this function can
0544      * also be called on radeon fences belonging to another card.
0545      * exclusive_lock is not held in that case.
0546      */
0547     if (WARN_ON_ONCE(!to_radeon_fence(&fence->base)))
0548         return dma_fence_wait(&fence->base, intr);
0549 
0550     seq[fence->ring] = fence->seq;
0551     r = radeon_fence_wait_seq_timeout(fence->rdev, seq, intr, timeout);
0552     if (r <= 0) {
0553         return r;
0554     }
0555 
0556     dma_fence_signal(&fence->base);
0557     return r;
0558 }
0559 
0560 /**
0561  * radeon_fence_wait - wait for a fence to signal
0562  *
0563  * @fence: radeon fence object
0564  * @intr: use interruptible sleep
0565  *
0566  * Wait for the requested fence to signal (all asics).
0567  * @intr selects whether to use interruptable (true) or non-interruptable
0568  * (false) sleep when waiting for the fence.
0569  * Returns 0 if the fence has passed, error for all other cases.
0570  */
0571 int radeon_fence_wait(struct radeon_fence *fence, bool intr)
0572 {
0573     long r = radeon_fence_wait_timeout(fence, intr, MAX_SCHEDULE_TIMEOUT);
0574     if (r > 0) {
0575         return 0;
0576     } else {
0577         return r;
0578     }
0579 }
0580 
0581 /**
0582  * radeon_fence_wait_any - wait for a fence to signal on any ring
0583  *
0584  * @rdev: radeon device pointer
0585  * @fences: radeon fence object(s)
0586  * @intr: use interruptable sleep
0587  *
0588  * Wait for any requested fence to signal (all asics).  Fence
0589  * array is indexed by ring id.  @intr selects whether to use
0590  * interruptable (true) or non-interruptable (false) sleep when
0591  * waiting for the fences. Used by the suballocator.
0592  * Returns 0 if any fence has passed, error for all other cases.
0593  */
0594 int radeon_fence_wait_any(struct radeon_device *rdev,
0595               struct radeon_fence **fences,
0596               bool intr)
0597 {
0598     uint64_t seq[RADEON_NUM_RINGS];
0599     unsigned i, num_rings = 0;
0600     long r;
0601 
0602     for (i = 0; i < RADEON_NUM_RINGS; ++i) {
0603         seq[i] = 0;
0604 
0605         if (!fences[i]) {
0606             continue;
0607         }
0608 
0609         seq[i] = fences[i]->seq;
0610         ++num_rings;
0611     }
0612 
0613     /* nothing to wait for ? */
0614     if (num_rings == 0)
0615         return -ENOENT;
0616 
0617     r = radeon_fence_wait_seq_timeout(rdev, seq, intr, MAX_SCHEDULE_TIMEOUT);
0618     if (r < 0) {
0619         return r;
0620     }
0621     return 0;
0622 }
0623 
0624 /**
0625  * radeon_fence_wait_next - wait for the next fence to signal
0626  *
0627  * @rdev: radeon device pointer
0628  * @ring: ring index the fence is associated with
0629  *
0630  * Wait for the next fence on the requested ring to signal (all asics).
0631  * Returns 0 if the next fence has passed, error for all other cases.
0632  * Caller must hold ring lock.
0633  */
0634 int radeon_fence_wait_next(struct radeon_device *rdev, int ring)
0635 {
0636     uint64_t seq[RADEON_NUM_RINGS] = {};
0637     long r;
0638 
0639     seq[ring] = atomic64_read(&rdev->fence_drv[ring].last_seq) + 1ULL;
0640     if (seq[ring] >= rdev->fence_drv[ring].sync_seq[ring]) {
0641         /* nothing to wait for, last_seq is
0642            already the last emited fence */
0643         return -ENOENT;
0644     }
0645     r = radeon_fence_wait_seq_timeout(rdev, seq, false, MAX_SCHEDULE_TIMEOUT);
0646     if (r < 0)
0647         return r;
0648     return 0;
0649 }
0650 
0651 /**
0652  * radeon_fence_wait_empty - wait for all fences to signal
0653  *
0654  * @rdev: radeon device pointer
0655  * @ring: ring index the fence is associated with
0656  *
0657  * Wait for all fences on the requested ring to signal (all asics).
0658  * Returns 0 if the fences have passed, error for all other cases.
0659  * Caller must hold ring lock.
0660  */
0661 int radeon_fence_wait_empty(struct radeon_device *rdev, int ring)
0662 {
0663     uint64_t seq[RADEON_NUM_RINGS] = {};
0664     long r;
0665 
0666     seq[ring] = rdev->fence_drv[ring].sync_seq[ring];
0667     if (!seq[ring])
0668         return 0;
0669 
0670     r = radeon_fence_wait_seq_timeout(rdev, seq, false, MAX_SCHEDULE_TIMEOUT);
0671     if (r < 0) {
0672         if (r == -EDEADLK)
0673             return -EDEADLK;
0674 
0675         dev_err(rdev->dev, "error waiting for ring[%d] to become idle (%ld)\n",
0676             ring, r);
0677     }
0678     return 0;
0679 }
0680 
0681 /**
0682  * radeon_fence_ref - take a ref on a fence
0683  *
0684  * @fence: radeon fence object
0685  *
0686  * Take a reference on a fence (all asics).
0687  * Returns the fence.
0688  */
0689 struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence)
0690 {
0691     dma_fence_get(&fence->base);
0692     return fence;
0693 }
0694 
0695 /**
0696  * radeon_fence_unref - remove a ref on a fence
0697  *
0698  * @fence: radeon fence object
0699  *
0700  * Remove a reference on a fence (all asics).
0701  */
0702 void radeon_fence_unref(struct radeon_fence **fence)
0703 {
0704     struct radeon_fence *tmp = *fence;
0705 
0706     *fence = NULL;
0707     if (tmp) {
0708         dma_fence_put(&tmp->base);
0709     }
0710 }
0711 
0712 /**
0713  * radeon_fence_count_emitted - get the count of emitted fences
0714  *
0715  * @rdev: radeon device pointer
0716  * @ring: ring index the fence is associated with
0717  *
0718  * Get the number of fences emitted on the requested ring (all asics).
0719  * Returns the number of emitted fences on the ring.  Used by the
0720  * dynpm code to ring track activity.
0721  */
0722 unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring)
0723 {
0724     uint64_t emitted;
0725 
0726     /* We are not protected by ring lock when reading the last sequence
0727      * but it's ok to report slightly wrong fence count here.
0728      */
0729     radeon_fence_process(rdev, ring);
0730     emitted = rdev->fence_drv[ring].sync_seq[ring]
0731         - atomic64_read(&rdev->fence_drv[ring].last_seq);
0732     /* to avoid 32bits warp around */
0733     if (emitted > 0x10000000) {
0734         emitted = 0x10000000;
0735     }
0736     return (unsigned)emitted;
0737 }
0738 
0739 /**
0740  * radeon_fence_need_sync - do we need a semaphore
0741  *
0742  * @fence: radeon fence object
0743  * @dst_ring: which ring to check against
0744  *
0745  * Check if the fence needs to be synced against another ring
0746  * (all asics).  If so, we need to emit a semaphore.
0747  * Returns true if we need to sync with another ring, false if
0748  * not.
0749  */
0750 bool radeon_fence_need_sync(struct radeon_fence *fence, int dst_ring)
0751 {
0752     struct radeon_fence_driver *fdrv;
0753 
0754     if (!fence) {
0755         return false;
0756     }
0757 
0758     if (fence->ring == dst_ring) {
0759         return false;
0760     }
0761 
0762     /* we are protected by the ring mutex */
0763     fdrv = &fence->rdev->fence_drv[dst_ring];
0764     if (fence->seq <= fdrv->sync_seq[fence->ring]) {
0765         return false;
0766     }
0767 
0768     return true;
0769 }
0770 
0771 /**
0772  * radeon_fence_note_sync - record the sync point
0773  *
0774  * @fence: radeon fence object
0775  * @dst_ring: which ring to check against
0776  *
0777  * Note the sequence number at which point the fence will
0778  * be synced with the requested ring (all asics).
0779  */
0780 void radeon_fence_note_sync(struct radeon_fence *fence, int dst_ring)
0781 {
0782     struct radeon_fence_driver *dst, *src;
0783     unsigned i;
0784 
0785     if (!fence) {
0786         return;
0787     }
0788 
0789     if (fence->ring == dst_ring) {
0790         return;
0791     }
0792 
0793     /* we are protected by the ring mutex */
0794     src = &fence->rdev->fence_drv[fence->ring];
0795     dst = &fence->rdev->fence_drv[dst_ring];
0796     for (i = 0; i < RADEON_NUM_RINGS; ++i) {
0797         if (i == dst_ring) {
0798             continue;
0799         }
0800         dst->sync_seq[i] = max(dst->sync_seq[i], src->sync_seq[i]);
0801     }
0802 }
0803 
0804 /**
0805  * radeon_fence_driver_start_ring - make the fence driver
0806  * ready for use on the requested ring.
0807  *
0808  * @rdev: radeon device pointer
0809  * @ring: ring index to start the fence driver on
0810  *
0811  * Make the fence driver ready for processing (all asics).
0812  * Not all asics have all rings, so each asic will only
0813  * start the fence driver on the rings it has.
0814  * Returns 0 for success, errors for failure.
0815  */
0816 int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
0817 {
0818     uint64_t index;
0819     int r;
0820 
0821     radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
0822     if (rdev->wb.use_event || !radeon_ring_supports_scratch_reg(rdev, &rdev->ring[ring])) {
0823         rdev->fence_drv[ring].scratch_reg = 0;
0824         if (ring != R600_RING_TYPE_UVD_INDEX) {
0825             index = R600_WB_EVENT_OFFSET + ring * 4;
0826             rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
0827             rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr +
0828                              index;
0829 
0830         } else {
0831             /* put fence directly behind firmware */
0832             index = ALIGN(rdev->uvd_fw->size, 8);
0833             rdev->fence_drv[ring].cpu_addr = rdev->uvd.cpu_addr + index;
0834             rdev->fence_drv[ring].gpu_addr = rdev->uvd.gpu_addr + index;
0835         }
0836 
0837     } else {
0838         r = radeon_scratch_get(rdev, &rdev->fence_drv[ring].scratch_reg);
0839         if (r) {
0840             dev_err(rdev->dev, "fence failed to get scratch register\n");
0841             return r;
0842         }
0843         index = RADEON_WB_SCRATCH_OFFSET +
0844             rdev->fence_drv[ring].scratch_reg -
0845             rdev->scratch.reg_base;
0846         rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
0847         rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index;
0848     }
0849     radeon_fence_write(rdev, atomic64_read(&rdev->fence_drv[ring].last_seq), ring);
0850     rdev->fence_drv[ring].initialized = true;
0851     dev_info(rdev->dev, "fence driver on ring %d use gpu addr 0x%016llx\n",
0852          ring, rdev->fence_drv[ring].gpu_addr);
0853     return 0;
0854 }
0855 
0856 /**
0857  * radeon_fence_driver_init_ring - init the fence driver
0858  * for the requested ring.
0859  *
0860  * @rdev: radeon device pointer
0861  * @ring: ring index to start the fence driver on
0862  *
0863  * Init the fence driver for the requested ring (all asics).
0864  * Helper function for radeon_fence_driver_init().
0865  */
0866 static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring)
0867 {
0868     int i;
0869 
0870     rdev->fence_drv[ring].scratch_reg = -1;
0871     rdev->fence_drv[ring].cpu_addr = NULL;
0872     rdev->fence_drv[ring].gpu_addr = 0;
0873     for (i = 0; i < RADEON_NUM_RINGS; ++i)
0874         rdev->fence_drv[ring].sync_seq[i] = 0;
0875     atomic64_set(&rdev->fence_drv[ring].last_seq, 0);
0876     rdev->fence_drv[ring].initialized = false;
0877     INIT_DELAYED_WORK(&rdev->fence_drv[ring].lockup_work,
0878               radeon_fence_check_lockup);
0879     rdev->fence_drv[ring].rdev = rdev;
0880 }
0881 
0882 /**
0883  * radeon_fence_driver_init - init the fence driver
0884  * for all possible rings.
0885  *
0886  * @rdev: radeon device pointer
0887  *
0888  * Init the fence driver for all possible rings (all asics).
0889  * Not all asics have all rings, so each asic will only
0890  * start the fence driver on the rings it has using
0891  * radeon_fence_driver_start_ring().
0892  */
0893 void radeon_fence_driver_init(struct radeon_device *rdev)
0894 {
0895     int ring;
0896 
0897     init_waitqueue_head(&rdev->fence_queue);
0898     for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
0899         radeon_fence_driver_init_ring(rdev, ring);
0900     }
0901 
0902     radeon_debugfs_fence_init(rdev);
0903 }
0904 
0905 /**
0906  * radeon_fence_driver_fini - tear down the fence driver
0907  * for all possible rings.
0908  *
0909  * @rdev: radeon device pointer
0910  *
0911  * Tear down the fence driver for all possible rings (all asics).
0912  */
0913 void radeon_fence_driver_fini(struct radeon_device *rdev)
0914 {
0915     int ring, r;
0916 
0917     mutex_lock(&rdev->ring_lock);
0918     for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
0919         if (!rdev->fence_drv[ring].initialized)
0920             continue;
0921         r = radeon_fence_wait_empty(rdev, ring);
0922         if (r) {
0923             /* no need to trigger GPU reset as we are unloading */
0924             radeon_fence_driver_force_completion(rdev, ring);
0925         }
0926         cancel_delayed_work_sync(&rdev->fence_drv[ring].lockup_work);
0927         wake_up_all(&rdev->fence_queue);
0928         radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
0929         rdev->fence_drv[ring].initialized = false;
0930     }
0931     mutex_unlock(&rdev->ring_lock);
0932 }
0933 
0934 /**
0935  * radeon_fence_driver_force_completion - force all fence waiter to complete
0936  *
0937  * @rdev: radeon device pointer
0938  * @ring: the ring to complete
0939  *
0940  * In case of GPU reset failure make sure no process keep waiting on fence
0941  * that will never complete.
0942  */
0943 void radeon_fence_driver_force_completion(struct radeon_device *rdev, int ring)
0944 {
0945     if (rdev->fence_drv[ring].initialized) {
0946         radeon_fence_write(rdev, rdev->fence_drv[ring].sync_seq[ring], ring);
0947         cancel_delayed_work_sync(&rdev->fence_drv[ring].lockup_work);
0948     }
0949 }
0950 
0951 
0952 /*
0953  * Fence debugfs
0954  */
0955 #if defined(CONFIG_DEBUG_FS)
0956 static int radeon_debugfs_fence_info_show(struct seq_file *m, void *data)
0957 {
0958     struct radeon_device *rdev = (struct radeon_device *)m->private;
0959     int i, j;
0960 
0961     for (i = 0; i < RADEON_NUM_RINGS; ++i) {
0962         if (!rdev->fence_drv[i].initialized)
0963             continue;
0964 
0965         radeon_fence_process(rdev, i);
0966 
0967         seq_printf(m, "--- ring %d ---\n", i);
0968         seq_printf(m, "Last signaled fence 0x%016llx\n",
0969                (unsigned long long)atomic64_read(&rdev->fence_drv[i].last_seq));
0970         seq_printf(m, "Last emitted        0x%016llx\n",
0971                rdev->fence_drv[i].sync_seq[i]);
0972 
0973         for (j = 0; j < RADEON_NUM_RINGS; ++j) {
0974             if (i != j && rdev->fence_drv[j].initialized)
0975                 seq_printf(m, "Last sync to ring %d 0x%016llx\n",
0976                        j, rdev->fence_drv[i].sync_seq[j]);
0977         }
0978     }
0979     return 0;
0980 }
0981 
0982 /*
0983  * radeon_debugfs_gpu_reset - manually trigger a gpu reset
0984  *
0985  * Manually trigger a gpu reset at the next fence wait.
0986  */
0987 static int radeon_debugfs_gpu_reset(void *data, u64 *val)
0988 {
0989     struct radeon_device *rdev = (struct radeon_device *)data;
0990 
0991     down_read(&rdev->exclusive_lock);
0992     *val = rdev->needs_reset;
0993     rdev->needs_reset = true;
0994     wake_up_all(&rdev->fence_queue);
0995     up_read(&rdev->exclusive_lock);
0996 
0997     return 0;
0998 }
0999 DEFINE_SHOW_ATTRIBUTE(radeon_debugfs_fence_info);
1000 DEFINE_DEBUGFS_ATTRIBUTE(radeon_debugfs_gpu_reset_fops,
1001              radeon_debugfs_gpu_reset, NULL, "%lld\n");
1002 #endif
1003 
1004 void radeon_debugfs_fence_init(struct radeon_device *rdev)
1005 {
1006 #if defined(CONFIG_DEBUG_FS)
1007     struct dentry *root = rdev->ddev->primary->debugfs_root;
1008 
1009     debugfs_create_file("radeon_gpu_reset", 0444, root, rdev,
1010                 &radeon_debugfs_gpu_reset_fops);
1011     debugfs_create_file("radeon_fence_info", 0444, root, rdev,
1012                 &radeon_debugfs_fence_info_fops);
1013 
1014 
1015 #endif
1016 }
1017 
1018 static const char *radeon_fence_get_driver_name(struct dma_fence *fence)
1019 {
1020     return "radeon";
1021 }
1022 
1023 static const char *radeon_fence_get_timeline_name(struct dma_fence *f)
1024 {
1025     struct radeon_fence *fence = to_radeon_fence(f);
1026     switch (fence->ring) {
1027     case RADEON_RING_TYPE_GFX_INDEX: return "radeon.gfx";
1028     case CAYMAN_RING_TYPE_CP1_INDEX: return "radeon.cp1";
1029     case CAYMAN_RING_TYPE_CP2_INDEX: return "radeon.cp2";
1030     case R600_RING_TYPE_DMA_INDEX: return "radeon.dma";
1031     case CAYMAN_RING_TYPE_DMA1_INDEX: return "radeon.dma1";
1032     case R600_RING_TYPE_UVD_INDEX: return "radeon.uvd";
1033     case TN_RING_TYPE_VCE1_INDEX: return "radeon.vce1";
1034     case TN_RING_TYPE_VCE2_INDEX: return "radeon.vce2";
1035     default: WARN_ON_ONCE(1); return "radeon.unk";
1036     }
1037 }
1038 
1039 static inline bool radeon_test_signaled(struct radeon_fence *fence)
1040 {
1041     return test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->base.flags);
1042 }
1043 
1044 struct radeon_wait_cb {
1045     struct dma_fence_cb base;
1046     struct task_struct *task;
1047 };
1048 
1049 static void
1050 radeon_fence_wait_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
1051 {
1052     struct radeon_wait_cb *wait =
1053         container_of(cb, struct radeon_wait_cb, base);
1054 
1055     wake_up_process(wait->task);
1056 }
1057 
1058 static signed long radeon_fence_default_wait(struct dma_fence *f, bool intr,
1059                          signed long t)
1060 {
1061     struct radeon_fence *fence = to_radeon_fence(f);
1062     struct radeon_device *rdev = fence->rdev;
1063     struct radeon_wait_cb cb;
1064 
1065     cb.task = current;
1066 
1067     if (dma_fence_add_callback(f, &cb.base, radeon_fence_wait_cb))
1068         return t;
1069 
1070     while (t > 0) {
1071         if (intr)
1072             set_current_state(TASK_INTERRUPTIBLE);
1073         else
1074             set_current_state(TASK_UNINTERRUPTIBLE);
1075 
1076         /*
1077          * radeon_test_signaled must be called after
1078          * set_current_state to prevent a race with wake_up_process
1079          */
1080         if (radeon_test_signaled(fence))
1081             break;
1082 
1083         if (rdev->needs_reset) {
1084             t = -EDEADLK;
1085             break;
1086         }
1087 
1088         t = schedule_timeout(t);
1089 
1090         if (t > 0 && intr && signal_pending(current))
1091             t = -ERESTARTSYS;
1092     }
1093 
1094     __set_current_state(TASK_RUNNING);
1095     dma_fence_remove_callback(f, &cb.base);
1096 
1097     return t;
1098 }
1099 
1100 const struct dma_fence_ops radeon_fence_ops = {
1101     .get_driver_name = radeon_fence_get_driver_name,
1102     .get_timeline_name = radeon_fence_get_timeline_name,
1103     .enable_signaling = radeon_fence_enable_signaling,
1104     .signaled = radeon_fence_is_signaled,
1105     .wait = radeon_fence_default_wait,
1106     .release = NULL,
1107 };