Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * Tegra host1x Command DMA
0004  *
0005  * Copyright (c) 2010-2013, NVIDIA Corporation.
0006  */
0007 
0008 
0009 #include <asm/cacheflush.h>
0010 #include <linux/device.h>
0011 #include <linux/dma-mapping.h>
0012 #include <linux/host1x.h>
0013 #include <linux/interrupt.h>
0014 #include <linux/kernel.h>
0015 #include <linux/kfifo.h>
0016 #include <linux/slab.h>
0017 #include <trace/events/host1x.h>
0018 
0019 #include "cdma.h"
0020 #include "channel.h"
0021 #include "dev.h"
0022 #include "debug.h"
0023 #include "job.h"
0024 
0025 /*
0026  * push_buffer
0027  *
0028  * The push buffer is a circular array of words to be fetched by command DMA.
0029  * Note that it works slightly differently to the sync queue; fence == pos
0030  * means that the push buffer is full, not empty.
0031  */
0032 
0033 /*
0034  * Typically the commands written into the push buffer are a pair of words. We
0035  * use slots to represent each of these pairs and to simplify things. Note the
0036  * strange number of slots allocated here. 512 slots will fit exactly within a
0037  * single memory page. We also need one additional word at the end of the push
0038  * buffer for the RESTART opcode that will instruct the CDMA to jump back to
0039  * the beginning of the push buffer. With 512 slots, this means that we'll use
0040  * 2 memory pages and waste 4092 bytes of the second page that will never be
0041  * used.
0042  */
0043 #define HOST1X_PUSHBUFFER_SLOTS 511
0044 
0045 /*
0046  * Clean up push buffer resources
0047  */
0048 static void host1x_pushbuffer_destroy(struct push_buffer *pb)
0049 {
0050     struct host1x_cdma *cdma = pb_to_cdma(pb);
0051     struct host1x *host1x = cdma_to_host1x(cdma);
0052 
0053     if (!pb->mapped)
0054         return;
0055 
0056     if (host1x->domain) {
0057         iommu_unmap(host1x->domain, pb->dma, pb->alloc_size);
0058         free_iova(&host1x->iova, iova_pfn(&host1x->iova, pb->dma));
0059     }
0060 
0061     dma_free_wc(host1x->dev, pb->alloc_size, pb->mapped, pb->phys);
0062 
0063     pb->mapped = NULL;
0064     pb->phys = 0;
0065 }
0066 
0067 /*
0068  * Init push buffer resources
0069  */
0070 static int host1x_pushbuffer_init(struct push_buffer *pb)
0071 {
0072     struct host1x_cdma *cdma = pb_to_cdma(pb);
0073     struct host1x *host1x = cdma_to_host1x(cdma);
0074     struct iova *alloc;
0075     u32 size;
0076     int err;
0077 
0078     pb->mapped = NULL;
0079     pb->phys = 0;
0080     pb->size = HOST1X_PUSHBUFFER_SLOTS * 8;
0081 
0082     size = pb->size + 4;
0083 
0084     /* initialize buffer pointers */
0085     pb->fence = pb->size - 8;
0086     pb->pos = 0;
0087 
0088     if (host1x->domain) {
0089         unsigned long shift;
0090 
0091         size = iova_align(&host1x->iova, size);
0092 
0093         pb->mapped = dma_alloc_wc(host1x->dev, size, &pb->phys,
0094                       GFP_KERNEL);
0095         if (!pb->mapped)
0096             return -ENOMEM;
0097 
0098         shift = iova_shift(&host1x->iova);
0099         alloc = alloc_iova(&host1x->iova, size >> shift,
0100                    host1x->iova_end >> shift, true);
0101         if (!alloc) {
0102             err = -ENOMEM;
0103             goto iommu_free_mem;
0104         }
0105 
0106         pb->dma = iova_dma_addr(&host1x->iova, alloc);
0107         err = iommu_map(host1x->domain, pb->dma, pb->phys, size,
0108                 IOMMU_READ);
0109         if (err)
0110             goto iommu_free_iova;
0111     } else {
0112         pb->mapped = dma_alloc_wc(host1x->dev, size, &pb->phys,
0113                       GFP_KERNEL);
0114         if (!pb->mapped)
0115             return -ENOMEM;
0116 
0117         pb->dma = pb->phys;
0118     }
0119 
0120     pb->alloc_size = size;
0121 
0122     host1x_hw_pushbuffer_init(host1x, pb);
0123 
0124     return 0;
0125 
0126 iommu_free_iova:
0127     __free_iova(&host1x->iova, alloc);
0128 iommu_free_mem:
0129     dma_free_wc(host1x->dev, size, pb->mapped, pb->phys);
0130 
0131     return err;
0132 }
0133 
0134 /*
0135  * Push two words to the push buffer
0136  * Caller must ensure push buffer is not full
0137  */
0138 static void host1x_pushbuffer_push(struct push_buffer *pb, u32 op1, u32 op2)
0139 {
0140     u32 *p = (u32 *)((void *)pb->mapped + pb->pos);
0141 
0142     WARN_ON(pb->pos == pb->fence);
0143     *(p++) = op1;
0144     *(p++) = op2;
0145     pb->pos += 8;
0146 
0147     if (pb->pos >= pb->size)
0148         pb->pos -= pb->size;
0149 }
0150 
0151 /*
0152  * Pop a number of two word slots from the push buffer
0153  * Caller must ensure push buffer is not empty
0154  */
0155 static void host1x_pushbuffer_pop(struct push_buffer *pb, unsigned int slots)
0156 {
0157     /* Advance the next write position */
0158     pb->fence += slots * 8;
0159 
0160     if (pb->fence >= pb->size)
0161         pb->fence -= pb->size;
0162 }
0163 
0164 /*
0165  * Return the number of two word slots free in the push buffer
0166  */
0167 static u32 host1x_pushbuffer_space(struct push_buffer *pb)
0168 {
0169     unsigned int fence = pb->fence;
0170 
0171     if (pb->fence < pb->pos)
0172         fence += pb->size;
0173 
0174     return (fence - pb->pos) / 8;
0175 }
0176 
0177 /*
0178  * Sleep (if necessary) until the requested event happens
0179  *   - CDMA_EVENT_SYNC_QUEUE_EMPTY : sync queue is completely empty.
0180  *     - Returns 1
0181  *   - CDMA_EVENT_PUSH_BUFFER_SPACE : there is space in the push buffer
0182  *     - Return the amount of space (> 0)
0183  * Must be called with the cdma lock held.
0184  */
0185 unsigned int host1x_cdma_wait_locked(struct host1x_cdma *cdma,
0186                      enum cdma_event event)
0187 {
0188     for (;;) {
0189         struct push_buffer *pb = &cdma->push_buffer;
0190         unsigned int space;
0191 
0192         switch (event) {
0193         case CDMA_EVENT_SYNC_QUEUE_EMPTY:
0194             space = list_empty(&cdma->sync_queue) ? 1 : 0;
0195             break;
0196 
0197         case CDMA_EVENT_PUSH_BUFFER_SPACE:
0198             space = host1x_pushbuffer_space(pb);
0199             break;
0200 
0201         default:
0202             WARN_ON(1);
0203             return -EINVAL;
0204         }
0205 
0206         if (space)
0207             return space;
0208 
0209         trace_host1x_wait_cdma(dev_name(cdma_to_channel(cdma)->dev),
0210                        event);
0211 
0212         /* If somebody has managed to already start waiting, yield */
0213         if (cdma->event != CDMA_EVENT_NONE) {
0214             mutex_unlock(&cdma->lock);
0215             schedule();
0216             mutex_lock(&cdma->lock);
0217             continue;
0218         }
0219 
0220         cdma->event = event;
0221 
0222         mutex_unlock(&cdma->lock);
0223         wait_for_completion(&cdma->complete);
0224         mutex_lock(&cdma->lock);
0225     }
0226 
0227     return 0;
0228 }
0229 
0230 /*
0231  * Sleep (if necessary) until the push buffer has enough free space.
0232  *
0233  * Must be called with the cdma lock held.
0234  */
0235 static int host1x_cdma_wait_pushbuffer_space(struct host1x *host1x,
0236                          struct host1x_cdma *cdma,
0237                          unsigned int needed)
0238 {
0239     while (true) {
0240         struct push_buffer *pb = &cdma->push_buffer;
0241         unsigned int space;
0242 
0243         space = host1x_pushbuffer_space(pb);
0244         if (space >= needed)
0245             break;
0246 
0247         trace_host1x_wait_cdma(dev_name(cdma_to_channel(cdma)->dev),
0248                        CDMA_EVENT_PUSH_BUFFER_SPACE);
0249 
0250         host1x_hw_cdma_flush(host1x, cdma);
0251 
0252         /* If somebody has managed to already start waiting, yield */
0253         if (cdma->event != CDMA_EVENT_NONE) {
0254             mutex_unlock(&cdma->lock);
0255             schedule();
0256             mutex_lock(&cdma->lock);
0257             continue;
0258         }
0259 
0260         cdma->event = CDMA_EVENT_PUSH_BUFFER_SPACE;
0261 
0262         mutex_unlock(&cdma->lock);
0263         wait_for_completion(&cdma->complete);
0264         mutex_lock(&cdma->lock);
0265     }
0266 
0267     return 0;
0268 }
0269 /*
0270  * Start timer that tracks the time spent by the job.
0271  * Must be called with the cdma lock held.
0272  */
0273 static void cdma_start_timer_locked(struct host1x_cdma *cdma,
0274                     struct host1x_job *job)
0275 {
0276     if (cdma->timeout.client) {
0277         /* timer already started */
0278         return;
0279     }
0280 
0281     cdma->timeout.client = job->client;
0282     cdma->timeout.syncpt = job->syncpt;
0283     cdma->timeout.syncpt_val = job->syncpt_end;
0284     cdma->timeout.start_ktime = ktime_get();
0285 
0286     schedule_delayed_work(&cdma->timeout.wq,
0287                   msecs_to_jiffies(job->timeout));
0288 }
0289 
0290 /*
0291  * Stop timer when a buffer submission completes.
0292  * Must be called with the cdma lock held.
0293  */
0294 static void stop_cdma_timer_locked(struct host1x_cdma *cdma)
0295 {
0296     cancel_delayed_work(&cdma->timeout.wq);
0297     cdma->timeout.client = NULL;
0298 }
0299 
0300 /*
0301  * For all sync queue entries that have already finished according to the
0302  * current sync point registers:
0303  *  - unpin & unref their mems
0304  *  - pop their push buffer slots
0305  *  - remove them from the sync queue
0306  * This is normally called from the host code's worker thread, but can be
0307  * called manually if necessary.
0308  * Must be called with the cdma lock held.
0309  */
0310 static void update_cdma_locked(struct host1x_cdma *cdma)
0311 {
0312     bool signal = false;
0313     struct host1x_job *job, *n;
0314 
0315     /*
0316      * Walk the sync queue, reading the sync point registers as necessary,
0317      * to consume as many sync queue entries as possible without blocking
0318      */
0319     list_for_each_entry_safe(job, n, &cdma->sync_queue, list) {
0320         struct host1x_syncpt *sp = job->syncpt;
0321 
0322         /* Check whether this syncpt has completed, and bail if not */
0323         if (!host1x_syncpt_is_expired(sp, job->syncpt_end) &&
0324             !job->cancelled) {
0325             /* Start timer on next pending syncpt */
0326             if (job->timeout)
0327                 cdma_start_timer_locked(cdma, job);
0328 
0329             break;
0330         }
0331 
0332         /* Cancel timeout, when a buffer completes */
0333         if (cdma->timeout.client)
0334             stop_cdma_timer_locked(cdma);
0335 
0336         /* Unpin the memory */
0337         host1x_job_unpin(job);
0338 
0339         /* Pop push buffer slots */
0340         if (job->num_slots) {
0341             struct push_buffer *pb = &cdma->push_buffer;
0342 
0343             host1x_pushbuffer_pop(pb, job->num_slots);
0344 
0345             if (cdma->event == CDMA_EVENT_PUSH_BUFFER_SPACE)
0346                 signal = true;
0347         }
0348 
0349         list_del(&job->list);
0350         host1x_job_put(job);
0351     }
0352 
0353     if (cdma->event == CDMA_EVENT_SYNC_QUEUE_EMPTY &&
0354         list_empty(&cdma->sync_queue))
0355         signal = true;
0356 
0357     if (signal) {
0358         cdma->event = CDMA_EVENT_NONE;
0359         complete(&cdma->complete);
0360     }
0361 }
0362 
0363 void host1x_cdma_update_sync_queue(struct host1x_cdma *cdma,
0364                    struct device *dev)
0365 {
0366     struct host1x *host1x = cdma_to_host1x(cdma);
0367     u32 restart_addr, syncpt_incrs, syncpt_val;
0368     struct host1x_job *job, *next_job = NULL;
0369 
0370     syncpt_val = host1x_syncpt_load(cdma->timeout.syncpt);
0371 
0372     dev_dbg(dev, "%s: starting cleanup (thresh %d)\n",
0373         __func__, syncpt_val);
0374 
0375     /*
0376      * Move the sync_queue read pointer to the first entry that hasn't
0377      * completed based on the current HW syncpt value. It's likely there
0378      * won't be any (i.e. we're still at the head), but covers the case
0379      * where a syncpt incr happens just prior/during the teardown.
0380      */
0381 
0382     dev_dbg(dev, "%s: skip completed buffers still in sync_queue\n",
0383         __func__);
0384 
0385     list_for_each_entry(job, &cdma->sync_queue, list) {
0386         if (syncpt_val < job->syncpt_end) {
0387 
0388             if (!list_is_last(&job->list, &cdma->sync_queue))
0389                 next_job = list_next_entry(job, list);
0390 
0391             goto syncpt_incr;
0392         }
0393 
0394         host1x_job_dump(dev, job);
0395     }
0396 
0397     /* all jobs have been completed */
0398     job = NULL;
0399 
0400 syncpt_incr:
0401 
0402     /*
0403      * Increment with CPU the remaining syncpts of a partially executed job.
0404      *
0405      * CDMA will continue execution starting with the next job or will get
0406      * into idle state.
0407      */
0408     if (next_job)
0409         restart_addr = next_job->first_get;
0410     else
0411         restart_addr = cdma->last_pos;
0412 
0413     if (!job)
0414         goto resume;
0415 
0416     /* do CPU increments for the remaining syncpts */
0417     if (job->syncpt_recovery) {
0418         dev_dbg(dev, "%s: perform CPU incr on pending buffers\n",
0419             __func__);
0420 
0421         /* won't need a timeout when replayed */
0422         job->timeout = 0;
0423 
0424         syncpt_incrs = job->syncpt_end - syncpt_val;
0425         dev_dbg(dev, "%s: CPU incr (%d)\n", __func__, syncpt_incrs);
0426 
0427         host1x_job_dump(dev, job);
0428 
0429         /* safe to use CPU to incr syncpts */
0430         host1x_hw_cdma_timeout_cpu_incr(host1x, cdma, job->first_get,
0431                         syncpt_incrs, job->syncpt_end,
0432                         job->num_slots);
0433 
0434         dev_dbg(dev, "%s: finished sync_queue modification\n",
0435             __func__);
0436     } else {
0437         struct host1x_job *failed_job = job;
0438 
0439         host1x_job_dump(dev, job);
0440 
0441         host1x_syncpt_set_locked(job->syncpt);
0442         failed_job->cancelled = true;
0443 
0444         list_for_each_entry_continue(job, &cdma->sync_queue, list) {
0445             unsigned int i;
0446 
0447             if (job->syncpt != failed_job->syncpt)
0448                 continue;
0449 
0450             for (i = 0; i < job->num_slots; i++) {
0451                 unsigned int slot = (job->first_get/8 + i) %
0452                             HOST1X_PUSHBUFFER_SLOTS;
0453                 u32 *mapped = cdma->push_buffer.mapped;
0454 
0455                 /*
0456                  * Overwrite opcodes with 0 word writes
0457                  * to offset 0xbad. This does nothing but
0458                  * has a easily detected signature in debug
0459                  * traces.
0460                  *
0461                  * On systems with MLOCK enforcement enabled,
0462                  * the above 0 word writes would fall foul of
0463                  * the enforcement. As such, in the first slot
0464                  * put a RESTART_W opcode to the beginning
0465                  * of the next job. We don't use this for older
0466                  * chips since those only support the RESTART
0467                  * opcode with inconvenient alignment requirements.
0468                  */
0469                 if (i == 0 && host1x->info->has_wide_gather) {
0470                     unsigned int next_job = (job->first_get/8 + job->num_slots)
0471                         % HOST1X_PUSHBUFFER_SLOTS;
0472                     mapped[2*slot+0] = (0xd << 28) | (next_job * 2);
0473                     mapped[2*slot+1] = 0x0;
0474                 } else {
0475                     mapped[2*slot+0] = 0x1bad0000;
0476                     mapped[2*slot+1] = 0x1bad0000;
0477                 }
0478             }
0479 
0480             job->cancelled = true;
0481         }
0482 
0483         wmb();
0484 
0485         update_cdma_locked(cdma);
0486     }
0487 
0488 resume:
0489     /* roll back DMAGET and start up channel again */
0490     host1x_hw_cdma_resume(host1x, cdma, restart_addr);
0491 }
0492 
0493 /*
0494  * Create a cdma
0495  */
0496 int host1x_cdma_init(struct host1x_cdma *cdma)
0497 {
0498     int err;
0499 
0500     mutex_init(&cdma->lock);
0501     init_completion(&cdma->complete);
0502 
0503     INIT_LIST_HEAD(&cdma->sync_queue);
0504 
0505     cdma->event = CDMA_EVENT_NONE;
0506     cdma->running = false;
0507     cdma->torndown = false;
0508 
0509     err = host1x_pushbuffer_init(&cdma->push_buffer);
0510     if (err)
0511         return err;
0512 
0513     return 0;
0514 }
0515 
0516 /*
0517  * Destroy a cdma
0518  */
0519 int host1x_cdma_deinit(struct host1x_cdma *cdma)
0520 {
0521     struct push_buffer *pb = &cdma->push_buffer;
0522     struct host1x *host1x = cdma_to_host1x(cdma);
0523 
0524     if (cdma->running) {
0525         pr_warn("%s: CDMA still running\n", __func__);
0526         return -EBUSY;
0527     }
0528 
0529     host1x_pushbuffer_destroy(pb);
0530     host1x_hw_cdma_timeout_destroy(host1x, cdma);
0531 
0532     return 0;
0533 }
0534 
0535 /*
0536  * Begin a cdma submit
0537  */
0538 int host1x_cdma_begin(struct host1x_cdma *cdma, struct host1x_job *job)
0539 {
0540     struct host1x *host1x = cdma_to_host1x(cdma);
0541 
0542     mutex_lock(&cdma->lock);
0543 
0544     /*
0545      * Check if syncpoint was locked due to previous job timeout.
0546      * This needs to be done within the cdma lock to avoid a race
0547      * with the timeout handler.
0548      */
0549     if (job->syncpt->locked) {
0550         mutex_unlock(&cdma->lock);
0551         return -EPERM;
0552     }
0553 
0554     if (job->timeout) {
0555         /* init state on first submit with timeout value */
0556         if (!cdma->timeout.initialized) {
0557             int err;
0558 
0559             err = host1x_hw_cdma_timeout_init(host1x, cdma);
0560             if (err) {
0561                 mutex_unlock(&cdma->lock);
0562                 return err;
0563             }
0564         }
0565     }
0566 
0567     if (!cdma->running)
0568         host1x_hw_cdma_start(host1x, cdma);
0569 
0570     cdma->slots_free = 0;
0571     cdma->slots_used = 0;
0572     cdma->first_get = cdma->push_buffer.pos;
0573 
0574     trace_host1x_cdma_begin(dev_name(job->channel->dev));
0575     return 0;
0576 }
0577 
0578 /*
0579  * Push two words into a push buffer slot
0580  * Blocks as necessary if the push buffer is full.
0581  */
0582 void host1x_cdma_push(struct host1x_cdma *cdma, u32 op1, u32 op2)
0583 {
0584     struct host1x *host1x = cdma_to_host1x(cdma);
0585     struct push_buffer *pb = &cdma->push_buffer;
0586     u32 slots_free = cdma->slots_free;
0587 
0588     if (host1x_debug_trace_cmdbuf)
0589         trace_host1x_cdma_push(dev_name(cdma_to_channel(cdma)->dev),
0590                        op1, op2);
0591 
0592     if (slots_free == 0) {
0593         host1x_hw_cdma_flush(host1x, cdma);
0594         slots_free = host1x_cdma_wait_locked(cdma,
0595                         CDMA_EVENT_PUSH_BUFFER_SPACE);
0596     }
0597 
0598     cdma->slots_free = slots_free - 1;
0599     cdma->slots_used++;
0600     host1x_pushbuffer_push(pb, op1, op2);
0601 }
0602 
0603 /*
0604  * Push four words into two consecutive push buffer slots. Note that extra
0605  * care needs to be taken not to split the two slots across the end of the
0606  * push buffer. Otherwise the RESTART opcode at the end of the push buffer
0607  * that ensures processing will restart at the beginning will break up the
0608  * four words.
0609  *
0610  * Blocks as necessary if the push buffer is full.
0611  */
0612 void host1x_cdma_push_wide(struct host1x_cdma *cdma, u32 op1, u32 op2,
0613                u32 op3, u32 op4)
0614 {
0615     struct host1x_channel *channel = cdma_to_channel(cdma);
0616     struct host1x *host1x = cdma_to_host1x(cdma);
0617     struct push_buffer *pb = &cdma->push_buffer;
0618     unsigned int space = cdma->slots_free;
0619     unsigned int needed = 2, extra = 0;
0620 
0621     if (host1x_debug_trace_cmdbuf)
0622         trace_host1x_cdma_push_wide(dev_name(channel->dev), op1, op2,
0623                         op3, op4);
0624 
0625     /* compute number of extra slots needed for padding */
0626     if (pb->pos + 16 > pb->size) {
0627         extra = (pb->size - pb->pos) / 8;
0628         needed += extra;
0629     }
0630 
0631     host1x_cdma_wait_pushbuffer_space(host1x, cdma, needed);
0632     space = host1x_pushbuffer_space(pb);
0633 
0634     cdma->slots_free = space - needed;
0635     cdma->slots_used += needed;
0636 
0637     if (extra > 0) {
0638         /*
0639          * If there isn't enough space at the tail of the pushbuffer,
0640          * insert a RESTART(0) here to go back to the beginning.
0641          * The code above adjusted the indexes appropriately.
0642          */
0643         host1x_pushbuffer_push(pb, (0x5 << 28), 0xdead0000);
0644     }
0645 
0646     host1x_pushbuffer_push(pb, op1, op2);
0647     host1x_pushbuffer_push(pb, op3, op4);
0648 }
0649 
0650 /*
0651  * End a cdma submit
0652  * Kick off DMA, add job to the sync queue, and a number of slots to be freed
0653  * from the pushbuffer. The handles for a submit must all be pinned at the same
0654  * time, but they can be unpinned in smaller chunks.
0655  */
0656 void host1x_cdma_end(struct host1x_cdma *cdma,
0657              struct host1x_job *job)
0658 {
0659     struct host1x *host1x = cdma_to_host1x(cdma);
0660     bool idle = list_empty(&cdma->sync_queue);
0661 
0662     host1x_hw_cdma_flush(host1x, cdma);
0663 
0664     job->first_get = cdma->first_get;
0665     job->num_slots = cdma->slots_used;
0666     host1x_job_get(job);
0667     list_add_tail(&job->list, &cdma->sync_queue);
0668 
0669     /* start timer on idle -> active transitions */
0670     if (job->timeout && idle)
0671         cdma_start_timer_locked(cdma, job);
0672 
0673     trace_host1x_cdma_end(dev_name(job->channel->dev));
0674     mutex_unlock(&cdma->lock);
0675 }
0676 
0677 /*
0678  * Update cdma state according to current sync point values
0679  */
0680 void host1x_cdma_update(struct host1x_cdma *cdma)
0681 {
0682     mutex_lock(&cdma->lock);
0683     update_cdma_locked(cdma);
0684     mutex_unlock(&cdma->lock);
0685 }