Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Copyright 2014 Advanced Micro Devices, Inc.
0003  *
0004  * Permission is hereby granted, free of charge, to any person obtaining a
0005  * copy of this software and associated documentation files (the "Software"),
0006  * to deal in the Software without restriction, including without limitation
0007  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
0008  * and/or sell copies of the Software, and to permit persons to whom the
0009  * Software is furnished to do so, subject to the following conditions:
0010  *
0011  * The above copyright notice and this permission notice shall be included in
0012  * all copies or substantial portions of the Software.
0013  *
0014  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
0015  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
0016  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
0017  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
0018  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
0019  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
0020  * OTHER DEALINGS IN THE SOFTWARE.
0021  *
0022  */
0023 
0024 #include <linux/dma-mapping.h>
0025 
0026 #include "amdgpu.h"
0027 #include "amdgpu_ih.h"
0028 
0029 /**
0030  * amdgpu_ih_ring_init - initialize the IH state
0031  *
0032  * @adev: amdgpu_device pointer
0033  * @ih: ih ring to initialize
0034  * @ring_size: ring size to allocate
0035  * @use_bus_addr: true when we can use dma_alloc_coherent
0036  *
0037  * Initializes the IH state and allocates a buffer
0038  * for the IH ring buffer.
0039  * Returns 0 for success, errors for failure.
0040  */
0041 int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
0042             unsigned ring_size, bool use_bus_addr)
0043 {
0044     u32 rb_bufsz;
0045     int r;
0046 
0047     /* Align ring size */
0048     rb_bufsz = order_base_2(ring_size / 4);
0049     ring_size = (1 << rb_bufsz) * 4;
0050     ih->ring_size = ring_size;
0051     ih->ptr_mask = ih->ring_size - 1;
0052     ih->rptr = 0;
0053     ih->use_bus_addr = use_bus_addr;
0054 
0055     if (use_bus_addr) {
0056         dma_addr_t dma_addr;
0057 
0058         if (ih->ring)
0059             return 0;
0060 
0061         /* add 8 bytes for the rptr/wptr shadows and
0062          * add them to the end of the ring allocation.
0063          */
0064         ih->ring = dma_alloc_coherent(adev->dev, ih->ring_size + 8,
0065                           &dma_addr, GFP_KERNEL);
0066         if (ih->ring == NULL)
0067             return -ENOMEM;
0068 
0069         ih->gpu_addr = dma_addr;
0070         ih->wptr_addr = dma_addr + ih->ring_size;
0071         ih->wptr_cpu = &ih->ring[ih->ring_size / 4];
0072         ih->rptr_addr = dma_addr + ih->ring_size + 4;
0073         ih->rptr_cpu = &ih->ring[(ih->ring_size / 4) + 1];
0074     } else {
0075         unsigned wptr_offs, rptr_offs;
0076 
0077         r = amdgpu_device_wb_get(adev, &wptr_offs);
0078         if (r)
0079             return r;
0080 
0081         r = amdgpu_device_wb_get(adev, &rptr_offs);
0082         if (r) {
0083             amdgpu_device_wb_free(adev, wptr_offs);
0084             return r;
0085         }
0086 
0087         r = amdgpu_bo_create_kernel(adev, ih->ring_size, PAGE_SIZE,
0088                         AMDGPU_GEM_DOMAIN_GTT,
0089                         &ih->ring_obj, &ih->gpu_addr,
0090                         (void **)&ih->ring);
0091         if (r) {
0092             amdgpu_device_wb_free(adev, rptr_offs);
0093             amdgpu_device_wb_free(adev, wptr_offs);
0094             return r;
0095         }
0096 
0097         ih->wptr_addr = adev->wb.gpu_addr + wptr_offs * 4;
0098         ih->wptr_cpu = &adev->wb.wb[wptr_offs];
0099         ih->rptr_addr = adev->wb.gpu_addr + rptr_offs * 4;
0100         ih->rptr_cpu = &adev->wb.wb[rptr_offs];
0101     }
0102 
0103     init_waitqueue_head(&ih->wait_process);
0104     return 0;
0105 }
0106 
0107 /**
0108  * amdgpu_ih_ring_fini - tear down the IH state
0109  *
0110  * @adev: amdgpu_device pointer
0111  * @ih: ih ring to tear down
0112  *
0113  * Tears down the IH state and frees buffer
0114  * used for the IH ring buffer.
0115  */
0116 void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
0117 {
0118 
0119     if (!ih->ring)
0120         return;
0121 
0122     if (ih->use_bus_addr) {
0123 
0124         /* add 8 bytes for the rptr/wptr shadows and
0125          * add them to the end of the ring allocation.
0126          */
0127         dma_free_coherent(adev->dev, ih->ring_size + 8,
0128                   (void *)ih->ring, ih->gpu_addr);
0129         ih->ring = NULL;
0130     } else {
0131         amdgpu_bo_free_kernel(&ih->ring_obj, &ih->gpu_addr,
0132                       (void **)&ih->ring);
0133         amdgpu_device_wb_free(adev, (ih->wptr_addr - ih->gpu_addr) / 4);
0134         amdgpu_device_wb_free(adev, (ih->rptr_addr - ih->gpu_addr) / 4);
0135     }
0136 }
0137 
0138 /**
0139  * amdgpu_ih_ring_write - write IV to the ring buffer
0140  *
0141  * @ih: ih ring to write to
0142  * @iv: the iv to write
0143  * @num_dw: size of the iv in dw
0144  *
0145  * Writes an IV to the ring buffer using the CPU and increment the wptr.
0146  * Used for testing and delegating IVs to a software ring.
0147  */
0148 void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv,
0149               unsigned int num_dw)
0150 {
0151     uint32_t wptr = le32_to_cpu(*ih->wptr_cpu) >> 2;
0152     unsigned int i;
0153 
0154     for (i = 0; i < num_dw; ++i)
0155             ih->ring[wptr++] = cpu_to_le32(iv[i]);
0156 
0157     wptr <<= 2;
0158     wptr &= ih->ptr_mask;
0159 
0160     /* Only commit the new wptr if we don't overflow */
0161     if (wptr != READ_ONCE(ih->rptr)) {
0162         wmb();
0163         WRITE_ONCE(*ih->wptr_cpu, cpu_to_le32(wptr));
0164     }
0165 }
0166 
0167 /**
0168  * amdgpu_ih_wait_on_checkpoint_process_ts - wait to process IVs up to checkpoint
0169  *
0170  * @adev: amdgpu_device pointer
0171  * @ih: ih ring to process
0172  *
0173  * Used to ensure ring has processed IVs up to the checkpoint write pointer.
0174  */
0175 int amdgpu_ih_wait_on_checkpoint_process_ts(struct amdgpu_device *adev,
0176                     struct amdgpu_ih_ring *ih)
0177 {
0178     uint32_t checkpoint_wptr;
0179     uint64_t checkpoint_ts;
0180     long timeout = HZ;
0181 
0182     if (!ih->enabled || adev->shutdown)
0183         return -ENODEV;
0184 
0185     checkpoint_wptr = amdgpu_ih_get_wptr(adev, ih);
0186     /* Order wptr with ring data. */
0187     rmb();
0188     checkpoint_ts = amdgpu_ih_decode_iv_ts(adev, ih, checkpoint_wptr, -1);
0189 
0190     return wait_event_interruptible_timeout(ih->wait_process,
0191             amdgpu_ih_ts_after(checkpoint_ts, ih->processed_timestamp) ||
0192             ih->rptr == amdgpu_ih_get_wptr(adev, ih), timeout);
0193 }
0194 
0195 /**
0196  * amdgpu_ih_process - interrupt handler
0197  *
0198  * @adev: amdgpu_device pointer
0199  * @ih: ih ring to process
0200  *
0201  * Interrupt hander (VI), walk the IH ring.
0202  * Returns irq process return code.
0203  */
0204 int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
0205 {
0206     unsigned int count;
0207     u32 wptr;
0208 
0209     if (!ih->enabled || adev->shutdown)
0210         return IRQ_NONE;
0211 
0212     wptr = amdgpu_ih_get_wptr(adev, ih);
0213 
0214 restart_ih:
0215     count  = AMDGPU_IH_MAX_NUM_IVS;
0216     DRM_DEBUG("%s: rptr %d, wptr %d\n", __func__, ih->rptr, wptr);
0217 
0218     /* Order reading of wptr vs. reading of IH ring data */
0219     rmb();
0220 
0221     while (ih->rptr != wptr && --count) {
0222         amdgpu_irq_dispatch(adev, ih);
0223         ih->rptr &= ih->ptr_mask;
0224     }
0225 
0226     amdgpu_ih_set_rptr(adev, ih);
0227     wake_up_all(&ih->wait_process);
0228 
0229     /* make sure wptr hasn't changed while processing */
0230     wptr = amdgpu_ih_get_wptr(adev, ih);
0231     if (wptr != ih->rptr)
0232         goto restart_ih;
0233 
0234     return IRQ_HANDLED;
0235 }
0236 
0237 /**
0238  * amdgpu_ih_decode_iv_helper - decode an interrupt vector
0239  *
0240  * @adev: amdgpu_device pointer
0241  * @ih: ih ring to process
0242  * @entry: IV entry
0243  *
0244  * Decodes the interrupt vector at the current rptr
0245  * position and also advance the position for Vega10
0246  * and later GPUs.
0247  */
0248 void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev,
0249                 struct amdgpu_ih_ring *ih,
0250                 struct amdgpu_iv_entry *entry)
0251 {
0252     /* wptr/rptr are in bytes! */
0253     u32 ring_index = ih->rptr >> 2;
0254     uint32_t dw[8];
0255 
0256     dw[0] = le32_to_cpu(ih->ring[ring_index + 0]);
0257     dw[1] = le32_to_cpu(ih->ring[ring_index + 1]);
0258     dw[2] = le32_to_cpu(ih->ring[ring_index + 2]);
0259     dw[3] = le32_to_cpu(ih->ring[ring_index + 3]);
0260     dw[4] = le32_to_cpu(ih->ring[ring_index + 4]);
0261     dw[5] = le32_to_cpu(ih->ring[ring_index + 5]);
0262     dw[6] = le32_to_cpu(ih->ring[ring_index + 6]);
0263     dw[7] = le32_to_cpu(ih->ring[ring_index + 7]);
0264 
0265     entry->client_id = dw[0] & 0xff;
0266     entry->src_id = (dw[0] >> 8) & 0xff;
0267     entry->ring_id = (dw[0] >> 16) & 0xff;
0268     entry->vmid = (dw[0] >> 24) & 0xf;
0269     entry->vmid_src = (dw[0] >> 31);
0270     entry->timestamp = dw[1] | ((u64)(dw[2] & 0xffff) << 32);
0271     entry->timestamp_src = dw[2] >> 31;
0272     entry->pasid = dw[3] & 0xffff;
0273     entry->pasid_src = dw[3] >> 31;
0274     entry->src_data[0] = dw[4];
0275     entry->src_data[1] = dw[5];
0276     entry->src_data[2] = dw[6];
0277     entry->src_data[3] = dw[7];
0278 
0279     /* wptr/rptr are in bytes! */
0280     ih->rptr += 32;
0281 }
0282 
0283 uint64_t amdgpu_ih_decode_iv_ts_helper(struct amdgpu_ih_ring *ih, u32 rptr,
0284                        signed int offset)
0285 {
0286     uint32_t iv_size = 32;
0287     uint32_t ring_index;
0288     uint32_t dw1, dw2;
0289 
0290     rptr += iv_size * offset;
0291     ring_index = (rptr & ih->ptr_mask) >> 2;
0292 
0293     dw1 = le32_to_cpu(ih->ring[ring_index + 1]);
0294     dw2 = le32_to_cpu(ih->ring[ring_index + 2]);
0295     return dw1 | ((u64)(dw2 & 0xffff) << 32);
0296 }