Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /* Copyright 2019 Collabora Ltd */
0003 
0004 #include <linux/completion.h>
0005 #include <linux/iopoll.h>
0006 #include <linux/iosys-map.h>
0007 #include <linux/pm_runtime.h>
0008 #include <linux/slab.h>
0009 #include <linux/uaccess.h>
0010 
0011 #include <drm/drm_file.h>
0012 #include <drm/drm_gem_shmem_helper.h>
0013 #include <drm/panfrost_drm.h>
0014 
0015 #include "panfrost_device.h"
0016 #include "panfrost_features.h"
0017 #include "panfrost_gem.h"
0018 #include "panfrost_issues.h"
0019 #include "panfrost_job.h"
0020 #include "panfrost_mmu.h"
0021 #include "panfrost_perfcnt.h"
0022 #include "panfrost_regs.h"
0023 
0024 #define COUNTERS_PER_BLOCK      64
0025 #define BYTES_PER_COUNTER       4
0026 #define BLOCKS_PER_COREGROUP        8
0027 #define V4_SHADERS_PER_COREGROUP    4
0028 
0029 struct panfrost_perfcnt {
0030     struct panfrost_gem_mapping *mapping;
0031     size_t bosize;
0032     void *buf;
0033     struct panfrost_file_priv *user;
0034     struct mutex lock;
0035     struct completion dump_comp;
0036 };
0037 
0038 void panfrost_perfcnt_clean_cache_done(struct panfrost_device *pfdev)
0039 {
0040     complete(&pfdev->perfcnt->dump_comp);
0041 }
0042 
0043 void panfrost_perfcnt_sample_done(struct panfrost_device *pfdev)
0044 {
0045     gpu_write(pfdev, GPU_CMD, GPU_CMD_CLEAN_CACHES);
0046 }
0047 
0048 static int panfrost_perfcnt_dump_locked(struct panfrost_device *pfdev)
0049 {
0050     u64 gpuva;
0051     int ret;
0052 
0053     reinit_completion(&pfdev->perfcnt->dump_comp);
0054     gpuva = pfdev->perfcnt->mapping->mmnode.start << PAGE_SHIFT;
0055     gpu_write(pfdev, GPU_PERFCNT_BASE_LO, lower_32_bits(gpuva));
0056     gpu_write(pfdev, GPU_PERFCNT_BASE_HI, upper_32_bits(gpuva));
0057     gpu_write(pfdev, GPU_INT_CLEAR,
0058           GPU_IRQ_CLEAN_CACHES_COMPLETED |
0059           GPU_IRQ_PERFCNT_SAMPLE_COMPLETED);
0060     gpu_write(pfdev, GPU_CMD, GPU_CMD_PERFCNT_SAMPLE);
0061     ret = wait_for_completion_interruptible_timeout(&pfdev->perfcnt->dump_comp,
0062                             msecs_to_jiffies(1000));
0063     if (!ret)
0064         ret = -ETIMEDOUT;
0065     else if (ret > 0)
0066         ret = 0;
0067 
0068     return ret;
0069 }
0070 
0071 static int panfrost_perfcnt_enable_locked(struct panfrost_device *pfdev,
0072                       struct drm_file *file_priv,
0073                       unsigned int counterset)
0074 {
0075     struct panfrost_file_priv *user = file_priv->driver_priv;
0076     struct panfrost_perfcnt *perfcnt = pfdev->perfcnt;
0077     struct iosys_map map;
0078     struct drm_gem_shmem_object *bo;
0079     u32 cfg, as;
0080     int ret;
0081 
0082     if (user == perfcnt->user)
0083         return 0;
0084     else if (perfcnt->user)
0085         return -EBUSY;
0086 
0087     ret = pm_runtime_get_sync(pfdev->dev);
0088     if (ret < 0)
0089         goto err_put_pm;
0090 
0091     bo = drm_gem_shmem_create(pfdev->ddev, perfcnt->bosize);
0092     if (IS_ERR(bo)) {
0093         ret = PTR_ERR(bo);
0094         goto err_put_pm;
0095     }
0096 
0097     /* Map the perfcnt buf in the address space attached to file_priv. */
0098     ret = panfrost_gem_open(&bo->base, file_priv);
0099     if (ret)
0100         goto err_put_bo;
0101 
0102     perfcnt->mapping = panfrost_gem_mapping_get(to_panfrost_bo(&bo->base),
0103                             user);
0104     if (!perfcnt->mapping) {
0105         ret = -EINVAL;
0106         goto err_close_bo;
0107     }
0108 
0109     ret = drm_gem_shmem_vmap(bo, &map);
0110     if (ret)
0111         goto err_put_mapping;
0112     perfcnt->buf = map.vaddr;
0113 
0114     /*
0115      * Invalidate the cache and clear the counters to start from a fresh
0116      * state.
0117      */
0118     reinit_completion(&pfdev->perfcnt->dump_comp);
0119     gpu_write(pfdev, GPU_INT_CLEAR,
0120           GPU_IRQ_CLEAN_CACHES_COMPLETED |
0121           GPU_IRQ_PERFCNT_SAMPLE_COMPLETED);
0122     gpu_write(pfdev, GPU_CMD, GPU_CMD_PERFCNT_CLEAR);
0123     gpu_write(pfdev, GPU_CMD, GPU_CMD_CLEAN_INV_CACHES);
0124     ret = wait_for_completion_timeout(&pfdev->perfcnt->dump_comp,
0125                       msecs_to_jiffies(1000));
0126     if (!ret) {
0127         ret = -ETIMEDOUT;
0128         goto err_vunmap;
0129     }
0130 
0131     perfcnt->user = user;
0132 
0133     as = panfrost_mmu_as_get(pfdev, perfcnt->mapping->mmu);
0134     cfg = GPU_PERFCNT_CFG_AS(as) |
0135           GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_MANUAL);
0136 
0137     /*
0138      * Bifrost GPUs have 2 set of counters, but we're only interested by
0139      * the first one for now.
0140      */
0141     if (panfrost_model_is_bifrost(pfdev))
0142         cfg |= GPU_PERFCNT_CFG_SETSEL(counterset);
0143 
0144     gpu_write(pfdev, GPU_PRFCNT_JM_EN, 0xffffffff);
0145     gpu_write(pfdev, GPU_PRFCNT_SHADER_EN, 0xffffffff);
0146     gpu_write(pfdev, GPU_PRFCNT_MMU_L2_EN, 0xffffffff);
0147 
0148     /*
0149      * Due to PRLAM-8186 we need to disable the Tiler before we enable HW
0150      * counters.
0151      */
0152     if (panfrost_has_hw_issue(pfdev, HW_ISSUE_8186))
0153         gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0);
0154     else
0155         gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0xffffffff);
0156 
0157     gpu_write(pfdev, GPU_PERFCNT_CFG, cfg);
0158 
0159     if (panfrost_has_hw_issue(pfdev, HW_ISSUE_8186))
0160         gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0xffffffff);
0161 
0162     /* The BO ref is retained by the mapping. */
0163     drm_gem_object_put(&bo->base);
0164 
0165     return 0;
0166 
0167 err_vunmap:
0168     drm_gem_shmem_vunmap(bo, &map);
0169 err_put_mapping:
0170     panfrost_gem_mapping_put(perfcnt->mapping);
0171 err_close_bo:
0172     panfrost_gem_close(&bo->base, file_priv);
0173 err_put_bo:
0174     drm_gem_object_put(&bo->base);
0175 err_put_pm:
0176     pm_runtime_put(pfdev->dev);
0177     return ret;
0178 }
0179 
0180 static int panfrost_perfcnt_disable_locked(struct panfrost_device *pfdev,
0181                        struct drm_file *file_priv)
0182 {
0183     struct panfrost_file_priv *user = file_priv->driver_priv;
0184     struct panfrost_perfcnt *perfcnt = pfdev->perfcnt;
0185     struct iosys_map map = IOSYS_MAP_INIT_VADDR(perfcnt->buf);
0186 
0187     if (user != perfcnt->user)
0188         return -EINVAL;
0189 
0190     gpu_write(pfdev, GPU_PRFCNT_JM_EN, 0x0);
0191     gpu_write(pfdev, GPU_PRFCNT_SHADER_EN, 0x0);
0192     gpu_write(pfdev, GPU_PRFCNT_MMU_L2_EN, 0x0);
0193     gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0);
0194     gpu_write(pfdev, GPU_PERFCNT_CFG,
0195           GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_OFF));
0196 
0197     perfcnt->user = NULL;
0198     drm_gem_shmem_vunmap(&perfcnt->mapping->obj->base, &map);
0199     perfcnt->buf = NULL;
0200     panfrost_gem_close(&perfcnt->mapping->obj->base.base, file_priv);
0201     panfrost_mmu_as_put(pfdev, perfcnt->mapping->mmu);
0202     panfrost_gem_mapping_put(perfcnt->mapping);
0203     perfcnt->mapping = NULL;
0204     pm_runtime_mark_last_busy(pfdev->dev);
0205     pm_runtime_put_autosuspend(pfdev->dev);
0206 
0207     return 0;
0208 }
0209 
0210 int panfrost_ioctl_perfcnt_enable(struct drm_device *dev, void *data,
0211                   struct drm_file *file_priv)
0212 {
0213     struct panfrost_device *pfdev = dev->dev_private;
0214     struct panfrost_perfcnt *perfcnt = pfdev->perfcnt;
0215     struct drm_panfrost_perfcnt_enable *req = data;
0216     int ret;
0217 
0218     ret = panfrost_unstable_ioctl_check();
0219     if (ret)
0220         return ret;
0221 
0222     /* Only Bifrost GPUs have 2 set of counters. */
0223     if (req->counterset > (panfrost_model_is_bifrost(pfdev) ? 1 : 0))
0224         return -EINVAL;
0225 
0226     mutex_lock(&perfcnt->lock);
0227     if (req->enable)
0228         ret = panfrost_perfcnt_enable_locked(pfdev, file_priv,
0229                              req->counterset);
0230     else
0231         ret = panfrost_perfcnt_disable_locked(pfdev, file_priv);
0232     mutex_unlock(&perfcnt->lock);
0233 
0234     return ret;
0235 }
0236 
0237 int panfrost_ioctl_perfcnt_dump(struct drm_device *dev, void *data,
0238                 struct drm_file *file_priv)
0239 {
0240     struct panfrost_device *pfdev = dev->dev_private;
0241     struct panfrost_perfcnt *perfcnt = pfdev->perfcnt;
0242     struct drm_panfrost_perfcnt_dump *req = data;
0243     void __user *user_ptr = (void __user *)(uintptr_t)req->buf_ptr;
0244     int ret;
0245 
0246     ret = panfrost_unstable_ioctl_check();
0247     if (ret)
0248         return ret;
0249 
0250     mutex_lock(&perfcnt->lock);
0251     if (perfcnt->user != file_priv->driver_priv) {
0252         ret = -EINVAL;
0253         goto out;
0254     }
0255 
0256     ret = panfrost_perfcnt_dump_locked(pfdev);
0257     if (ret)
0258         goto out;
0259 
0260     if (copy_to_user(user_ptr, perfcnt->buf, perfcnt->bosize))
0261         ret = -EFAULT;
0262 
0263 out:
0264     mutex_unlock(&perfcnt->lock);
0265 
0266     return ret;
0267 }
0268 
0269 void panfrost_perfcnt_close(struct drm_file *file_priv)
0270 {
0271     struct panfrost_file_priv *pfile = file_priv->driver_priv;
0272     struct panfrost_device *pfdev = pfile->pfdev;
0273     struct panfrost_perfcnt *perfcnt = pfdev->perfcnt;
0274 
0275     pm_runtime_get_sync(pfdev->dev);
0276     mutex_lock(&perfcnt->lock);
0277     if (perfcnt->user == pfile)
0278         panfrost_perfcnt_disable_locked(pfdev, file_priv);
0279     mutex_unlock(&perfcnt->lock);
0280     pm_runtime_mark_last_busy(pfdev->dev);
0281     pm_runtime_put_autosuspend(pfdev->dev);
0282 }
0283 
0284 int panfrost_perfcnt_init(struct panfrost_device *pfdev)
0285 {
0286     struct panfrost_perfcnt *perfcnt;
0287     size_t size;
0288 
0289     if (panfrost_has_hw_feature(pfdev, HW_FEATURE_V4)) {
0290         unsigned int ncoregroups;
0291 
0292         ncoregroups = hweight64(pfdev->features.l2_present);
0293         size = ncoregroups * BLOCKS_PER_COREGROUP *
0294                COUNTERS_PER_BLOCK * BYTES_PER_COUNTER;
0295     } else {
0296         unsigned int nl2c, ncores;
0297 
0298         /*
0299          * TODO: define a macro to extract the number of l2 caches from
0300          * mem_features.
0301          */
0302         nl2c = ((pfdev->features.mem_features >> 8) & GENMASK(3, 0)) + 1;
0303 
0304         /*
0305          * shader_present might be sparse, but the counters layout
0306          * forces to dump unused regions too, hence the fls64() call
0307          * instead of hweight64().
0308          */
0309         ncores = fls64(pfdev->features.shader_present);
0310 
0311         /*
0312          * There's always one JM and one Tiler block, hence the '+ 2'
0313          * here.
0314          */
0315         size = (nl2c + ncores + 2) *
0316                COUNTERS_PER_BLOCK * BYTES_PER_COUNTER;
0317     }
0318 
0319     perfcnt = devm_kzalloc(pfdev->dev, sizeof(*perfcnt), GFP_KERNEL);
0320     if (!perfcnt)
0321         return -ENOMEM;
0322 
0323     perfcnt->bosize = size;
0324 
0325     /* Start with everything disabled. */
0326     gpu_write(pfdev, GPU_PERFCNT_CFG,
0327           GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_OFF));
0328     gpu_write(pfdev, GPU_PRFCNT_JM_EN, 0);
0329     gpu_write(pfdev, GPU_PRFCNT_SHADER_EN, 0);
0330     gpu_write(pfdev, GPU_PRFCNT_MMU_L2_EN, 0);
0331     gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0);
0332 
0333     init_completion(&perfcnt->dump_comp);
0334     mutex_init(&perfcnt->lock);
0335     pfdev->perfcnt = perfcnt;
0336 
0337     return 0;
0338 }
0339 
0340 void panfrost_perfcnt_fini(struct panfrost_device *pfdev)
0341 {
0342     /* Disable everything before leaving. */
0343     gpu_write(pfdev, GPU_PERFCNT_CFG,
0344           GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_OFF));
0345     gpu_write(pfdev, GPU_PRFCNT_JM_EN, 0);
0346     gpu_write(pfdev, GPU_PRFCNT_SHADER_EN, 0);
0347     gpu_write(pfdev, GPU_PRFCNT_MMU_L2_EN, 0);
0348     gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0);
0349 }