Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Copyright 2010 Advanced Micro Devices, Inc.
0003  * Copyright 2008 Red Hat Inc.
0004  * Copyright 2009 Jerome Glisse.
0005  *
0006  * Permission is hereby granted, free of charge, to any person obtaining a
0007  * copy of this software and associated documentation files (the "Software"),
0008  * to deal in the Software without restriction, including without limitation
0009  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
0010  * and/or sell copies of the Software, and to permit persons to whom the
0011  * Software is furnished to do so, subject to the following conditions:
0012  *
0013  * The above copyright notice and this permission notice shall be included in
0014  * all copies or substantial portions of the Software.
0015  *
0016  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
0017  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
0018  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
0019  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
0020  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
0021  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
0022  * OTHER DEALINGS IN THE SOFTWARE.
0023  *
0024  * Authors: Dave Airlie
0025  *          Alex Deucher
0026  *          Jerome Glisse
0027  */
0028 
0029 #include "radeon.h"
0030 #include "radeon_asic.h"
0031 #include "r600.h"
0032 #include "evergreend.h"
0033 #include "evergreen_reg_safe.h"
0034 #include "cayman_reg_safe.h"
0035 
0036 #define MAX(a,b)                   (((a)>(b))?(a):(b))
0037 #define MIN(a,b)                   (((a)<(b))?(a):(b))
0038 
0039 #define REG_SAFE_BM_SIZE ARRAY_SIZE(evergreen_reg_safe_bm)
0040 
0041 struct evergreen_cs_track {
0042     u32         group_size;
0043     u32         nbanks;
0044     u32         npipes;
0045     u32         row_size;
0046     /* value we track */
0047     u32         nsamples;       /* unused */
0048     struct radeon_bo    *cb_color_bo[12];
0049     u32         cb_color_bo_offset[12];
0050     struct radeon_bo    *cb_color_fmask_bo[8];  /* unused */
0051     struct radeon_bo    *cb_color_cmask_bo[8];  /* unused */
0052     u32         cb_color_info[12];
0053     u32         cb_color_view[12];
0054     u32         cb_color_pitch[12];
0055     u32         cb_color_slice[12];
0056     u32         cb_color_slice_idx[12];
0057     u32         cb_color_attrib[12];
0058     u32         cb_color_cmask_slice[8];/* unused */
0059     u32         cb_color_fmask_slice[8];/* unused */
0060     u32         cb_target_mask;
0061     u32         cb_shader_mask; /* unused */
0062     u32         vgt_strmout_config;
0063     u32         vgt_strmout_buffer_config;
0064     struct radeon_bo    *vgt_strmout_bo[4];
0065     u32         vgt_strmout_bo_offset[4];
0066     u32         vgt_strmout_size[4];
0067     u32         db_depth_control;
0068     u32         db_depth_view;
0069     u32         db_depth_slice;
0070     u32         db_depth_size;
0071     u32         db_z_info;
0072     u32         db_z_read_offset;
0073     u32         db_z_write_offset;
0074     struct radeon_bo    *db_z_read_bo;
0075     struct radeon_bo    *db_z_write_bo;
0076     u32         db_s_info;
0077     u32         db_s_read_offset;
0078     u32         db_s_write_offset;
0079     struct radeon_bo    *db_s_read_bo;
0080     struct radeon_bo    *db_s_write_bo;
0081     bool            sx_misc_kill_all_prims;
0082     bool            cb_dirty;
0083     bool            db_dirty;
0084     bool            streamout_dirty;
0085     u32         htile_offset;
0086     u32         htile_surface;
0087     struct radeon_bo    *htile_bo;
0088     unsigned long       indirect_draw_buffer_size;
0089     const unsigned      *reg_safe_bm;
0090 };
0091 
0092 static u32 evergreen_cs_get_aray_mode(u32 tiling_flags)
0093 {
0094     if (tiling_flags & RADEON_TILING_MACRO)
0095         return ARRAY_2D_TILED_THIN1;
0096     else if (tiling_flags & RADEON_TILING_MICRO)
0097         return ARRAY_1D_TILED_THIN1;
0098     else
0099         return ARRAY_LINEAR_GENERAL;
0100 }
0101 
0102 static u32 evergreen_cs_get_num_banks(u32 nbanks)
0103 {
0104     switch (nbanks) {
0105     case 2:
0106         return ADDR_SURF_2_BANK;
0107     case 4:
0108         return ADDR_SURF_4_BANK;
0109     case 8:
0110     default:
0111         return ADDR_SURF_8_BANK;
0112     case 16:
0113         return ADDR_SURF_16_BANK;
0114     }
0115 }
0116 
0117 static void evergreen_cs_track_init(struct evergreen_cs_track *track)
0118 {
0119     int i;
0120 
0121     for (i = 0; i < 8; i++) {
0122         track->cb_color_fmask_bo[i] = NULL;
0123         track->cb_color_cmask_bo[i] = NULL;
0124         track->cb_color_cmask_slice[i] = 0;
0125         track->cb_color_fmask_slice[i] = 0;
0126     }
0127 
0128     for (i = 0; i < 12; i++) {
0129         track->cb_color_bo[i] = NULL;
0130         track->cb_color_bo_offset[i] = 0xFFFFFFFF;
0131         track->cb_color_info[i] = 0;
0132         track->cb_color_view[i] = 0xFFFFFFFF;
0133         track->cb_color_pitch[i] = 0;
0134         track->cb_color_slice[i] = 0xfffffff;
0135         track->cb_color_slice_idx[i] = 0;
0136     }
0137     track->cb_target_mask = 0xFFFFFFFF;
0138     track->cb_shader_mask = 0xFFFFFFFF;
0139     track->cb_dirty = true;
0140 
0141     track->db_depth_slice = 0xffffffff;
0142     track->db_depth_view = 0xFFFFC000;
0143     track->db_depth_size = 0xFFFFFFFF;
0144     track->db_depth_control = 0xFFFFFFFF;
0145     track->db_z_info = 0xFFFFFFFF;
0146     track->db_z_read_offset = 0xFFFFFFFF;
0147     track->db_z_write_offset = 0xFFFFFFFF;
0148     track->db_z_read_bo = NULL;
0149     track->db_z_write_bo = NULL;
0150     track->db_s_info = 0xFFFFFFFF;
0151     track->db_s_read_offset = 0xFFFFFFFF;
0152     track->db_s_write_offset = 0xFFFFFFFF;
0153     track->db_s_read_bo = NULL;
0154     track->db_s_write_bo = NULL;
0155     track->db_dirty = true;
0156     track->htile_bo = NULL;
0157     track->htile_offset = 0xFFFFFFFF;
0158     track->htile_surface = 0;
0159 
0160     for (i = 0; i < 4; i++) {
0161         track->vgt_strmout_size[i] = 0;
0162         track->vgt_strmout_bo[i] = NULL;
0163         track->vgt_strmout_bo_offset[i] = 0xFFFFFFFF;
0164     }
0165     track->streamout_dirty = true;
0166     track->sx_misc_kill_all_prims = false;
0167 }
0168 
0169 struct eg_surface {
0170     /* value gathered from cs */
0171     unsigned    nbx;
0172     unsigned    nby;
0173     unsigned    format;
0174     unsigned    mode;
0175     unsigned    nbanks;
0176     unsigned    bankw;
0177     unsigned    bankh;
0178     unsigned    tsplit;
0179     unsigned    mtilea;
0180     unsigned    nsamples;
0181     /* output value */
0182     unsigned    bpe;
0183     unsigned    layer_size;
0184     unsigned    palign;
0185     unsigned    halign;
0186     unsigned long   base_align;
0187 };
0188 
0189 static int evergreen_surface_check_linear(struct radeon_cs_parser *p,
0190                       struct eg_surface *surf,
0191                       const char *prefix)
0192 {
0193     surf->layer_size = surf->nbx * surf->nby * surf->bpe * surf->nsamples;
0194     surf->base_align = surf->bpe;
0195     surf->palign = 1;
0196     surf->halign = 1;
0197     return 0;
0198 }
0199 
0200 static int evergreen_surface_check_linear_aligned(struct radeon_cs_parser *p,
0201                           struct eg_surface *surf,
0202                           const char *prefix)
0203 {
0204     struct evergreen_cs_track *track = p->track;
0205     unsigned palign;
0206 
0207     palign = MAX(64, track->group_size / surf->bpe);
0208     surf->layer_size = surf->nbx * surf->nby * surf->bpe * surf->nsamples;
0209     surf->base_align = track->group_size;
0210     surf->palign = palign;
0211     surf->halign = 1;
0212     if (surf->nbx & (palign - 1)) {
0213         if (prefix) {
0214             dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d\n",
0215                  __func__, __LINE__, prefix, surf->nbx, palign);
0216         }
0217         return -EINVAL;
0218     }
0219     return 0;
0220 }
0221 
0222 static int evergreen_surface_check_1d(struct radeon_cs_parser *p,
0223                       struct eg_surface *surf,
0224                       const char *prefix)
0225 {
0226     struct evergreen_cs_track *track = p->track;
0227     unsigned palign;
0228 
0229     palign = track->group_size / (8 * surf->bpe * surf->nsamples);
0230     palign = MAX(8, palign);
0231     surf->layer_size = surf->nbx * surf->nby * surf->bpe;
0232     surf->base_align = track->group_size;
0233     surf->palign = palign;
0234     surf->halign = 8;
0235     if ((surf->nbx & (palign - 1))) {
0236         if (prefix) {
0237             dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d (%d %d %d)\n",
0238                  __func__, __LINE__, prefix, surf->nbx, palign,
0239                  track->group_size, surf->bpe, surf->nsamples);
0240         }
0241         return -EINVAL;
0242     }
0243     if ((surf->nby & (8 - 1))) {
0244         if (prefix) {
0245             dev_warn(p->dev, "%s:%d %s height %d invalid must be aligned with 8\n",
0246                  __func__, __LINE__, prefix, surf->nby);
0247         }
0248         return -EINVAL;
0249     }
0250     return 0;
0251 }
0252 
0253 static int evergreen_surface_check_2d(struct radeon_cs_parser *p,
0254                       struct eg_surface *surf,
0255                       const char *prefix)
0256 {
0257     struct evergreen_cs_track *track = p->track;
0258     unsigned palign, halign, tileb, slice_pt;
0259     unsigned mtile_pr, mtile_ps, mtileb;
0260 
0261     tileb = 64 * surf->bpe * surf->nsamples;
0262     slice_pt = 1;
0263     if (tileb > surf->tsplit) {
0264         slice_pt = tileb / surf->tsplit;
0265     }
0266     tileb = tileb / slice_pt;
0267     /* macro tile width & height */
0268     palign = (8 * surf->bankw * track->npipes) * surf->mtilea;
0269     halign = (8 * surf->bankh * surf->nbanks) / surf->mtilea;
0270     mtileb = (palign / 8) * (halign / 8) * tileb;
0271     mtile_pr = surf->nbx / palign;
0272     mtile_ps = (mtile_pr * surf->nby) / halign;
0273     surf->layer_size = mtile_ps * mtileb * slice_pt;
0274     surf->base_align = (palign / 8) * (halign / 8) * tileb;
0275     surf->palign = palign;
0276     surf->halign = halign;
0277 
0278     if ((surf->nbx & (palign - 1))) {
0279         if (prefix) {
0280             dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d\n",
0281                  __func__, __LINE__, prefix, surf->nbx, palign);
0282         }
0283         return -EINVAL;
0284     }
0285     if ((surf->nby & (halign - 1))) {
0286         if (prefix) {
0287             dev_warn(p->dev, "%s:%d %s height %d invalid must be aligned with %d\n",
0288                  __func__, __LINE__, prefix, surf->nby, halign);
0289         }
0290         return -EINVAL;
0291     }
0292 
0293     return 0;
0294 }
0295 
0296 static int evergreen_surface_check(struct radeon_cs_parser *p,
0297                    struct eg_surface *surf,
0298                    const char *prefix)
0299 {
0300     /* some common value computed here */
0301     surf->bpe = r600_fmt_get_blocksize(surf->format);
0302 
0303     switch (surf->mode) {
0304     case ARRAY_LINEAR_GENERAL:
0305         return evergreen_surface_check_linear(p, surf, prefix);
0306     case ARRAY_LINEAR_ALIGNED:
0307         return evergreen_surface_check_linear_aligned(p, surf, prefix);
0308     case ARRAY_1D_TILED_THIN1:
0309         return evergreen_surface_check_1d(p, surf, prefix);
0310     case ARRAY_2D_TILED_THIN1:
0311         return evergreen_surface_check_2d(p, surf, prefix);
0312     default:
0313         dev_warn(p->dev, "%s:%d %s invalid array mode %d\n",
0314                 __func__, __LINE__, prefix, surf->mode);
0315         return -EINVAL;
0316     }
0317     return -EINVAL;
0318 }
0319 
0320 static int evergreen_surface_value_conv_check(struct radeon_cs_parser *p,
0321                           struct eg_surface *surf,
0322                           const char *prefix)
0323 {
0324     switch (surf->mode) {
0325     case ARRAY_2D_TILED_THIN1:
0326         break;
0327     case ARRAY_LINEAR_GENERAL:
0328     case ARRAY_LINEAR_ALIGNED:
0329     case ARRAY_1D_TILED_THIN1:
0330         return 0;
0331     default:
0332         dev_warn(p->dev, "%s:%d %s invalid array mode %d\n",
0333                 __func__, __LINE__, prefix, surf->mode);
0334         return -EINVAL;
0335     }
0336 
0337     switch (surf->nbanks) {
0338     case 0: surf->nbanks = 2; break;
0339     case 1: surf->nbanks = 4; break;
0340     case 2: surf->nbanks = 8; break;
0341     case 3: surf->nbanks = 16; break;
0342     default:
0343         dev_warn(p->dev, "%s:%d %s invalid number of banks %d\n",
0344              __func__, __LINE__, prefix, surf->nbanks);
0345         return -EINVAL;
0346     }
0347     switch (surf->bankw) {
0348     case 0: surf->bankw = 1; break;
0349     case 1: surf->bankw = 2; break;
0350     case 2: surf->bankw = 4; break;
0351     case 3: surf->bankw = 8; break;
0352     default:
0353         dev_warn(p->dev, "%s:%d %s invalid bankw %d\n",
0354              __func__, __LINE__, prefix, surf->bankw);
0355         return -EINVAL;
0356     }
0357     switch (surf->bankh) {
0358     case 0: surf->bankh = 1; break;
0359     case 1: surf->bankh = 2; break;
0360     case 2: surf->bankh = 4; break;
0361     case 3: surf->bankh = 8; break;
0362     default:
0363         dev_warn(p->dev, "%s:%d %s invalid bankh %d\n",
0364              __func__, __LINE__, prefix, surf->bankh);
0365         return -EINVAL;
0366     }
0367     switch (surf->mtilea) {
0368     case 0: surf->mtilea = 1; break;
0369     case 1: surf->mtilea = 2; break;
0370     case 2: surf->mtilea = 4; break;
0371     case 3: surf->mtilea = 8; break;
0372     default:
0373         dev_warn(p->dev, "%s:%d %s invalid macro tile aspect %d\n",
0374              __func__, __LINE__, prefix, surf->mtilea);
0375         return -EINVAL;
0376     }
0377     switch (surf->tsplit) {
0378     case 0: surf->tsplit = 64; break;
0379     case 1: surf->tsplit = 128; break;
0380     case 2: surf->tsplit = 256; break;
0381     case 3: surf->tsplit = 512; break;
0382     case 4: surf->tsplit = 1024; break;
0383     case 5: surf->tsplit = 2048; break;
0384     case 6: surf->tsplit = 4096; break;
0385     default:
0386         dev_warn(p->dev, "%s:%d %s invalid tile split %d\n",
0387              __func__, __LINE__, prefix, surf->tsplit);
0388         return -EINVAL;
0389     }
0390     return 0;
0391 }
0392 
0393 static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned id)
0394 {
0395     struct evergreen_cs_track *track = p->track;
0396     struct eg_surface surf;
0397     unsigned pitch, slice, mslice;
0398     unsigned long offset;
0399     int r;
0400 
0401     mslice = G_028C6C_SLICE_MAX(track->cb_color_view[id]) + 1;
0402     pitch = track->cb_color_pitch[id];
0403     slice = track->cb_color_slice[id];
0404     surf.nbx = (pitch + 1) * 8;
0405     surf.nby = ((slice + 1) * 64) / surf.nbx;
0406     surf.mode = G_028C70_ARRAY_MODE(track->cb_color_info[id]);
0407     surf.format = G_028C70_FORMAT(track->cb_color_info[id]);
0408     surf.tsplit = G_028C74_TILE_SPLIT(track->cb_color_attrib[id]);
0409     surf.nbanks = G_028C74_NUM_BANKS(track->cb_color_attrib[id]);
0410     surf.bankw = G_028C74_BANK_WIDTH(track->cb_color_attrib[id]);
0411     surf.bankh = G_028C74_BANK_HEIGHT(track->cb_color_attrib[id]);
0412     surf.mtilea = G_028C74_MACRO_TILE_ASPECT(track->cb_color_attrib[id]);
0413     surf.nsamples = 1;
0414 
0415     if (!r600_fmt_is_valid_color(surf.format)) {
0416         dev_warn(p->dev, "%s:%d cb invalid format %d for %d (0x%08x)\n",
0417              __func__, __LINE__, surf.format,
0418             id, track->cb_color_info[id]);
0419         return -EINVAL;
0420     }
0421 
0422     r = evergreen_surface_value_conv_check(p, &surf, "cb");
0423     if (r) {
0424         return r;
0425     }
0426 
0427     r = evergreen_surface_check(p, &surf, "cb");
0428     if (r) {
0429         dev_warn(p->dev, "%s:%d cb[%d] invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
0430              __func__, __LINE__, id, track->cb_color_pitch[id],
0431              track->cb_color_slice[id], track->cb_color_attrib[id],
0432              track->cb_color_info[id]);
0433         return r;
0434     }
0435 
0436     offset = track->cb_color_bo_offset[id] << 8;
0437     if (offset & (surf.base_align - 1)) {
0438         dev_warn(p->dev, "%s:%d cb[%d] bo base %ld not aligned with %ld\n",
0439              __func__, __LINE__, id, offset, surf.base_align);
0440         return -EINVAL;
0441     }
0442 
0443     offset += surf.layer_size * mslice;
0444     if (offset > radeon_bo_size(track->cb_color_bo[id])) {
0445         /* old ddx are broken they allocate bo with w*h*bpp but
0446          * program slice with ALIGN(h, 8), catch this and patch
0447          * command stream.
0448          */
0449         if (!surf.mode) {
0450             uint32_t *ib = p->ib.ptr;
0451             unsigned long tmp, nby, bsize, size, min = 0;
0452 
0453             /* find the height the ddx wants */
0454             if (surf.nby > 8) {
0455                 min = surf.nby - 8;
0456             }
0457             bsize = radeon_bo_size(track->cb_color_bo[id]);
0458             tmp = track->cb_color_bo_offset[id] << 8;
0459             for (nby = surf.nby; nby > min; nby--) {
0460                 size = nby * surf.nbx * surf.bpe * surf.nsamples;
0461                 if ((tmp + size * mslice) <= bsize) {
0462                     break;
0463                 }
0464             }
0465             if (nby > min) {
0466                 surf.nby = nby;
0467                 slice = ((nby * surf.nbx) / 64) - 1;
0468                 if (!evergreen_surface_check(p, &surf, "cb")) {
0469                     /* check if this one works */
0470                     tmp += surf.layer_size * mslice;
0471                     if (tmp <= bsize) {
0472                         ib[track->cb_color_slice_idx[id]] = slice;
0473                         goto old_ddx_ok;
0474                     }
0475                 }
0476             }
0477         }
0478         dev_warn(p->dev, "%s:%d cb[%d] bo too small (layer size %d, "
0479              "offset %d, max layer %d, bo size %ld, slice %d)\n",
0480              __func__, __LINE__, id, surf.layer_size,
0481             track->cb_color_bo_offset[id] << 8, mslice,
0482             radeon_bo_size(track->cb_color_bo[id]), slice);
0483         dev_warn(p->dev, "%s:%d problematic surf: (%d %d) (%d %d %d %d %d %d %d)\n",
0484              __func__, __LINE__, surf.nbx, surf.nby,
0485             surf.mode, surf.bpe, surf.nsamples,
0486             surf.bankw, surf.bankh,
0487             surf.tsplit, surf.mtilea);
0488         return -EINVAL;
0489     }
0490 old_ddx_ok:
0491 
0492     return 0;
0493 }
0494 
0495 static int evergreen_cs_track_validate_htile(struct radeon_cs_parser *p,
0496                         unsigned nbx, unsigned nby)
0497 {
0498     struct evergreen_cs_track *track = p->track;
0499     unsigned long size;
0500 
0501     if (track->htile_bo == NULL) {
0502         dev_warn(p->dev, "%s:%d htile enabled without htile surface 0x%08x\n",
0503                 __func__, __LINE__, track->db_z_info);
0504         return -EINVAL;
0505     }
0506 
0507     if (G_028ABC_LINEAR(track->htile_surface)) {
0508         /* pitch must be 16 htiles aligned == 16 * 8 pixel aligned */
0509         nbx = round_up(nbx, 16 * 8);
0510         /* height is npipes htiles aligned == npipes * 8 pixel aligned */
0511         nby = round_up(nby, track->npipes * 8);
0512     } else {
0513         /* always assume 8x8 htile */
0514         /* align is htile align * 8, htile align vary according to
0515          * number of pipe and tile width and nby
0516          */
0517         switch (track->npipes) {
0518         case 8:
0519             /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
0520             nbx = round_up(nbx, 64 * 8);
0521             nby = round_up(nby, 64 * 8);
0522             break;
0523         case 4:
0524             /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
0525             nbx = round_up(nbx, 64 * 8);
0526             nby = round_up(nby, 32 * 8);
0527             break;
0528         case 2:
0529             /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
0530             nbx = round_up(nbx, 32 * 8);
0531             nby = round_up(nby, 32 * 8);
0532             break;
0533         case 1:
0534             /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
0535             nbx = round_up(nbx, 32 * 8);
0536             nby = round_up(nby, 16 * 8);
0537             break;
0538         default:
0539             dev_warn(p->dev, "%s:%d invalid num pipes %d\n",
0540                     __func__, __LINE__, track->npipes);
0541             return -EINVAL;
0542         }
0543     }
0544     /* compute number of htile */
0545     nbx = nbx >> 3;
0546     nby = nby >> 3;
0547     /* size must be aligned on npipes * 2K boundary */
0548     size = roundup(nbx * nby * 4, track->npipes * (2 << 10));
0549     size += track->htile_offset;
0550 
0551     if (size > radeon_bo_size(track->htile_bo)) {
0552         dev_warn(p->dev, "%s:%d htile surface too small %ld for %ld (%d %d)\n",
0553                 __func__, __LINE__, radeon_bo_size(track->htile_bo),
0554                 size, nbx, nby);
0555         return -EINVAL;
0556     }
0557     return 0;
0558 }
0559 
0560 static int evergreen_cs_track_validate_stencil(struct radeon_cs_parser *p)
0561 {
0562     struct evergreen_cs_track *track = p->track;
0563     struct eg_surface surf;
0564     unsigned pitch, slice, mslice;
0565     unsigned long offset;
0566     int r;
0567 
0568     mslice = G_028008_SLICE_MAX(track->db_depth_view) + 1;
0569     pitch = G_028058_PITCH_TILE_MAX(track->db_depth_size);
0570     slice = track->db_depth_slice;
0571     surf.nbx = (pitch + 1) * 8;
0572     surf.nby = ((slice + 1) * 64) / surf.nbx;
0573     surf.mode = G_028040_ARRAY_MODE(track->db_z_info);
0574     surf.format = G_028044_FORMAT(track->db_s_info);
0575     surf.tsplit = G_028044_TILE_SPLIT(track->db_s_info);
0576     surf.nbanks = G_028040_NUM_BANKS(track->db_z_info);
0577     surf.bankw = G_028040_BANK_WIDTH(track->db_z_info);
0578     surf.bankh = G_028040_BANK_HEIGHT(track->db_z_info);
0579     surf.mtilea = G_028040_MACRO_TILE_ASPECT(track->db_z_info);
0580     surf.nsamples = 1;
0581 
0582     if (surf.format != 1) {
0583         dev_warn(p->dev, "%s:%d stencil invalid format %d\n",
0584              __func__, __LINE__, surf.format);
0585         return -EINVAL;
0586     }
0587     /* replace by color format so we can use same code */
0588     surf.format = V_028C70_COLOR_8;
0589 
0590     r = evergreen_surface_value_conv_check(p, &surf, "stencil");
0591     if (r) {
0592         return r;
0593     }
0594 
0595     r = evergreen_surface_check(p, &surf, NULL);
0596     if (r) {
0597         /* old userspace doesn't compute proper depth/stencil alignment
0598          * check that alignment against a bigger byte per elements and
0599          * only report if that alignment is wrong too.
0600          */
0601         surf.format = V_028C70_COLOR_8_8_8_8;
0602         r = evergreen_surface_check(p, &surf, "stencil");
0603         if (r) {
0604             dev_warn(p->dev, "%s:%d stencil invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
0605                  __func__, __LINE__, track->db_depth_size,
0606                  track->db_depth_slice, track->db_s_info, track->db_z_info);
0607         }
0608         return r;
0609     }
0610 
0611     offset = track->db_s_read_offset << 8;
0612     if (offset & (surf.base_align - 1)) {
0613         dev_warn(p->dev, "%s:%d stencil read bo base %ld not aligned with %ld\n",
0614              __func__, __LINE__, offset, surf.base_align);
0615         return -EINVAL;
0616     }
0617     offset += surf.layer_size * mslice;
0618     if (offset > radeon_bo_size(track->db_s_read_bo)) {
0619         dev_warn(p->dev, "%s:%d stencil read bo too small (layer size %d, "
0620              "offset %ld, max layer %d, bo size %ld)\n",
0621              __func__, __LINE__, surf.layer_size,
0622             (unsigned long)track->db_s_read_offset << 8, mslice,
0623             radeon_bo_size(track->db_s_read_bo));
0624         dev_warn(p->dev, "%s:%d stencil invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
0625              __func__, __LINE__, track->db_depth_size,
0626              track->db_depth_slice, track->db_s_info, track->db_z_info);
0627         return -EINVAL;
0628     }
0629 
0630     offset = track->db_s_write_offset << 8;
0631     if (offset & (surf.base_align - 1)) {
0632         dev_warn(p->dev, "%s:%d stencil write bo base %ld not aligned with %ld\n",
0633              __func__, __LINE__, offset, surf.base_align);
0634         return -EINVAL;
0635     }
0636     offset += surf.layer_size * mslice;
0637     if (offset > radeon_bo_size(track->db_s_write_bo)) {
0638         dev_warn(p->dev, "%s:%d stencil write bo too small (layer size %d, "
0639              "offset %ld, max layer %d, bo size %ld)\n",
0640              __func__, __LINE__, surf.layer_size,
0641             (unsigned long)track->db_s_write_offset << 8, mslice,
0642             radeon_bo_size(track->db_s_write_bo));
0643         return -EINVAL;
0644     }
0645 
0646     /* hyperz */
0647     if (G_028040_TILE_SURFACE_ENABLE(track->db_z_info)) {
0648         r = evergreen_cs_track_validate_htile(p, surf.nbx, surf.nby);
0649         if (r) {
0650             return r;
0651         }
0652     }
0653 
0654     return 0;
0655 }
0656 
0657 static int evergreen_cs_track_validate_depth(struct radeon_cs_parser *p)
0658 {
0659     struct evergreen_cs_track *track = p->track;
0660     struct eg_surface surf;
0661     unsigned pitch, slice, mslice;
0662     unsigned long offset;
0663     int r;
0664 
0665     mslice = G_028008_SLICE_MAX(track->db_depth_view) + 1;
0666     pitch = G_028058_PITCH_TILE_MAX(track->db_depth_size);
0667     slice = track->db_depth_slice;
0668     surf.nbx = (pitch + 1) * 8;
0669     surf.nby = ((slice + 1) * 64) / surf.nbx;
0670     surf.mode = G_028040_ARRAY_MODE(track->db_z_info);
0671     surf.format = G_028040_FORMAT(track->db_z_info);
0672     surf.tsplit = G_028040_TILE_SPLIT(track->db_z_info);
0673     surf.nbanks = G_028040_NUM_BANKS(track->db_z_info);
0674     surf.bankw = G_028040_BANK_WIDTH(track->db_z_info);
0675     surf.bankh = G_028040_BANK_HEIGHT(track->db_z_info);
0676     surf.mtilea = G_028040_MACRO_TILE_ASPECT(track->db_z_info);
0677     surf.nsamples = 1;
0678 
0679     switch (surf.format) {
0680     case V_028040_Z_16:
0681         surf.format = V_028C70_COLOR_16;
0682         break;
0683     case V_028040_Z_24:
0684     case V_028040_Z_32_FLOAT:
0685         surf.format = V_028C70_COLOR_8_8_8_8;
0686         break;
0687     default:
0688         dev_warn(p->dev, "%s:%d depth invalid format %d\n",
0689              __func__, __LINE__, surf.format);
0690         return -EINVAL;
0691     }
0692 
0693     r = evergreen_surface_value_conv_check(p, &surf, "depth");
0694     if (r) {
0695         dev_warn(p->dev, "%s:%d depth invalid (0x%08x 0x%08x 0x%08x)\n",
0696              __func__, __LINE__, track->db_depth_size,
0697              track->db_depth_slice, track->db_z_info);
0698         return r;
0699     }
0700 
0701     r = evergreen_surface_check(p, &surf, "depth");
0702     if (r) {
0703         dev_warn(p->dev, "%s:%d depth invalid (0x%08x 0x%08x 0x%08x)\n",
0704              __func__, __LINE__, track->db_depth_size,
0705              track->db_depth_slice, track->db_z_info);
0706         return r;
0707     }
0708 
0709     offset = track->db_z_read_offset << 8;
0710     if (offset & (surf.base_align - 1)) {
0711         dev_warn(p->dev, "%s:%d stencil read bo base %ld not aligned with %ld\n",
0712              __func__, __LINE__, offset, surf.base_align);
0713         return -EINVAL;
0714     }
0715     offset += surf.layer_size * mslice;
0716     if (offset > radeon_bo_size(track->db_z_read_bo)) {
0717         dev_warn(p->dev, "%s:%d depth read bo too small (layer size %d, "
0718              "offset %ld, max layer %d, bo size %ld)\n",
0719              __func__, __LINE__, surf.layer_size,
0720             (unsigned long)track->db_z_read_offset << 8, mslice,
0721             radeon_bo_size(track->db_z_read_bo));
0722         return -EINVAL;
0723     }
0724 
0725     offset = track->db_z_write_offset << 8;
0726     if (offset & (surf.base_align - 1)) {
0727         dev_warn(p->dev, "%s:%d stencil write bo base %ld not aligned with %ld\n",
0728              __func__, __LINE__, offset, surf.base_align);
0729         return -EINVAL;
0730     }
0731     offset += surf.layer_size * mslice;
0732     if (offset > radeon_bo_size(track->db_z_write_bo)) {
0733         dev_warn(p->dev, "%s:%d depth write bo too small (layer size %d, "
0734              "offset %ld, max layer %d, bo size %ld)\n",
0735              __func__, __LINE__, surf.layer_size,
0736             (unsigned long)track->db_z_write_offset << 8, mslice,
0737             radeon_bo_size(track->db_z_write_bo));
0738         return -EINVAL;
0739     }
0740 
0741     /* hyperz */
0742     if (G_028040_TILE_SURFACE_ENABLE(track->db_z_info)) {
0743         r = evergreen_cs_track_validate_htile(p, surf.nbx, surf.nby);
0744         if (r) {
0745             return r;
0746         }
0747     }
0748 
0749     return 0;
0750 }
0751 
0752 static int evergreen_cs_track_validate_texture(struct radeon_cs_parser *p,
0753                            struct radeon_bo *texture,
0754                            struct radeon_bo *mipmap,
0755                            unsigned idx)
0756 {
0757     struct eg_surface surf;
0758     unsigned long toffset, moffset;
0759     unsigned dim, llevel, mslice, width, height, depth, i;
0760     u32 texdw[8];
0761     int r;
0762 
0763     texdw[0] = radeon_get_ib_value(p, idx + 0);
0764     texdw[1] = radeon_get_ib_value(p, idx + 1);
0765     texdw[2] = radeon_get_ib_value(p, idx + 2);
0766     texdw[3] = radeon_get_ib_value(p, idx + 3);
0767     texdw[4] = radeon_get_ib_value(p, idx + 4);
0768     texdw[5] = radeon_get_ib_value(p, idx + 5);
0769     texdw[6] = radeon_get_ib_value(p, idx + 6);
0770     texdw[7] = radeon_get_ib_value(p, idx + 7);
0771     dim = G_030000_DIM(texdw[0]);
0772     llevel = G_030014_LAST_LEVEL(texdw[5]);
0773     mslice = G_030014_LAST_ARRAY(texdw[5]) + 1;
0774     width = G_030000_TEX_WIDTH(texdw[0]) + 1;
0775     height =  G_030004_TEX_HEIGHT(texdw[1]) + 1;
0776     depth = G_030004_TEX_DEPTH(texdw[1]) + 1;
0777     surf.format = G_03001C_DATA_FORMAT(texdw[7]);
0778     surf.nbx = (G_030000_PITCH(texdw[0]) + 1) * 8;
0779     surf.nbx = r600_fmt_get_nblocksx(surf.format, surf.nbx);
0780     surf.nby = r600_fmt_get_nblocksy(surf.format, height);
0781     surf.mode = G_030004_ARRAY_MODE(texdw[1]);
0782     surf.tsplit = G_030018_TILE_SPLIT(texdw[6]);
0783     surf.nbanks = G_03001C_NUM_BANKS(texdw[7]);
0784     surf.bankw = G_03001C_BANK_WIDTH(texdw[7]);
0785     surf.bankh = G_03001C_BANK_HEIGHT(texdw[7]);
0786     surf.mtilea = G_03001C_MACRO_TILE_ASPECT(texdw[7]);
0787     surf.nsamples = 1;
0788     toffset = texdw[2] << 8;
0789     moffset = texdw[3] << 8;
0790 
0791     if (!r600_fmt_is_valid_texture(surf.format, p->family)) {
0792         dev_warn(p->dev, "%s:%d texture invalid format %d\n",
0793              __func__, __LINE__, surf.format);
0794         return -EINVAL;
0795     }
0796     switch (dim) {
0797     case V_030000_SQ_TEX_DIM_1D:
0798     case V_030000_SQ_TEX_DIM_2D:
0799     case V_030000_SQ_TEX_DIM_CUBEMAP:
0800     case V_030000_SQ_TEX_DIM_1D_ARRAY:
0801     case V_030000_SQ_TEX_DIM_2D_ARRAY:
0802         depth = 1;
0803         break;
0804     case V_030000_SQ_TEX_DIM_2D_MSAA:
0805     case V_030000_SQ_TEX_DIM_2D_ARRAY_MSAA:
0806         surf.nsamples = 1 << llevel;
0807         llevel = 0;
0808         depth = 1;
0809         break;
0810     case V_030000_SQ_TEX_DIM_3D:
0811         break;
0812     default:
0813         dev_warn(p->dev, "%s:%d texture invalid dimension %d\n",
0814              __func__, __LINE__, dim);
0815         return -EINVAL;
0816     }
0817 
0818     r = evergreen_surface_value_conv_check(p, &surf, "texture");
0819     if (r) {
0820         return r;
0821     }
0822 
0823     /* align height */
0824     evergreen_surface_check(p, &surf, NULL);
0825     surf.nby = ALIGN(surf.nby, surf.halign);
0826 
0827     r = evergreen_surface_check(p, &surf, "texture");
0828     if (r) {
0829         dev_warn(p->dev, "%s:%d texture invalid 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n",
0830              __func__, __LINE__, texdw[0], texdw[1], texdw[4],
0831              texdw[5], texdw[6], texdw[7]);
0832         return r;
0833     }
0834 
0835     /* check texture size */
0836     if (toffset & (surf.base_align - 1)) {
0837         dev_warn(p->dev, "%s:%d texture bo base %ld not aligned with %ld\n",
0838              __func__, __LINE__, toffset, surf.base_align);
0839         return -EINVAL;
0840     }
0841     if (surf.nsamples <= 1 && moffset & (surf.base_align - 1)) {
0842         dev_warn(p->dev, "%s:%d mipmap bo base %ld not aligned with %ld\n",
0843              __func__, __LINE__, moffset, surf.base_align);
0844         return -EINVAL;
0845     }
0846     if (dim == SQ_TEX_DIM_3D) {
0847         toffset += surf.layer_size * depth;
0848     } else {
0849         toffset += surf.layer_size * mslice;
0850     }
0851     if (toffset > radeon_bo_size(texture)) {
0852         dev_warn(p->dev, "%s:%d texture bo too small (layer size %d, "
0853              "offset %ld, max layer %d, depth %d, bo size %ld) (%d %d)\n",
0854              __func__, __LINE__, surf.layer_size,
0855             (unsigned long)texdw[2] << 8, mslice,
0856             depth, radeon_bo_size(texture),
0857             surf.nbx, surf.nby);
0858         return -EINVAL;
0859     }
0860 
0861     if (!mipmap) {
0862         if (llevel) {
0863             dev_warn(p->dev, "%s:%i got NULL MIP_ADDRESS relocation\n",
0864                  __func__, __LINE__);
0865             return -EINVAL;
0866         } else {
0867             return 0; /* everything's ok */
0868         }
0869     }
0870 
0871     /* check mipmap size */
0872     for (i = 1; i <= llevel; i++) {
0873         unsigned w, h, d;
0874 
0875         w = r600_mip_minify(width, i);
0876         h = r600_mip_minify(height, i);
0877         d = r600_mip_minify(depth, i);
0878         surf.nbx = r600_fmt_get_nblocksx(surf.format, w);
0879         surf.nby = r600_fmt_get_nblocksy(surf.format, h);
0880 
0881         switch (surf.mode) {
0882         case ARRAY_2D_TILED_THIN1:
0883             if (surf.nbx < surf.palign || surf.nby < surf.halign) {
0884                 surf.mode = ARRAY_1D_TILED_THIN1;
0885             }
0886             /* recompute alignment */
0887             evergreen_surface_check(p, &surf, NULL);
0888             break;
0889         case ARRAY_LINEAR_GENERAL:
0890         case ARRAY_LINEAR_ALIGNED:
0891         case ARRAY_1D_TILED_THIN1:
0892             break;
0893         default:
0894             dev_warn(p->dev, "%s:%d invalid array mode %d\n",
0895                  __func__, __LINE__, surf.mode);
0896             return -EINVAL;
0897         }
0898         surf.nbx = ALIGN(surf.nbx, surf.palign);
0899         surf.nby = ALIGN(surf.nby, surf.halign);
0900 
0901         r = evergreen_surface_check(p, &surf, "mipmap");
0902         if (r) {
0903             return r;
0904         }
0905 
0906         if (dim == SQ_TEX_DIM_3D) {
0907             moffset += surf.layer_size * d;
0908         } else {
0909             moffset += surf.layer_size * mslice;
0910         }
0911         if (moffset > radeon_bo_size(mipmap)) {
0912             dev_warn(p->dev, "%s:%d mipmap [%d] bo too small (layer size %d, "
0913                     "offset %ld, coffset %ld, max layer %d, depth %d, "
0914                     "bo size %ld) level0 (%d %d %d)\n",
0915                     __func__, __LINE__, i, surf.layer_size,
0916                     (unsigned long)texdw[3] << 8, moffset, mslice,
0917                     d, radeon_bo_size(mipmap),
0918                     width, height, depth);
0919             dev_warn(p->dev, "%s:%d problematic surf: (%d %d) (%d %d %d %d %d %d %d)\n",
0920                  __func__, __LINE__, surf.nbx, surf.nby,
0921                 surf.mode, surf.bpe, surf.nsamples,
0922                 surf.bankw, surf.bankh,
0923                 surf.tsplit, surf.mtilea);
0924             return -EINVAL;
0925         }
0926     }
0927 
0928     return 0;
0929 }
0930 
0931 static int evergreen_cs_track_check(struct radeon_cs_parser *p)
0932 {
0933     struct evergreen_cs_track *track = p->track;
0934     unsigned tmp, i;
0935     int r;
0936     unsigned buffer_mask = 0;
0937 
0938     /* check streamout */
0939     if (track->streamout_dirty && track->vgt_strmout_config) {
0940         for (i = 0; i < 4; i++) {
0941             if (track->vgt_strmout_config & (1 << i)) {
0942                 buffer_mask |= (track->vgt_strmout_buffer_config >> (i * 4)) & 0xf;
0943             }
0944         }
0945 
0946         for (i = 0; i < 4; i++) {
0947             if (buffer_mask & (1 << i)) {
0948                 if (track->vgt_strmout_bo[i]) {
0949                     u64 offset = (u64)track->vgt_strmout_bo_offset[i] +
0950                             (u64)track->vgt_strmout_size[i];
0951                     if (offset > radeon_bo_size(track->vgt_strmout_bo[i])) {
0952                         DRM_ERROR("streamout %d bo too small: 0x%llx, 0x%lx\n",
0953                               i, offset,
0954                               radeon_bo_size(track->vgt_strmout_bo[i]));
0955                         return -EINVAL;
0956                     }
0957                 } else {
0958                     dev_warn(p->dev, "No buffer for streamout %d\n", i);
0959                     return -EINVAL;
0960                 }
0961             }
0962         }
0963         track->streamout_dirty = false;
0964     }
0965 
0966     if (track->sx_misc_kill_all_prims)
0967         return 0;
0968 
0969     /* check that we have a cb for each enabled target
0970      */
0971     if (track->cb_dirty) {
0972         tmp = track->cb_target_mask;
0973         for (i = 0; i < 8; i++) {
0974             u32 format = G_028C70_FORMAT(track->cb_color_info[i]);
0975 
0976             if (format != V_028C70_COLOR_INVALID &&
0977                 (tmp >> (i * 4)) & 0xF) {
0978                 /* at least one component is enabled */
0979                 if (track->cb_color_bo[i] == NULL) {
0980                     dev_warn(p->dev, "%s:%d mask 0x%08X | 0x%08X no cb for %d\n",
0981                         __func__, __LINE__, track->cb_target_mask, track->cb_shader_mask, i);
0982                     return -EINVAL;
0983                 }
0984                 /* check cb */
0985                 r = evergreen_cs_track_validate_cb(p, i);
0986                 if (r) {
0987                     return r;
0988                 }
0989             }
0990         }
0991         track->cb_dirty = false;
0992     }
0993 
0994     if (track->db_dirty) {
0995         /* Check stencil buffer */
0996         if (G_028044_FORMAT(track->db_s_info) != V_028044_STENCIL_INVALID &&
0997             G_028800_STENCIL_ENABLE(track->db_depth_control)) {
0998             r = evergreen_cs_track_validate_stencil(p);
0999             if (r)
1000                 return r;
1001         }
1002         /* Check depth buffer */
1003         if (G_028040_FORMAT(track->db_z_info) != V_028040_Z_INVALID &&
1004             G_028800_Z_ENABLE(track->db_depth_control)) {
1005             r = evergreen_cs_track_validate_depth(p);
1006             if (r)
1007                 return r;
1008         }
1009         track->db_dirty = false;
1010     }
1011 
1012     return 0;
1013 }
1014 
1015 /**
1016  * evergreen_cs_packet_parse_vline() - parse userspace VLINE packet
1017  * @p:      parser structure holding parsing context.
1018  *
1019  * This is an Evergreen(+)-specific function for parsing VLINE packets.
1020  * Real work is done by r600_cs_common_vline_parse function.
1021  * Here we just set up ASIC-specific register table and call
1022  * the common implementation function.
1023  */
1024 static int evergreen_cs_packet_parse_vline(struct radeon_cs_parser *p)
1025 {
1026 
1027     static uint32_t vline_start_end[6] = {
1028         EVERGREEN_VLINE_START_END + EVERGREEN_CRTC0_REGISTER_OFFSET,
1029         EVERGREEN_VLINE_START_END + EVERGREEN_CRTC1_REGISTER_OFFSET,
1030         EVERGREEN_VLINE_START_END + EVERGREEN_CRTC2_REGISTER_OFFSET,
1031         EVERGREEN_VLINE_START_END + EVERGREEN_CRTC3_REGISTER_OFFSET,
1032         EVERGREEN_VLINE_START_END + EVERGREEN_CRTC4_REGISTER_OFFSET,
1033         EVERGREEN_VLINE_START_END + EVERGREEN_CRTC5_REGISTER_OFFSET
1034     };
1035     static uint32_t vline_status[6] = {
1036         EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
1037         EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
1038         EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
1039         EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
1040         EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
1041         EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET
1042     };
1043 
1044     return r600_cs_common_vline_parse(p, vline_start_end, vline_status);
1045 }
1046 
1047 static int evergreen_packet0_check(struct radeon_cs_parser *p,
1048                    struct radeon_cs_packet *pkt,
1049                    unsigned idx, unsigned reg)
1050 {
1051     int r;
1052 
1053     switch (reg) {
1054     case EVERGREEN_VLINE_START_END:
1055         r = evergreen_cs_packet_parse_vline(p);
1056         if (r) {
1057             DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1058                     idx, reg);
1059             return r;
1060         }
1061         break;
1062     default:
1063         pr_err("Forbidden register 0x%04X in cs at %d\n", reg, idx);
1064         return -EINVAL;
1065     }
1066     return 0;
1067 }
1068 
1069 static int evergreen_cs_parse_packet0(struct radeon_cs_parser *p,
1070                       struct radeon_cs_packet *pkt)
1071 {
1072     unsigned reg, i;
1073     unsigned idx;
1074     int r;
1075 
1076     idx = pkt->idx + 1;
1077     reg = pkt->reg;
1078     for (i = 0; i <= pkt->count; i++, idx++, reg += 4) {
1079         r = evergreen_packet0_check(p, pkt, idx, reg);
1080         if (r) {
1081             return r;
1082         }
1083     }
1084     return 0;
1085 }
1086 
1087 /**
1088  * evergreen_cs_handle_reg() - process registers that need special handling.
1089  * @p: parser structure holding parsing context
1090  * @reg: register we are testing
1091  * @idx: index into the cs buffer
1092  */
1093 static int evergreen_cs_handle_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
1094 {
1095     struct evergreen_cs_track *track = (struct evergreen_cs_track *)p->track;
1096     struct radeon_bo_list *reloc;
1097     u32 tmp, *ib;
1098     int r;
1099 
1100     ib = p->ib.ptr;
1101     switch (reg) {
1102     /* force following reg to 0 in an attempt to disable out buffer
1103      * which will need us to better understand how it works to perform
1104      * security check on it (Jerome)
1105      */
1106     case SQ_ESGS_RING_SIZE:
1107     case SQ_GSVS_RING_SIZE:
1108     case SQ_ESTMP_RING_SIZE:
1109     case SQ_GSTMP_RING_SIZE:
1110     case SQ_HSTMP_RING_SIZE:
1111     case SQ_LSTMP_RING_SIZE:
1112     case SQ_PSTMP_RING_SIZE:
1113     case SQ_VSTMP_RING_SIZE:
1114     case SQ_ESGS_RING_ITEMSIZE:
1115     case SQ_ESTMP_RING_ITEMSIZE:
1116     case SQ_GSTMP_RING_ITEMSIZE:
1117     case SQ_GSVS_RING_ITEMSIZE:
1118     case SQ_GS_VERT_ITEMSIZE:
1119     case SQ_GS_VERT_ITEMSIZE_1:
1120     case SQ_GS_VERT_ITEMSIZE_2:
1121     case SQ_GS_VERT_ITEMSIZE_3:
1122     case SQ_GSVS_RING_OFFSET_1:
1123     case SQ_GSVS_RING_OFFSET_2:
1124     case SQ_GSVS_RING_OFFSET_3:
1125     case SQ_HSTMP_RING_ITEMSIZE:
1126     case SQ_LSTMP_RING_ITEMSIZE:
1127     case SQ_PSTMP_RING_ITEMSIZE:
1128     case SQ_VSTMP_RING_ITEMSIZE:
1129     case VGT_TF_RING_SIZE:
1130         /* get value to populate the IB don't remove */
1131         /*tmp =radeon_get_ib_value(p, idx);
1132           ib[idx] = 0;*/
1133         break;
1134     case SQ_ESGS_RING_BASE:
1135     case SQ_GSVS_RING_BASE:
1136     case SQ_ESTMP_RING_BASE:
1137     case SQ_GSTMP_RING_BASE:
1138     case SQ_HSTMP_RING_BASE:
1139     case SQ_LSTMP_RING_BASE:
1140     case SQ_PSTMP_RING_BASE:
1141     case SQ_VSTMP_RING_BASE:
1142         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1143         if (r) {
1144             dev_warn(p->dev, "bad SET_CONTEXT_REG "
1145                     "0x%04X\n", reg);
1146             return -EINVAL;
1147         }
1148         ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1149         break;
1150     case DB_DEPTH_CONTROL:
1151         track->db_depth_control = radeon_get_ib_value(p, idx);
1152         track->db_dirty = true;
1153         break;
1154     case CAYMAN_DB_EQAA:
1155         if (p->rdev->family < CHIP_CAYMAN) {
1156             dev_warn(p->dev, "bad SET_CONTEXT_REG "
1157                  "0x%04X\n", reg);
1158             return -EINVAL;
1159         }
1160         break;
1161     case CAYMAN_DB_DEPTH_INFO:
1162         if (p->rdev->family < CHIP_CAYMAN) {
1163             dev_warn(p->dev, "bad SET_CONTEXT_REG "
1164                  "0x%04X\n", reg);
1165             return -EINVAL;
1166         }
1167         break;
1168     case DB_Z_INFO:
1169         track->db_z_info = radeon_get_ib_value(p, idx);
1170         if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1171             r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1172             if (r) {
1173                 dev_warn(p->dev, "bad SET_CONTEXT_REG "
1174                         "0x%04X\n", reg);
1175                 return -EINVAL;
1176             }
1177             ib[idx] &= ~Z_ARRAY_MODE(0xf);
1178             track->db_z_info &= ~Z_ARRAY_MODE(0xf);
1179             ib[idx] |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1180             track->db_z_info |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1181             if (reloc->tiling_flags & RADEON_TILING_MACRO) {
1182                 unsigned bankw, bankh, mtaspect, tile_split;
1183 
1184                 evergreen_tiling_fields(reloc->tiling_flags,
1185                             &bankw, &bankh, &mtaspect,
1186                             &tile_split);
1187                 ib[idx] |= DB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
1188                 ib[idx] |= DB_TILE_SPLIT(tile_split) |
1189                         DB_BANK_WIDTH(bankw) |
1190                         DB_BANK_HEIGHT(bankh) |
1191                         DB_MACRO_TILE_ASPECT(mtaspect);
1192             }
1193         }
1194         track->db_dirty = true;
1195         break;
1196     case DB_STENCIL_INFO:
1197         track->db_s_info = radeon_get_ib_value(p, idx);
1198         track->db_dirty = true;
1199         break;
1200     case DB_DEPTH_VIEW:
1201         track->db_depth_view = radeon_get_ib_value(p, idx);
1202         track->db_dirty = true;
1203         break;
1204     case DB_DEPTH_SIZE:
1205         track->db_depth_size = radeon_get_ib_value(p, idx);
1206         track->db_dirty = true;
1207         break;
1208     case R_02805C_DB_DEPTH_SLICE:
1209         track->db_depth_slice = radeon_get_ib_value(p, idx);
1210         track->db_dirty = true;
1211         break;
1212     case DB_Z_READ_BASE:
1213         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1214         if (r) {
1215             dev_warn(p->dev, "bad SET_CONTEXT_REG "
1216                     "0x%04X\n", reg);
1217             return -EINVAL;
1218         }
1219         track->db_z_read_offset = radeon_get_ib_value(p, idx);
1220         ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1221         track->db_z_read_bo = reloc->robj;
1222         track->db_dirty = true;
1223         break;
1224     case DB_Z_WRITE_BASE:
1225         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1226         if (r) {
1227             dev_warn(p->dev, "bad SET_CONTEXT_REG "
1228                     "0x%04X\n", reg);
1229             return -EINVAL;
1230         }
1231         track->db_z_write_offset = radeon_get_ib_value(p, idx);
1232         ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1233         track->db_z_write_bo = reloc->robj;
1234         track->db_dirty = true;
1235         break;
1236     case DB_STENCIL_READ_BASE:
1237         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1238         if (r) {
1239             dev_warn(p->dev, "bad SET_CONTEXT_REG "
1240                     "0x%04X\n", reg);
1241             return -EINVAL;
1242         }
1243         track->db_s_read_offset = radeon_get_ib_value(p, idx);
1244         ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1245         track->db_s_read_bo = reloc->robj;
1246         track->db_dirty = true;
1247         break;
1248     case DB_STENCIL_WRITE_BASE:
1249         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1250         if (r) {
1251             dev_warn(p->dev, "bad SET_CONTEXT_REG "
1252                     "0x%04X\n", reg);
1253             return -EINVAL;
1254         }
1255         track->db_s_write_offset = radeon_get_ib_value(p, idx);
1256         ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1257         track->db_s_write_bo = reloc->robj;
1258         track->db_dirty = true;
1259         break;
1260     case VGT_STRMOUT_CONFIG:
1261         track->vgt_strmout_config = radeon_get_ib_value(p, idx);
1262         track->streamout_dirty = true;
1263         break;
1264     case VGT_STRMOUT_BUFFER_CONFIG:
1265         track->vgt_strmout_buffer_config = radeon_get_ib_value(p, idx);
1266         track->streamout_dirty = true;
1267         break;
1268     case VGT_STRMOUT_BUFFER_BASE_0:
1269     case VGT_STRMOUT_BUFFER_BASE_1:
1270     case VGT_STRMOUT_BUFFER_BASE_2:
1271     case VGT_STRMOUT_BUFFER_BASE_3:
1272         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1273         if (r) {
1274             dev_warn(p->dev, "bad SET_CONTEXT_REG "
1275                     "0x%04X\n", reg);
1276             return -EINVAL;
1277         }
1278         tmp = (reg - VGT_STRMOUT_BUFFER_BASE_0) / 16;
1279         track->vgt_strmout_bo_offset[tmp] = radeon_get_ib_value(p, idx) << 8;
1280         ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1281         track->vgt_strmout_bo[tmp] = reloc->robj;
1282         track->streamout_dirty = true;
1283         break;
1284     case VGT_STRMOUT_BUFFER_SIZE_0:
1285     case VGT_STRMOUT_BUFFER_SIZE_1:
1286     case VGT_STRMOUT_BUFFER_SIZE_2:
1287     case VGT_STRMOUT_BUFFER_SIZE_3:
1288         tmp = (reg - VGT_STRMOUT_BUFFER_SIZE_0) / 16;
1289         /* size in register is DWs, convert to bytes */
1290         track->vgt_strmout_size[tmp] = radeon_get_ib_value(p, idx) * 4;
1291         track->streamout_dirty = true;
1292         break;
1293     case CP_COHER_BASE:
1294         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1295         if (r) {
1296             dev_warn(p->dev, "missing reloc for CP_COHER_BASE "
1297                     "0x%04X\n", reg);
1298             return -EINVAL;
1299         }
1300         ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1301         break;
1302     case CB_TARGET_MASK:
1303         track->cb_target_mask = radeon_get_ib_value(p, idx);
1304         track->cb_dirty = true;
1305         break;
1306     case CB_SHADER_MASK:
1307         track->cb_shader_mask = radeon_get_ib_value(p, idx);
1308         track->cb_dirty = true;
1309         break;
1310     case PA_SC_AA_CONFIG:
1311         if (p->rdev->family >= CHIP_CAYMAN) {
1312             dev_warn(p->dev, "bad SET_CONTEXT_REG "
1313                  "0x%04X\n", reg);
1314             return -EINVAL;
1315         }
1316         tmp = radeon_get_ib_value(p, idx) & MSAA_NUM_SAMPLES_MASK;
1317         track->nsamples = 1 << tmp;
1318         break;
1319     case CAYMAN_PA_SC_AA_CONFIG:
1320         if (p->rdev->family < CHIP_CAYMAN) {
1321             dev_warn(p->dev, "bad SET_CONTEXT_REG "
1322                  "0x%04X\n", reg);
1323             return -EINVAL;
1324         }
1325         tmp = radeon_get_ib_value(p, idx) & CAYMAN_MSAA_NUM_SAMPLES_MASK;
1326         track->nsamples = 1 << tmp;
1327         break;
1328     case CB_COLOR0_VIEW:
1329     case CB_COLOR1_VIEW:
1330     case CB_COLOR2_VIEW:
1331     case CB_COLOR3_VIEW:
1332     case CB_COLOR4_VIEW:
1333     case CB_COLOR5_VIEW:
1334     case CB_COLOR6_VIEW:
1335     case CB_COLOR7_VIEW:
1336         tmp = (reg - CB_COLOR0_VIEW) / 0x3c;
1337         track->cb_color_view[tmp] = radeon_get_ib_value(p, idx);
1338         track->cb_dirty = true;
1339         break;
1340     case CB_COLOR8_VIEW:
1341     case CB_COLOR9_VIEW:
1342     case CB_COLOR10_VIEW:
1343     case CB_COLOR11_VIEW:
1344         tmp = ((reg - CB_COLOR8_VIEW) / 0x1c) + 8;
1345         track->cb_color_view[tmp] = radeon_get_ib_value(p, idx);
1346         track->cb_dirty = true;
1347         break;
1348     case CB_COLOR0_INFO:
1349     case CB_COLOR1_INFO:
1350     case CB_COLOR2_INFO:
1351     case CB_COLOR3_INFO:
1352     case CB_COLOR4_INFO:
1353     case CB_COLOR5_INFO:
1354     case CB_COLOR6_INFO:
1355     case CB_COLOR7_INFO:
1356         tmp = (reg - CB_COLOR0_INFO) / 0x3c;
1357         track->cb_color_info[tmp] = radeon_get_ib_value(p, idx);
1358         if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1359             r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1360             if (r) {
1361                 dev_warn(p->dev, "bad SET_CONTEXT_REG "
1362                         "0x%04X\n", reg);
1363                 return -EINVAL;
1364             }
1365             ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1366             track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1367         }
1368         track->cb_dirty = true;
1369         break;
1370     case CB_COLOR8_INFO:
1371     case CB_COLOR9_INFO:
1372     case CB_COLOR10_INFO:
1373     case CB_COLOR11_INFO:
1374         tmp = ((reg - CB_COLOR8_INFO) / 0x1c) + 8;
1375         track->cb_color_info[tmp] = radeon_get_ib_value(p, idx);
1376         if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1377             r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1378             if (r) {
1379                 dev_warn(p->dev, "bad SET_CONTEXT_REG "
1380                         "0x%04X\n", reg);
1381                 return -EINVAL;
1382             }
1383             ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1384             track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1385         }
1386         track->cb_dirty = true;
1387         break;
1388     case CB_COLOR0_PITCH:
1389     case CB_COLOR1_PITCH:
1390     case CB_COLOR2_PITCH:
1391     case CB_COLOR3_PITCH:
1392     case CB_COLOR4_PITCH:
1393     case CB_COLOR5_PITCH:
1394     case CB_COLOR6_PITCH:
1395     case CB_COLOR7_PITCH:
1396         tmp = (reg - CB_COLOR0_PITCH) / 0x3c;
1397         track->cb_color_pitch[tmp] = radeon_get_ib_value(p, idx);
1398         track->cb_dirty = true;
1399         break;
1400     case CB_COLOR8_PITCH:
1401     case CB_COLOR9_PITCH:
1402     case CB_COLOR10_PITCH:
1403     case CB_COLOR11_PITCH:
1404         tmp = ((reg - CB_COLOR8_PITCH) / 0x1c) + 8;
1405         track->cb_color_pitch[tmp] = radeon_get_ib_value(p, idx);
1406         track->cb_dirty = true;
1407         break;
1408     case CB_COLOR0_SLICE:
1409     case CB_COLOR1_SLICE:
1410     case CB_COLOR2_SLICE:
1411     case CB_COLOR3_SLICE:
1412     case CB_COLOR4_SLICE:
1413     case CB_COLOR5_SLICE:
1414     case CB_COLOR6_SLICE:
1415     case CB_COLOR7_SLICE:
1416         tmp = (reg - CB_COLOR0_SLICE) / 0x3c;
1417         track->cb_color_slice[tmp] = radeon_get_ib_value(p, idx);
1418         track->cb_color_slice_idx[tmp] = idx;
1419         track->cb_dirty = true;
1420         break;
1421     case CB_COLOR8_SLICE:
1422     case CB_COLOR9_SLICE:
1423     case CB_COLOR10_SLICE:
1424     case CB_COLOR11_SLICE:
1425         tmp = ((reg - CB_COLOR8_SLICE) / 0x1c) + 8;
1426         track->cb_color_slice[tmp] = radeon_get_ib_value(p, idx);
1427         track->cb_color_slice_idx[tmp] = idx;
1428         track->cb_dirty = true;
1429         break;
1430     case CB_COLOR0_ATTRIB:
1431     case CB_COLOR1_ATTRIB:
1432     case CB_COLOR2_ATTRIB:
1433     case CB_COLOR3_ATTRIB:
1434     case CB_COLOR4_ATTRIB:
1435     case CB_COLOR5_ATTRIB:
1436     case CB_COLOR6_ATTRIB:
1437     case CB_COLOR7_ATTRIB:
1438         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1439         if (r) {
1440             dev_warn(p->dev, "bad SET_CONTEXT_REG "
1441                     "0x%04X\n", reg);
1442             return -EINVAL;
1443         }
1444         if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1445             if (reloc->tiling_flags & RADEON_TILING_MACRO) {
1446                 unsigned bankw, bankh, mtaspect, tile_split;
1447 
1448                 evergreen_tiling_fields(reloc->tiling_flags,
1449                             &bankw, &bankh, &mtaspect,
1450                             &tile_split);
1451                 ib[idx] |= CB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
1452                 ib[idx] |= CB_TILE_SPLIT(tile_split) |
1453                        CB_BANK_WIDTH(bankw) |
1454                        CB_BANK_HEIGHT(bankh) |
1455                        CB_MACRO_TILE_ASPECT(mtaspect);
1456             }
1457         }
1458         tmp = ((reg - CB_COLOR0_ATTRIB) / 0x3c);
1459         track->cb_color_attrib[tmp] = ib[idx];
1460         track->cb_dirty = true;
1461         break;
1462     case CB_COLOR8_ATTRIB:
1463     case CB_COLOR9_ATTRIB:
1464     case CB_COLOR10_ATTRIB:
1465     case CB_COLOR11_ATTRIB:
1466         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1467         if (r) {
1468             dev_warn(p->dev, "bad SET_CONTEXT_REG "
1469                     "0x%04X\n", reg);
1470             return -EINVAL;
1471         }
1472         if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1473             if (reloc->tiling_flags & RADEON_TILING_MACRO) {
1474                 unsigned bankw, bankh, mtaspect, tile_split;
1475 
1476                 evergreen_tiling_fields(reloc->tiling_flags,
1477                             &bankw, &bankh, &mtaspect,
1478                             &tile_split);
1479                 ib[idx] |= CB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
1480                 ib[idx] |= CB_TILE_SPLIT(tile_split) |
1481                        CB_BANK_WIDTH(bankw) |
1482                        CB_BANK_HEIGHT(bankh) |
1483                        CB_MACRO_TILE_ASPECT(mtaspect);
1484             }
1485         }
1486         tmp = ((reg - CB_COLOR8_ATTRIB) / 0x1c) + 8;
1487         track->cb_color_attrib[tmp] = ib[idx];
1488         track->cb_dirty = true;
1489         break;
1490     case CB_COLOR0_FMASK:
1491     case CB_COLOR1_FMASK:
1492     case CB_COLOR2_FMASK:
1493     case CB_COLOR3_FMASK:
1494     case CB_COLOR4_FMASK:
1495     case CB_COLOR5_FMASK:
1496     case CB_COLOR6_FMASK:
1497     case CB_COLOR7_FMASK:
1498         tmp = (reg - CB_COLOR0_FMASK) / 0x3c;
1499         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1500         if (r) {
1501             dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
1502             return -EINVAL;
1503         }
1504         ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1505         track->cb_color_fmask_bo[tmp] = reloc->robj;
1506         break;
1507     case CB_COLOR0_CMASK:
1508     case CB_COLOR1_CMASK:
1509     case CB_COLOR2_CMASK:
1510     case CB_COLOR3_CMASK:
1511     case CB_COLOR4_CMASK:
1512     case CB_COLOR5_CMASK:
1513     case CB_COLOR6_CMASK:
1514     case CB_COLOR7_CMASK:
1515         tmp = (reg - CB_COLOR0_CMASK) / 0x3c;
1516         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1517         if (r) {
1518             dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
1519             return -EINVAL;
1520         }
1521         ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1522         track->cb_color_cmask_bo[tmp] = reloc->robj;
1523         break;
1524     case CB_COLOR0_FMASK_SLICE:
1525     case CB_COLOR1_FMASK_SLICE:
1526     case CB_COLOR2_FMASK_SLICE:
1527     case CB_COLOR3_FMASK_SLICE:
1528     case CB_COLOR4_FMASK_SLICE:
1529     case CB_COLOR5_FMASK_SLICE:
1530     case CB_COLOR6_FMASK_SLICE:
1531     case CB_COLOR7_FMASK_SLICE:
1532         tmp = (reg - CB_COLOR0_FMASK_SLICE) / 0x3c;
1533         track->cb_color_fmask_slice[tmp] = radeon_get_ib_value(p, idx);
1534         break;
1535     case CB_COLOR0_CMASK_SLICE:
1536     case CB_COLOR1_CMASK_SLICE:
1537     case CB_COLOR2_CMASK_SLICE:
1538     case CB_COLOR3_CMASK_SLICE:
1539     case CB_COLOR4_CMASK_SLICE:
1540     case CB_COLOR5_CMASK_SLICE:
1541     case CB_COLOR6_CMASK_SLICE:
1542     case CB_COLOR7_CMASK_SLICE:
1543         tmp = (reg - CB_COLOR0_CMASK_SLICE) / 0x3c;
1544         track->cb_color_cmask_slice[tmp] = radeon_get_ib_value(p, idx);
1545         break;
1546     case CB_COLOR0_BASE:
1547     case CB_COLOR1_BASE:
1548     case CB_COLOR2_BASE:
1549     case CB_COLOR3_BASE:
1550     case CB_COLOR4_BASE:
1551     case CB_COLOR5_BASE:
1552     case CB_COLOR6_BASE:
1553     case CB_COLOR7_BASE:
1554         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1555         if (r) {
1556             dev_warn(p->dev, "bad SET_CONTEXT_REG "
1557                     "0x%04X\n", reg);
1558             return -EINVAL;
1559         }
1560         tmp = (reg - CB_COLOR0_BASE) / 0x3c;
1561         track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx);
1562         ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1563         track->cb_color_bo[tmp] = reloc->robj;
1564         track->cb_dirty = true;
1565         break;
1566     case CB_COLOR8_BASE:
1567     case CB_COLOR9_BASE:
1568     case CB_COLOR10_BASE:
1569     case CB_COLOR11_BASE:
1570         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1571         if (r) {
1572             dev_warn(p->dev, "bad SET_CONTEXT_REG "
1573                     "0x%04X\n", reg);
1574             return -EINVAL;
1575         }
1576         tmp = ((reg - CB_COLOR8_BASE) / 0x1c) + 8;
1577         track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx);
1578         ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1579         track->cb_color_bo[tmp] = reloc->robj;
1580         track->cb_dirty = true;
1581         break;
1582     case DB_HTILE_DATA_BASE:
1583         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1584         if (r) {
1585             dev_warn(p->dev, "bad SET_CONTEXT_REG "
1586                     "0x%04X\n", reg);
1587             return -EINVAL;
1588         }
1589         track->htile_offset = radeon_get_ib_value(p, idx);
1590         ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1591         track->htile_bo = reloc->robj;
1592         track->db_dirty = true;
1593         break;
1594     case DB_HTILE_SURFACE:
1595         /* 8x8 only */
1596         track->htile_surface = radeon_get_ib_value(p, idx);
1597         /* force 8x8 htile width and height */
1598         ib[idx] |= 3;
1599         track->db_dirty = true;
1600         break;
1601     case CB_IMMED0_BASE:
1602     case CB_IMMED1_BASE:
1603     case CB_IMMED2_BASE:
1604     case CB_IMMED3_BASE:
1605     case CB_IMMED4_BASE:
1606     case CB_IMMED5_BASE:
1607     case CB_IMMED6_BASE:
1608     case CB_IMMED7_BASE:
1609     case CB_IMMED8_BASE:
1610     case CB_IMMED9_BASE:
1611     case CB_IMMED10_BASE:
1612     case CB_IMMED11_BASE:
1613     case SQ_PGM_START_FS:
1614     case SQ_PGM_START_ES:
1615     case SQ_PGM_START_VS:
1616     case SQ_PGM_START_GS:
1617     case SQ_PGM_START_PS:
1618     case SQ_PGM_START_HS:
1619     case SQ_PGM_START_LS:
1620     case SQ_CONST_MEM_BASE:
1621     case SQ_ALU_CONST_CACHE_GS_0:
1622     case SQ_ALU_CONST_CACHE_GS_1:
1623     case SQ_ALU_CONST_CACHE_GS_2:
1624     case SQ_ALU_CONST_CACHE_GS_3:
1625     case SQ_ALU_CONST_CACHE_GS_4:
1626     case SQ_ALU_CONST_CACHE_GS_5:
1627     case SQ_ALU_CONST_CACHE_GS_6:
1628     case SQ_ALU_CONST_CACHE_GS_7:
1629     case SQ_ALU_CONST_CACHE_GS_8:
1630     case SQ_ALU_CONST_CACHE_GS_9:
1631     case SQ_ALU_CONST_CACHE_GS_10:
1632     case SQ_ALU_CONST_CACHE_GS_11:
1633     case SQ_ALU_CONST_CACHE_GS_12:
1634     case SQ_ALU_CONST_CACHE_GS_13:
1635     case SQ_ALU_CONST_CACHE_GS_14:
1636     case SQ_ALU_CONST_CACHE_GS_15:
1637     case SQ_ALU_CONST_CACHE_PS_0:
1638     case SQ_ALU_CONST_CACHE_PS_1:
1639     case SQ_ALU_CONST_CACHE_PS_2:
1640     case SQ_ALU_CONST_CACHE_PS_3:
1641     case SQ_ALU_CONST_CACHE_PS_4:
1642     case SQ_ALU_CONST_CACHE_PS_5:
1643     case SQ_ALU_CONST_CACHE_PS_6:
1644     case SQ_ALU_CONST_CACHE_PS_7:
1645     case SQ_ALU_CONST_CACHE_PS_8:
1646     case SQ_ALU_CONST_CACHE_PS_9:
1647     case SQ_ALU_CONST_CACHE_PS_10:
1648     case SQ_ALU_CONST_CACHE_PS_11:
1649     case SQ_ALU_CONST_CACHE_PS_12:
1650     case SQ_ALU_CONST_CACHE_PS_13:
1651     case SQ_ALU_CONST_CACHE_PS_14:
1652     case SQ_ALU_CONST_CACHE_PS_15:
1653     case SQ_ALU_CONST_CACHE_VS_0:
1654     case SQ_ALU_CONST_CACHE_VS_1:
1655     case SQ_ALU_CONST_CACHE_VS_2:
1656     case SQ_ALU_CONST_CACHE_VS_3:
1657     case SQ_ALU_CONST_CACHE_VS_4:
1658     case SQ_ALU_CONST_CACHE_VS_5:
1659     case SQ_ALU_CONST_CACHE_VS_6:
1660     case SQ_ALU_CONST_CACHE_VS_7:
1661     case SQ_ALU_CONST_CACHE_VS_8:
1662     case SQ_ALU_CONST_CACHE_VS_9:
1663     case SQ_ALU_CONST_CACHE_VS_10:
1664     case SQ_ALU_CONST_CACHE_VS_11:
1665     case SQ_ALU_CONST_CACHE_VS_12:
1666     case SQ_ALU_CONST_CACHE_VS_13:
1667     case SQ_ALU_CONST_CACHE_VS_14:
1668     case SQ_ALU_CONST_CACHE_VS_15:
1669     case SQ_ALU_CONST_CACHE_HS_0:
1670     case SQ_ALU_CONST_CACHE_HS_1:
1671     case SQ_ALU_CONST_CACHE_HS_2:
1672     case SQ_ALU_CONST_CACHE_HS_3:
1673     case SQ_ALU_CONST_CACHE_HS_4:
1674     case SQ_ALU_CONST_CACHE_HS_5:
1675     case SQ_ALU_CONST_CACHE_HS_6:
1676     case SQ_ALU_CONST_CACHE_HS_7:
1677     case SQ_ALU_CONST_CACHE_HS_8:
1678     case SQ_ALU_CONST_CACHE_HS_9:
1679     case SQ_ALU_CONST_CACHE_HS_10:
1680     case SQ_ALU_CONST_CACHE_HS_11:
1681     case SQ_ALU_CONST_CACHE_HS_12:
1682     case SQ_ALU_CONST_CACHE_HS_13:
1683     case SQ_ALU_CONST_CACHE_HS_14:
1684     case SQ_ALU_CONST_CACHE_HS_15:
1685     case SQ_ALU_CONST_CACHE_LS_0:
1686     case SQ_ALU_CONST_CACHE_LS_1:
1687     case SQ_ALU_CONST_CACHE_LS_2:
1688     case SQ_ALU_CONST_CACHE_LS_3:
1689     case SQ_ALU_CONST_CACHE_LS_4:
1690     case SQ_ALU_CONST_CACHE_LS_5:
1691     case SQ_ALU_CONST_CACHE_LS_6:
1692     case SQ_ALU_CONST_CACHE_LS_7:
1693     case SQ_ALU_CONST_CACHE_LS_8:
1694     case SQ_ALU_CONST_CACHE_LS_9:
1695     case SQ_ALU_CONST_CACHE_LS_10:
1696     case SQ_ALU_CONST_CACHE_LS_11:
1697     case SQ_ALU_CONST_CACHE_LS_12:
1698     case SQ_ALU_CONST_CACHE_LS_13:
1699     case SQ_ALU_CONST_CACHE_LS_14:
1700     case SQ_ALU_CONST_CACHE_LS_15:
1701         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1702         if (r) {
1703             dev_warn(p->dev, "bad SET_CONTEXT_REG "
1704                     "0x%04X\n", reg);
1705             return -EINVAL;
1706         }
1707         ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1708         break;
1709     case SX_MEMORY_EXPORT_BASE:
1710         if (p->rdev->family >= CHIP_CAYMAN) {
1711             dev_warn(p->dev, "bad SET_CONFIG_REG "
1712                  "0x%04X\n", reg);
1713             return -EINVAL;
1714         }
1715         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1716         if (r) {
1717             dev_warn(p->dev, "bad SET_CONFIG_REG "
1718                     "0x%04X\n", reg);
1719             return -EINVAL;
1720         }
1721         ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1722         break;
1723     case CAYMAN_SX_SCATTER_EXPORT_BASE:
1724         if (p->rdev->family < CHIP_CAYMAN) {
1725             dev_warn(p->dev, "bad SET_CONTEXT_REG "
1726                  "0x%04X\n", reg);
1727             return -EINVAL;
1728         }
1729         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1730         if (r) {
1731             dev_warn(p->dev, "bad SET_CONTEXT_REG "
1732                     "0x%04X\n", reg);
1733             return -EINVAL;
1734         }
1735         ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1736         break;
1737     case SX_MISC:
1738         track->sx_misc_kill_all_prims = (radeon_get_ib_value(p, idx) & 0x1) != 0;
1739         break;
1740     default:
1741         dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
1742         return -EINVAL;
1743     }
1744     return 0;
1745 }
1746 
1747 /**
1748  * evergreen_is_safe_reg() - check if register is authorized or not
1749  * @p: parser structure holding parsing context
1750  * @reg: register we are testing
1751  *
1752  * This function will test against reg_safe_bm and return true
1753  * if register is safe or false otherwise.
1754  */
1755 static inline bool evergreen_is_safe_reg(struct radeon_cs_parser *p, u32 reg)
1756 {
1757     struct evergreen_cs_track *track = p->track;
1758     u32 m, i;
1759 
1760     i = (reg >> 7);
1761     if (unlikely(i >= REG_SAFE_BM_SIZE)) {
1762         return false;
1763     }
1764     m = 1 << ((reg >> 2) & 31);
1765     if (!(track->reg_safe_bm[i] & m))
1766         return true;
1767 
1768     return false;
1769 }
1770 
1771 static int evergreen_packet3_check(struct radeon_cs_parser *p,
1772                    struct radeon_cs_packet *pkt)
1773 {
1774     struct radeon_bo_list *reloc;
1775     struct evergreen_cs_track *track;
1776     uint32_t *ib;
1777     unsigned idx;
1778     unsigned i;
1779     unsigned start_reg, end_reg, reg;
1780     int r;
1781     u32 idx_value;
1782 
1783     track = (struct evergreen_cs_track *)p->track;
1784     ib = p->ib.ptr;
1785     idx = pkt->idx + 1;
1786     idx_value = radeon_get_ib_value(p, idx);
1787 
1788     switch (pkt->opcode) {
1789     case PACKET3_SET_PREDICATION:
1790     {
1791         int pred_op;
1792         int tmp;
1793         uint64_t offset;
1794 
1795         if (pkt->count != 1) {
1796             DRM_ERROR("bad SET PREDICATION\n");
1797             return -EINVAL;
1798         }
1799 
1800         tmp = radeon_get_ib_value(p, idx + 1);
1801         pred_op = (tmp >> 16) & 0x7;
1802 
1803         /* for the clear predicate operation */
1804         if (pred_op == 0)
1805             return 0;
1806 
1807         if (pred_op > 2) {
1808             DRM_ERROR("bad SET PREDICATION operation %d\n", pred_op);
1809             return -EINVAL;
1810         }
1811 
1812         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1813         if (r) {
1814             DRM_ERROR("bad SET PREDICATION\n");
1815             return -EINVAL;
1816         }
1817 
1818         offset = reloc->gpu_offset +
1819              (idx_value & 0xfffffff0) +
1820              ((u64)(tmp & 0xff) << 32);
1821 
1822         ib[idx + 0] = offset;
1823         ib[idx + 1] = (tmp & 0xffffff00) | (upper_32_bits(offset) & 0xff);
1824     }
1825     break;
1826     case PACKET3_CONTEXT_CONTROL:
1827         if (pkt->count != 1) {
1828             DRM_ERROR("bad CONTEXT_CONTROL\n");
1829             return -EINVAL;
1830         }
1831         break;
1832     case PACKET3_INDEX_TYPE:
1833     case PACKET3_NUM_INSTANCES:
1834     case PACKET3_CLEAR_STATE:
1835         if (pkt->count) {
1836             DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n");
1837             return -EINVAL;
1838         }
1839         break;
1840     case CAYMAN_PACKET3_DEALLOC_STATE:
1841         if (p->rdev->family < CHIP_CAYMAN) {
1842             DRM_ERROR("bad PACKET3_DEALLOC_STATE\n");
1843             return -EINVAL;
1844         }
1845         if (pkt->count) {
1846             DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n");
1847             return -EINVAL;
1848         }
1849         break;
1850     case PACKET3_INDEX_BASE:
1851     {
1852         uint64_t offset;
1853 
1854         if (pkt->count != 1) {
1855             DRM_ERROR("bad INDEX_BASE\n");
1856             return -EINVAL;
1857         }
1858         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1859         if (r) {
1860             DRM_ERROR("bad INDEX_BASE\n");
1861             return -EINVAL;
1862         }
1863 
1864         offset = reloc->gpu_offset +
1865              idx_value +
1866              ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
1867 
1868         ib[idx+0] = offset;
1869         ib[idx+1] = upper_32_bits(offset) & 0xff;
1870 
1871         r = evergreen_cs_track_check(p);
1872         if (r) {
1873             dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1874             return r;
1875         }
1876         break;
1877     }
1878     case PACKET3_INDEX_BUFFER_SIZE:
1879     {
1880         if (pkt->count != 0) {
1881             DRM_ERROR("bad INDEX_BUFFER_SIZE\n");
1882             return -EINVAL;
1883         }
1884         break;
1885     }
1886     case PACKET3_DRAW_INDEX:
1887     {
1888         uint64_t offset;
1889         if (pkt->count != 3) {
1890             DRM_ERROR("bad DRAW_INDEX\n");
1891             return -EINVAL;
1892         }
1893         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1894         if (r) {
1895             DRM_ERROR("bad DRAW_INDEX\n");
1896             return -EINVAL;
1897         }
1898 
1899         offset = reloc->gpu_offset +
1900              idx_value +
1901              ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
1902 
1903         ib[idx+0] = offset;
1904         ib[idx+1] = upper_32_bits(offset) & 0xff;
1905 
1906         r = evergreen_cs_track_check(p);
1907         if (r) {
1908             dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1909             return r;
1910         }
1911         break;
1912     }
1913     case PACKET3_DRAW_INDEX_2:
1914     {
1915         uint64_t offset;
1916 
1917         if (pkt->count != 4) {
1918             DRM_ERROR("bad DRAW_INDEX_2\n");
1919             return -EINVAL;
1920         }
1921         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1922         if (r) {
1923             DRM_ERROR("bad DRAW_INDEX_2\n");
1924             return -EINVAL;
1925         }
1926 
1927         offset = reloc->gpu_offset +
1928              radeon_get_ib_value(p, idx+1) +
1929              ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
1930 
1931         ib[idx+1] = offset;
1932         ib[idx+2] = upper_32_bits(offset) & 0xff;
1933 
1934         r = evergreen_cs_track_check(p);
1935         if (r) {
1936             dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1937             return r;
1938         }
1939         break;
1940     }
1941     case PACKET3_DRAW_INDEX_AUTO:
1942         if (pkt->count != 1) {
1943             DRM_ERROR("bad DRAW_INDEX_AUTO\n");
1944             return -EINVAL;
1945         }
1946         r = evergreen_cs_track_check(p);
1947         if (r) {
1948             dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx);
1949             return r;
1950         }
1951         break;
1952     case PACKET3_DRAW_INDEX_MULTI_AUTO:
1953         if (pkt->count != 2) {
1954             DRM_ERROR("bad DRAW_INDEX_MULTI_AUTO\n");
1955             return -EINVAL;
1956         }
1957         r = evergreen_cs_track_check(p);
1958         if (r) {
1959             dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx);
1960             return r;
1961         }
1962         break;
1963     case PACKET3_DRAW_INDEX_IMMD:
1964         if (pkt->count < 2) {
1965             DRM_ERROR("bad DRAW_INDEX_IMMD\n");
1966             return -EINVAL;
1967         }
1968         r = evergreen_cs_track_check(p);
1969         if (r) {
1970             dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1971             return r;
1972         }
1973         break;
1974     case PACKET3_DRAW_INDEX_OFFSET:
1975         if (pkt->count != 2) {
1976             DRM_ERROR("bad DRAW_INDEX_OFFSET\n");
1977             return -EINVAL;
1978         }
1979         r = evergreen_cs_track_check(p);
1980         if (r) {
1981             dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1982             return r;
1983         }
1984         break;
1985     case PACKET3_DRAW_INDEX_OFFSET_2:
1986         if (pkt->count != 3) {
1987             DRM_ERROR("bad DRAW_INDEX_OFFSET_2\n");
1988             return -EINVAL;
1989         }
1990         r = evergreen_cs_track_check(p);
1991         if (r) {
1992             dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1993             return r;
1994         }
1995         break;
1996     case PACKET3_SET_BASE:
1997     {
1998         /*
1999         DW 1 HEADER Header of the packet. Shader_Type in bit 1 of the Header will correspond to the shader type of the Load, see Type-3 Packet.
2000            2 BASE_INDEX Bits [3:0] BASE_INDEX - Base Index specifies which base address is specified in the last two DWs.
2001              0001: DX11 Draw_Index_Indirect Patch Table Base: Base address for Draw_Index_Indirect data.
2002            3 ADDRESS_LO Bits [31:3] - Lower bits of QWORD-Aligned Address. Bits [2:0] - Reserved
2003            4 ADDRESS_HI Bits [31:8] - Reserved. Bits [7:0] - Upper bits of Address [47:32]
2004         */
2005         if (pkt->count != 2) {
2006             DRM_ERROR("bad SET_BASE\n");
2007             return -EINVAL;
2008         }
2009 
2010         /* currently only supporting setting indirect draw buffer base address */
2011         if (idx_value != 1) {
2012             DRM_ERROR("bad SET_BASE\n");
2013             return -EINVAL;
2014         }
2015 
2016         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2017         if (r) {
2018             DRM_ERROR("bad SET_BASE\n");
2019             return -EINVAL;
2020         }
2021 
2022         track->indirect_draw_buffer_size = radeon_bo_size(reloc->robj);
2023 
2024         ib[idx+1] = reloc->gpu_offset;
2025         ib[idx+2] = upper_32_bits(reloc->gpu_offset) & 0xff;
2026 
2027         break;
2028     }
2029     case PACKET3_DRAW_INDIRECT:
2030     case PACKET3_DRAW_INDEX_INDIRECT:
2031     {
2032         u64 size = pkt->opcode == PACKET3_DRAW_INDIRECT ? 16 : 20;
2033 
2034         /*
2035         DW 1 HEADER
2036            2 DATA_OFFSET Bits [31:0] + byte aligned offset where the required data structure starts. Bits 1:0 are zero
2037            3 DRAW_INITIATOR Draw Initiator Register. Written to the VGT_DRAW_INITIATOR register for the assigned context
2038         */
2039         if (pkt->count != 1) {
2040             DRM_ERROR("bad DRAW_INDIRECT\n");
2041             return -EINVAL;
2042         }
2043 
2044         if (idx_value + size > track->indirect_draw_buffer_size) {
2045             dev_warn(p->dev, "DRAW_INDIRECT buffer too small %u + %llu > %lu\n",
2046                 idx_value, size, track->indirect_draw_buffer_size);
2047             return -EINVAL;
2048         }
2049 
2050         r = evergreen_cs_track_check(p);
2051         if (r) {
2052             dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2053             return r;
2054         }
2055         break;
2056     }
2057     case PACKET3_DISPATCH_DIRECT:
2058         if (pkt->count != 3) {
2059             DRM_ERROR("bad DISPATCH_DIRECT\n");
2060             return -EINVAL;
2061         }
2062         r = evergreen_cs_track_check(p);
2063         if (r) {
2064             dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx);
2065             return r;
2066         }
2067         break;
2068     case PACKET3_DISPATCH_INDIRECT:
2069         if (pkt->count != 1) {
2070             DRM_ERROR("bad DISPATCH_INDIRECT\n");
2071             return -EINVAL;
2072         }
2073         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2074         if (r) {
2075             DRM_ERROR("bad DISPATCH_INDIRECT\n");
2076             return -EINVAL;
2077         }
2078         ib[idx+0] = idx_value + (u32)(reloc->gpu_offset & 0xffffffff);
2079         r = evergreen_cs_track_check(p);
2080         if (r) {
2081             dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2082             return r;
2083         }
2084         break;
2085     case PACKET3_WAIT_REG_MEM:
2086         if (pkt->count != 5) {
2087             DRM_ERROR("bad WAIT_REG_MEM\n");
2088             return -EINVAL;
2089         }
2090         /* bit 4 is reg (0) or mem (1) */
2091         if (idx_value & 0x10) {
2092             uint64_t offset;
2093 
2094             r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2095             if (r) {
2096                 DRM_ERROR("bad WAIT_REG_MEM\n");
2097                 return -EINVAL;
2098             }
2099 
2100             offset = reloc->gpu_offset +
2101                  (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
2102                  ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2103 
2104             ib[idx+1] = (ib[idx+1] & 0x3) | (offset & 0xfffffffc);
2105             ib[idx+2] = upper_32_bits(offset) & 0xff;
2106         } else if (idx_value & 0x100) {
2107             DRM_ERROR("cannot use PFP on REG wait\n");
2108             return -EINVAL;
2109         }
2110         break;
2111     case PACKET3_CP_DMA:
2112     {
2113         u32 command, size, info;
2114         u64 offset, tmp;
2115         if (pkt->count != 4) {
2116             DRM_ERROR("bad CP DMA\n");
2117             return -EINVAL;
2118         }
2119         command = radeon_get_ib_value(p, idx+4);
2120         size = command & 0x1fffff;
2121         info = radeon_get_ib_value(p, idx+1);
2122         if ((((info & 0x60000000) >> 29) != 0) || /* src = GDS or DATA */
2123             (((info & 0x00300000) >> 20) != 0) || /* dst = GDS */
2124             ((((info & 0x00300000) >> 20) == 0) &&
2125              (command & PACKET3_CP_DMA_CMD_DAS)) || /* dst = register */
2126             ((((info & 0x60000000) >> 29) == 0) &&
2127              (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */
2128             /* non mem to mem copies requires dw aligned count */
2129             if (size % 4) {
2130                 DRM_ERROR("CP DMA command requires dw count alignment\n");
2131                 return -EINVAL;
2132             }
2133         }
2134         if (command & PACKET3_CP_DMA_CMD_SAS) {
2135             /* src address space is register */
2136             /* GDS is ok */
2137             if (((info & 0x60000000) >> 29) != 1) {
2138                 DRM_ERROR("CP DMA SAS not supported\n");
2139                 return -EINVAL;
2140             }
2141         } else {
2142             if (command & PACKET3_CP_DMA_CMD_SAIC) {
2143                 DRM_ERROR("CP DMA SAIC only supported for registers\n");
2144                 return -EINVAL;
2145             }
2146             /* src address space is memory */
2147             if (((info & 0x60000000) >> 29) == 0) {
2148                 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2149                 if (r) {
2150                     DRM_ERROR("bad CP DMA SRC\n");
2151                     return -EINVAL;
2152                 }
2153 
2154                 tmp = radeon_get_ib_value(p, idx) +
2155                     ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
2156 
2157                 offset = reloc->gpu_offset + tmp;
2158 
2159                 if ((tmp + size) > radeon_bo_size(reloc->robj)) {
2160                     dev_warn(p->dev, "CP DMA src buffer too small (%llu %lu)\n",
2161                          tmp + size, radeon_bo_size(reloc->robj));
2162                     return -EINVAL;
2163                 }
2164 
2165                 ib[idx] = offset;
2166                 ib[idx+1] = (ib[idx+1] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
2167             } else if (((info & 0x60000000) >> 29) != 2) {
2168                 DRM_ERROR("bad CP DMA SRC_SEL\n");
2169                 return -EINVAL;
2170             }
2171         }
2172         if (command & PACKET3_CP_DMA_CMD_DAS) {
2173             /* dst address space is register */
2174             /* GDS is ok */
2175             if (((info & 0x00300000) >> 20) != 1) {
2176                 DRM_ERROR("CP DMA DAS not supported\n");
2177                 return -EINVAL;
2178             }
2179         } else {
2180             /* dst address space is memory */
2181             if (command & PACKET3_CP_DMA_CMD_DAIC) {
2182                 DRM_ERROR("CP DMA DAIC only supported for registers\n");
2183                 return -EINVAL;
2184             }
2185             if (((info & 0x00300000) >> 20) == 0) {
2186                 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2187                 if (r) {
2188                     DRM_ERROR("bad CP DMA DST\n");
2189                     return -EINVAL;
2190                 }
2191 
2192                 tmp = radeon_get_ib_value(p, idx+2) +
2193                     ((u64)(radeon_get_ib_value(p, idx+3) & 0xff) << 32);
2194 
2195                 offset = reloc->gpu_offset + tmp;
2196 
2197                 if ((tmp + size) > radeon_bo_size(reloc->robj)) {
2198                     dev_warn(p->dev, "CP DMA dst buffer too small (%llu %lu)\n",
2199                          tmp + size, radeon_bo_size(reloc->robj));
2200                     return -EINVAL;
2201                 }
2202 
2203                 ib[idx+2] = offset;
2204                 ib[idx+3] = upper_32_bits(offset) & 0xff;
2205             } else {
2206                 DRM_ERROR("bad CP DMA DST_SEL\n");
2207                 return -EINVAL;
2208             }
2209         }
2210         break;
2211     }
2212     case PACKET3_PFP_SYNC_ME:
2213         if (pkt->count) {
2214             DRM_ERROR("bad PFP_SYNC_ME\n");
2215             return -EINVAL;
2216         }
2217         break;
2218     case PACKET3_SURFACE_SYNC:
2219         if (pkt->count != 3) {
2220             DRM_ERROR("bad SURFACE_SYNC\n");
2221             return -EINVAL;
2222         }
2223         /* 0xffffffff/0x0 is flush all cache flag */
2224         if (radeon_get_ib_value(p, idx + 1) != 0xffffffff ||
2225             radeon_get_ib_value(p, idx + 2) != 0) {
2226             r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2227             if (r) {
2228                 DRM_ERROR("bad SURFACE_SYNC\n");
2229                 return -EINVAL;
2230             }
2231             ib[idx+2] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
2232         }
2233         break;
2234     case PACKET3_EVENT_WRITE:
2235         if (pkt->count != 2 && pkt->count != 0) {
2236             DRM_ERROR("bad EVENT_WRITE\n");
2237             return -EINVAL;
2238         }
2239         if (pkt->count) {
2240             uint64_t offset;
2241 
2242             r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2243             if (r) {
2244                 DRM_ERROR("bad EVENT_WRITE\n");
2245                 return -EINVAL;
2246             }
2247             offset = reloc->gpu_offset +
2248                  (radeon_get_ib_value(p, idx+1) & 0xfffffff8) +
2249                  ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2250 
2251             ib[idx+1] = offset & 0xfffffff8;
2252             ib[idx+2] = upper_32_bits(offset) & 0xff;
2253         }
2254         break;
2255     case PACKET3_EVENT_WRITE_EOP:
2256     {
2257         uint64_t offset;
2258 
2259         if (pkt->count != 4) {
2260             DRM_ERROR("bad EVENT_WRITE_EOP\n");
2261             return -EINVAL;
2262         }
2263         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2264         if (r) {
2265             DRM_ERROR("bad EVENT_WRITE_EOP\n");
2266             return -EINVAL;
2267         }
2268 
2269         offset = reloc->gpu_offset +
2270              (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
2271              ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2272 
2273         ib[idx+1] = offset & 0xfffffffc;
2274         ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
2275         break;
2276     }
2277     case PACKET3_EVENT_WRITE_EOS:
2278     {
2279         uint64_t offset;
2280 
2281         if (pkt->count != 3) {
2282             DRM_ERROR("bad EVENT_WRITE_EOS\n");
2283             return -EINVAL;
2284         }
2285         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2286         if (r) {
2287             DRM_ERROR("bad EVENT_WRITE_EOS\n");
2288             return -EINVAL;
2289         }
2290 
2291         offset = reloc->gpu_offset +
2292              (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
2293              ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2294 
2295         ib[idx+1] = offset & 0xfffffffc;
2296         ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
2297         break;
2298     }
2299     case PACKET3_SET_CONFIG_REG:
2300         start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
2301         end_reg = 4 * pkt->count + start_reg - 4;
2302         if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
2303             (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
2304             (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
2305             DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
2306             return -EINVAL;
2307         }
2308         for (reg = start_reg, idx++; reg <= end_reg; reg += 4, idx++) {
2309             if (evergreen_is_safe_reg(p, reg))
2310                 continue;
2311             r = evergreen_cs_handle_reg(p, reg, idx);
2312             if (r)
2313                 return r;
2314         }
2315         break;
2316     case PACKET3_SET_CONTEXT_REG:
2317         start_reg = (idx_value << 2) + PACKET3_SET_CONTEXT_REG_START;
2318         end_reg = 4 * pkt->count + start_reg - 4;
2319         if ((start_reg < PACKET3_SET_CONTEXT_REG_START) ||
2320             (start_reg >= PACKET3_SET_CONTEXT_REG_END) ||
2321             (end_reg >= PACKET3_SET_CONTEXT_REG_END)) {
2322             DRM_ERROR("bad PACKET3_SET_CONTEXT_REG\n");
2323             return -EINVAL;
2324         }
2325         for (reg = start_reg, idx++; reg <= end_reg; reg += 4, idx++) {
2326             if (evergreen_is_safe_reg(p, reg))
2327                 continue;
2328             r = evergreen_cs_handle_reg(p, reg, idx);
2329             if (r)
2330                 return r;
2331         }
2332         break;
2333     case PACKET3_SET_RESOURCE:
2334         if (pkt->count % 8) {
2335             DRM_ERROR("bad SET_RESOURCE\n");
2336             return -EINVAL;
2337         }
2338         start_reg = (idx_value << 2) + PACKET3_SET_RESOURCE_START;
2339         end_reg = 4 * pkt->count + start_reg - 4;
2340         if ((start_reg < PACKET3_SET_RESOURCE_START) ||
2341             (start_reg >= PACKET3_SET_RESOURCE_END) ||
2342             (end_reg >= PACKET3_SET_RESOURCE_END)) {
2343             DRM_ERROR("bad SET_RESOURCE\n");
2344             return -EINVAL;
2345         }
2346         for (i = 0; i < (pkt->count / 8); i++) {
2347             struct radeon_bo *texture, *mipmap;
2348             u32 toffset, moffset;
2349             u32 size, offset, mip_address, tex_dim;
2350 
2351             switch (G__SQ_CONSTANT_TYPE(radeon_get_ib_value(p, idx+1+(i*8)+7))) {
2352             case SQ_TEX_VTX_VALID_TEXTURE:
2353                 /* tex base */
2354                 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2355                 if (r) {
2356                     DRM_ERROR("bad SET_RESOURCE (tex)\n");
2357                     return -EINVAL;
2358                 }
2359                 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
2360                     ib[idx+1+(i*8)+1] |=
2361                         TEX_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
2362                     if (reloc->tiling_flags & RADEON_TILING_MACRO) {
2363                         unsigned bankw, bankh, mtaspect, tile_split;
2364 
2365                         evergreen_tiling_fields(reloc->tiling_flags,
2366                                     &bankw, &bankh, &mtaspect,
2367                                     &tile_split);
2368                         ib[idx+1+(i*8)+6] |= TEX_TILE_SPLIT(tile_split);
2369                         ib[idx+1+(i*8)+7] |=
2370                             TEX_BANK_WIDTH(bankw) |
2371                             TEX_BANK_HEIGHT(bankh) |
2372                             MACRO_TILE_ASPECT(mtaspect) |
2373                             TEX_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
2374                     }
2375                 }
2376                 texture = reloc->robj;
2377                 toffset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
2378 
2379                 /* tex mip base */
2380                 tex_dim = ib[idx+1+(i*8)+0] & 0x7;
2381                 mip_address = ib[idx+1+(i*8)+3];
2382 
2383                 if ((tex_dim == SQ_TEX_DIM_2D_MSAA || tex_dim == SQ_TEX_DIM_2D_ARRAY_MSAA) &&
2384                     !mip_address &&
2385                     !radeon_cs_packet_next_is_pkt3_nop(p)) {
2386                     /* MIP_ADDRESS should point to FMASK for an MSAA texture.
2387                      * It should be 0 if FMASK is disabled. */
2388                     moffset = 0;
2389                     mipmap = NULL;
2390                 } else {
2391                     r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2392                     if (r) {
2393                         DRM_ERROR("bad SET_RESOURCE (tex)\n");
2394                         return -EINVAL;
2395                     }
2396                     moffset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
2397                     mipmap = reloc->robj;
2398                 }
2399 
2400                 r = evergreen_cs_track_validate_texture(p, texture, mipmap, idx+1+(i*8));
2401                 if (r)
2402                     return r;
2403                 ib[idx+1+(i*8)+2] += toffset;
2404                 ib[idx+1+(i*8)+3] += moffset;
2405                 break;
2406             case SQ_TEX_VTX_VALID_BUFFER:
2407             {
2408                 uint64_t offset64;
2409                 /* vtx base */
2410                 r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2411                 if (r) {
2412                     DRM_ERROR("bad SET_RESOURCE (vtx)\n");
2413                     return -EINVAL;
2414                 }
2415                 offset = radeon_get_ib_value(p, idx+1+(i*8)+0);
2416                 size = radeon_get_ib_value(p, idx+1+(i*8)+1);
2417                 if (p->rdev && (size + offset) > radeon_bo_size(reloc->robj)) {
2418                     /* force size to size of the buffer */
2419                     dev_warn_ratelimited(p->dev, "vbo resource seems too big for the bo\n");
2420                     ib[idx+1+(i*8)+1] = radeon_bo_size(reloc->robj) - offset;
2421                 }
2422 
2423                 offset64 = reloc->gpu_offset + offset;
2424                 ib[idx+1+(i*8)+0] = offset64;
2425                 ib[idx+1+(i*8)+2] = (ib[idx+1+(i*8)+2] & 0xffffff00) |
2426                             (upper_32_bits(offset64) & 0xff);
2427                 break;
2428             }
2429             case SQ_TEX_VTX_INVALID_TEXTURE:
2430             case SQ_TEX_VTX_INVALID_BUFFER:
2431             default:
2432                 DRM_ERROR("bad SET_RESOURCE\n");
2433                 return -EINVAL;
2434             }
2435         }
2436         break;
2437     case PACKET3_SET_ALU_CONST:
2438         /* XXX fix me ALU const buffers only */
2439         break;
2440     case PACKET3_SET_BOOL_CONST:
2441         start_reg = (idx_value << 2) + PACKET3_SET_BOOL_CONST_START;
2442         end_reg = 4 * pkt->count + start_reg - 4;
2443         if ((start_reg < PACKET3_SET_BOOL_CONST_START) ||
2444             (start_reg >= PACKET3_SET_BOOL_CONST_END) ||
2445             (end_reg >= PACKET3_SET_BOOL_CONST_END)) {
2446             DRM_ERROR("bad SET_BOOL_CONST\n");
2447             return -EINVAL;
2448         }
2449         break;
2450     case PACKET3_SET_LOOP_CONST:
2451         start_reg = (idx_value << 2) + PACKET3_SET_LOOP_CONST_START;
2452         end_reg = 4 * pkt->count + start_reg - 4;
2453         if ((start_reg < PACKET3_SET_LOOP_CONST_START) ||
2454             (start_reg >= PACKET3_SET_LOOP_CONST_END) ||
2455             (end_reg >= PACKET3_SET_LOOP_CONST_END)) {
2456             DRM_ERROR("bad SET_LOOP_CONST\n");
2457             return -EINVAL;
2458         }
2459         break;
2460     case PACKET3_SET_CTL_CONST:
2461         start_reg = (idx_value << 2) + PACKET3_SET_CTL_CONST_START;
2462         end_reg = 4 * pkt->count + start_reg - 4;
2463         if ((start_reg < PACKET3_SET_CTL_CONST_START) ||
2464             (start_reg >= PACKET3_SET_CTL_CONST_END) ||
2465             (end_reg >= PACKET3_SET_CTL_CONST_END)) {
2466             DRM_ERROR("bad SET_CTL_CONST\n");
2467             return -EINVAL;
2468         }
2469         break;
2470     case PACKET3_SET_SAMPLER:
2471         if (pkt->count % 3) {
2472             DRM_ERROR("bad SET_SAMPLER\n");
2473             return -EINVAL;
2474         }
2475         start_reg = (idx_value << 2) + PACKET3_SET_SAMPLER_START;
2476         end_reg = 4 * pkt->count + start_reg - 4;
2477         if ((start_reg < PACKET3_SET_SAMPLER_START) ||
2478             (start_reg >= PACKET3_SET_SAMPLER_END) ||
2479             (end_reg >= PACKET3_SET_SAMPLER_END)) {
2480             DRM_ERROR("bad SET_SAMPLER\n");
2481             return -EINVAL;
2482         }
2483         break;
2484     case PACKET3_STRMOUT_BUFFER_UPDATE:
2485         if (pkt->count != 4) {
2486             DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (invalid count)\n");
2487             return -EINVAL;
2488         }
2489         /* Updating memory at DST_ADDRESS. */
2490         if (idx_value & 0x1) {
2491             u64 offset;
2492             r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2493             if (r) {
2494                 DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing dst reloc)\n");
2495                 return -EINVAL;
2496             }
2497             offset = radeon_get_ib_value(p, idx+1);
2498             offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
2499             if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2500                 DRM_ERROR("bad STRMOUT_BUFFER_UPDATE dst bo too small: 0x%llx, 0x%lx\n",
2501                       offset + 4, radeon_bo_size(reloc->robj));
2502                 return -EINVAL;
2503             }
2504             offset += reloc->gpu_offset;
2505             ib[idx+1] = offset;
2506             ib[idx+2] = upper_32_bits(offset) & 0xff;
2507         }
2508         /* Reading data from SRC_ADDRESS. */
2509         if (((idx_value >> 1) & 0x3) == 2) {
2510             u64 offset;
2511             r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2512             if (r) {
2513                 DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing src reloc)\n");
2514                 return -EINVAL;
2515             }
2516             offset = radeon_get_ib_value(p, idx+3);
2517             offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2518             if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2519                 DRM_ERROR("bad STRMOUT_BUFFER_UPDATE src bo too small: 0x%llx, 0x%lx\n",
2520                       offset + 4, radeon_bo_size(reloc->robj));
2521                 return -EINVAL;
2522             }
2523             offset += reloc->gpu_offset;
2524             ib[idx+3] = offset;
2525             ib[idx+4] = upper_32_bits(offset) & 0xff;
2526         }
2527         break;
2528     case PACKET3_MEM_WRITE:
2529     {
2530         u64 offset;
2531 
2532         if (pkt->count != 3) {
2533             DRM_ERROR("bad MEM_WRITE (invalid count)\n");
2534             return -EINVAL;
2535         }
2536         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2537         if (r) {
2538             DRM_ERROR("bad MEM_WRITE (missing reloc)\n");
2539             return -EINVAL;
2540         }
2541         offset = radeon_get_ib_value(p, idx+0);
2542         offset += ((u64)(radeon_get_ib_value(p, idx+1) & 0xff)) << 32UL;
2543         if (offset & 0x7) {
2544             DRM_ERROR("bad MEM_WRITE (address not qwords aligned)\n");
2545             return -EINVAL;
2546         }
2547         if ((offset + 8) > radeon_bo_size(reloc->robj)) {
2548             DRM_ERROR("bad MEM_WRITE bo too small: 0x%llx, 0x%lx\n",
2549                   offset + 8, radeon_bo_size(reloc->robj));
2550             return -EINVAL;
2551         }
2552         offset += reloc->gpu_offset;
2553         ib[idx+0] = offset;
2554         ib[idx+1] = upper_32_bits(offset) & 0xff;
2555         break;
2556     }
2557     case PACKET3_COPY_DW:
2558         if (pkt->count != 4) {
2559             DRM_ERROR("bad COPY_DW (invalid count)\n");
2560             return -EINVAL;
2561         }
2562         if (idx_value & 0x1) {
2563             u64 offset;
2564             /* SRC is memory. */
2565             r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2566             if (r) {
2567                 DRM_ERROR("bad COPY_DW (missing src reloc)\n");
2568                 return -EINVAL;
2569             }
2570             offset = radeon_get_ib_value(p, idx+1);
2571             offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
2572             if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2573                 DRM_ERROR("bad COPY_DW src bo too small: 0x%llx, 0x%lx\n",
2574                       offset + 4, radeon_bo_size(reloc->robj));
2575                 return -EINVAL;
2576             }
2577             offset += reloc->gpu_offset;
2578             ib[idx+1] = offset;
2579             ib[idx+2] = upper_32_bits(offset) & 0xff;
2580         } else {
2581             /* SRC is a reg. */
2582             reg = radeon_get_ib_value(p, idx+1) << 2;
2583             if (!evergreen_is_safe_reg(p, reg)) {
2584                 dev_warn(p->dev, "forbidden register 0x%08x at %d\n",
2585                      reg, idx + 1);
2586                 return -EINVAL;
2587             }
2588         }
2589         if (idx_value & 0x2) {
2590             u64 offset;
2591             /* DST is memory. */
2592             r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2593             if (r) {
2594                 DRM_ERROR("bad COPY_DW (missing dst reloc)\n");
2595                 return -EINVAL;
2596             }
2597             offset = radeon_get_ib_value(p, idx+3);
2598             offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2599             if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2600                 DRM_ERROR("bad COPY_DW dst bo too small: 0x%llx, 0x%lx\n",
2601                       offset + 4, radeon_bo_size(reloc->robj));
2602                 return -EINVAL;
2603             }
2604             offset += reloc->gpu_offset;
2605             ib[idx+3] = offset;
2606             ib[idx+4] = upper_32_bits(offset) & 0xff;
2607         } else {
2608             /* DST is a reg. */
2609             reg = radeon_get_ib_value(p, idx+3) << 2;
2610             if (!evergreen_is_safe_reg(p, reg)) {
2611                 dev_warn(p->dev, "forbidden register 0x%08x at %d\n",
2612                      reg, idx + 3);
2613                 return -EINVAL;
2614             }
2615         }
2616         break;
2617     case PACKET3_SET_APPEND_CNT:
2618     {
2619         uint32_t areg;
2620         uint32_t allowed_reg_base;
2621         uint32_t source_sel;
2622         if (pkt->count != 2) {
2623             DRM_ERROR("bad SET_APPEND_CNT (invalid count)\n");
2624             return -EINVAL;
2625         }
2626 
2627         allowed_reg_base = GDS_APPEND_COUNT_0;
2628         allowed_reg_base -= PACKET3_SET_CONTEXT_REG_START;
2629         allowed_reg_base >>= 2;
2630 
2631         areg = idx_value >> 16;
2632         if (areg < allowed_reg_base || areg > (allowed_reg_base + 11)) {
2633             dev_warn(p->dev, "forbidden register for append cnt 0x%08x at %d\n",
2634                  areg, idx);
2635             return -EINVAL;
2636         }
2637 
2638         source_sel = G_PACKET3_SET_APPEND_CNT_SRC_SELECT(idx_value);
2639         if (source_sel == PACKET3_SAC_SRC_SEL_MEM) {
2640             uint64_t offset;
2641             uint32_t swap;
2642             r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2643             if (r) {
2644                 DRM_ERROR("bad SET_APPEND_CNT (missing reloc)\n");
2645                 return -EINVAL;
2646             }
2647             offset = radeon_get_ib_value(p, idx + 1);
2648             swap = offset & 0x3;
2649             offset &= ~0x3;
2650 
2651             offset += ((u64)(radeon_get_ib_value(p, idx + 2) & 0xff)) << 32;
2652 
2653             offset += reloc->gpu_offset;
2654             ib[idx+1] = (offset & 0xfffffffc) | swap;
2655             ib[idx+2] = upper_32_bits(offset) & 0xff;
2656         } else {
2657             DRM_ERROR("bad SET_APPEND_CNT (unsupported operation)\n");
2658             return -EINVAL;
2659         }
2660         break;
2661     }
2662     case PACKET3_NOP:
2663         break;
2664     default:
2665         DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
2666         return -EINVAL;
2667     }
2668     return 0;
2669 }
2670 
2671 int evergreen_cs_parse(struct radeon_cs_parser *p)
2672 {
2673     struct radeon_cs_packet pkt;
2674     struct evergreen_cs_track *track;
2675     u32 tmp;
2676     int r;
2677 
2678     if (p->track == NULL) {
2679         /* initialize tracker, we are in kms */
2680         track = kzalloc(sizeof(*track), GFP_KERNEL);
2681         if (track == NULL)
2682             return -ENOMEM;
2683         evergreen_cs_track_init(track);
2684         if (p->rdev->family >= CHIP_CAYMAN) {
2685             tmp = p->rdev->config.cayman.tile_config;
2686             track->reg_safe_bm = cayman_reg_safe_bm;
2687         } else {
2688             tmp = p->rdev->config.evergreen.tile_config;
2689             track->reg_safe_bm = evergreen_reg_safe_bm;
2690         }
2691         BUILD_BUG_ON(ARRAY_SIZE(cayman_reg_safe_bm) != REG_SAFE_BM_SIZE);
2692         BUILD_BUG_ON(ARRAY_SIZE(evergreen_reg_safe_bm) != REG_SAFE_BM_SIZE);
2693         switch (tmp & 0xf) {
2694         case 0:
2695             track->npipes = 1;
2696             break;
2697         case 1:
2698         default:
2699             track->npipes = 2;
2700             break;
2701         case 2:
2702             track->npipes = 4;
2703             break;
2704         case 3:
2705             track->npipes = 8;
2706             break;
2707         }
2708 
2709         switch ((tmp & 0xf0) >> 4) {
2710         case 0:
2711             track->nbanks = 4;
2712             break;
2713         case 1:
2714         default:
2715             track->nbanks = 8;
2716             break;
2717         case 2:
2718             track->nbanks = 16;
2719             break;
2720         }
2721 
2722         switch ((tmp & 0xf00) >> 8) {
2723         case 0:
2724             track->group_size = 256;
2725             break;
2726         case 1:
2727         default:
2728             track->group_size = 512;
2729             break;
2730         }
2731 
2732         switch ((tmp & 0xf000) >> 12) {
2733         case 0:
2734             track->row_size = 1;
2735             break;
2736         case 1:
2737         default:
2738             track->row_size = 2;
2739             break;
2740         case 2:
2741             track->row_size = 4;
2742             break;
2743         }
2744 
2745         p->track = track;
2746     }
2747     do {
2748         r = radeon_cs_packet_parse(p, &pkt, p->idx);
2749         if (r) {
2750             kfree(p->track);
2751             p->track = NULL;
2752             return r;
2753         }
2754         p->idx += pkt.count + 2;
2755         switch (pkt.type) {
2756         case RADEON_PACKET_TYPE0:
2757             r = evergreen_cs_parse_packet0(p, &pkt);
2758             break;
2759         case RADEON_PACKET_TYPE2:
2760             break;
2761         case RADEON_PACKET_TYPE3:
2762             r = evergreen_packet3_check(p, &pkt);
2763             break;
2764         default:
2765             DRM_ERROR("Unknown packet type %d !\n", pkt.type);
2766             kfree(p->track);
2767             p->track = NULL;
2768             return -EINVAL;
2769         }
2770         if (r) {
2771             kfree(p->track);
2772             p->track = NULL;
2773             return r;
2774         }
2775     } while (p->idx < p->chunk_ib->length_dw);
2776 #if 0
2777     for (r = 0; r < p->ib.length_dw; r++) {
2778         pr_info("%05d  0x%08X\n", r, p->ib.ptr[r]);
2779         mdelay(1);
2780     }
2781 #endif
2782     kfree(p->track);
2783     p->track = NULL;
2784     return 0;
2785 }
2786 
2787 /**
2788  * evergreen_dma_cs_parse() - parse the DMA IB
2789  * @p:      parser structure holding parsing context.
2790  *
2791  * Parses the DMA IB from the CS ioctl and updates
2792  * the GPU addresses based on the reloc information and
2793  * checks for errors. (Evergreen-Cayman)
2794  * Returns 0 for success and an error on failure.
2795  **/
2796 int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
2797 {
2798     struct radeon_cs_chunk *ib_chunk = p->chunk_ib;
2799     struct radeon_bo_list *src_reloc, *dst_reloc, *dst2_reloc;
2800     u32 header, cmd, count, sub_cmd;
2801     uint32_t *ib = p->ib.ptr;
2802     u32 idx;
2803     u64 src_offset, dst_offset, dst2_offset;
2804     int r;
2805 
2806     do {
2807         if (p->idx >= ib_chunk->length_dw) {
2808             DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
2809                   p->idx, ib_chunk->length_dw);
2810             return -EINVAL;
2811         }
2812         idx = p->idx;
2813         header = radeon_get_ib_value(p, idx);
2814         cmd = GET_DMA_CMD(header);
2815         count = GET_DMA_COUNT(header);
2816         sub_cmd = GET_DMA_SUB_CMD(header);
2817 
2818         switch (cmd) {
2819         case DMA_PACKET_WRITE:
2820             r = r600_dma_cs_next_reloc(p, &dst_reloc);
2821             if (r) {
2822                 DRM_ERROR("bad DMA_PACKET_WRITE\n");
2823                 return -EINVAL;
2824             }
2825             switch (sub_cmd) {
2826             /* tiled */
2827             case 8:
2828                 dst_offset = radeon_get_ib_value(p, idx+1);
2829                 dst_offset <<= 8;
2830 
2831                 ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
2832                 p->idx += count + 7;
2833                 break;
2834             /* linear */
2835             case 0:
2836                 dst_offset = radeon_get_ib_value(p, idx+1);
2837                 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
2838 
2839                 ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2840                 ib[idx+2] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2841                 p->idx += count + 3;
2842                 break;
2843             default:
2844                 DRM_ERROR("bad DMA_PACKET_WRITE [%6d] 0x%08x sub cmd is not 0 or 8\n", idx, header);
2845                 return -EINVAL;
2846             }
2847             if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2848                 dev_warn(p->dev, "DMA write buffer too small (%llu %lu)\n",
2849                      dst_offset, radeon_bo_size(dst_reloc->robj));
2850                 return -EINVAL;
2851             }
2852             break;
2853         case DMA_PACKET_COPY:
2854             r = r600_dma_cs_next_reloc(p, &src_reloc);
2855             if (r) {
2856                 DRM_ERROR("bad DMA_PACKET_COPY\n");
2857                 return -EINVAL;
2858             }
2859             r = r600_dma_cs_next_reloc(p, &dst_reloc);
2860             if (r) {
2861                 DRM_ERROR("bad DMA_PACKET_COPY\n");
2862                 return -EINVAL;
2863             }
2864             switch (sub_cmd) {
2865             /* Copy L2L, DW aligned */
2866             case 0x00:
2867                 /* L2L, dw */
2868                 src_offset = radeon_get_ib_value(p, idx+2);
2869                 src_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2870                 dst_offset = radeon_get_ib_value(p, idx+1);
2871                 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32;
2872                 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2873                     dev_warn(p->dev, "DMA L2L, dw src buffer too small (%llu %lu)\n",
2874                             src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2875                     return -EINVAL;
2876                 }
2877                 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2878                     dev_warn(p->dev, "DMA L2L, dw dst buffer too small (%llu %lu)\n",
2879                             dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2880                     return -EINVAL;
2881                 }
2882                 ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2883                 ib[idx+2] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
2884                 ib[idx+3] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2885                 ib[idx+4] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2886                 p->idx += 5;
2887                 break;
2888             /* Copy L2T/T2L */
2889             case 0x08:
2890                 /* detile bit */
2891                 if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
2892                     /* tiled src, linear dst */
2893                     src_offset = radeon_get_ib_value(p, idx+1);
2894                     src_offset <<= 8;
2895                     ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8);
2896 
2897                     dst_offset = radeon_get_ib_value(p, idx + 7);
2898                     dst_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
2899                     ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2900                     ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2901                 } else {
2902                     /* linear src, tiled dst */
2903                     src_offset = radeon_get_ib_value(p, idx+7);
2904                     src_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
2905                     ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
2906                     ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2907 
2908                     dst_offset = radeon_get_ib_value(p, idx+1);
2909                     dst_offset <<= 8;
2910                     ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
2911                 }
2912                 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2913                     dev_warn(p->dev, "DMA L2T, src buffer too small (%llu %lu)\n",
2914                             src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2915                     return -EINVAL;
2916                 }
2917                 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2918                     dev_warn(p->dev, "DMA L2T, dst buffer too small (%llu %lu)\n",
2919                             dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2920                     return -EINVAL;
2921                 }
2922                 p->idx += 9;
2923                 break;
2924             /* Copy L2L, byte aligned */
2925             case 0x40:
2926                 /* L2L, byte */
2927                 src_offset = radeon_get_ib_value(p, idx+2);
2928                 src_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2929                 dst_offset = radeon_get_ib_value(p, idx+1);
2930                 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32;
2931                 if ((src_offset + count) > radeon_bo_size(src_reloc->robj)) {
2932                     dev_warn(p->dev, "DMA L2L, byte src buffer too small (%llu %lu)\n",
2933                             src_offset + count, radeon_bo_size(src_reloc->robj));
2934                     return -EINVAL;
2935                 }
2936                 if ((dst_offset + count) > radeon_bo_size(dst_reloc->robj)) {
2937                     dev_warn(p->dev, "DMA L2L, byte dst buffer too small (%llu %lu)\n",
2938                             dst_offset + count, radeon_bo_size(dst_reloc->robj));
2939                     return -EINVAL;
2940                 }
2941                 ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xffffffff);
2942                 ib[idx+2] += (u32)(src_reloc->gpu_offset & 0xffffffff);
2943                 ib[idx+3] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2944                 ib[idx+4] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2945                 p->idx += 5;
2946                 break;
2947             /* Copy L2L, partial */
2948             case 0x41:
2949                 /* L2L, partial */
2950                 if (p->family < CHIP_CAYMAN) {
2951                     DRM_ERROR("L2L Partial is cayman only !\n");
2952                     return -EINVAL;
2953                 }
2954                 ib[idx+1] += (u32)(src_reloc->gpu_offset & 0xffffffff);
2955                 ib[idx+2] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2956                 ib[idx+4] += (u32)(dst_reloc->gpu_offset & 0xffffffff);
2957                 ib[idx+5] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2958 
2959                 p->idx += 9;
2960                 break;
2961             /* Copy L2L, DW aligned, broadcast */
2962             case 0x44:
2963                 /* L2L, dw, broadcast */
2964                 r = r600_dma_cs_next_reloc(p, &dst2_reloc);
2965                 if (r) {
2966                     DRM_ERROR("bad L2L, dw, broadcast DMA_PACKET_COPY\n");
2967                     return -EINVAL;
2968                 }
2969                 dst_offset = radeon_get_ib_value(p, idx+1);
2970                 dst_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2971                 dst2_offset = radeon_get_ib_value(p, idx+2);
2972                 dst2_offset |= ((u64)(radeon_get_ib_value(p, idx+5) & 0xff)) << 32;
2973                 src_offset = radeon_get_ib_value(p, idx+3);
2974                 src_offset |= ((u64)(radeon_get_ib_value(p, idx+6) & 0xff)) << 32;
2975                 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2976                     dev_warn(p->dev, "DMA L2L, dw, broadcast src buffer too small (%llu %lu)\n",
2977                             src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2978                     return -EINVAL;
2979                 }
2980                 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2981                     dev_warn(p->dev, "DMA L2L, dw, broadcast dst buffer too small (%llu %lu)\n",
2982                             dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2983                     return -EINVAL;
2984                 }
2985                 if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
2986                     dev_warn(p->dev, "DMA L2L, dw, broadcast dst2 buffer too small (%llu %lu)\n",
2987                             dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
2988                     return -EINVAL;
2989                 }
2990                 ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2991                 ib[idx+2] += (u32)(dst2_reloc->gpu_offset & 0xfffffffc);
2992                 ib[idx+3] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
2993                 ib[idx+4] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2994                 ib[idx+5] += upper_32_bits(dst2_reloc->gpu_offset) & 0xff;
2995                 ib[idx+6] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2996                 p->idx += 7;
2997                 break;
2998             /* Copy L2T Frame to Field */
2999             case 0x48:
3000                 if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
3001                     DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
3002                     return -EINVAL;
3003                 }
3004                 r = r600_dma_cs_next_reloc(p, &dst2_reloc);
3005                 if (r) {
3006                     DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
3007                     return -EINVAL;
3008                 }
3009                 dst_offset = radeon_get_ib_value(p, idx+1);
3010                 dst_offset <<= 8;
3011                 dst2_offset = radeon_get_ib_value(p, idx+2);
3012                 dst2_offset <<= 8;
3013                 src_offset = radeon_get_ib_value(p, idx+8);
3014                 src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32;
3015                 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3016                     dev_warn(p->dev, "DMA L2T, frame to fields src buffer too small (%llu %lu)\n",
3017                             src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3018                     return -EINVAL;
3019                 }
3020                 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3021                     dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n",
3022                             dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3023                     return -EINVAL;
3024                 }
3025                 if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
3026                     dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n",
3027                             dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
3028                     return -EINVAL;
3029                 }
3030                 ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
3031                 ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8);
3032                 ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
3033                 ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
3034                 p->idx += 10;
3035                 break;
3036             /* Copy L2T/T2L, partial */
3037             case 0x49:
3038                 /* L2T, T2L partial */
3039                 if (p->family < CHIP_CAYMAN) {
3040                     DRM_ERROR("L2T, T2L Partial is cayman only !\n");
3041                     return -EINVAL;
3042                 }
3043                 /* detile bit */
3044                 if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
3045                     /* tiled src, linear dst */
3046                     ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8);
3047 
3048                     ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
3049                     ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
3050                 } else {
3051                     /* linear src, tiled dst */
3052                     ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
3053                     ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
3054 
3055                     ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
3056                 }
3057                 p->idx += 12;
3058                 break;
3059             /* Copy L2T broadcast */
3060             case 0x4b:
3061                 /* L2T, broadcast */
3062                 if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
3063                     DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3064                     return -EINVAL;
3065                 }
3066                 r = r600_dma_cs_next_reloc(p, &dst2_reloc);
3067                 if (r) {
3068                     DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3069                     return -EINVAL;
3070                 }
3071                 dst_offset = radeon_get_ib_value(p, idx+1);
3072                 dst_offset <<= 8;
3073                 dst2_offset = radeon_get_ib_value(p, idx+2);
3074                 dst2_offset <<= 8;
3075                 src_offset = radeon_get_ib_value(p, idx+8);
3076                 src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32;
3077                 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3078                     dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
3079                             src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3080                     return -EINVAL;
3081                 }
3082                 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3083                     dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
3084                             dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3085                     return -EINVAL;
3086                 }
3087                 if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
3088                     dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n",
3089                             dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
3090                     return -EINVAL;
3091                 }
3092                 ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
3093                 ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8);
3094                 ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
3095                 ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
3096                 p->idx += 10;
3097                 break;
3098             /* Copy L2T/T2L (tile units) */
3099             case 0x4c:
3100                 /* L2T, T2L */
3101                 /* detile bit */
3102                 if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
3103                     /* tiled src, linear dst */
3104                     src_offset = radeon_get_ib_value(p, idx+1);
3105                     src_offset <<= 8;
3106                     ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8);
3107 
3108                     dst_offset = radeon_get_ib_value(p, idx+7);
3109                     dst_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
3110                     ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
3111                     ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
3112                 } else {
3113                     /* linear src, tiled dst */
3114                     src_offset = radeon_get_ib_value(p, idx+7);
3115                     src_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
3116                     ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
3117                     ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
3118 
3119                     dst_offset = radeon_get_ib_value(p, idx+1);
3120                     dst_offset <<= 8;
3121                     ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
3122                 }
3123                 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3124                     dev_warn(p->dev, "DMA L2T, T2L src buffer too small (%llu %lu)\n",
3125                             src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3126                     return -EINVAL;
3127                 }
3128                 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3129                     dev_warn(p->dev, "DMA L2T, T2L dst buffer too small (%llu %lu)\n",
3130                             dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3131                     return -EINVAL;
3132                 }
3133                 p->idx += 9;
3134                 break;
3135             /* Copy T2T, partial (tile units) */
3136             case 0x4d:
3137                 /* T2T partial */
3138                 if (p->family < CHIP_CAYMAN) {
3139                     DRM_ERROR("L2T, T2L Partial is cayman only !\n");
3140                     return -EINVAL;
3141                 }
3142                 ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8);
3143                 ib[idx+4] += (u32)(dst_reloc->gpu_offset >> 8);
3144                 p->idx += 13;
3145                 break;
3146             /* Copy L2T broadcast (tile units) */
3147             case 0x4f:
3148                 /* L2T, broadcast */
3149                 if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
3150                     DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3151                     return -EINVAL;
3152                 }
3153                 r = r600_dma_cs_next_reloc(p, &dst2_reloc);
3154                 if (r) {
3155                     DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3156                     return -EINVAL;
3157                 }
3158                 dst_offset = radeon_get_ib_value(p, idx+1);
3159                 dst_offset <<= 8;
3160                 dst2_offset = radeon_get_ib_value(p, idx+2);
3161                 dst2_offset <<= 8;
3162                 src_offset = radeon_get_ib_value(p, idx+8);
3163                 src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32;
3164                 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3165                     dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
3166                             src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3167                     return -EINVAL;
3168                 }
3169                 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3170                     dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
3171                             dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3172                     return -EINVAL;
3173                 }
3174                 if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
3175                     dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n",
3176                             dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
3177                     return -EINVAL;
3178                 }
3179                 ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
3180                 ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8);
3181                 ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
3182                 ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
3183                 p->idx += 10;
3184                 break;
3185             default:
3186                 DRM_ERROR("bad DMA_PACKET_COPY [%6d] 0x%08x invalid sub cmd\n", idx, header);
3187                 return -EINVAL;
3188             }
3189             break;
3190         case DMA_PACKET_CONSTANT_FILL:
3191             r = r600_dma_cs_next_reloc(p, &dst_reloc);
3192             if (r) {
3193                 DRM_ERROR("bad DMA_PACKET_CONSTANT_FILL\n");
3194                 return -EINVAL;
3195             }
3196             dst_offset = radeon_get_ib_value(p, idx+1);
3197             dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0x00ff0000)) << 16;
3198             if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3199                 dev_warn(p->dev, "DMA constant fill buffer too small (%llu %lu)\n",
3200                      dst_offset, radeon_bo_size(dst_reloc->robj));
3201                 return -EINVAL;
3202             }
3203             ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
3204             ib[idx+3] += (upper_32_bits(dst_reloc->gpu_offset) << 16) & 0x00ff0000;
3205             p->idx += 4;
3206             break;
3207         case DMA_PACKET_NOP:
3208             p->idx += 1;
3209             break;
3210         default:
3211             DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
3212             return -EINVAL;
3213         }
3214     } while (p->idx < p->chunk_ib->length_dw);
3215 #if 0
3216     for (r = 0; r < p->ib->length_dw; r++) {
3217         pr_info("%05d  0x%08X\n", r, p->ib.ptr[r]);
3218         mdelay(1);
3219     }
3220 #endif
3221     return 0;
3222 }
3223 
3224 /* vm parser */
3225 static bool evergreen_vm_reg_valid(u32 reg)
3226 {
3227     /* context regs are fine */
3228     if (reg >= 0x28000)
3229         return true;
3230 
3231     /* check config regs */
3232     switch (reg) {
3233     case WAIT_UNTIL:
3234     case GRBM_GFX_INDEX:
3235     case CP_STRMOUT_CNTL:
3236     case CP_COHER_CNTL:
3237     case CP_COHER_SIZE:
3238     case VGT_VTX_VECT_EJECT_REG:
3239     case VGT_CACHE_INVALIDATION:
3240     case VGT_GS_VERTEX_REUSE:
3241     case VGT_PRIMITIVE_TYPE:
3242     case VGT_INDEX_TYPE:
3243     case VGT_NUM_INDICES:
3244     case VGT_NUM_INSTANCES:
3245     case VGT_COMPUTE_DIM_X:
3246     case VGT_COMPUTE_DIM_Y:
3247     case VGT_COMPUTE_DIM_Z:
3248     case VGT_COMPUTE_START_X:
3249     case VGT_COMPUTE_START_Y:
3250     case VGT_COMPUTE_START_Z:
3251     case VGT_COMPUTE_INDEX:
3252     case VGT_COMPUTE_THREAD_GROUP_SIZE:
3253     case VGT_HS_OFFCHIP_PARAM:
3254     case PA_CL_ENHANCE:
3255     case PA_SU_LINE_STIPPLE_VALUE:
3256     case PA_SC_LINE_STIPPLE_STATE:
3257     case PA_SC_ENHANCE:
3258     case SQ_DYN_GPR_CNTL_PS_FLUSH_REQ:
3259     case SQ_DYN_GPR_SIMD_LOCK_EN:
3260     case SQ_CONFIG:
3261     case SQ_GPR_RESOURCE_MGMT_1:
3262     case SQ_GLOBAL_GPR_RESOURCE_MGMT_1:
3263     case SQ_GLOBAL_GPR_RESOURCE_MGMT_2:
3264     case SQ_CONST_MEM_BASE:
3265     case SQ_STATIC_THREAD_MGMT_1:
3266     case SQ_STATIC_THREAD_MGMT_2:
3267     case SQ_STATIC_THREAD_MGMT_3:
3268     case SPI_CONFIG_CNTL:
3269     case SPI_CONFIG_CNTL_1:
3270     case TA_CNTL_AUX:
3271     case DB_DEBUG:
3272     case DB_DEBUG2:
3273     case DB_DEBUG3:
3274     case DB_DEBUG4:
3275     case DB_WATERMARKS:
3276     case TD_PS_BORDER_COLOR_INDEX:
3277     case TD_PS_BORDER_COLOR_RED:
3278     case TD_PS_BORDER_COLOR_GREEN:
3279     case TD_PS_BORDER_COLOR_BLUE:
3280     case TD_PS_BORDER_COLOR_ALPHA:
3281     case TD_VS_BORDER_COLOR_INDEX:
3282     case TD_VS_BORDER_COLOR_RED:
3283     case TD_VS_BORDER_COLOR_GREEN:
3284     case TD_VS_BORDER_COLOR_BLUE:
3285     case TD_VS_BORDER_COLOR_ALPHA:
3286     case TD_GS_BORDER_COLOR_INDEX:
3287     case TD_GS_BORDER_COLOR_RED:
3288     case TD_GS_BORDER_COLOR_GREEN:
3289     case TD_GS_BORDER_COLOR_BLUE:
3290     case TD_GS_BORDER_COLOR_ALPHA:
3291     case TD_HS_BORDER_COLOR_INDEX:
3292     case TD_HS_BORDER_COLOR_RED:
3293     case TD_HS_BORDER_COLOR_GREEN:
3294     case TD_HS_BORDER_COLOR_BLUE:
3295     case TD_HS_BORDER_COLOR_ALPHA:
3296     case TD_LS_BORDER_COLOR_INDEX:
3297     case TD_LS_BORDER_COLOR_RED:
3298     case TD_LS_BORDER_COLOR_GREEN:
3299     case TD_LS_BORDER_COLOR_BLUE:
3300     case TD_LS_BORDER_COLOR_ALPHA:
3301     case TD_CS_BORDER_COLOR_INDEX:
3302     case TD_CS_BORDER_COLOR_RED:
3303     case TD_CS_BORDER_COLOR_GREEN:
3304     case TD_CS_BORDER_COLOR_BLUE:
3305     case TD_CS_BORDER_COLOR_ALPHA:
3306     case SQ_ESGS_RING_SIZE:
3307     case SQ_GSVS_RING_SIZE:
3308     case SQ_ESTMP_RING_SIZE:
3309     case SQ_GSTMP_RING_SIZE:
3310     case SQ_HSTMP_RING_SIZE:
3311     case SQ_LSTMP_RING_SIZE:
3312     case SQ_PSTMP_RING_SIZE:
3313     case SQ_VSTMP_RING_SIZE:
3314     case SQ_ESGS_RING_ITEMSIZE:
3315     case SQ_ESTMP_RING_ITEMSIZE:
3316     case SQ_GSTMP_RING_ITEMSIZE:
3317     case SQ_GSVS_RING_ITEMSIZE:
3318     case SQ_GS_VERT_ITEMSIZE:
3319     case SQ_GS_VERT_ITEMSIZE_1:
3320     case SQ_GS_VERT_ITEMSIZE_2:
3321     case SQ_GS_VERT_ITEMSIZE_3:
3322     case SQ_GSVS_RING_OFFSET_1:
3323     case SQ_GSVS_RING_OFFSET_2:
3324     case SQ_GSVS_RING_OFFSET_3:
3325     case SQ_HSTMP_RING_ITEMSIZE:
3326     case SQ_LSTMP_RING_ITEMSIZE:
3327     case SQ_PSTMP_RING_ITEMSIZE:
3328     case SQ_VSTMP_RING_ITEMSIZE:
3329     case VGT_TF_RING_SIZE:
3330     case SQ_ESGS_RING_BASE:
3331     case SQ_GSVS_RING_BASE:
3332     case SQ_ESTMP_RING_BASE:
3333     case SQ_GSTMP_RING_BASE:
3334     case SQ_HSTMP_RING_BASE:
3335     case SQ_LSTMP_RING_BASE:
3336     case SQ_PSTMP_RING_BASE:
3337     case SQ_VSTMP_RING_BASE:
3338     case CAYMAN_VGT_OFFCHIP_LDS_BASE:
3339     case CAYMAN_SQ_EX_ALLOC_TABLE_SLOTS:
3340         return true;
3341     default:
3342         DRM_ERROR("Invalid register 0x%x in CS\n", reg);
3343         return false;
3344     }
3345 }
3346 
3347 static int evergreen_vm_packet3_check(struct radeon_device *rdev,
3348                       u32 *ib, struct radeon_cs_packet *pkt)
3349 {
3350     u32 idx = pkt->idx + 1;
3351     u32 idx_value = ib[idx];
3352     u32 start_reg, end_reg, reg, i;
3353     u32 command, info;
3354 
3355     switch (pkt->opcode) {
3356     case PACKET3_NOP:
3357         break;
3358     case PACKET3_SET_BASE:
3359         if (idx_value != 1) {
3360             DRM_ERROR("bad SET_BASE");
3361             return -EINVAL;
3362         }
3363         break;
3364     case PACKET3_CLEAR_STATE:
3365     case PACKET3_INDEX_BUFFER_SIZE:
3366     case PACKET3_DISPATCH_DIRECT:
3367     case PACKET3_DISPATCH_INDIRECT:
3368     case PACKET3_MODE_CONTROL:
3369     case PACKET3_SET_PREDICATION:
3370     case PACKET3_COND_EXEC:
3371     case PACKET3_PRED_EXEC:
3372     case PACKET3_DRAW_INDIRECT:
3373     case PACKET3_DRAW_INDEX_INDIRECT:
3374     case PACKET3_INDEX_BASE:
3375     case PACKET3_DRAW_INDEX_2:
3376     case PACKET3_CONTEXT_CONTROL:
3377     case PACKET3_DRAW_INDEX_OFFSET:
3378     case PACKET3_INDEX_TYPE:
3379     case PACKET3_DRAW_INDEX:
3380     case PACKET3_DRAW_INDEX_AUTO:
3381     case PACKET3_DRAW_INDEX_IMMD:
3382     case PACKET3_NUM_INSTANCES:
3383     case PACKET3_DRAW_INDEX_MULTI_AUTO:
3384     case PACKET3_STRMOUT_BUFFER_UPDATE:
3385     case PACKET3_DRAW_INDEX_OFFSET_2:
3386     case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
3387     case PACKET3_MPEG_INDEX:
3388     case PACKET3_WAIT_REG_MEM:
3389     case PACKET3_MEM_WRITE:
3390     case PACKET3_PFP_SYNC_ME:
3391     case PACKET3_SURFACE_SYNC:
3392     case PACKET3_EVENT_WRITE:
3393     case PACKET3_EVENT_WRITE_EOP:
3394     case PACKET3_EVENT_WRITE_EOS:
3395     case PACKET3_SET_CONTEXT_REG:
3396     case PACKET3_SET_BOOL_CONST:
3397     case PACKET3_SET_LOOP_CONST:
3398     case PACKET3_SET_RESOURCE:
3399     case PACKET3_SET_SAMPLER:
3400     case PACKET3_SET_CTL_CONST:
3401     case PACKET3_SET_RESOURCE_OFFSET:
3402     case PACKET3_SET_CONTEXT_REG_INDIRECT:
3403     case PACKET3_SET_RESOURCE_INDIRECT:
3404     case CAYMAN_PACKET3_DEALLOC_STATE:
3405         break;
3406     case PACKET3_COND_WRITE:
3407         if (idx_value & 0x100) {
3408             reg = ib[idx + 5] * 4;
3409             if (!evergreen_vm_reg_valid(reg))
3410                 return -EINVAL;
3411         }
3412         break;
3413     case PACKET3_COPY_DW:
3414         if (idx_value & 0x2) {
3415             reg = ib[idx + 3] * 4;
3416             if (!evergreen_vm_reg_valid(reg))
3417                 return -EINVAL;
3418         }
3419         break;
3420     case PACKET3_SET_CONFIG_REG:
3421         start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
3422         end_reg = 4 * pkt->count + start_reg - 4;
3423         if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
3424             (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
3425             (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
3426             DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
3427             return -EINVAL;
3428         }
3429         for (i = 0; i < pkt->count; i++) {
3430             reg = start_reg + (4 * i);
3431             if (!evergreen_vm_reg_valid(reg))
3432                 return -EINVAL;
3433         }
3434         break;
3435     case PACKET3_CP_DMA:
3436         command = ib[idx + 4];
3437         info = ib[idx + 1];
3438         if ((((info & 0x60000000) >> 29) != 0) || /* src = GDS or DATA */
3439             (((info & 0x00300000) >> 20) != 0) || /* dst = GDS */
3440             ((((info & 0x00300000) >> 20) == 0) &&
3441              (command & PACKET3_CP_DMA_CMD_DAS)) || /* dst = register */
3442             ((((info & 0x60000000) >> 29) == 0) &&
3443              (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */
3444             /* non mem to mem copies requires dw aligned count */
3445             if ((command & 0x1fffff) % 4) {
3446                 DRM_ERROR("CP DMA command requires dw count alignment\n");
3447                 return -EINVAL;
3448             }
3449         }
3450         if (command & PACKET3_CP_DMA_CMD_SAS) {
3451             /* src address space is register */
3452             if (((info & 0x60000000) >> 29) == 0) {
3453                 start_reg = idx_value << 2;
3454                 if (command & PACKET3_CP_DMA_CMD_SAIC) {
3455                     reg = start_reg;
3456                     if (!evergreen_vm_reg_valid(reg)) {
3457                         DRM_ERROR("CP DMA Bad SRC register\n");
3458                         return -EINVAL;
3459                     }
3460                 } else {
3461                     for (i = 0; i < (command & 0x1fffff); i++) {
3462                         reg = start_reg + (4 * i);
3463                         if (!evergreen_vm_reg_valid(reg)) {
3464                             DRM_ERROR("CP DMA Bad SRC register\n");
3465                             return -EINVAL;
3466                         }
3467                     }
3468                 }
3469             }
3470         }
3471         if (command & PACKET3_CP_DMA_CMD_DAS) {
3472             /* dst address space is register */
3473             if (((info & 0x00300000) >> 20) == 0) {
3474                 start_reg = ib[idx + 2];
3475                 if (command & PACKET3_CP_DMA_CMD_DAIC) {
3476                     reg = start_reg;
3477                     if (!evergreen_vm_reg_valid(reg)) {
3478                         DRM_ERROR("CP DMA Bad DST register\n");
3479                         return -EINVAL;
3480                     }
3481                 } else {
3482                     for (i = 0; i < (command & 0x1fffff); i++) {
3483                         reg = start_reg + (4 * i);
3484                         if (!evergreen_vm_reg_valid(reg)) {
3485                             DRM_ERROR("CP DMA Bad DST register\n");
3486                             return -EINVAL;
3487                         }
3488                     }
3489                 }
3490             }
3491         }
3492         break;
3493     case PACKET3_SET_APPEND_CNT: {
3494         uint32_t areg;
3495         uint32_t allowed_reg_base;
3496 
3497         if (pkt->count != 2) {
3498             DRM_ERROR("bad SET_APPEND_CNT (invalid count)\n");
3499             return -EINVAL;
3500         }
3501 
3502         allowed_reg_base = GDS_APPEND_COUNT_0;
3503         allowed_reg_base -= PACKET3_SET_CONTEXT_REG_START;
3504         allowed_reg_base >>= 2;
3505 
3506         areg = idx_value >> 16;
3507         if (areg < allowed_reg_base || areg > (allowed_reg_base + 11)) {
3508             DRM_ERROR("forbidden register for append cnt 0x%08x at %d\n",
3509                   areg, idx);
3510             return -EINVAL;
3511         }
3512         break;
3513     }
3514     default:
3515         return -EINVAL;
3516     }
3517     return 0;
3518 }
3519 
3520 int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3521 {
3522     int ret = 0;
3523     u32 idx = 0;
3524     struct radeon_cs_packet pkt;
3525 
3526     do {
3527         pkt.idx = idx;
3528         pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
3529         pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
3530         pkt.one_reg_wr = 0;
3531         switch (pkt.type) {
3532         case RADEON_PACKET_TYPE0:
3533             dev_err(rdev->dev, "Packet0 not allowed!\n");
3534             ret = -EINVAL;
3535             break;
3536         case RADEON_PACKET_TYPE2:
3537             idx += 1;
3538             break;
3539         case RADEON_PACKET_TYPE3:
3540             pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
3541             ret = evergreen_vm_packet3_check(rdev, ib->ptr, &pkt);
3542             idx += pkt.count + 2;
3543             break;
3544         default:
3545             dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
3546             ret = -EINVAL;
3547             break;
3548         }
3549         if (ret)
3550             break;
3551     } while (idx < ib->length_dw);
3552 
3553     return ret;
3554 }
3555 
3556 /**
3557  * evergreen_dma_ib_parse() - parse the DMA IB for VM
3558  * @rdev: radeon_device pointer
3559  * @ib: radeon_ib pointer
3560  *
3561  * Parses the DMA IB from the VM CS ioctl
3562  * checks for errors. (Cayman-SI)
3563  * Returns 0 for success and an error on failure.
3564  **/
3565 int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3566 {
3567     u32 idx = 0;
3568     u32 header, cmd, count, sub_cmd;
3569 
3570     do {
3571         header = ib->ptr[idx];
3572         cmd = GET_DMA_CMD(header);
3573         count = GET_DMA_COUNT(header);
3574         sub_cmd = GET_DMA_SUB_CMD(header);
3575 
3576         switch (cmd) {
3577         case DMA_PACKET_WRITE:
3578             switch (sub_cmd) {
3579             /* tiled */
3580             case 8:
3581                 idx += count + 7;
3582                 break;
3583             /* linear */
3584             case 0:
3585                 idx += count + 3;
3586                 break;
3587             default:
3588                 DRM_ERROR("bad DMA_PACKET_WRITE [%6d] 0x%08x sub cmd is not 0 or 8\n", idx, ib->ptr[idx]);
3589                 return -EINVAL;
3590             }
3591             break;
3592         case DMA_PACKET_COPY:
3593             switch (sub_cmd) {
3594             /* Copy L2L, DW aligned */
3595             case 0x00:
3596                 idx += 5;
3597                 break;
3598             /* Copy L2T/T2L */
3599             case 0x08:
3600                 idx += 9;
3601                 break;
3602             /* Copy L2L, byte aligned */
3603             case 0x40:
3604                 idx += 5;
3605                 break;
3606             /* Copy L2L, partial */
3607             case 0x41:
3608                 idx += 9;
3609                 break;
3610             /* Copy L2L, DW aligned, broadcast */
3611             case 0x44:
3612                 idx += 7;
3613                 break;
3614             /* Copy L2T Frame to Field */
3615             case 0x48:
3616                 idx += 10;
3617                 break;
3618             /* Copy L2T/T2L, partial */
3619             case 0x49:
3620                 idx += 12;
3621                 break;
3622             /* Copy L2T broadcast */
3623             case 0x4b:
3624                 idx += 10;
3625                 break;
3626             /* Copy L2T/T2L (tile units) */
3627             case 0x4c:
3628                 idx += 9;
3629                 break;
3630             /* Copy T2T, partial (tile units) */
3631             case 0x4d:
3632                 idx += 13;
3633                 break;
3634             /* Copy L2T broadcast (tile units) */
3635             case 0x4f:
3636                 idx += 10;
3637                 break;
3638             default:
3639                 DRM_ERROR("bad DMA_PACKET_COPY [%6d] 0x%08x invalid sub cmd\n", idx, ib->ptr[idx]);
3640                 return -EINVAL;
3641             }
3642             break;
3643         case DMA_PACKET_CONSTANT_FILL:
3644             idx += 4;
3645             break;
3646         case DMA_PACKET_NOP:
3647             idx += 1;
3648             break;
3649         default:
3650             DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
3651             return -EINVAL;
3652         }
3653     } while (idx < ib->length_dw);
3654 
3655     return 0;
3656 }