0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044 #include "vc4_drv.h"
0045 #include "vc4_qpu_defines.h"
0046
0047 #define LIVE_REG_COUNT (32 + 32 + 4)
0048
0049 struct vc4_shader_validation_state {
0050
0051 uint32_t ip;
0052
0053
0054 uint32_t max_ip;
0055
0056 uint64_t *shader;
0057
0058 struct vc4_texture_sample_info tmu_setup[2];
0059 int tmu_write_count[2];
0060
0061
0062
0063
0064
0065
0066
0067 uint32_t live_min_clamp_offsets[LIVE_REG_COUNT];
0068 bool live_max_clamp_regs[LIVE_REG_COUNT];
0069 uint32_t live_immediates[LIVE_REG_COUNT];
0070
0071
0072
0073
0074
0075
0076 unsigned long *branch_targets;
0077
0078
0079
0080
0081
0082 bool needs_uniform_address_update;
0083
0084
0085
0086
0087
0088
0089
0090 bool needs_uniform_address_for_loop;
0091
0092
0093
0094
0095
0096
0097 bool all_registers_used;
0098 };
0099
0100 static uint32_t
0101 waddr_to_live_reg_index(uint32_t waddr, bool is_b)
0102 {
0103 if (waddr < 32) {
0104 if (is_b)
0105 return 32 + waddr;
0106 else
0107 return waddr;
0108 } else if (waddr <= QPU_W_ACC3) {
0109 return 64 + waddr - QPU_W_ACC0;
0110 } else {
0111 return ~0;
0112 }
0113 }
0114
0115 static uint32_t
0116 raddr_add_a_to_live_reg_index(uint64_t inst)
0117 {
0118 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
0119 uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A);
0120 uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
0121 uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
0122
0123 if (add_a == QPU_MUX_A)
0124 return raddr_a;
0125 else if (add_a == QPU_MUX_B && sig != QPU_SIG_SMALL_IMM)
0126 return 32 + raddr_b;
0127 else if (add_a <= QPU_MUX_R3)
0128 return 64 + add_a;
0129 else
0130 return ~0;
0131 }
0132
0133 static bool
0134 live_reg_is_upper_half(uint32_t lri)
0135 {
0136 return (lri >= 16 && lri < 32) ||
0137 (lri >= 32 + 16 && lri < 32 + 32);
0138 }
0139
0140 static bool
0141 is_tmu_submit(uint32_t waddr)
0142 {
0143 return (waddr == QPU_W_TMU0_S ||
0144 waddr == QPU_W_TMU1_S);
0145 }
0146
0147 static bool
0148 is_tmu_write(uint32_t waddr)
0149 {
0150 return (waddr >= QPU_W_TMU0_S &&
0151 waddr <= QPU_W_TMU1_B);
0152 }
0153
0154 static bool
0155 record_texture_sample(struct vc4_validated_shader_info *validated_shader,
0156 struct vc4_shader_validation_state *validation_state,
0157 int tmu)
0158 {
0159 uint32_t s = validated_shader->num_texture_samples;
0160 int i;
0161 struct vc4_texture_sample_info *temp_samples;
0162
0163 temp_samples = krealloc(validated_shader->texture_samples,
0164 (s + 1) * sizeof(*temp_samples),
0165 GFP_KERNEL);
0166 if (!temp_samples)
0167 return false;
0168
0169 memcpy(&temp_samples[s],
0170 &validation_state->tmu_setup[tmu],
0171 sizeof(*temp_samples));
0172
0173 validated_shader->num_texture_samples = s + 1;
0174 validated_shader->texture_samples = temp_samples;
0175
0176 for (i = 0; i < 4; i++)
0177 validation_state->tmu_setup[tmu].p_offset[i] = ~0;
0178
0179 return true;
0180 }
0181
0182 static bool
0183 check_tmu_write(struct vc4_validated_shader_info *validated_shader,
0184 struct vc4_shader_validation_state *validation_state,
0185 bool is_mul)
0186 {
0187 uint64_t inst = validation_state->shader[validation_state->ip];
0188 uint32_t waddr = (is_mul ?
0189 QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
0190 QPU_GET_FIELD(inst, QPU_WADDR_ADD));
0191 uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
0192 uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
0193 int tmu = waddr > QPU_W_TMU0_B;
0194 bool submit = is_tmu_submit(waddr);
0195 bool is_direct = submit && validation_state->tmu_write_count[tmu] == 0;
0196 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
0197
0198 if (is_direct) {
0199 uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
0200 uint32_t clamp_reg, clamp_offset;
0201
0202 if (sig == QPU_SIG_SMALL_IMM) {
0203 DRM_DEBUG("direct TMU read used small immediate\n");
0204 return false;
0205 }
0206
0207
0208
0209
0210 if (is_mul ||
0211 QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_ADD) {
0212 DRM_DEBUG("direct TMU load wasn't an add\n");
0213 return false;
0214 }
0215
0216
0217
0218
0219
0220
0221 clamp_reg = raddr_add_a_to_live_reg_index(inst);
0222 if (clamp_reg == ~0) {
0223 DRM_DEBUG("direct TMU load wasn't clamped\n");
0224 return false;
0225 }
0226
0227 clamp_offset = validation_state->live_min_clamp_offsets[clamp_reg];
0228 if (clamp_offset == ~0) {
0229 DRM_DEBUG("direct TMU load wasn't clamped\n");
0230 return false;
0231 }
0232
0233
0234
0235
0236 validation_state->tmu_setup[tmu].p_offset[1] =
0237 clamp_offset;
0238
0239 if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
0240 !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF)) {
0241 DRM_DEBUG("direct TMU load didn't add to a uniform\n");
0242 return false;
0243 }
0244
0245 validation_state->tmu_setup[tmu].is_direct = true;
0246 } else {
0247 if (raddr_a == QPU_R_UNIF || (sig != QPU_SIG_SMALL_IMM &&
0248 raddr_b == QPU_R_UNIF)) {
0249 DRM_DEBUG("uniform read in the same instruction as "
0250 "texture setup.\n");
0251 return false;
0252 }
0253 }
0254
0255 if (validation_state->tmu_write_count[tmu] >= 4) {
0256 DRM_DEBUG("TMU%d got too many parameters before dispatch\n",
0257 tmu);
0258 return false;
0259 }
0260 validation_state->tmu_setup[tmu].p_offset[validation_state->tmu_write_count[tmu]] =
0261 validated_shader->uniforms_size;
0262 validation_state->tmu_write_count[tmu]++;
0263
0264
0265
0266 if (!is_direct) {
0267 if (validation_state->needs_uniform_address_update) {
0268 DRM_DEBUG("Texturing with undefined uniform address\n");
0269 return false;
0270 }
0271
0272 validated_shader->uniforms_size += 4;
0273 }
0274
0275 if (submit) {
0276 if (!record_texture_sample(validated_shader,
0277 validation_state, tmu)) {
0278 return false;
0279 }
0280
0281 validation_state->tmu_write_count[tmu] = 0;
0282 }
0283
0284 return true;
0285 }
0286
0287 static bool require_uniform_address_uniform(struct vc4_validated_shader_info *validated_shader)
0288 {
0289 uint32_t o = validated_shader->num_uniform_addr_offsets;
0290 uint32_t num_uniforms = validated_shader->uniforms_size / 4;
0291
0292 validated_shader->uniform_addr_offsets =
0293 krealloc(validated_shader->uniform_addr_offsets,
0294 (o + 1) *
0295 sizeof(*validated_shader->uniform_addr_offsets),
0296 GFP_KERNEL);
0297 if (!validated_shader->uniform_addr_offsets)
0298 return false;
0299
0300 validated_shader->uniform_addr_offsets[o] = num_uniforms;
0301 validated_shader->num_uniform_addr_offsets++;
0302
0303 return true;
0304 }
0305
0306 static bool
0307 validate_uniform_address_write(struct vc4_validated_shader_info *validated_shader,
0308 struct vc4_shader_validation_state *validation_state,
0309 bool is_mul)
0310 {
0311 uint64_t inst = validation_state->shader[validation_state->ip];
0312 u32 add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
0313 u32 raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
0314 u32 raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
0315 u32 add_lri = raddr_add_a_to_live_reg_index(inst);
0316
0317
0318
0319 u32 expected_offset = validated_shader->uniforms_size + 4;
0320
0321
0322
0323
0324
0325
0326
0327
0328
0329
0330
0331 switch (QPU_GET_FIELD(inst, QPU_SIG)) {
0332 case QPU_SIG_NONE:
0333 case QPU_SIG_SCOREBOARD_UNLOCK:
0334 case QPU_SIG_COLOR_LOAD:
0335 case QPU_SIG_LOAD_TMU0:
0336 case QPU_SIG_LOAD_TMU1:
0337 break;
0338 default:
0339 DRM_DEBUG("uniforms address change must be "
0340 "normal math\n");
0341 return false;
0342 }
0343
0344 if (is_mul || QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_ADD) {
0345 DRM_DEBUG("Uniform address reset must be an ADD.\n");
0346 return false;
0347 }
0348
0349 if (QPU_GET_FIELD(inst, QPU_COND_ADD) != QPU_COND_ALWAYS) {
0350 DRM_DEBUG("Uniform address reset must be unconditional.\n");
0351 return false;
0352 }
0353
0354 if (QPU_GET_FIELD(inst, QPU_PACK) != QPU_PACK_A_NOP &&
0355 !(inst & QPU_PM)) {
0356 DRM_DEBUG("No packing allowed on uniforms reset\n");
0357 return false;
0358 }
0359
0360 if (add_lri == -1) {
0361 DRM_DEBUG("First argument of uniform address write must be "
0362 "an immediate value.\n");
0363 return false;
0364 }
0365
0366 if (validation_state->live_immediates[add_lri] != expected_offset) {
0367 DRM_DEBUG("Resetting uniforms with offset %db instead of %db\n",
0368 validation_state->live_immediates[add_lri],
0369 expected_offset);
0370 return false;
0371 }
0372
0373 if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
0374 !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF)) {
0375 DRM_DEBUG("Second argument of uniform address write must be "
0376 "a uniform.\n");
0377 return false;
0378 }
0379
0380 validation_state->needs_uniform_address_update = false;
0381 validation_state->needs_uniform_address_for_loop = false;
0382 return require_uniform_address_uniform(validated_shader);
0383 }
0384
0385 static bool
0386 check_reg_write(struct vc4_validated_shader_info *validated_shader,
0387 struct vc4_shader_validation_state *validation_state,
0388 bool is_mul)
0389 {
0390 uint64_t inst = validation_state->shader[validation_state->ip];
0391 uint32_t waddr = (is_mul ?
0392 QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
0393 QPU_GET_FIELD(inst, QPU_WADDR_ADD));
0394 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
0395 bool ws = inst & QPU_WS;
0396 bool is_b = is_mul ^ ws;
0397 u32 lri = waddr_to_live_reg_index(waddr, is_b);
0398
0399 if (lri != -1) {
0400 uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD);
0401 uint32_t cond_mul = QPU_GET_FIELD(inst, QPU_COND_MUL);
0402
0403 if (sig == QPU_SIG_LOAD_IMM &&
0404 QPU_GET_FIELD(inst, QPU_PACK) == QPU_PACK_A_NOP &&
0405 ((is_mul && cond_mul == QPU_COND_ALWAYS) ||
0406 (!is_mul && cond_add == QPU_COND_ALWAYS))) {
0407 validation_state->live_immediates[lri] =
0408 QPU_GET_FIELD(inst, QPU_LOAD_IMM);
0409 } else {
0410 validation_state->live_immediates[lri] = ~0;
0411 }
0412
0413 if (live_reg_is_upper_half(lri))
0414 validation_state->all_registers_used = true;
0415 }
0416
0417 switch (waddr) {
0418 case QPU_W_UNIFORMS_ADDRESS:
0419 if (is_b) {
0420 DRM_DEBUG("relative uniforms address change "
0421 "unsupported\n");
0422 return false;
0423 }
0424
0425 return validate_uniform_address_write(validated_shader,
0426 validation_state,
0427 is_mul);
0428
0429 case QPU_W_TLB_COLOR_MS:
0430 case QPU_W_TLB_COLOR_ALL:
0431 case QPU_W_TLB_Z:
0432
0433
0434
0435 return true;
0436
0437 case QPU_W_TMU0_S:
0438 case QPU_W_TMU0_T:
0439 case QPU_W_TMU0_R:
0440 case QPU_W_TMU0_B:
0441 case QPU_W_TMU1_S:
0442 case QPU_W_TMU1_T:
0443 case QPU_W_TMU1_R:
0444 case QPU_W_TMU1_B:
0445 return check_tmu_write(validated_shader, validation_state,
0446 is_mul);
0447
0448 case QPU_W_HOST_INT:
0449 case QPU_W_TMU_NOSWAP:
0450 case QPU_W_TLB_ALPHA_MASK:
0451 case QPU_W_MUTEX_RELEASE:
0452
0453
0454
0455 DRM_DEBUG("Unsupported waddr %d\n", waddr);
0456 return false;
0457
0458 case QPU_W_VPM_ADDR:
0459 DRM_DEBUG("General VPM DMA unsupported\n");
0460 return false;
0461
0462 case QPU_W_VPM:
0463 case QPU_W_VPMVCD_SETUP:
0464
0465
0466
0467
0468 return true;
0469
0470 case QPU_W_TLB_STENCIL_SETUP:
0471 return true;
0472 }
0473
0474 return true;
0475 }
0476
0477 static void
0478 track_live_clamps(struct vc4_validated_shader_info *validated_shader,
0479 struct vc4_shader_validation_state *validation_state)
0480 {
0481 uint64_t inst = validation_state->shader[validation_state->ip];
0482 uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD);
0483 uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
0484 uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
0485 uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD);
0486 uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A);
0487 uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
0488 uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
0489 uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
0490 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
0491 bool ws = inst & QPU_WS;
0492 uint32_t lri_add_a, lri_add, lri_mul;
0493 bool add_a_is_min_0;
0494
0495
0496
0497
0498 lri_add_a = raddr_add_a_to_live_reg_index(inst);
0499 add_a_is_min_0 = (lri_add_a != ~0 &&
0500 validation_state->live_max_clamp_regs[lri_add_a]);
0501
0502
0503 lri_add = waddr_to_live_reg_index(waddr_add, ws);
0504 lri_mul = waddr_to_live_reg_index(waddr_mul, !ws);
0505 if (lri_mul != ~0) {
0506 validation_state->live_max_clamp_regs[lri_mul] = false;
0507 validation_state->live_min_clamp_offsets[lri_mul] = ~0;
0508 }
0509 if (lri_add != ~0) {
0510 validation_state->live_max_clamp_regs[lri_add] = false;
0511 validation_state->live_min_clamp_offsets[lri_add] = ~0;
0512 } else {
0513
0514
0515
0516 return;
0517 }
0518
0519
0520
0521 if (cond_add != QPU_COND_ALWAYS)
0522 return;
0523
0524 if (op_add == QPU_A_MAX) {
0525
0526
0527
0528 if (sig != QPU_SIG_SMALL_IMM || raddr_b != 0 ||
0529 (add_a != QPU_MUX_B && add_b != QPU_MUX_B)) {
0530 return;
0531 }
0532
0533 validation_state->live_max_clamp_regs[lri_add] = true;
0534 } else if (op_add == QPU_A_MIN) {
0535
0536
0537
0538 if (!add_a_is_min_0)
0539 return;
0540
0541 if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
0542 !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF &&
0543 sig != QPU_SIG_SMALL_IMM)) {
0544 return;
0545 }
0546
0547 validation_state->live_min_clamp_offsets[lri_add] =
0548 validated_shader->uniforms_size;
0549 }
0550 }
0551
0552 static bool
0553 check_instruction_writes(struct vc4_validated_shader_info *validated_shader,
0554 struct vc4_shader_validation_state *validation_state)
0555 {
0556 uint64_t inst = validation_state->shader[validation_state->ip];
0557 uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
0558 uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
0559 bool ok;
0560
0561 if (is_tmu_write(waddr_add) && is_tmu_write(waddr_mul)) {
0562 DRM_DEBUG("ADD and MUL both set up textures\n");
0563 return false;
0564 }
0565
0566 ok = (check_reg_write(validated_shader, validation_state, false) &&
0567 check_reg_write(validated_shader, validation_state, true));
0568
0569 track_live_clamps(validated_shader, validation_state);
0570
0571 return ok;
0572 }
0573
0574 static bool
0575 check_branch(uint64_t inst,
0576 struct vc4_validated_shader_info *validated_shader,
0577 struct vc4_shader_validation_state *validation_state,
0578 int ip)
0579 {
0580 int32_t branch_imm = QPU_GET_FIELD(inst, QPU_BRANCH_TARGET);
0581 uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
0582 uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
0583
0584 if ((int)branch_imm < 0)
0585 validation_state->needs_uniform_address_for_loop = true;
0586
0587
0588
0589
0590 if (waddr_add != QPU_W_NOP || waddr_mul != QPU_W_NOP) {
0591 DRM_DEBUG("branch instruction at %d wrote a register.\n",
0592 validation_state->ip);
0593 return false;
0594 }
0595
0596 return true;
0597 }
0598
0599 static bool
0600 check_instruction_reads(struct vc4_validated_shader_info *validated_shader,
0601 struct vc4_shader_validation_state *validation_state)
0602 {
0603 uint64_t inst = validation_state->shader[validation_state->ip];
0604 uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
0605 uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
0606 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
0607
0608 if (raddr_a == QPU_R_UNIF ||
0609 (raddr_b == QPU_R_UNIF && sig != QPU_SIG_SMALL_IMM)) {
0610
0611
0612
0613
0614 validated_shader->uniforms_size += 4;
0615
0616 if (validation_state->needs_uniform_address_update) {
0617 DRM_DEBUG("Uniform read with undefined uniform "
0618 "address\n");
0619 return false;
0620 }
0621 }
0622
0623 if ((raddr_a >= 16 && raddr_a < 32) ||
0624 (raddr_b >= 16 && raddr_b < 32 && sig != QPU_SIG_SMALL_IMM)) {
0625 validation_state->all_registers_used = true;
0626 }
0627
0628 return true;
0629 }
0630
0631
0632
0633
0634 static bool
0635 vc4_validate_branches(struct vc4_shader_validation_state *validation_state)
0636 {
0637 uint32_t max_branch_target = 0;
0638 int ip;
0639 int last_branch = -2;
0640
0641 for (ip = 0; ip < validation_state->max_ip; ip++) {
0642 uint64_t inst = validation_state->shader[ip];
0643 int32_t branch_imm = QPU_GET_FIELD(inst, QPU_BRANCH_TARGET);
0644 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
0645 uint32_t after_delay_ip = ip + 4;
0646 uint32_t branch_target_ip;
0647
0648 if (sig == QPU_SIG_PROG_END) {
0649
0650
0651
0652
0653
0654
0655 validation_state->max_ip = ip + 3;
0656 continue;
0657 }
0658
0659 if (sig != QPU_SIG_BRANCH)
0660 continue;
0661
0662 if (ip - last_branch < 4) {
0663 DRM_DEBUG("Branch at %d during delay slots\n", ip);
0664 return false;
0665 }
0666 last_branch = ip;
0667
0668 if (inst & QPU_BRANCH_REG) {
0669 DRM_DEBUG("branching from register relative "
0670 "not supported\n");
0671 return false;
0672 }
0673
0674 if (!(inst & QPU_BRANCH_REL)) {
0675 DRM_DEBUG("relative branching required\n");
0676 return false;
0677 }
0678
0679
0680
0681
0682
0683
0684 if (branch_imm % sizeof(inst) != 0) {
0685 DRM_DEBUG("branch target not aligned\n");
0686 return false;
0687 }
0688
0689 branch_target_ip = after_delay_ip + (branch_imm >> 3);
0690 if (branch_target_ip >= validation_state->max_ip) {
0691 DRM_DEBUG("Branch at %d outside of shader (ip %d/%d)\n",
0692 ip, branch_target_ip,
0693 validation_state->max_ip);
0694 return false;
0695 }
0696 set_bit(branch_target_ip, validation_state->branch_targets);
0697
0698
0699
0700
0701 if (after_delay_ip >= validation_state->max_ip) {
0702 DRM_DEBUG("Branch at %d continues past shader end "
0703 "(%d/%d)\n",
0704 ip, after_delay_ip, validation_state->max_ip);
0705 return false;
0706 }
0707 set_bit(after_delay_ip, validation_state->branch_targets);
0708 max_branch_target = max(max_branch_target, after_delay_ip);
0709 }
0710
0711 if (max_branch_target > validation_state->max_ip - 3) {
0712 DRM_DEBUG("Branch landed after QPU_SIG_PROG_END");
0713 return false;
0714 }
0715
0716 return true;
0717 }
0718
0719
0720
0721
0722 static void
0723 reset_validation_state(struct vc4_shader_validation_state *validation_state)
0724 {
0725 int i;
0726
0727 for (i = 0; i < 8; i++)
0728 validation_state->tmu_setup[i / 4].p_offset[i % 4] = ~0;
0729
0730 for (i = 0; i < LIVE_REG_COUNT; i++) {
0731 validation_state->live_min_clamp_offsets[i] = ~0;
0732 validation_state->live_max_clamp_regs[i] = false;
0733 validation_state->live_immediates[i] = ~0;
0734 }
0735 }
0736
0737 static bool
0738 texturing_in_progress(struct vc4_shader_validation_state *validation_state)
0739 {
0740 return (validation_state->tmu_write_count[0] != 0 ||
0741 validation_state->tmu_write_count[1] != 0);
0742 }
0743
0744 static bool
0745 vc4_handle_branch_target(struct vc4_shader_validation_state *validation_state)
0746 {
0747 uint32_t ip = validation_state->ip;
0748
0749 if (!test_bit(ip, validation_state->branch_targets))
0750 return true;
0751
0752 if (texturing_in_progress(validation_state)) {
0753 DRM_DEBUG("Branch target landed during TMU setup\n");
0754 return false;
0755 }
0756
0757
0758
0759
0760
0761
0762
0763
0764 reset_validation_state(validation_state);
0765
0766
0767
0768
0769
0770
0771
0772
0773 validation_state->needs_uniform_address_update = true;
0774
0775 return true;
0776 }
0777
0778 struct vc4_validated_shader_info *
0779 vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
0780 {
0781 struct vc4_dev *vc4 = to_vc4_dev(shader_obj->base.dev);
0782 bool found_shader_end = false;
0783 int shader_end_ip = 0;
0784 uint32_t last_thread_switch_ip = -3;
0785 uint32_t ip;
0786 struct vc4_validated_shader_info *validated_shader = NULL;
0787 struct vc4_shader_validation_state validation_state;
0788
0789 if (WARN_ON_ONCE(vc4->is_vc5))
0790 return NULL;
0791
0792 memset(&validation_state, 0, sizeof(validation_state));
0793 validation_state.shader = shader_obj->vaddr;
0794 validation_state.max_ip = shader_obj->base.size / sizeof(uint64_t);
0795
0796 reset_validation_state(&validation_state);
0797
0798 validation_state.branch_targets =
0799 kcalloc(BITS_TO_LONGS(validation_state.max_ip),
0800 sizeof(unsigned long), GFP_KERNEL);
0801 if (!validation_state.branch_targets)
0802 goto fail;
0803
0804 validated_shader = kcalloc(1, sizeof(*validated_shader), GFP_KERNEL);
0805 if (!validated_shader)
0806 goto fail;
0807
0808 if (!vc4_validate_branches(&validation_state))
0809 goto fail;
0810
0811 for (ip = 0; ip < validation_state.max_ip; ip++) {
0812 uint64_t inst = validation_state.shader[ip];
0813 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
0814
0815 validation_state.ip = ip;
0816
0817 if (!vc4_handle_branch_target(&validation_state))
0818 goto fail;
0819
0820 if (ip == last_thread_switch_ip + 3) {
0821
0822 int i;
0823
0824 for (i = 64; i < LIVE_REG_COUNT; i++) {
0825 validation_state.live_min_clamp_offsets[i] = ~0;
0826 validation_state.live_max_clamp_regs[i] = false;
0827 validation_state.live_immediates[i] = ~0;
0828 }
0829 }
0830
0831 switch (sig) {
0832 case QPU_SIG_NONE:
0833 case QPU_SIG_WAIT_FOR_SCOREBOARD:
0834 case QPU_SIG_SCOREBOARD_UNLOCK:
0835 case QPU_SIG_COLOR_LOAD:
0836 case QPU_SIG_LOAD_TMU0:
0837 case QPU_SIG_LOAD_TMU1:
0838 case QPU_SIG_PROG_END:
0839 case QPU_SIG_SMALL_IMM:
0840 case QPU_SIG_THREAD_SWITCH:
0841 case QPU_SIG_LAST_THREAD_SWITCH:
0842 if (!check_instruction_writes(validated_shader,
0843 &validation_state)) {
0844 DRM_DEBUG("Bad write at ip %d\n", ip);
0845 goto fail;
0846 }
0847
0848 if (!check_instruction_reads(validated_shader,
0849 &validation_state))
0850 goto fail;
0851
0852 if (sig == QPU_SIG_PROG_END) {
0853 found_shader_end = true;
0854 shader_end_ip = ip;
0855 }
0856
0857 if (sig == QPU_SIG_THREAD_SWITCH ||
0858 sig == QPU_SIG_LAST_THREAD_SWITCH) {
0859 validated_shader->is_threaded = true;
0860
0861 if (ip < last_thread_switch_ip + 3) {
0862 DRM_DEBUG("Thread switch too soon after "
0863 "last switch at ip %d\n", ip);
0864 goto fail;
0865 }
0866 last_thread_switch_ip = ip;
0867 }
0868
0869 break;
0870
0871 case QPU_SIG_LOAD_IMM:
0872 if (!check_instruction_writes(validated_shader,
0873 &validation_state)) {
0874 DRM_DEBUG("Bad LOAD_IMM write at ip %d\n", ip);
0875 goto fail;
0876 }
0877 break;
0878
0879 case QPU_SIG_BRANCH:
0880 if (!check_branch(inst, validated_shader,
0881 &validation_state, ip))
0882 goto fail;
0883
0884 if (ip < last_thread_switch_ip + 3) {
0885 DRM_DEBUG("Branch in thread switch at ip %d",
0886 ip);
0887 goto fail;
0888 }
0889
0890 break;
0891 default:
0892 DRM_DEBUG("Unsupported QPU signal %d at "
0893 "instruction %d\n", sig, ip);
0894 goto fail;
0895 }
0896
0897
0898
0899
0900 if (found_shader_end && ip == shader_end_ip + 2)
0901 break;
0902 }
0903
0904 if (ip == validation_state.max_ip) {
0905 DRM_DEBUG("shader failed to terminate before "
0906 "shader BO end at %zd\n",
0907 shader_obj->base.size);
0908 goto fail;
0909 }
0910
0911
0912 if (validated_shader->is_threaded &&
0913 validation_state.all_registers_used) {
0914 DRM_DEBUG("Shader uses threading, but uses the upper "
0915 "half of the registers, too\n");
0916 goto fail;
0917 }
0918
0919
0920
0921
0922
0923
0924
0925
0926
0927
0928 if (validation_state.needs_uniform_address_for_loop) {
0929 if (!require_uniform_address_uniform(validated_shader))
0930 goto fail;
0931 validated_shader->uniforms_size += 4;
0932 }
0933
0934
0935
0936
0937
0938 validated_shader->uniforms_src_size =
0939 (validated_shader->uniforms_size +
0940 4 * validated_shader->num_texture_samples);
0941
0942 kfree(validation_state.branch_targets);
0943
0944 return validated_shader;
0945
0946 fail:
0947 kfree(validation_state.branch_targets);
0948 if (validated_shader) {
0949 kfree(validated_shader->uniform_addr_offsets);
0950 kfree(validated_shader->texture_samples);
0951 kfree(validated_shader);
0952 }
0953 return NULL;
0954 }