Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 // Copyright (C) 2021 ARM Limited.
0003 // Original author: Mark Brown <broonie@kernel.org>
0004 //
0005 // Scalable Matrix Extension ZA context switch test
0006 // Repeatedly writes unique test patterns into each ZA tile
0007 // and reads them back to verify integrity.
0008 //
0009 // for x in `seq 1 NR_CPUS`; do sve-test & pids=$pids\ $! ; done
0010 // (leave it running for as long as you want...)
0011 // kill $pids
0012 
0013 #include <asm/unistd.h>
0014 #include "assembler.h"
0015 #include "asm-offsets.h"
0016 #include "sme-inst.h"
0017 
0018 .arch_extension sve
0019 
0020 #define MAXVL     2048
0021 #define MAXVL_B   (MAXVL / 8)
0022 
0023 // Declare some storage space to shadow ZA register contents and a
0024 // scratch buffer for a vector.
0025 .pushsection .text
0026 .data
0027 .align 4
0028 zaref:
0029     .space  MAXVL_B * MAXVL_B
0030 scratch:
0031     .space  MAXVL_B
0032 .popsection
0033 
0034 // Trivial memory copy: copy x2 bytes, starting at address x1, to address x0.
0035 // Clobbers x0-x3
0036 function memcpy
0037     cmp x2, #0
0038     b.eq    1f
0039 0:  ldrb    w3, [x1], #1
0040     strb    w3, [x0], #1
0041     subs    x2, x2, #1
0042     b.ne    0b
0043 1:  ret
0044 endfunction
0045 
0046 // Generate a test pattern for storage in ZA
0047 // x0: pid
0048 // x1: row in ZA
0049 // x2: generation
0050 
0051 // These values are used to constuct a 32-bit pattern that is repeated in the
0052 // scratch buffer as many times as will fit:
0053 // bits 31:28   generation number (increments once per test_loop)
0054 // bits 27:16   pid
0055 // bits 15: 8   row number
0056 // bits  7: 0   32-bit lane index
0057 
0058 function pattern
0059     mov w3, wzr
0060     bfi w3, w0, #16, #12    // PID
0061     bfi w3, w1, #8, #8      // Row
0062     bfi w3, w2, #28, #4     // Generation
0063 
0064     ldr x0, =scratch
0065     mov w1, #MAXVL_B / 4
0066 
0067 0:  str w3, [x0], #4
0068     add w3, w3, #1      // Lane
0069     subs    w1, w1, #1
0070     b.ne    0b
0071 
0072     ret
0073 endfunction
0074 
0075 // Get the address of shadow data for ZA horizontal vector xn
0076 .macro _adrza xd, xn, nrtmp
0077     ldr \xd, =zaref
0078     rdsvl   \nrtmp, 1
0079     madd    \xd, x\nrtmp, \xn, \xd
0080 .endm
0081 
0082 // Set up test pattern in a ZA horizontal vector
0083 // x0: pid
0084 // x1: row number
0085 // x2: generation
0086 function setup_za
0087     mov x4, x30
0088     mov x12, x1         // Use x12 for vector select
0089 
0090     bl  pattern         // Get pattern in scratch buffer
0091     _adrza  x0, x12, 2      // Shadow buffer pointer to x0 and x5
0092     mov x5, x0
0093     ldr x1, =scratch
0094     bl  memcpy          // length set up in x2 by _adrza
0095 
0096     _ldr_za 12, 5           // load vector w12 from pointer x5
0097 
0098     ret x4
0099 endfunction
0100 
0101 // Trivial memory compare: compare x2 bytes starting at address x0 with
0102 // bytes starting at address x1.
0103 // Returns only if all bytes match; otherwise, the program is aborted.
0104 // Clobbers x0-x5.
0105 function memcmp
0106     cbz x2, 2f
0107 
0108     stp x0, x1, [sp, #-0x20]!
0109     str x2, [sp, #0x10]
0110 
0111     mov x5, #0
0112 0:  ldrb    w3, [x0, x5]
0113     ldrb    w4, [x1, x5]
0114     add x5, x5, #1
0115     cmp w3, w4
0116     b.ne    1f
0117     subs    x2, x2, #1
0118     b.ne    0b
0119 
0120 1:  ldr x2, [sp, #0x10]
0121     ldp x0, x1, [sp], #0x20
0122     b.ne    barf
0123 
0124 2:  ret
0125 endfunction
0126 
0127 // Verify that a ZA vector matches its shadow in memory, else abort
0128 // x0: row number
0129 // Clobbers x0-x7 and x12.
0130 function check_za
0131     mov x3, x30
0132 
0133     mov x12, x0
0134     _adrza  x5, x0, 6       // pointer to expected value in x5
0135     mov x4, x0
0136     ldr x7, =scratch        // x7 is scratch
0137 
0138     mov x0, x7          // Poison scratch
0139     mov x1, x6
0140     bl  memfill_ae
0141 
0142     _str_za 12, 7           // save vector w12 to pointer x7
0143 
0144     mov x0, x5
0145     mov x1, x7
0146     mov x2, x6
0147     mov x30, x3
0148     b   memcmp
0149 endfunction
0150 
0151 // Any SME register modified here can cause corruption in the main
0152 // thread -- but *only* the locations modified here.
0153 function irritator_handler
0154     // Increment the irritation signal count (x23):
0155     ldr x0, [x2, #ucontext_regs + 8 * 23]
0156     add x0, x0, #1
0157     str x0, [x2, #ucontext_regs + 8 * 23]
0158 
0159     // Corrupt some random ZA data
0160 #if 0
0161     adr x0, .text + (irritator_handler - .text) / 16 * 16
0162     movi    v0.8b, #1
0163     movi    v9.16b, #2
0164     movi    v31.8b, #3
0165 #endif
0166 
0167     ret
0168 endfunction
0169 
0170 function terminate_handler
0171     mov w21, w0
0172     mov x20, x2
0173 
0174     puts    "Terminated by signal "
0175     mov w0, w21
0176     bl  putdec
0177     puts    ", no error, iterations="
0178     ldr x0, [x20, #ucontext_regs + 8 * 22]
0179     bl  putdec
0180     puts    ", signals="
0181     ldr x0, [x20, #ucontext_regs + 8 * 23]
0182     bl  putdecn
0183 
0184     mov x0, #0
0185     mov x8, #__NR_exit
0186     svc #0
0187 endfunction
0188 
0189 // w0: signal number
0190 // x1: sa_action
0191 // w2: sa_flags
0192 // Clobbers x0-x6,x8
0193 function setsignal
0194     str x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]!
0195 
0196     mov w4, w0
0197     mov x5, x1
0198     mov w6, w2
0199 
0200     add x0, sp, #16
0201     mov x1, #sa_sz
0202     bl  memclr
0203 
0204     mov w0, w4
0205     add x1, sp, #16
0206     str w6, [x1, #sa_flags]
0207     str x5, [x1, #sa_handler]
0208     mov x2, #0
0209     mov x3, #sa_mask_sz
0210     mov x8, #__NR_rt_sigaction
0211     svc #0
0212 
0213     cbz w0, 1f
0214 
0215     puts    "sigaction failure\n"
0216     b   .Labort
0217 
0218 1:  ldr x30, [sp], #((sa_sz + 15) / 16 * 16 + 16)
0219     ret
0220 endfunction
0221 
0222 // Main program entry point
0223 .globl _start
0224 function _start
0225 _start:
0226     puts    "Streaming mode "
0227     smstart_za
0228 
0229     // Sanity-check and report the vector length
0230 
0231     rdsvl   19, 8
0232     cmp x19, #128
0233     b.lo    1f
0234     cmp x19, #2048
0235     b.hi    1f
0236     tst x19, #(8 - 1)
0237     b.eq    2f
0238 
0239 1:  puts    "bad vector length: "
0240     mov x0, x19
0241     bl  putdecn
0242     b   .Labort
0243 
0244 2:  puts    "vector length:\t"
0245     mov x0, x19
0246     bl  putdec
0247     puts    " bits\n"
0248 
0249     // Obtain our PID, to ensure test pattern uniqueness between processes
0250     mov x8, #__NR_getpid
0251     svc #0
0252     mov x20, x0
0253 
0254     puts    "PID:\t"
0255     mov x0, x20
0256     bl  putdecn
0257 
0258     mov x23, #0     // Irritation signal count
0259 
0260     mov w0, #SIGINT
0261     adr x1, terminate_handler
0262     mov w2, #SA_SIGINFO
0263     bl  setsignal
0264 
0265     mov w0, #SIGTERM
0266     adr x1, terminate_handler
0267     mov w2, #SA_SIGINFO
0268     bl  setsignal
0269 
0270     mov w0, #SIGUSR1
0271     adr x1, irritator_handler
0272     mov w2, #SA_SIGINFO
0273     orr w2, w2, #SA_NODEFER
0274     bl  setsignal
0275 
0276     mov x22, #0     // generation number, increments per iteration
0277 .Ltest_loop:
0278     rdsvl   0, 8
0279     cmp x0, x19
0280     b.ne    vl_barf
0281 
0282     rdsvl   21, 1       // Set up ZA & shadow with test pattern
0283 0:  mov x0, x20
0284     sub x1, x21, #1
0285     mov x2, x22
0286     bl  setup_za
0287     subs    x21, x21, #1
0288     b.ne    0b
0289 
0290     and x8, x22, #127       // Every 128 interations...
0291     cbz x8, 0f
0292     mov x8, #__NR_getpid    // (otherwise minimal syscall)
0293     b   1f
0294 0:
0295     mov x8, #__NR_sched_yield   // ...encourage preemption
0296 1:
0297     svc #0
0298 
0299     mrs x0, S3_3_C4_C2_2    // SVCR should have ZA=1,SM=0
0300     and x1, x0, #3
0301     cmp x1, #2
0302     b.ne    svcr_barf
0303 
0304     rdsvl   21, 1           // Verify that the data made it through
0305     rdsvl   24, 1           // Verify that the data made it through
0306 0:  sub x0, x24, x21
0307     bl  check_za
0308     subs    x21, x21, #1
0309     bne 0b
0310 
0311     add x22, x22, #1    // Everything still working
0312     b   .Ltest_loop
0313 
0314 .Labort:
0315     mov x0, #0
0316     mov x1, #SIGABRT
0317     mov x8, #__NR_kill
0318     svc #0
0319 endfunction
0320 
0321 function barf
0322 // fpsimd.c acitivty log dump hack
0323 //  ldr w0, =0xdeadc0de
0324 //  mov w8, #__NR_exit
0325 //  svc #0
0326 // end hack
0327     smstop
0328     mov x10, x0 // expected data
0329     mov x11, x1 // actual data
0330     mov x12, x2 // data size
0331 
0332     puts    "Mismatch: PID="
0333     mov x0, x20
0334     bl  putdec
0335     puts    ", iteration="
0336     mov x0, x22
0337     bl  putdec
0338     puts    ", row="
0339     mov x0, x21
0340     bl  putdecn
0341     puts    "\tExpected ["
0342     mov x0, x10
0343     mov x1, x12
0344     bl  dumphex
0345     puts    "]\n\tGot      ["
0346     mov x0, x11
0347     mov x1, x12
0348     bl  dumphex
0349     puts    "]\n"
0350 
0351     mov x8, #__NR_getpid
0352     svc #0
0353 // fpsimd.c acitivty log dump hack
0354 //  ldr w0, =0xdeadc0de
0355 //  mov w8, #__NR_exit
0356 //  svc #0
0357 // ^ end of hack
0358     mov x1, #SIGABRT
0359     mov x8, #__NR_kill
0360     svc #0
0361 //  mov x8, #__NR_exit
0362 //  mov x1, #1
0363 //  svc #0
0364 endfunction
0365 
0366 function vl_barf
0367     mov x10, x0
0368 
0369     puts    "Bad active VL: "
0370     mov x0, x10
0371     bl  putdecn
0372 
0373     mov x8, #__NR_exit
0374     mov x1, #1
0375     svc #0
0376 endfunction
0377 
0378 function svcr_barf
0379     mov x10, x0
0380 
0381     puts    "Bad SVCR: "
0382     mov x0, x10
0383     bl  putdecn
0384 
0385     mov x8, #__NR_exit
0386     mov x1, #1
0387     svc #0
0388 endfunction