Back to home page

OSCL-LXR

 
 

    


0001 /* SPDX-License-Identifier: GPL-2.0-only */
0002 /*
0003  * FP/SIMD state saving and restoring macros
0004  *
0005  * Copyright (C) 2012 ARM Ltd.
0006  * Author: Catalin Marinas <catalin.marinas@arm.com>
0007  */
0008 
0009 #include <asm/assembler.h>
0010 
0011 .macro fpsimd_save state, tmpnr
0012     stp q0, q1, [\state, #16 * 0]
0013     stp q2, q3, [\state, #16 * 2]
0014     stp q4, q5, [\state, #16 * 4]
0015     stp q6, q7, [\state, #16 * 6]
0016     stp q8, q9, [\state, #16 * 8]
0017     stp q10, q11, [\state, #16 * 10]
0018     stp q12, q13, [\state, #16 * 12]
0019     stp q14, q15, [\state, #16 * 14]
0020     stp q16, q17, [\state, #16 * 16]
0021     stp q18, q19, [\state, #16 * 18]
0022     stp q20, q21, [\state, #16 * 20]
0023     stp q22, q23, [\state, #16 * 22]
0024     stp q24, q25, [\state, #16 * 24]
0025     stp q26, q27, [\state, #16 * 26]
0026     stp q28, q29, [\state, #16 * 28]
0027     stp q30, q31, [\state, #16 * 30]!
0028     mrs x\tmpnr, fpsr
0029     str w\tmpnr, [\state, #16 * 2]
0030     mrs x\tmpnr, fpcr
0031     str w\tmpnr, [\state, #16 * 2 + 4]
0032 .endm
0033 
0034 .macro fpsimd_restore_fpcr state, tmp
0035     /*
0036      * Writes to fpcr may be self-synchronising, so avoid restoring
0037      * the register if it hasn't changed.
0038      */
0039     mrs \tmp, fpcr
0040     cmp \tmp, \state
0041     b.eq    9999f
0042     msr fpcr, \state
0043 9999:
0044 .endm
0045 
0046 /* Clobbers \state */
0047 .macro fpsimd_restore state, tmpnr
0048     ldp q0, q1, [\state, #16 * 0]
0049     ldp q2, q3, [\state, #16 * 2]
0050     ldp q4, q5, [\state, #16 * 4]
0051     ldp q6, q7, [\state, #16 * 6]
0052     ldp q8, q9, [\state, #16 * 8]
0053     ldp q10, q11, [\state, #16 * 10]
0054     ldp q12, q13, [\state, #16 * 12]
0055     ldp q14, q15, [\state, #16 * 14]
0056     ldp q16, q17, [\state, #16 * 16]
0057     ldp q18, q19, [\state, #16 * 18]
0058     ldp q20, q21, [\state, #16 * 20]
0059     ldp q22, q23, [\state, #16 * 22]
0060     ldp q24, q25, [\state, #16 * 24]
0061     ldp q26, q27, [\state, #16 * 26]
0062     ldp q28, q29, [\state, #16 * 28]
0063     ldp q30, q31, [\state, #16 * 30]!
0064     ldr w\tmpnr, [\state, #16 * 2]
0065     msr fpsr, x\tmpnr
0066     ldr w\tmpnr, [\state, #16 * 2 + 4]
0067     fpsimd_restore_fpcr x\tmpnr, \state
0068 .endm
0069 
0070 /* Sanity-check macros to help avoid encoding garbage instructions */
0071 
0072 .macro _check_general_reg nr
0073     .if (\nr) < 0 || (\nr) > 30
0074         .error "Bad register number \nr."
0075     .endif
0076 .endm
0077 
0078 .macro _sve_check_zreg znr
0079     .if (\znr) < 0 || (\znr) > 31
0080         .error "Bad Scalable Vector Extension vector register number \znr."
0081     .endif
0082 .endm
0083 
0084 .macro _sve_check_preg pnr
0085     .if (\pnr) < 0 || (\pnr) > 15
0086         .error "Bad Scalable Vector Extension predicate register number \pnr."
0087     .endif
0088 .endm
0089 
0090 .macro _check_num n, min, max
0091     .if (\n) < (\min) || (\n) > (\max)
0092         .error "Number \n out of range [\min,\max]"
0093     .endif
0094 .endm
0095 
0096 .macro _sme_check_wv v
0097     .if (\v) < 12 || (\v) > 15
0098         .error "Bad vector select register \v."
0099     .endif
0100 .endm
0101 
0102 /* SVE instruction encodings for non-SVE-capable assemblers */
0103 /* (pre binutils 2.28, all kernel capable clang versions support SVE) */
0104 
0105 /* STR (vector): STR Z\nz, [X\nxbase, #\offset, MUL VL] */
0106 .macro _sve_str_v nz, nxbase, offset=0
0107     _sve_check_zreg \nz
0108     _check_general_reg \nxbase
0109     _check_num (\offset), -0x100, 0xff
0110     .inst   0xe5804000          \
0111         | (\nz)             \
0112         | ((\nxbase) << 5)      \
0113         | (((\offset) & 7) << 10)   \
0114         | (((\offset) & 0x1f8) << 13)
0115 .endm
0116 
0117 /* LDR (vector): LDR Z\nz, [X\nxbase, #\offset, MUL VL] */
0118 .macro _sve_ldr_v nz, nxbase, offset=0
0119     _sve_check_zreg \nz
0120     _check_general_reg \nxbase
0121     _check_num (\offset), -0x100, 0xff
0122     .inst   0x85804000          \
0123         | (\nz)             \
0124         | ((\nxbase) << 5)      \
0125         | (((\offset) & 7) << 10)   \
0126         | (((\offset) & 0x1f8) << 13)
0127 .endm
0128 
0129 /* STR (predicate): STR P\np, [X\nxbase, #\offset, MUL VL] */
0130 .macro _sve_str_p np, nxbase, offset=0
0131     _sve_check_preg \np
0132     _check_general_reg \nxbase
0133     _check_num (\offset), -0x100, 0xff
0134     .inst   0xe5800000          \
0135         | (\np)             \
0136         | ((\nxbase) << 5)      \
0137         | (((\offset) & 7) << 10)   \
0138         | (((\offset) & 0x1f8) << 13)
0139 .endm
0140 
0141 /* LDR (predicate): LDR P\np, [X\nxbase, #\offset, MUL VL] */
0142 .macro _sve_ldr_p np, nxbase, offset=0
0143     _sve_check_preg \np
0144     _check_general_reg \nxbase
0145     _check_num (\offset), -0x100, 0xff
0146     .inst   0x85800000          \
0147         | (\np)             \
0148         | ((\nxbase) << 5)      \
0149         | (((\offset) & 7) << 10)   \
0150         | (((\offset) & 0x1f8) << 13)
0151 .endm
0152 
0153 /* RDVL X\nx, #\imm */
0154 .macro _sve_rdvl nx, imm
0155     _check_general_reg \nx
0156     _check_num (\imm), -0x20, 0x1f
0157     .inst   0x04bf5000          \
0158         | (\nx)             \
0159         | (((\imm) & 0x3f) << 5)
0160 .endm
0161 
0162 /* RDFFR (unpredicated): RDFFR P\np.B */
0163 .macro _sve_rdffr np
0164     _sve_check_preg \np
0165     .inst   0x2519f000          \
0166         | (\np)
0167 .endm
0168 
0169 /* WRFFR P\np.B */
0170 .macro _sve_wrffr np
0171     _sve_check_preg \np
0172     .inst   0x25289000          \
0173         | ((\np) << 5)
0174 .endm
0175 
0176 /* PFALSE P\np.B */
0177 .macro _sve_pfalse np
0178     _sve_check_preg \np
0179     .inst   0x2518e400          \
0180         | (\np)
0181 .endm
0182 
0183 /* SME instruction encodings for non-SME-capable assemblers */
0184 /* (pre binutils 2.38/LLVM 13) */
0185 
0186 /* RDSVL X\nx, #\imm */
0187 .macro _sme_rdsvl nx, imm
0188     _check_general_reg \nx
0189     _check_num (\imm), -0x20, 0x1f
0190     .inst   0x04bf5800          \
0191         | (\nx)             \
0192         | (((\imm) & 0x3f) << 5)
0193 .endm
0194 
0195 /*
0196  * STR (vector from ZA array):
0197  *  STR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
0198  */
0199 .macro _sme_str_zav nw, nxbase, offset=0
0200     _sme_check_wv \nw
0201     _check_general_reg \nxbase
0202     _check_num (\offset), -0x100, 0xff
0203     .inst   0xe1200000          \
0204         | (((\nw) & 3) << 13)       \
0205         | ((\nxbase) << 5)      \
0206         | ((\offset) & 7)
0207 .endm
0208 
0209 /*
0210  * LDR (vector to ZA array):
0211  *  LDR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
0212  */
0213 .macro _sme_ldr_zav nw, nxbase, offset=0
0214     _sme_check_wv \nw
0215     _check_general_reg \nxbase
0216     _check_num (\offset), -0x100, 0xff
0217     .inst   0xe1000000          \
0218         | (((\nw) & 3) << 13)       \
0219         | ((\nxbase) << 5)      \
0220         | ((\offset) & 7)
0221 .endm
0222 
0223 /*
0224  * Zero the entire ZA array
0225  *  ZERO ZA
0226  */
0227 .macro zero_za
0228     .inst 0xc00800ff
0229 .endm
0230 
0231 .macro __for from:req, to:req
0232     .if (\from) == (\to)
0233         _for__body %\from
0234     .else
0235         __for %\from, %((\from) + ((\to) - (\from)) / 2)
0236         __for %((\from) + ((\to) - (\from)) / 2 + 1), %\to
0237     .endif
0238 .endm
0239 
0240 .macro _for var:req, from:req, to:req, insn:vararg
0241     .macro _for__body \var:req
0242         .noaltmacro
0243         \insn
0244         .altmacro
0245     .endm
0246 
0247     .altmacro
0248     __for \from, \to
0249     .noaltmacro
0250 
0251     .purgem _for__body
0252 .endm
0253 
0254 /* Update ZCR_EL1.LEN with the new VQ */
0255 .macro sve_load_vq xvqminus1, xtmp, xtmp2
0256         mrs_s       \xtmp, SYS_ZCR_EL1
0257         bic     \xtmp2, \xtmp, ZCR_ELx_LEN_MASK
0258         orr     \xtmp2, \xtmp2, \xvqminus1
0259         cmp     \xtmp2, \xtmp
0260         b.eq        921f
0261         msr_s       SYS_ZCR_EL1, \xtmp2 //self-synchronising
0262 921:
0263 .endm
0264 
0265 /* Update SMCR_EL1.LEN with the new VQ */
0266 .macro sme_load_vq xvqminus1, xtmp, xtmp2
0267         mrs_s       \xtmp, SYS_SMCR_EL1
0268         bic     \xtmp2, \xtmp, SMCR_ELx_LEN_MASK
0269         orr     \xtmp2, \xtmp2, \xvqminus1
0270         cmp     \xtmp2, \xtmp
0271         b.eq        921f
0272         msr_s       SYS_SMCR_EL1, \xtmp2    //self-synchronising
0273 921:
0274 .endm
0275 
0276 /* Preserve the first 128-bits of Znz and zero the rest. */
0277 .macro _sve_flush_z nz
0278     _sve_check_zreg \nz
0279     mov v\nz\().16b, v\nz\().16b
0280 .endm
0281 
0282 .macro sve_flush_z
0283  _for n, 0, 31, _sve_flush_z    \n
0284 .endm
0285 .macro sve_flush_p
0286  _for n, 0, 15, _sve_pfalse \n
0287 .endm
0288 .macro sve_flush_ffr
0289         _sve_wrffr  0
0290 .endm
0291 
0292 .macro sve_save nxbase, xpfpsr, save_ffr, nxtmp
0293  _for n, 0, 31, _sve_str_v  \n, \nxbase, \n - 34
0294  _for n, 0, 15, _sve_str_p  \n, \nxbase, \n - 16
0295         cbz     \save_ffr, 921f
0296         _sve_rdffr  0
0297         _sve_str_p  0, \nxbase
0298         _sve_ldr_p  0, \nxbase, -16
0299         b       922f
0300 921:
0301         str     xzr, [x\nxbase]     // Zero out FFR
0302 922:
0303         mrs     x\nxtmp, fpsr
0304         str     w\nxtmp, [\xpfpsr]
0305         mrs     x\nxtmp, fpcr
0306         str     w\nxtmp, [\xpfpsr, #4]
0307 .endm
0308 
0309 .macro sve_load nxbase, xpfpsr, restore_ffr, nxtmp
0310  _for n, 0, 31, _sve_ldr_v  \n, \nxbase, \n - 34
0311         cbz     \restore_ffr, 921f
0312         _sve_ldr_p  0, \nxbase
0313         _sve_wrffr  0
0314 921:
0315  _for n, 0, 15, _sve_ldr_p  \n, \nxbase, \n - 16
0316 
0317         ldr     w\nxtmp, [\xpfpsr]
0318         msr     fpsr, x\nxtmp
0319         ldr     w\nxtmp, [\xpfpsr, #4]
0320         msr     fpcr, x\nxtmp
0321 .endm
0322 
0323 .macro sme_save_za nxbase, xvl, nw
0324     mov w\nw, #0
0325 
0326 423:
0327     _sme_str_zav \nw, \nxbase
0328     add x\nxbase, x\nxbase, \xvl
0329     add x\nw, x\nw, #1
0330     cmp \xvl, x\nw
0331     bne 423b
0332 .endm
0333 
0334 .macro sme_load_za nxbase, xvl, nw
0335     mov w\nw, #0
0336 
0337 423:
0338     _sme_ldr_zav \nw, \nxbase
0339     add x\nxbase, x\nxbase, \xvl
0340     add x\nw, x\nw, #1
0341     cmp \xvl, x\nw
0342     bne 423b
0343 .endm