Back to home page

OSCL-LXR

 
 

    


0001 /* SPDX-License-Identifier: GPL-2.0 */
0002 /*
0003  * sha3-ce-core.S - core SHA-3 transform using v8.2 Crypto Extensions
0004  *
0005  * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
0006  *
0007  * This program is free software; you can redistribute it and/or modify
0008  * it under the terms of the GNU General Public License version 2 as
0009  * published by the Free Software Foundation.
0010  */
0011 
0012 #include <linux/linkage.h>
0013 #include <asm/assembler.h>
0014 
0015     .irp    b,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
0016     .set    .Lv\b\().2d, \b
0017     .set    .Lv\b\().16b, \b
0018     .endr
0019 
0020     /*
0021      * ARMv8.2 Crypto Extensions instructions
0022      */
0023     .macro  eor3, rd, rn, rm, ra
0024     .inst   0xce000000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
0025     .endm
0026 
0027     .macro  rax1, rd, rn, rm
0028     .inst   0xce608c00 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
0029     .endm
0030 
0031     .macro  bcax, rd, rn, rm, ra
0032     .inst   0xce200000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
0033     .endm
0034 
0035     .macro  xar, rd, rn, rm, imm6
0036     .inst   0xce800000 | .L\rd | (.L\rn << 5) | ((\imm6) << 10) | (.L\rm << 16)
0037     .endm
0038 
0039     /*
0040      * int sha3_ce_transform(u64 *st, const u8 *data, int blocks, int dg_size)
0041      */
0042     .text
0043 SYM_FUNC_START(sha3_ce_transform)
0044     /* load state */
0045     add x8, x0, #32
0046     ld1 { v0.1d- v3.1d}, [x0]
0047     ld1 { v4.1d- v7.1d}, [x8], #32
0048     ld1 { v8.1d-v11.1d}, [x8], #32
0049     ld1 {v12.1d-v15.1d}, [x8], #32
0050     ld1 {v16.1d-v19.1d}, [x8], #32
0051     ld1 {v20.1d-v23.1d}, [x8], #32
0052     ld1 {v24.1d}, [x8]
0053 
0054 0:  sub w2, w2, #1
0055     mov w8, #24
0056     adr_l   x9, .Lsha3_rcon
0057 
0058     /* load input */
0059     ld1 {v25.8b-v28.8b}, [x1], #32
0060     ld1 {v29.8b-v31.8b}, [x1], #24
0061     eor v0.8b, v0.8b, v25.8b
0062     eor v1.8b, v1.8b, v26.8b
0063     eor v2.8b, v2.8b, v27.8b
0064     eor v3.8b, v3.8b, v28.8b
0065     eor v4.8b, v4.8b, v29.8b
0066     eor v5.8b, v5.8b, v30.8b
0067     eor v6.8b, v6.8b, v31.8b
0068 
0069     tbnz    x3, #6, 2f      // SHA3-512
0070 
0071     ld1 {v25.8b-v28.8b}, [x1], #32
0072     ld1 {v29.8b-v30.8b}, [x1], #16
0073     eor  v7.8b,  v7.8b, v25.8b
0074     eor  v8.8b,  v8.8b, v26.8b
0075     eor  v9.8b,  v9.8b, v27.8b
0076     eor v10.8b, v10.8b, v28.8b
0077     eor v11.8b, v11.8b, v29.8b
0078     eor v12.8b, v12.8b, v30.8b
0079 
0080     tbnz    x3, #4, 1f      // SHA3-384 or SHA3-224
0081 
0082     // SHA3-256
0083     ld1 {v25.8b-v28.8b}, [x1], #32
0084     eor v13.8b, v13.8b, v25.8b
0085     eor v14.8b, v14.8b, v26.8b
0086     eor v15.8b, v15.8b, v27.8b
0087     eor v16.8b, v16.8b, v28.8b
0088     b   3f
0089 
0090 1:  tbz x3, #2, 3f      // bit 2 cleared? SHA-384
0091 
0092     // SHA3-224
0093     ld1 {v25.8b-v28.8b}, [x1], #32
0094     ld1 {v29.8b}, [x1], #8
0095     eor v13.8b, v13.8b, v25.8b
0096     eor v14.8b, v14.8b, v26.8b
0097     eor v15.8b, v15.8b, v27.8b
0098     eor v16.8b, v16.8b, v28.8b
0099     eor v17.8b, v17.8b, v29.8b
0100     b   3f
0101 
0102     // SHA3-512
0103 2:  ld1 {v25.8b-v26.8b}, [x1], #16
0104     eor  v7.8b,  v7.8b, v25.8b
0105     eor  v8.8b,  v8.8b, v26.8b
0106 
0107 3:  sub w8, w8, #1
0108 
0109     eor3    v29.16b,  v4.16b,  v9.16b, v14.16b
0110     eor3    v26.16b,  v1.16b,  v6.16b, v11.16b
0111     eor3    v28.16b,  v3.16b,  v8.16b, v13.16b
0112     eor3    v25.16b,  v0.16b,  v5.16b, v10.16b
0113     eor3    v27.16b,  v2.16b,  v7.16b, v12.16b
0114     eor3    v29.16b, v29.16b, v19.16b, v24.16b
0115     eor3    v26.16b, v26.16b, v16.16b, v21.16b
0116     eor3    v28.16b, v28.16b, v18.16b, v23.16b
0117     eor3    v25.16b, v25.16b, v15.16b, v20.16b
0118     eor3    v27.16b, v27.16b, v17.16b, v22.16b
0119 
0120     rax1    v30.2d, v29.2d, v26.2d  // bc[0]
0121     rax1    v26.2d, v26.2d, v28.2d  // bc[2]
0122     rax1    v28.2d, v28.2d, v25.2d  // bc[4]
0123     rax1    v25.2d, v25.2d, v27.2d  // bc[1]
0124     rax1    v27.2d, v27.2d, v29.2d  // bc[3]
0125 
0126     eor  v0.16b,  v0.16b, v30.16b
0127     xar  v29.2d,   v1.2d,  v25.2d, (64 - 1)
0128     xar   v1.2d,   v6.2d,  v25.2d, (64 - 44)
0129     xar   v6.2d,   v9.2d,  v28.2d, (64 - 20)
0130     xar   v9.2d,  v22.2d,  v26.2d, (64 - 61)
0131     xar  v22.2d,  v14.2d,  v28.2d, (64 - 39)
0132     xar  v14.2d,  v20.2d,  v30.2d, (64 - 18)
0133     xar  v31.2d,   v2.2d,  v26.2d, (64 - 62)
0134     xar   v2.2d,  v12.2d,  v26.2d, (64 - 43)
0135     xar  v12.2d,  v13.2d,  v27.2d, (64 - 25)
0136     xar  v13.2d,  v19.2d,  v28.2d, (64 - 8)
0137     xar  v19.2d,  v23.2d,  v27.2d, (64 - 56)
0138     xar  v23.2d,  v15.2d,  v30.2d, (64 - 41)
0139     xar  v15.2d,   v4.2d,  v28.2d, (64 - 27)
0140     xar  v28.2d,  v24.2d,  v28.2d, (64 - 14)
0141     xar  v24.2d,  v21.2d,  v25.2d, (64 - 2)
0142     xar   v8.2d,   v8.2d,  v27.2d, (64 - 55)
0143     xar   v4.2d,  v16.2d,  v25.2d, (64 - 45)
0144     xar  v16.2d,   v5.2d,  v30.2d, (64 - 36)
0145     xar   v5.2d,   v3.2d,  v27.2d, (64 - 28)
0146     xar  v27.2d,  v18.2d,  v27.2d, (64 - 21)
0147     xar   v3.2d,  v17.2d,  v26.2d, (64 - 15)
0148     xar  v25.2d,  v11.2d,  v25.2d, (64 - 10)
0149     xar  v26.2d,   v7.2d,  v26.2d, (64 - 6)
0150     xar  v30.2d,  v10.2d,  v30.2d, (64 - 3)
0151 
0152     bcax    v20.16b, v31.16b, v22.16b,  v8.16b
0153     bcax    v21.16b,  v8.16b, v23.16b, v22.16b
0154     bcax    v22.16b, v22.16b, v24.16b, v23.16b
0155     bcax    v23.16b, v23.16b, v31.16b, v24.16b
0156     bcax    v24.16b, v24.16b,  v8.16b, v31.16b
0157 
0158     ld1r    {v31.2d}, [x9], #8
0159 
0160     bcax    v17.16b, v25.16b, v19.16b,  v3.16b
0161     bcax    v18.16b,  v3.16b, v15.16b, v19.16b
0162     bcax    v19.16b, v19.16b, v16.16b, v15.16b
0163     bcax    v15.16b, v15.16b, v25.16b, v16.16b
0164     bcax    v16.16b, v16.16b,  v3.16b, v25.16b
0165 
0166     bcax    v10.16b, v29.16b, v12.16b, v26.16b
0167     bcax    v11.16b, v26.16b, v13.16b, v12.16b
0168     bcax    v12.16b, v12.16b, v14.16b, v13.16b
0169     bcax    v13.16b, v13.16b, v29.16b, v14.16b
0170     bcax    v14.16b, v14.16b, v26.16b, v29.16b
0171 
0172     bcax     v7.16b, v30.16b,  v9.16b,  v4.16b
0173     bcax     v8.16b,  v4.16b,  v5.16b,  v9.16b
0174     bcax     v9.16b,  v9.16b,  v6.16b,  v5.16b
0175     bcax     v5.16b,  v5.16b, v30.16b,  v6.16b
0176     bcax     v6.16b,  v6.16b,  v4.16b, v30.16b
0177 
0178     bcax     v3.16b, v27.16b,  v0.16b, v28.16b
0179     bcax     v4.16b, v28.16b,  v1.16b,  v0.16b
0180     bcax     v0.16b,  v0.16b,  v2.16b,  v1.16b
0181     bcax     v1.16b,  v1.16b, v27.16b,  v2.16b
0182     bcax     v2.16b,  v2.16b, v28.16b, v27.16b
0183 
0184     eor  v0.16b,  v0.16b, v31.16b
0185 
0186     cbnz    w8, 3b
0187     cond_yield 4f, x8, x9
0188     cbnz    w2, 0b
0189 
0190     /* save state */
0191 4:  st1 { v0.1d- v3.1d}, [x0], #32
0192     st1 { v4.1d- v7.1d}, [x0], #32
0193     st1 { v8.1d-v11.1d}, [x0], #32
0194     st1 {v12.1d-v15.1d}, [x0], #32
0195     st1 {v16.1d-v19.1d}, [x0], #32
0196     st1 {v20.1d-v23.1d}, [x0], #32
0197     st1 {v24.1d}, [x0]
0198     mov w0, w2
0199     ret
0200 SYM_FUNC_END(sha3_ce_transform)
0201 
0202     .section    ".rodata", "a"
0203     .align      8
0204 .Lsha3_rcon:
0205     .quad   0x0000000000000001, 0x0000000000008082, 0x800000000000808a
0206     .quad   0x8000000080008000, 0x000000000000808b, 0x0000000080000001
0207     .quad   0x8000000080008081, 0x8000000000008009, 0x000000000000008a
0208     .quad   0x0000000000000088, 0x0000000080008009, 0x000000008000000a
0209     .quad   0x000000008000808b, 0x800000000000008b, 0x8000000000008089
0210     .quad   0x8000000000008003, 0x8000000000008002, 0x8000000000000080
0211     .quad   0x000000000000800a, 0x800000008000000a, 0x8000000080008081
0212     .quad   0x8000000000008080, 0x0000000080000001, 0x8000000080008008