Back to home page

OSCL-LXR

 
 

    


0001 /* SPDX-License-Identifier: GPL-2.0 */
0002 /*
0003  * sha512-ce-core.S - core SHA-384/SHA-512 transform using v8 Crypto Extensions
0004  *
0005  * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
0006  *
0007  * This program is free software; you can redistribute it and/or modify
0008  * it under the terms of the GNU General Public License version 2 as
0009  * published by the Free Software Foundation.
0010  */
0011 
0012 #include <linux/linkage.h>
0013 #include <asm/assembler.h>
0014 
0015     .irp        b,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0016     .set        .Lq\b, \b
0017     .set        .Lv\b\().2d, \b
0018     .endr
0019 
0020     .macro      sha512h, rd, rn, rm
0021     .inst       0xce608000 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
0022     .endm
0023 
0024     .macro      sha512h2, rd, rn, rm
0025     .inst       0xce608400 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
0026     .endm
0027 
0028     .macro      sha512su0, rd, rn
0029     .inst       0xcec08000 | .L\rd | (.L\rn << 5)
0030     .endm
0031 
0032     .macro      sha512su1, rd, rn, rm
0033     .inst       0xce608800 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
0034     .endm
0035 
0036     /*
0037      * The SHA-512 round constants
0038      */
0039     .section    ".rodata", "a"
0040     .align      4
0041 .Lsha512_rcon:
0042     .quad       0x428a2f98d728ae22, 0x7137449123ef65cd
0043     .quad       0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc
0044     .quad       0x3956c25bf348b538, 0x59f111f1b605d019
0045     .quad       0x923f82a4af194f9b, 0xab1c5ed5da6d8118
0046     .quad       0xd807aa98a3030242, 0x12835b0145706fbe
0047     .quad       0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2
0048     .quad       0x72be5d74f27b896f, 0x80deb1fe3b1696b1
0049     .quad       0x9bdc06a725c71235, 0xc19bf174cf692694
0050     .quad       0xe49b69c19ef14ad2, 0xefbe4786384f25e3
0051     .quad       0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65
0052     .quad       0x2de92c6f592b0275, 0x4a7484aa6ea6e483
0053     .quad       0x5cb0a9dcbd41fbd4, 0x76f988da831153b5
0054     .quad       0x983e5152ee66dfab, 0xa831c66d2db43210
0055     .quad       0xb00327c898fb213f, 0xbf597fc7beef0ee4
0056     .quad       0xc6e00bf33da88fc2, 0xd5a79147930aa725
0057     .quad       0x06ca6351e003826f, 0x142929670a0e6e70
0058     .quad       0x27b70a8546d22ffc, 0x2e1b21385c26c926
0059     .quad       0x4d2c6dfc5ac42aed, 0x53380d139d95b3df
0060     .quad       0x650a73548baf63de, 0x766a0abb3c77b2a8
0061     .quad       0x81c2c92e47edaee6, 0x92722c851482353b
0062     .quad       0xa2bfe8a14cf10364, 0xa81a664bbc423001
0063     .quad       0xc24b8b70d0f89791, 0xc76c51a30654be30
0064     .quad       0xd192e819d6ef5218, 0xd69906245565a910
0065     .quad       0xf40e35855771202a, 0x106aa07032bbd1b8
0066     .quad       0x19a4c116b8d2d0c8, 0x1e376c085141ab53
0067     .quad       0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8
0068     .quad       0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb
0069     .quad       0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3
0070     .quad       0x748f82ee5defb2fc, 0x78a5636f43172f60
0071     .quad       0x84c87814a1f0ab72, 0x8cc702081a6439ec
0072     .quad       0x90befffa23631e28, 0xa4506cebde82bde9
0073     .quad       0xbef9a3f7b2c67915, 0xc67178f2e372532b
0074     .quad       0xca273eceea26619c, 0xd186b8c721c0c207
0075     .quad       0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178
0076     .quad       0x06f067aa72176fba, 0x0a637dc5a2c898a6
0077     .quad       0x113f9804bef90dae, 0x1b710b35131c471b
0078     .quad       0x28db77f523047d84, 0x32caab7b40c72493
0079     .quad       0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c
0080     .quad       0x4cc5d4becb3e42b6, 0x597f299cfc657e2a
0081     .quad       0x5fcb6fab3ad6faec, 0x6c44198c4a475817
0082 
0083     .macro      dround, i0, i1, i2, i3, i4, rc0, rc1, in0, in1, in2, in3, in4
0084     .ifnb       \rc1
0085     ld1     {v\rc1\().2d}, [x4], #16
0086     .endif
0087     add     v5.2d, v\rc0\().2d, v\in0\().2d
0088     ext     v6.16b, v\i2\().16b, v\i3\().16b, #8
0089     ext     v5.16b, v5.16b, v5.16b, #8
0090     ext     v7.16b, v\i1\().16b, v\i2\().16b, #8
0091     add     v\i3\().2d, v\i3\().2d, v5.2d
0092     .ifnb       \in1
0093     ext     v5.16b, v\in3\().16b, v\in4\().16b, #8
0094     sha512su0   v\in0\().2d, v\in1\().2d
0095     .endif
0096     sha512h     q\i3, q6, v7.2d
0097     .ifnb       \in1
0098     sha512su1   v\in0\().2d, v\in2\().2d, v5.2d
0099     .endif
0100     add     v\i4\().2d, v\i1\().2d, v\i3\().2d
0101     sha512h2    q\i3, q\i1, v\i0\().2d
0102     .endm
0103 
0104     /*
0105      * void sha512_ce_transform(struct sha512_state *sst, u8 const *src,
0106      *            int blocks)
0107      */
0108     .text
0109 SYM_FUNC_START(sha512_ce_transform)
0110     /* load state */
0111     ld1     {v8.2d-v11.2d}, [x0]
0112 
0113     /* load first 4 round constants */
0114     adr_l       x3, .Lsha512_rcon
0115     ld1     {v20.2d-v23.2d}, [x3], #64
0116 
0117     /* load input */
0118 0:  ld1     {v12.2d-v15.2d}, [x1], #64
0119     ld1     {v16.2d-v19.2d}, [x1], #64
0120     sub     w2, w2, #1
0121 
0122 CPU_LE( rev64       v12.16b, v12.16b    )
0123 CPU_LE( rev64       v13.16b, v13.16b    )
0124 CPU_LE( rev64       v14.16b, v14.16b    )
0125 CPU_LE( rev64       v15.16b, v15.16b    )
0126 CPU_LE( rev64       v16.16b, v16.16b    )
0127 CPU_LE( rev64       v17.16b, v17.16b    )
0128 CPU_LE( rev64       v18.16b, v18.16b    )
0129 CPU_LE( rev64       v19.16b, v19.16b    )
0130 
0131     mov     x4, x3              // rc pointer
0132 
0133     mov     v0.16b, v8.16b
0134     mov     v1.16b, v9.16b
0135     mov     v2.16b, v10.16b
0136     mov     v3.16b, v11.16b
0137 
0138     // v0  ab  cd  --  ef  gh  ab
0139     // v1  cd  --  ef  gh  ab  cd
0140     // v2  ef  gh  ab  cd  --  ef
0141     // v3  gh  ab  cd  --  ef  gh
0142     // v4  --  ef  gh  ab  cd  --
0143 
0144     dround      0, 1, 2, 3, 4, 20, 24, 12, 13, 19, 16, 17
0145     dround      3, 0, 4, 2, 1, 21, 25, 13, 14, 12, 17, 18
0146     dround      2, 3, 1, 4, 0, 22, 26, 14, 15, 13, 18, 19
0147     dround      4, 2, 0, 1, 3, 23, 27, 15, 16, 14, 19, 12
0148     dround      1, 4, 3, 0, 2, 24, 28, 16, 17, 15, 12, 13
0149 
0150     dround      0, 1, 2, 3, 4, 25, 29, 17, 18, 16, 13, 14
0151     dround      3, 0, 4, 2, 1, 26, 30, 18, 19, 17, 14, 15
0152     dround      2, 3, 1, 4, 0, 27, 31, 19, 12, 18, 15, 16
0153     dround      4, 2, 0, 1, 3, 28, 24, 12, 13, 19, 16, 17
0154     dround      1, 4, 3, 0, 2, 29, 25, 13, 14, 12, 17, 18
0155 
0156     dround      0, 1, 2, 3, 4, 30, 26, 14, 15, 13, 18, 19
0157     dround      3, 0, 4, 2, 1, 31, 27, 15, 16, 14, 19, 12
0158     dround      2, 3, 1, 4, 0, 24, 28, 16, 17, 15, 12, 13
0159     dround      4, 2, 0, 1, 3, 25, 29, 17, 18, 16, 13, 14
0160     dround      1, 4, 3, 0, 2, 26, 30, 18, 19, 17, 14, 15
0161 
0162     dround      0, 1, 2, 3, 4, 27, 31, 19, 12, 18, 15, 16
0163     dround      3, 0, 4, 2, 1, 28, 24, 12, 13, 19, 16, 17
0164     dround      2, 3, 1, 4, 0, 29, 25, 13, 14, 12, 17, 18
0165     dround      4, 2, 0, 1, 3, 30, 26, 14, 15, 13, 18, 19
0166     dround      1, 4, 3, 0, 2, 31, 27, 15, 16, 14, 19, 12
0167 
0168     dround      0, 1, 2, 3, 4, 24, 28, 16, 17, 15, 12, 13
0169     dround      3, 0, 4, 2, 1, 25, 29, 17, 18, 16, 13, 14
0170     dround      2, 3, 1, 4, 0, 26, 30, 18, 19, 17, 14, 15
0171     dround      4, 2, 0, 1, 3, 27, 31, 19, 12, 18, 15, 16
0172     dround      1, 4, 3, 0, 2, 28, 24, 12, 13, 19, 16, 17
0173 
0174     dround      0, 1, 2, 3, 4, 29, 25, 13, 14, 12, 17, 18
0175     dround      3, 0, 4, 2, 1, 30, 26, 14, 15, 13, 18, 19
0176     dround      2, 3, 1, 4, 0, 31, 27, 15, 16, 14, 19, 12
0177     dround      4, 2, 0, 1, 3, 24, 28, 16, 17, 15, 12, 13
0178     dround      1, 4, 3, 0, 2, 25, 29, 17, 18, 16, 13, 14
0179 
0180     dround      0, 1, 2, 3, 4, 26, 30, 18, 19, 17, 14, 15
0181     dround      3, 0, 4, 2, 1, 27, 31, 19, 12, 18, 15, 16
0182     dround      2, 3, 1, 4, 0, 28, 24, 12
0183     dround      4, 2, 0, 1, 3, 29, 25, 13
0184     dround      1, 4, 3, 0, 2, 30, 26, 14
0185 
0186     dround      0, 1, 2, 3, 4, 31, 27, 15
0187     dround      3, 0, 4, 2, 1, 24,   , 16
0188     dround      2, 3, 1, 4, 0, 25,   , 17
0189     dround      4, 2, 0, 1, 3, 26,   , 18
0190     dround      1, 4, 3, 0, 2, 27,   , 19
0191 
0192     /* update state */
0193     add     v8.2d, v8.2d, v0.2d
0194     add     v9.2d, v9.2d, v1.2d
0195     add     v10.2d, v10.2d, v2.2d
0196     add     v11.2d, v11.2d, v3.2d
0197 
0198     cond_yield  3f, x4, x5
0199     /* handled all input blocks? */
0200     cbnz        w2, 0b
0201 
0202     /* store new state */
0203 3:  st1     {v8.2d-v11.2d}, [x0]
0204     mov     w0, w2
0205     ret
0206 SYM_FUNC_END(sha512_ce_transform)