Back to home page

OSCL-LXR

 
 

    


0001 /* SPDX-License-Identifier: GPL-2.0-only */
0002 /*
0003  * sha2-ce-core.S - core SHA-224/SHA-256 transform using v8 Crypto Extensions
0004  *
0005  * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
0006  */
0007 
0008 #include <linux/linkage.h>
0009 #include <asm/assembler.h>
0010 
0011     .text
0012     .arch       armv8-a+crypto
0013 
0014     dga     .req    q20
0015     dgav        .req    v20
0016     dgb     .req    q21
0017     dgbv        .req    v21
0018 
0019     t0      .req    v22
0020     t1      .req    v23
0021 
0022     dg0q        .req    q24
0023     dg0v        .req    v24
0024     dg1q        .req    q25
0025     dg1v        .req    v25
0026     dg2q        .req    q26
0027     dg2v        .req    v26
0028 
0029     .macro      add_only, ev, rc, s0
0030     mov     dg2v.16b, dg0v.16b
0031     .ifeq       \ev
0032     add     t1.4s, v\s0\().4s, \rc\().4s
0033     sha256h     dg0q, dg1q, t0.4s
0034     sha256h2    dg1q, dg2q, t0.4s
0035     .else
0036     .ifnb       \s0
0037     add     t0.4s, v\s0\().4s, \rc\().4s
0038     .endif
0039     sha256h     dg0q, dg1q, t1.4s
0040     sha256h2    dg1q, dg2q, t1.4s
0041     .endif
0042     .endm
0043 
0044     .macro      add_update, ev, rc, s0, s1, s2, s3
0045     sha256su0   v\s0\().4s, v\s1\().4s
0046     add_only    \ev, \rc, \s1
0047     sha256su1   v\s0\().4s, v\s2\().4s, v\s3\().4s
0048     .endm
0049 
0050     /*
0051      * The SHA-256 round constants
0052      */
0053     .section    ".rodata", "a"
0054     .align      4
0055 .Lsha2_rcon:
0056     .word       0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
0057     .word       0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
0058     .word       0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
0059     .word       0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
0060     .word       0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
0061     .word       0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
0062     .word       0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
0063     .word       0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
0064     .word       0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
0065     .word       0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
0066     .word       0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
0067     .word       0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
0068     .word       0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
0069     .word       0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
0070     .word       0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
0071     .word       0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
0072 
0073     /*
0074      * void sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src,
0075      *            int blocks)
0076      */
0077     .text
0078 SYM_FUNC_START(sha2_ce_transform)
0079     /* load round constants */
0080     adr_l       x8, .Lsha2_rcon
0081     ld1     { v0.4s- v3.4s}, [x8], #64
0082     ld1     { v4.4s- v7.4s}, [x8], #64
0083     ld1     { v8.4s-v11.4s}, [x8], #64
0084     ld1     {v12.4s-v15.4s}, [x8]
0085 
0086     /* load state */
0087     ld1     {dgav.4s, dgbv.4s}, [x0]
0088 
0089     /* load sha256_ce_state::finalize */
0090     ldr_l       w4, sha256_ce_offsetof_finalize, x4
0091     ldr     w4, [x0, x4]
0092 
0093     /* load input */
0094 0:  ld1     {v16.4s-v19.4s}, [x1], #64
0095     sub     w2, w2, #1
0096 
0097 CPU_LE( rev32       v16.16b, v16.16b    )
0098 CPU_LE( rev32       v17.16b, v17.16b    )
0099 CPU_LE( rev32       v18.16b, v18.16b    )
0100 CPU_LE( rev32       v19.16b, v19.16b    )
0101 
0102 1:  add     t0.4s, v16.4s, v0.4s
0103     mov     dg0v.16b, dgav.16b
0104     mov     dg1v.16b, dgbv.16b
0105 
0106     add_update  0,  v1, 16, 17, 18, 19
0107     add_update  1,  v2, 17, 18, 19, 16
0108     add_update  0,  v3, 18, 19, 16, 17
0109     add_update  1,  v4, 19, 16, 17, 18
0110 
0111     add_update  0,  v5, 16, 17, 18, 19
0112     add_update  1,  v6, 17, 18, 19, 16
0113     add_update  0,  v7, 18, 19, 16, 17
0114     add_update  1,  v8, 19, 16, 17, 18
0115 
0116     add_update  0,  v9, 16, 17, 18, 19
0117     add_update  1, v10, 17, 18, 19, 16
0118     add_update  0, v11, 18, 19, 16, 17
0119     add_update  1, v12, 19, 16, 17, 18
0120 
0121     add_only    0, v13, 17
0122     add_only    1, v14, 18
0123     add_only    0, v15, 19
0124     add_only    1
0125 
0126     /* update state */
0127     add     dgav.4s, dgav.4s, dg0v.4s
0128     add     dgbv.4s, dgbv.4s, dg1v.4s
0129 
0130     /* handled all input blocks? */
0131     cbz     w2, 2f
0132     cond_yield  3f, x5, x6
0133     b       0b
0134 
0135     /*
0136      * Final block: add padding and total bit count.
0137      * Skip if the input size was not a round multiple of the block size,
0138      * the padding is handled by the C code in that case.
0139      */
0140 2:  cbz     x4, 3f
0141     ldr_l       w4, sha256_ce_offsetof_count, x4
0142     ldr     x4, [x0, x4]
0143     movi        v17.2d, #0
0144     mov     x8, #0x80000000
0145     movi        v18.2d, #0
0146     ror     x7, x4, #29     // ror(lsl(x4, 3), 32)
0147     fmov        d16, x8
0148     mov     x4, #0
0149     mov     v19.d[0], xzr
0150     mov     v19.d[1], x7
0151     b       1b
0152 
0153     /* store new state */
0154 3:  st1     {dgav.4s, dgbv.4s}, [x0]
0155     mov     w0, w2
0156     ret
0157 SYM_FUNC_END(sha2_ce_transform)