Back to home page

OSCL-LXR

 
 

    


0001 #!/usr/bin/env perl
0002 # SPDX-License-Identifier: GPL-2.0
0003 
0004 # This code is taken from the OpenSSL project but the author (Andy Polyakov)
0005 # has relicensed it under the GPLv2. Therefore this program is free software;
0006 # you can redistribute it and/or modify it under the terms of the GNU General
0007 # Public License version 2 as published by the Free Software Foundation.
0008 #
0009 # The original headers, including the original license headers, are
0010 # included below for completeness.
0011 
0012 # ====================================================================
0013 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
0014 # project. The module is, however, dual licensed under OpenSSL and
0015 # CRYPTOGAMS licenses depending on where you obtain it. For further
0016 # details see https://www.openssl.org/~appro/cryptogams/.
0017 # ====================================================================
0018 #
0019 # GHASH for PowerISA v2.07.
0020 #
0021 # July 2014
0022 #
0023 # Accurate performance measurements are problematic, because it's
0024 # always virtualized setup with possibly throttled processor.
0025 # Relative comparison is therefore more informative. This initial
0026 # version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x
0027 # faster than "4-bit" integer-only compiler-generated 64-bit code.
0028 # "Initial version" means that there is room for futher improvement.
0029 
0030 $flavour=shift;
0031 $output =shift;
0032 
0033 if ($flavour =~ /64/) {
0034     $SIZE_T=8;
0035     $LRSAVE=2*$SIZE_T;
0036     $STU="stdu";
0037     $POP="ld";
0038     $PUSH="std";
0039 } elsif ($flavour =~ /32/) {
0040     $SIZE_T=4;
0041     $LRSAVE=$SIZE_T;
0042     $STU="stwu";
0043     $POP="lwz";
0044     $PUSH="stw";
0045 } else { die "nonsense $flavour"; }
0046 
0047 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
0048 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
0049 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
0050 die "can't locate ppc-xlate.pl";
0051 
0052 open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!";
0053 
0054 my ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6));    # argument block
0055 
0056 my ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3));
0057 my ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12));
0058 my $vrsave="r12";
0059 
0060 $code=<<___;
0061 .machine    "any"
0062 
0063 .text
0064 
0065 .globl  .gcm_init_p8
0066     lis     r0,0xfff0
0067     li      r8,0x10
0068     mfspr       $vrsave,256
0069     li      r9,0x20
0070     mtspr       256,r0
0071     li      r10,0x30
0072     lvx_u       $H,0,r4         # load H
0073     le?xor      r7,r7,r7
0074     le?addi     r7,r7,0x8       # need a vperm start with 08
0075     le?lvsr     5,0,r7
0076     le?vspltisb 6,0x0f
0077     le?vxor     5,5,6           # set a b-endian mask
0078     le?vperm    $H,$H,$H,5
0079 
0080     vspltisb    $xC2,-16        # 0xf0
0081     vspltisb    $t0,1           # one
0082     vaddubm     $xC2,$xC2,$xC2      # 0xe0
0083     vxor        $zero,$zero,$zero
0084     vor     $xC2,$xC2,$t0       # 0xe1
0085     vsldoi      $xC2,$xC2,$zero,15  # 0xe1...
0086     vsldoi      $t1,$zero,$t0,1     # ...1
0087     vaddubm     $xC2,$xC2,$xC2      # 0xc2...
0088     vspltisb    $t2,7
0089     vor     $xC2,$xC2,$t1       # 0xc2....01
0090     vspltb      $t1,$H,0        # most significant byte
0091     vsl     $H,$H,$t0       # H<<=1
0092     vsrab       $t1,$t1,$t2     # broadcast carry bit
0093     vand        $t1,$t1,$xC2
0094     vxor        $H,$H,$t1       # twisted H
0095 
0096     vsldoi      $H,$H,$H,8      # twist even more ...
0097     vsldoi      $xC2,$zero,$xC2,8   # 0xc2.0
0098     vsldoi      $Hl,$zero,$H,8      # ... and split
0099     vsldoi      $Hh,$H,$zero,8
0100 
0101     stvx_u      $xC2,0,r3       # save pre-computed table
0102     stvx_u      $Hl,r8,r3
0103     stvx_u      $H, r9,r3
0104     stvx_u      $Hh,r10,r3
0105 
0106     mtspr       256,$vrsave
0107     blr
0108     .long       0
0109     .byte       0,12,0x14,0,0,0,2,0
0110     .long       0
0111 .size   .gcm_init_p8,.-.gcm_init_p8
0112 
0113 .globl  .gcm_gmult_p8
0114     lis     r0,0xfff8
0115     li      r8,0x10
0116     mfspr       $vrsave,256
0117     li      r9,0x20
0118     mtspr       256,r0
0119     li      r10,0x30
0120     lvx_u       $IN,0,$Xip      # load Xi
0121 
0122     lvx_u       $Hl,r8,$Htbl        # load pre-computed table
0123      le?lvsl    $lemask,r0,r0
0124     lvx_u       $H, r9,$Htbl
0125      le?vspltisb    $t0,0x07
0126     lvx_u       $Hh,r10,$Htbl
0127      le?vxor    $lemask,$lemask,$t0
0128     lvx_u       $xC2,0,$Htbl
0129      le?vperm   $IN,$IN,$IN,$lemask
0130     vxor        $zero,$zero,$zero
0131 
0132     vpmsumd     $Xl,$IN,$Hl     # H.lo·Xi.lo
0133     vpmsumd     $Xm,$IN,$H      # H.hi·Xi.lo+H.lo·Xi.hi
0134     vpmsumd     $Xh,$IN,$Hh     # H.hi·Xi.hi
0135 
0136     vpmsumd     $t2,$Xl,$xC2        # 1st phase
0137 
0138     vsldoi      $t0,$Xm,$zero,8
0139     vsldoi      $t1,$zero,$Xm,8
0140     vxor        $Xl,$Xl,$t0
0141     vxor        $Xh,$Xh,$t1
0142 
0143     vsldoi      $Xl,$Xl,$Xl,8
0144     vxor        $Xl,$Xl,$t2
0145 
0146     vsldoi      $t1,$Xl,$Xl,8       # 2nd phase
0147     vpmsumd     $Xl,$Xl,$xC2
0148     vxor        $t1,$t1,$Xh
0149     vxor        $Xl,$Xl,$t1
0150 
0151     le?vperm    $Xl,$Xl,$Xl,$lemask
0152     stvx_u      $Xl,0,$Xip      # write out Xi
0153 
0154     mtspr       256,$vrsave
0155     blr
0156     .long       0
0157     .byte       0,12,0x14,0,0,0,2,0
0158     .long       0
0159 .size   .gcm_gmult_p8,.-.gcm_gmult_p8
0160 
0161 .globl  .gcm_ghash_p8
0162     lis     r0,0xfff8
0163     li      r8,0x10
0164     mfspr       $vrsave,256
0165     li      r9,0x20
0166     mtspr       256,r0
0167     li      r10,0x30
0168     lvx_u       $Xl,0,$Xip      # load Xi
0169 
0170     lvx_u       $Hl,r8,$Htbl        # load pre-computed table
0171      le?lvsl    $lemask,r0,r0
0172     lvx_u       $H, r9,$Htbl
0173      le?vspltisb    $t0,0x07
0174     lvx_u       $Hh,r10,$Htbl
0175      le?vxor    $lemask,$lemask,$t0
0176     lvx_u       $xC2,0,$Htbl
0177      le?vperm   $Xl,$Xl,$Xl,$lemask
0178     vxor        $zero,$zero,$zero
0179 
0180     lvx_u       $IN,0,$inp
0181     addi        $inp,$inp,16
0182     subi        $len,$len,16
0183      le?vperm   $IN,$IN,$IN,$lemask
0184     vxor        $IN,$IN,$Xl
0185     b       Loop
0186 
0187 .align  5
0188 Loop:
0189      subic      $len,$len,16
0190     vpmsumd     $Xl,$IN,$Hl     # H.lo·Xi.lo
0191      subfe.     r0,r0,r0        # borrow?-1:0
0192     vpmsumd     $Xm,$IN,$H      # H.hi·Xi.lo+H.lo·Xi.hi
0193      and        r0,r0,$len
0194     vpmsumd     $Xh,$IN,$Hh     # H.hi·Xi.hi
0195      add        $inp,$inp,r0
0196 
0197     vpmsumd     $t2,$Xl,$xC2        # 1st phase
0198 
0199     vsldoi      $t0,$Xm,$zero,8
0200     vsldoi      $t1,$zero,$Xm,8
0201     vxor        $Xl,$Xl,$t0
0202     vxor        $Xh,$Xh,$t1
0203 
0204     vsldoi      $Xl,$Xl,$Xl,8
0205     vxor        $Xl,$Xl,$t2
0206      lvx_u      $IN,0,$inp
0207      addi       $inp,$inp,16
0208 
0209     vsldoi      $t1,$Xl,$Xl,8       # 2nd phase
0210     vpmsumd     $Xl,$Xl,$xC2
0211      le?vperm   $IN,$IN,$IN,$lemask
0212     vxor        $t1,$t1,$Xh
0213     vxor        $IN,$IN,$t1
0214     vxor        $IN,$IN,$Xl
0215     beq     Loop            # did $len-=16 borrow?
0216 
0217     vxor        $Xl,$Xl,$t1
0218     le?vperm    $Xl,$Xl,$Xl,$lemask
0219     stvx_u      $Xl,0,$Xip      # write out Xi
0220 
0221     mtspr       256,$vrsave
0222     blr
0223     .long       0
0224     .byte       0,12,0x14,0,0,0,4,0
0225     .long       0
0226 .size   .gcm_ghash_p8,.-.gcm_ghash_p8
0227 
0228 .asciz  "GHASH for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
0229 .align  2
0230 ___
0231 
0232 foreach (split("\n",$code)) {
0233     if ($flavour =~ /le$/o) {   # little-endian
0234         s/le\?//o       or
0235         s/be\?/#be#/o;
0236     } else {
0237         s/le\?/#le#/o   or
0238         s/be\?//o;
0239     }
0240     print $_,"\n";
0241 }
0242 
0243 close STDOUT; # enforce flush