Back to home page

OSCL-LXR

 
 

    


0001 #! /usr/bin/env perl
0002 # SPDX-License-Identifier: GPL-2.0
0003 
0004 # This code is taken from CRYPTOGAMs[1] and is included here using the option
0005 # in the license to distribute the code under the GPL. Therefore this program
0006 # is free software; you can redistribute it and/or modify it under the terms of
0007 # the GNU General Public License version 2 as published by the Free Software
0008 # Foundation.
0009 #
0010 # [1] https://www.openssl.org/~appro/cryptogams/
0011 
0012 # Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org>
0013 # All rights reserved.
0014 #
0015 # Redistribution and use in source and binary forms, with or without
0016 # modification, are permitted provided that the following conditions
0017 # are met:
0018 #
0019 #       * Redistributions of source code must retain copyright notices,
0020 #         this list of conditions and the following disclaimer.
0021 #
0022 #       * Redistributions in binary form must reproduce the above
0023 #         copyright notice, this list of conditions and the following
0024 #         disclaimer in the documentation and/or other materials
0025 #         provided with the distribution.
0026 #
0027 #       * Neither the name of the CRYPTOGAMS nor the names of its
0028 #         copyright holder and contributors may be used to endorse or
0029 #         promote products derived from this software without specific
0030 #         prior written permission.
0031 #
0032 # ALTERNATIVELY, provided that this notice is retained in full, this
0033 # product may be distributed under the terms of the GNU General Public
0034 # License (GPL), in which case the provisions of the GPL apply INSTEAD OF
0035 # those given above.
0036 #
0037 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
0038 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
0039 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
0040 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
0041 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
0042 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
0043 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
0044 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
0045 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
0046 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
0047 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
0048 
0049 # ====================================================================
0050 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
0051 # project. The module is, however, dual licensed under OpenSSL and
0052 # CRYPTOGAMS licenses depending on where you obtain it. For further
0053 # details see https://www.openssl.org/~appro/cryptogams/.
0054 # ====================================================================
0055 #
0056 # This module implements support for AES instructions as per PowerISA
0057 # specification version 2.07, first implemented by POWER8 processor.
0058 # The module is endian-agnostic in sense that it supports both big-
0059 # and little-endian cases. Data alignment in parallelizable modes is
0060 # handled with VSX loads and stores, which implies MSR.VSX flag being
0061 # set. It should also be noted that ISA specification doesn't prohibit
0062 # alignment exceptions for these instructions on page boundaries.
0063 # Initially alignment was handled in pure AltiVec/VMX way [when data
0064 # is aligned programmatically, which in turn guarantees exception-
0065 # free execution], but it turned to hamper performance when vcipher
0066 # instructions are interleaved. It's reckoned that eventual
0067 # misalignment penalties at page boundaries are in average lower
0068 # than additional overhead in pure AltiVec approach.
0069 #
0070 # May 2016
0071 #
0072 # Add XTS subroutine, 9x on little- and 12x improvement on big-endian
0073 # systems were measured.
0074 #
0075 ######################################################################
0076 # Current large-block performance in cycles per byte processed with
0077 # 128-bit key (less is better).
0078 #
0079 #       CBC en-/decrypt CTR XTS
0080 # POWER8[le]    3.96/0.72   0.74    1.1
0081 # POWER8[be]    3.75/0.65   0.66    1.0
0082 
0083 $flavour = shift;
0084 
0085 if ($flavour =~ /64/) {
0086     $SIZE_T =8;
0087     $LRSAVE =2*$SIZE_T;
0088     $STU    ="stdu";
0089     $POP    ="ld";
0090     $PUSH   ="std";
0091     $UCMP   ="cmpld";
0092     $SHL    ="sldi";
0093 } elsif ($flavour =~ /32/) {
0094     $SIZE_T =4;
0095     $LRSAVE =$SIZE_T;
0096     $STU    ="stwu";
0097     $POP    ="lwz";
0098     $PUSH   ="stw";
0099     $UCMP   ="cmplw";
0100     $SHL    ="slwi";
0101 } else { die "nonsense $flavour"; }
0102 
0103 $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
0104 
0105 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
0106 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
0107 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
0108 die "can't locate ppc-xlate.pl";
0109 
0110 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
0111 
0112 $FRAME=8*$SIZE_T;
0113 $prefix="aes_p8";
0114 
0115 $sp="r1";
0116 $vrsave="r12";
0117 
0118 #########################################################################
0119 {{{ # Key setup procedures                      #
0120 my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
0121 my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
0122 my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
0123 
0124 $code.=<<___;
0125 .machine    "any"
0126 
0127 .text
0128 
0129 .align  7
0130 rcon:
0131 .long   0x01000000, 0x01000000, 0x01000000, 0x01000000  ?rev
0132 .long   0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000  ?rev
0133 .long   0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c  ?rev
0134 .long   0,0,0,0                     ?asis
0135 Lconsts:
0136     mflr    r0
0137     bcl 20,31,\$+4
0138     mflr    $ptr     #vvvvv "distance between . and rcon
0139     addi    $ptr,$ptr,-0x48
0140     mtlr    r0
0141     blr
0142     .long   0
0143     .byte   0,12,0x14,0,0,0,0,0
0144 .asciz  "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
0145 
0146 .globl  .${prefix}_set_encrypt_key
0147 Lset_encrypt_key:
0148     mflr        r11
0149     $PUSH       r11,$LRSAVE($sp)
0150 
0151     li      $ptr,-1
0152     ${UCMP}i    $inp,0
0153     beq-        Lenc_key_abort      # if ($inp==0) return -1;
0154     ${UCMP}i    $out,0
0155     beq-        Lenc_key_abort      # if ($out==0) return -1;
0156     li      $ptr,-2
0157     cmpwi       $bits,128
0158     blt-        Lenc_key_abort
0159     cmpwi       $bits,256
0160     bgt-        Lenc_key_abort
0161     andi.       r0,$bits,0x3f
0162     bne-        Lenc_key_abort
0163 
0164     lis     r0,0xfff0
0165     mfspr       $vrsave,256
0166     mtspr       256,r0
0167 
0168     bl      Lconsts
0169     mtlr        r11
0170 
0171     neg     r9,$inp
0172     lvx     $in0,0,$inp
0173     addi        $inp,$inp,15        # 15 is not typo
0174     lvsr        $key,0,r9       # borrow $key
0175     li      r8,0x20
0176     cmpwi       $bits,192
0177     lvx     $in1,0,$inp
0178     le?vspltisb $mask,0x0f      # borrow $mask
0179     lvx     $rcon,0,$ptr
0180     le?vxor     $key,$key,$mask     # adjust for byte swap
0181     lvx     $mask,r8,$ptr
0182     addi        $ptr,$ptr,0x10
0183     vperm       $in0,$in0,$in1,$key # align [and byte swap in LE]
0184     li      $cnt,8
0185     vxor        $zero,$zero,$zero
0186     mtctr       $cnt
0187 
0188     ?lvsr       $outperm,0,$out
0189     vspltisb    $outmask,-1
0190     lvx     $outhead,0,$out
0191     ?vperm      $outmask,$zero,$outmask,$outperm
0192 
0193     blt     Loop128
0194     addi        $inp,$inp,8
0195     beq     L192
0196     addi        $inp,$inp,8
0197     b       L256
0198 
0199 .align  4
0200 Loop128:
0201     vperm       $key,$in0,$in0,$mask    # rotate-n-splat
0202     vsldoi      $tmp,$zero,$in0,12  # >>32
0203      vperm      $outtail,$in0,$in0,$outperm # rotate
0204      vsel       $stage,$outhead,$outtail,$outmask
0205      vmr        $outhead,$outtail
0206     vcipherlast $key,$key,$rcon
0207      stvx       $stage,0,$out
0208      addi       $out,$out,16
0209 
0210     vxor        $in0,$in0,$tmp
0211     vsldoi      $tmp,$zero,$tmp,12  # >>32
0212     vxor        $in0,$in0,$tmp
0213     vsldoi      $tmp,$zero,$tmp,12  # >>32
0214     vxor        $in0,$in0,$tmp
0215      vadduwm    $rcon,$rcon,$rcon
0216     vxor        $in0,$in0,$key
0217     bdnz        Loop128
0218 
0219     lvx     $rcon,0,$ptr        # last two round keys
0220 
0221     vperm       $key,$in0,$in0,$mask    # rotate-n-splat
0222     vsldoi      $tmp,$zero,$in0,12  # >>32
0223      vperm      $outtail,$in0,$in0,$outperm # rotate
0224      vsel       $stage,$outhead,$outtail,$outmask
0225      vmr        $outhead,$outtail
0226     vcipherlast $key,$key,$rcon
0227      stvx       $stage,0,$out
0228      addi       $out,$out,16
0229 
0230     vxor        $in0,$in0,$tmp
0231     vsldoi      $tmp,$zero,$tmp,12  # >>32
0232     vxor        $in0,$in0,$tmp
0233     vsldoi      $tmp,$zero,$tmp,12  # >>32
0234     vxor        $in0,$in0,$tmp
0235      vadduwm    $rcon,$rcon,$rcon
0236     vxor        $in0,$in0,$key
0237 
0238     vperm       $key,$in0,$in0,$mask    # rotate-n-splat
0239     vsldoi      $tmp,$zero,$in0,12  # >>32
0240      vperm      $outtail,$in0,$in0,$outperm # rotate
0241      vsel       $stage,$outhead,$outtail,$outmask
0242      vmr        $outhead,$outtail
0243     vcipherlast $key,$key,$rcon
0244      stvx       $stage,0,$out
0245      addi       $out,$out,16
0246 
0247     vxor        $in0,$in0,$tmp
0248     vsldoi      $tmp,$zero,$tmp,12  # >>32
0249     vxor        $in0,$in0,$tmp
0250     vsldoi      $tmp,$zero,$tmp,12  # >>32
0251     vxor        $in0,$in0,$tmp
0252     vxor        $in0,$in0,$key
0253      vperm      $outtail,$in0,$in0,$outperm # rotate
0254      vsel       $stage,$outhead,$outtail,$outmask
0255      vmr        $outhead,$outtail
0256      stvx       $stage,0,$out
0257 
0258     addi        $inp,$out,15        # 15 is not typo
0259     addi        $out,$out,0x50
0260 
0261     li      $rounds,10
0262     b       Ldone
0263 
0264 .align  4
0265 L192:
0266     lvx     $tmp,0,$inp
0267     li      $cnt,4
0268      vperm      $outtail,$in0,$in0,$outperm # rotate
0269      vsel       $stage,$outhead,$outtail,$outmask
0270      vmr        $outhead,$outtail
0271      stvx       $stage,0,$out
0272      addi       $out,$out,16
0273     vperm       $in1,$in1,$tmp,$key # align [and byte swap in LE]
0274     vspltisb    $key,8          # borrow $key
0275     mtctr       $cnt
0276     vsububm     $mask,$mask,$key    # adjust the mask
0277 
0278 Loop192:
0279     vperm       $key,$in1,$in1,$mask    # roate-n-splat
0280     vsldoi      $tmp,$zero,$in0,12  # >>32
0281     vcipherlast $key,$key,$rcon
0282 
0283     vxor        $in0,$in0,$tmp
0284     vsldoi      $tmp,$zero,$tmp,12  # >>32
0285     vxor        $in0,$in0,$tmp
0286     vsldoi      $tmp,$zero,$tmp,12  # >>32
0287     vxor        $in0,$in0,$tmp
0288 
0289      vsldoi     $stage,$zero,$in1,8
0290     vspltw      $tmp,$in0,3
0291     vxor        $tmp,$tmp,$in1
0292     vsldoi      $in1,$zero,$in1,12  # >>32
0293      vadduwm    $rcon,$rcon,$rcon
0294     vxor        $in1,$in1,$tmp
0295     vxor        $in0,$in0,$key
0296     vxor        $in1,$in1,$key
0297      vsldoi     $stage,$stage,$in0,8
0298 
0299     vperm       $key,$in1,$in1,$mask    # rotate-n-splat
0300     vsldoi      $tmp,$zero,$in0,12  # >>32
0301      vperm      $outtail,$stage,$stage,$outperm # rotate
0302      vsel       $stage,$outhead,$outtail,$outmask
0303      vmr        $outhead,$outtail
0304     vcipherlast $key,$key,$rcon
0305      stvx       $stage,0,$out
0306      addi       $out,$out,16
0307 
0308      vsldoi     $stage,$in0,$in1,8
0309     vxor        $in0,$in0,$tmp
0310     vsldoi      $tmp,$zero,$tmp,12  # >>32
0311      vperm      $outtail,$stage,$stage,$outperm # rotate
0312      vsel       $stage,$outhead,$outtail,$outmask
0313      vmr        $outhead,$outtail
0314     vxor        $in0,$in0,$tmp
0315     vsldoi      $tmp,$zero,$tmp,12  # >>32
0316     vxor        $in0,$in0,$tmp
0317      stvx       $stage,0,$out
0318      addi       $out,$out,16
0319 
0320     vspltw      $tmp,$in0,3
0321     vxor        $tmp,$tmp,$in1
0322     vsldoi      $in1,$zero,$in1,12  # >>32
0323      vadduwm    $rcon,$rcon,$rcon
0324     vxor        $in1,$in1,$tmp
0325     vxor        $in0,$in0,$key
0326     vxor        $in1,$in1,$key
0327      vperm      $outtail,$in0,$in0,$outperm # rotate
0328      vsel       $stage,$outhead,$outtail,$outmask
0329      vmr        $outhead,$outtail
0330      stvx       $stage,0,$out
0331      addi       $inp,$out,15        # 15 is not typo
0332      addi       $out,$out,16
0333     bdnz        Loop192
0334 
0335     li      $rounds,12
0336     addi        $out,$out,0x20
0337     b       Ldone
0338 
0339 .align  4
0340 L256:
0341     lvx     $tmp,0,$inp
0342     li      $cnt,7
0343     li      $rounds,14
0344      vperm      $outtail,$in0,$in0,$outperm # rotate
0345      vsel       $stage,$outhead,$outtail,$outmask
0346      vmr        $outhead,$outtail
0347      stvx       $stage,0,$out
0348      addi       $out,$out,16
0349     vperm       $in1,$in1,$tmp,$key # align [and byte swap in LE]
0350     mtctr       $cnt
0351 
0352 Loop256:
0353     vperm       $key,$in1,$in1,$mask    # rotate-n-splat
0354     vsldoi      $tmp,$zero,$in0,12  # >>32
0355      vperm      $outtail,$in1,$in1,$outperm # rotate
0356      vsel       $stage,$outhead,$outtail,$outmask
0357      vmr        $outhead,$outtail
0358     vcipherlast $key,$key,$rcon
0359      stvx       $stage,0,$out
0360      addi       $out,$out,16
0361 
0362     vxor        $in0,$in0,$tmp
0363     vsldoi      $tmp,$zero,$tmp,12  # >>32
0364     vxor        $in0,$in0,$tmp
0365     vsldoi      $tmp,$zero,$tmp,12  # >>32
0366     vxor        $in0,$in0,$tmp
0367      vadduwm    $rcon,$rcon,$rcon
0368     vxor        $in0,$in0,$key
0369      vperm      $outtail,$in0,$in0,$outperm # rotate
0370      vsel       $stage,$outhead,$outtail,$outmask
0371      vmr        $outhead,$outtail
0372      stvx       $stage,0,$out
0373      addi       $inp,$out,15        # 15 is not typo
0374      addi       $out,$out,16
0375     bdz     Ldone
0376 
0377     vspltw      $key,$in0,3     # just splat
0378     vsldoi      $tmp,$zero,$in1,12  # >>32
0379     vsbox       $key,$key
0380 
0381     vxor        $in1,$in1,$tmp
0382     vsldoi      $tmp,$zero,$tmp,12  # >>32
0383     vxor        $in1,$in1,$tmp
0384     vsldoi      $tmp,$zero,$tmp,12  # >>32
0385     vxor        $in1,$in1,$tmp
0386 
0387     vxor        $in1,$in1,$key
0388     b       Loop256
0389 
0390 .align  4
0391 Ldone:
0392     lvx     $in1,0,$inp     # redundant in aligned case
0393     vsel        $in1,$outhead,$in1,$outmask
0394     stvx        $in1,0,$inp
0395     li      $ptr,0
0396     mtspr       256,$vrsave
0397     stw     $rounds,0($out)
0398 
0399 Lenc_key_abort:
0400     mr      r3,$ptr
0401     blr
0402     .long       0
0403     .byte       0,12,0x14,1,0,0,3,0
0404     .long       0
0405 .size   .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
0406 
0407 .globl  .${prefix}_set_decrypt_key
0408     $STU        $sp,-$FRAME($sp)
0409     mflr        r10
0410     $PUSH       r10,$FRAME+$LRSAVE($sp)
0411     bl      Lset_encrypt_key
0412     mtlr        r10
0413 
0414     cmpwi       r3,0
0415     bne-        Ldec_key_abort
0416 
0417     slwi        $cnt,$rounds,4
0418     subi        $inp,$out,240       # first round key
0419     srwi        $rounds,$rounds,1
0420     add     $out,$inp,$cnt      # last round key
0421     mtctr       $rounds
0422 
0423 Ldeckey:
0424     lwz     r0, 0($inp)
0425     lwz     r6, 4($inp)
0426     lwz     r7, 8($inp)
0427     lwz     r8, 12($inp)
0428     addi        $inp,$inp,16
0429     lwz     r9, 0($out)
0430     lwz     r10,4($out)
0431     lwz     r11,8($out)
0432     lwz     r12,12($out)
0433     stw     r0, 0($out)
0434     stw     r6, 4($out)
0435     stw     r7, 8($out)
0436     stw     r8, 12($out)
0437     subi        $out,$out,16
0438     stw     r9, -16($inp)
0439     stw     r10,-12($inp)
0440     stw     r11,-8($inp)
0441     stw     r12,-4($inp)
0442     bdnz        Ldeckey
0443 
0444     xor     r3,r3,r3        # return value
0445 Ldec_key_abort:
0446     addi        $sp,$sp,$FRAME
0447     blr
0448     .long       0
0449     .byte       0,12,4,1,0x80,0,3,0
0450     .long       0
0451 .size   .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
0452 ___
0453 }}}
0454 #########################################################################
0455 {{{ # Single block en- and decrypt procedures           #
0456 sub gen_block () {
0457 my $dir = shift;
0458 my $n   = $dir eq "de" ? "n" : "";
0459 my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
0460 
0461 $code.=<<___;
0462 .globl  .${prefix}_${dir}crypt
0463     lwz     $rounds,240($key)
0464     lis     r0,0xfc00
0465     mfspr       $vrsave,256
0466     li      $idx,15         # 15 is not typo
0467     mtspr       256,r0
0468 
0469     lvx     v0,0,$inp
0470     neg     r11,$out
0471     lvx     v1,$idx,$inp
0472     lvsl        v2,0,$inp       # inpperm
0473     le?vspltisb v4,0x0f
0474     ?lvsl       v3,0,r11        # outperm
0475     le?vxor     v2,v2,v4
0476     li      $idx,16
0477     vperm       v0,v0,v1,v2     # align [and byte swap in LE]
0478     lvx     v1,0,$key
0479     ?lvsl       v5,0,$key       # keyperm
0480     srwi        $rounds,$rounds,1
0481     lvx     v2,$idx,$key
0482     addi        $idx,$idx,16
0483     subi        $rounds,$rounds,1
0484     ?vperm      v1,v1,v2,v5     # align round key
0485 
0486     vxor        v0,v0,v1
0487     lvx     v1,$idx,$key
0488     addi        $idx,$idx,16
0489     mtctr       $rounds
0490 
0491 Loop_${dir}c:
0492     ?vperm      v2,v2,v1,v5
0493     v${n}cipher v0,v0,v2
0494     lvx     v2,$idx,$key
0495     addi        $idx,$idx,16
0496     ?vperm      v1,v1,v2,v5
0497     v${n}cipher v0,v0,v1
0498     lvx     v1,$idx,$key
0499     addi        $idx,$idx,16
0500     bdnz        Loop_${dir}c
0501 
0502     ?vperm      v2,v2,v1,v5
0503     v${n}cipher v0,v0,v2
0504     lvx     v2,$idx,$key
0505     ?vperm      v1,v1,v2,v5
0506     v${n}cipherlast v0,v0,v1
0507 
0508     vspltisb    v2,-1
0509     vxor        v1,v1,v1
0510     li      $idx,15         # 15 is not typo
0511     ?vperm      v2,v1,v2,v3     # outmask
0512     le?vxor     v3,v3,v4
0513     lvx     v1,0,$out       # outhead
0514     vperm       v0,v0,v0,v3     # rotate [and byte swap in LE]
0515     vsel        v1,v1,v0,v2
0516     lvx     v4,$idx,$out
0517     stvx        v1,0,$out
0518     vsel        v0,v0,v4,v2
0519     stvx        v0,$idx,$out
0520 
0521     mtspr       256,$vrsave
0522     blr
0523     .long       0
0524     .byte       0,12,0x14,0,0,0,3,0
0525     .long       0
0526 .size   .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
0527 ___
0528 }
0529 &gen_block("en");
0530 &gen_block("de");
0531 }}}
0532 #########################################################################
0533 {{{ # CBC en- and decrypt procedures                #
0534 my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
0535 my ($rndkey0,$rndkey1,$inout,$tmp)=     map("v$_",(0..3));
0536 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
0537                         map("v$_",(4..10));
0538 $code.=<<___;
0539 .globl  .${prefix}_cbc_encrypt
0540     ${UCMP}i    $len,16
0541     bltlr-
0542 
0543     cmpwi       $enc,0          # test direction
0544     lis     r0,0xffe0
0545     mfspr       $vrsave,256
0546     mtspr       256,r0
0547 
0548     li      $idx,15
0549     vxor        $rndkey0,$rndkey0,$rndkey0
0550     le?vspltisb $tmp,0x0f
0551 
0552     lvx     $ivec,0,$ivp        # load [unaligned] iv
0553     lvsl        $inpperm,0,$ivp
0554     lvx     $inptail,$idx,$ivp
0555     le?vxor     $inpperm,$inpperm,$tmp
0556     vperm       $ivec,$ivec,$inptail,$inpperm
0557 
0558     neg     r11,$inp
0559     ?lvsl       $keyperm,0,$key     # prepare for unaligned key
0560     lwz     $rounds,240($key)
0561 
0562     lvsr        $inpperm,0,r11      # prepare for unaligned load
0563     lvx     $inptail,0,$inp
0564     addi        $inp,$inp,15        # 15 is not typo
0565     le?vxor     $inpperm,$inpperm,$tmp
0566 
0567     ?lvsr       $outperm,0,$out     # prepare for unaligned store
0568     vspltisb    $outmask,-1
0569     lvx     $outhead,0,$out
0570     ?vperm      $outmask,$rndkey0,$outmask,$outperm
0571     le?vxor     $outperm,$outperm,$tmp
0572 
0573     srwi        $rounds,$rounds,1
0574     li      $idx,16
0575     subi        $rounds,$rounds,1
0576     beq     Lcbc_dec
0577 
0578 Lcbc_enc:
0579     vmr     $inout,$inptail
0580     lvx     $inptail,0,$inp
0581     addi        $inp,$inp,16
0582     mtctr       $rounds
0583     subi        $len,$len,16        # len-=16
0584 
0585     lvx     $rndkey0,0,$key
0586      vperm      $inout,$inout,$inptail,$inpperm
0587     lvx     $rndkey1,$idx,$key
0588     addi        $idx,$idx,16
0589     ?vperm      $rndkey0,$rndkey0,$rndkey1,$keyperm
0590     vxor        $inout,$inout,$rndkey0
0591     lvx     $rndkey0,$idx,$key
0592     addi        $idx,$idx,16
0593     vxor        $inout,$inout,$ivec
0594 
0595 Loop_cbc_enc:
0596     ?vperm      $rndkey1,$rndkey1,$rndkey0,$keyperm
0597     vcipher     $inout,$inout,$rndkey1
0598     lvx     $rndkey1,$idx,$key
0599     addi        $idx,$idx,16
0600     ?vperm      $rndkey0,$rndkey0,$rndkey1,$keyperm
0601     vcipher     $inout,$inout,$rndkey0
0602     lvx     $rndkey0,$idx,$key
0603     addi        $idx,$idx,16
0604     bdnz        Loop_cbc_enc
0605 
0606     ?vperm      $rndkey1,$rndkey1,$rndkey0,$keyperm
0607     vcipher     $inout,$inout,$rndkey1
0608     lvx     $rndkey1,$idx,$key
0609     li      $idx,16
0610     ?vperm      $rndkey0,$rndkey0,$rndkey1,$keyperm
0611     vcipherlast $ivec,$inout,$rndkey0
0612     ${UCMP}i    $len,16
0613 
0614     vperm       $tmp,$ivec,$ivec,$outperm
0615     vsel        $inout,$outhead,$tmp,$outmask
0616     vmr     $outhead,$tmp
0617     stvx        $inout,0,$out
0618     addi        $out,$out,16
0619     bge     Lcbc_enc
0620 
0621     b       Lcbc_done
0622 
0623 .align  4
0624 Lcbc_dec:
0625     ${UCMP}i    $len,128
0626     bge     _aesp8_cbc_decrypt8x
0627     vmr     $tmp,$inptail
0628     lvx     $inptail,0,$inp
0629     addi        $inp,$inp,16
0630     mtctr       $rounds
0631     subi        $len,$len,16        # len-=16
0632 
0633     lvx     $rndkey0,0,$key
0634      vperm      $tmp,$tmp,$inptail,$inpperm
0635     lvx     $rndkey1,$idx,$key
0636     addi        $idx,$idx,16
0637     ?vperm      $rndkey0,$rndkey0,$rndkey1,$keyperm
0638     vxor        $inout,$tmp,$rndkey0
0639     lvx     $rndkey0,$idx,$key
0640     addi        $idx,$idx,16
0641 
0642 Loop_cbc_dec:
0643     ?vperm      $rndkey1,$rndkey1,$rndkey0,$keyperm
0644     vncipher    $inout,$inout,$rndkey1
0645     lvx     $rndkey1,$idx,$key
0646     addi        $idx,$idx,16
0647     ?vperm      $rndkey0,$rndkey0,$rndkey1,$keyperm
0648     vncipher    $inout,$inout,$rndkey0
0649     lvx     $rndkey0,$idx,$key
0650     addi        $idx,$idx,16
0651     bdnz        Loop_cbc_dec
0652 
0653     ?vperm      $rndkey1,$rndkey1,$rndkey0,$keyperm
0654     vncipher    $inout,$inout,$rndkey1
0655     lvx     $rndkey1,$idx,$key
0656     li      $idx,16
0657     ?vperm      $rndkey0,$rndkey0,$rndkey1,$keyperm
0658     vncipherlast    $inout,$inout,$rndkey0
0659     ${UCMP}i    $len,16
0660 
0661     vxor        $inout,$inout,$ivec
0662     vmr     $ivec,$tmp
0663     vperm       $tmp,$inout,$inout,$outperm
0664     vsel        $inout,$outhead,$tmp,$outmask
0665     vmr     $outhead,$tmp
0666     stvx        $inout,0,$out
0667     addi        $out,$out,16
0668     bge     Lcbc_dec
0669 
0670 Lcbc_done:
0671     addi        $out,$out,-1
0672     lvx     $inout,0,$out       # redundant in aligned case
0673     vsel        $inout,$outhead,$inout,$outmask
0674     stvx        $inout,0,$out
0675 
0676     neg     $enc,$ivp       # write [unaligned] iv
0677     li      $idx,15         # 15 is not typo
0678     vxor        $rndkey0,$rndkey0,$rndkey0
0679     vspltisb    $outmask,-1
0680     le?vspltisb $tmp,0x0f
0681     ?lvsl       $outperm,0,$enc
0682     ?vperm      $outmask,$rndkey0,$outmask,$outperm
0683     le?vxor     $outperm,$outperm,$tmp
0684     lvx     $outhead,0,$ivp
0685     vperm       $ivec,$ivec,$ivec,$outperm
0686     vsel        $inout,$outhead,$ivec,$outmask
0687     lvx     $inptail,$idx,$ivp
0688     stvx        $inout,0,$ivp
0689     vsel        $inout,$ivec,$inptail,$outmask
0690     stvx        $inout,$idx,$ivp
0691 
0692     mtspr       256,$vrsave
0693     blr
0694     .long       0
0695     .byte       0,12,0x14,0,0,0,6,0
0696     .long       0
0697 ___
0698 #########################################################################
0699 {{  # Optimized CBC decrypt procedure               #
0700 my $key_="r11";
0701 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
0702 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
0703 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
0704 my $rndkey0="v23";  # v24-v25 rotating buffer for first found keys
0705             # v26-v31 last 6 round keys
0706 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
0707 
0708 $code.=<<___;
0709 .align  5
0710 _aesp8_cbc_decrypt8x:
0711     $STU        $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
0712     li      r10,`$FRAME+8*16+15`
0713     li      r11,`$FRAME+8*16+31`
0714     stvx        v20,r10,$sp     # ABI says so
0715     addi        r10,r10,32
0716     stvx        v21,r11,$sp
0717     addi        r11,r11,32
0718     stvx        v22,r10,$sp
0719     addi        r10,r10,32
0720     stvx        v23,r11,$sp
0721     addi        r11,r11,32
0722     stvx        v24,r10,$sp
0723     addi        r10,r10,32
0724     stvx        v25,r11,$sp
0725     addi        r11,r11,32
0726     stvx        v26,r10,$sp
0727     addi        r10,r10,32
0728     stvx        v27,r11,$sp
0729     addi        r11,r11,32
0730     stvx        v28,r10,$sp
0731     addi        r10,r10,32
0732     stvx        v29,r11,$sp
0733     addi        r11,r11,32
0734     stvx        v30,r10,$sp
0735     stvx        v31,r11,$sp
0736     li      r0,-1
0737     stw     $vrsave,`$FRAME+21*16-4`($sp)   # save vrsave
0738     li      $x10,0x10
0739     $PUSH       r26,`$FRAME+21*16+0*$SIZE_T`($sp)
0740     li      $x20,0x20
0741     $PUSH       r27,`$FRAME+21*16+1*$SIZE_T`($sp)
0742     li      $x30,0x30
0743     $PUSH       r28,`$FRAME+21*16+2*$SIZE_T`($sp)
0744     li      $x40,0x40
0745     $PUSH       r29,`$FRAME+21*16+3*$SIZE_T`($sp)
0746     li      $x50,0x50
0747     $PUSH       r30,`$FRAME+21*16+4*$SIZE_T`($sp)
0748     li      $x60,0x60
0749     $PUSH       r31,`$FRAME+21*16+5*$SIZE_T`($sp)
0750     li      $x70,0x70
0751     mtspr       256,r0
0752 
0753     subi        $rounds,$rounds,3   # -4 in total
0754     subi        $len,$len,128       # bias
0755 
0756     lvx     $rndkey0,$x00,$key  # load key schedule
0757     lvx     v30,$x10,$key
0758     addi        $key,$key,0x20
0759     lvx     v31,$x00,$key
0760     ?vperm      $rndkey0,$rndkey0,v30,$keyperm
0761     addi        $key_,$sp,$FRAME+15
0762     mtctr       $rounds
0763 
0764 Load_cbc_dec_key:
0765     ?vperm      v24,v30,v31,$keyperm
0766     lvx     v30,$x10,$key
0767     addi        $key,$key,0x20
0768     stvx        v24,$x00,$key_      # off-load round[1]
0769     ?vperm      v25,v31,v30,$keyperm
0770     lvx     v31,$x00,$key
0771     stvx        v25,$x10,$key_      # off-load round[2]
0772     addi        $key_,$key_,0x20
0773     bdnz        Load_cbc_dec_key
0774 
0775     lvx     v26,$x10,$key
0776     ?vperm      v24,v30,v31,$keyperm
0777     lvx     v27,$x20,$key
0778     stvx        v24,$x00,$key_      # off-load round[3]
0779     ?vperm      v25,v31,v26,$keyperm
0780     lvx     v28,$x30,$key
0781     stvx        v25,$x10,$key_      # off-load round[4]
0782     addi        $key_,$sp,$FRAME+15 # rewind $key_
0783     ?vperm      v26,v26,v27,$keyperm
0784     lvx     v29,$x40,$key
0785     ?vperm      v27,v27,v28,$keyperm
0786     lvx     v30,$x50,$key
0787     ?vperm      v28,v28,v29,$keyperm
0788     lvx     v31,$x60,$key
0789     ?vperm      v29,v29,v30,$keyperm
0790     lvx     $out0,$x70,$key     # borrow $out0
0791     ?vperm      v30,v30,v31,$keyperm
0792     lvx     v24,$x00,$key_      # pre-load round[1]
0793     ?vperm      v31,v31,$out0,$keyperm
0794     lvx     v25,$x10,$key_      # pre-load round[2]
0795 
0796     #lvx        $inptail,0,$inp     # "caller" already did this
0797     #addi       $inp,$inp,15        # 15 is not typo
0798     subi        $inp,$inp,15        # undo "caller"
0799 
0800      le?li      $idx,8
0801     lvx_u       $in0,$x00,$inp      # load first 8 "words"
0802      le?lvsl    $inpperm,0,$idx
0803      le?vspltisb    $tmp,0x0f
0804     lvx_u       $in1,$x10,$inp
0805      le?vxor    $inpperm,$inpperm,$tmp  # transform for lvx_u/stvx_u
0806     lvx_u       $in2,$x20,$inp
0807      le?vperm   $in0,$in0,$in0,$inpperm
0808     lvx_u       $in3,$x30,$inp
0809      le?vperm   $in1,$in1,$in1,$inpperm
0810     lvx_u       $in4,$x40,$inp
0811      le?vperm   $in2,$in2,$in2,$inpperm
0812     vxor        $out0,$in0,$rndkey0
0813     lvx_u       $in5,$x50,$inp
0814      le?vperm   $in3,$in3,$in3,$inpperm
0815     vxor        $out1,$in1,$rndkey0
0816     lvx_u       $in6,$x60,$inp
0817      le?vperm   $in4,$in4,$in4,$inpperm
0818     vxor        $out2,$in2,$rndkey0
0819     lvx_u       $in7,$x70,$inp
0820     addi        $inp,$inp,0x80
0821      le?vperm   $in5,$in5,$in5,$inpperm
0822     vxor        $out3,$in3,$rndkey0
0823      le?vperm   $in6,$in6,$in6,$inpperm
0824     vxor        $out4,$in4,$rndkey0
0825      le?vperm   $in7,$in7,$in7,$inpperm
0826     vxor        $out5,$in5,$rndkey0
0827     vxor        $out6,$in6,$rndkey0
0828     vxor        $out7,$in7,$rndkey0
0829 
0830     mtctr       $rounds
0831     b       Loop_cbc_dec8x
0832 .align  5
0833 Loop_cbc_dec8x:
0834     vncipher    $out0,$out0,v24
0835     vncipher    $out1,$out1,v24
0836     vncipher    $out2,$out2,v24
0837     vncipher    $out3,$out3,v24
0838     vncipher    $out4,$out4,v24
0839     vncipher    $out5,$out5,v24
0840     vncipher    $out6,$out6,v24
0841     vncipher    $out7,$out7,v24
0842     lvx     v24,$x20,$key_      # round[3]
0843     addi        $key_,$key_,0x20
0844 
0845     vncipher    $out0,$out0,v25
0846     vncipher    $out1,$out1,v25
0847     vncipher    $out2,$out2,v25
0848     vncipher    $out3,$out3,v25
0849     vncipher    $out4,$out4,v25
0850     vncipher    $out5,$out5,v25
0851     vncipher    $out6,$out6,v25
0852     vncipher    $out7,$out7,v25
0853     lvx     v25,$x10,$key_      # round[4]
0854     bdnz        Loop_cbc_dec8x
0855 
0856     subic       $len,$len,128       # $len-=128
0857     vncipher    $out0,$out0,v24
0858     vncipher    $out1,$out1,v24
0859     vncipher    $out2,$out2,v24
0860     vncipher    $out3,$out3,v24
0861     vncipher    $out4,$out4,v24
0862     vncipher    $out5,$out5,v24
0863     vncipher    $out6,$out6,v24
0864     vncipher    $out7,$out7,v24
0865 
0866     subfe.      r0,r0,r0        # borrow?-1:0
0867     vncipher    $out0,$out0,v25
0868     vncipher    $out1,$out1,v25
0869     vncipher    $out2,$out2,v25
0870     vncipher    $out3,$out3,v25
0871     vncipher    $out4,$out4,v25
0872     vncipher    $out5,$out5,v25
0873     vncipher    $out6,$out6,v25
0874     vncipher    $out7,$out7,v25
0875 
0876     and     r0,r0,$len
0877     vncipher    $out0,$out0,v26
0878     vncipher    $out1,$out1,v26
0879     vncipher    $out2,$out2,v26
0880     vncipher    $out3,$out3,v26
0881     vncipher    $out4,$out4,v26
0882     vncipher    $out5,$out5,v26
0883     vncipher    $out6,$out6,v26
0884     vncipher    $out7,$out7,v26
0885 
0886     add     $inp,$inp,r0        # $inp is adjusted in such
0887                         # way that at exit from the
0888                         # loop inX-in7 are loaded
0889                         # with last "words"
0890     vncipher    $out0,$out0,v27
0891     vncipher    $out1,$out1,v27
0892     vncipher    $out2,$out2,v27
0893     vncipher    $out3,$out3,v27
0894     vncipher    $out4,$out4,v27
0895     vncipher    $out5,$out5,v27
0896     vncipher    $out6,$out6,v27
0897     vncipher    $out7,$out7,v27
0898 
0899     addi        $key_,$sp,$FRAME+15 # rewind $key_
0900     vncipher    $out0,$out0,v28
0901     vncipher    $out1,$out1,v28
0902     vncipher    $out2,$out2,v28
0903     vncipher    $out3,$out3,v28
0904     vncipher    $out4,$out4,v28
0905     vncipher    $out5,$out5,v28
0906     vncipher    $out6,$out6,v28
0907     vncipher    $out7,$out7,v28
0908     lvx     v24,$x00,$key_      # re-pre-load round[1]
0909 
0910     vncipher    $out0,$out0,v29
0911     vncipher    $out1,$out1,v29
0912     vncipher    $out2,$out2,v29
0913     vncipher    $out3,$out3,v29
0914     vncipher    $out4,$out4,v29
0915     vncipher    $out5,$out5,v29
0916     vncipher    $out6,$out6,v29
0917     vncipher    $out7,$out7,v29
0918     lvx     v25,$x10,$key_      # re-pre-load round[2]
0919 
0920     vncipher    $out0,$out0,v30
0921      vxor       $ivec,$ivec,v31     # xor with last round key
0922     vncipher    $out1,$out1,v30
0923      vxor       $in0,$in0,v31
0924     vncipher    $out2,$out2,v30
0925      vxor       $in1,$in1,v31
0926     vncipher    $out3,$out3,v30
0927      vxor       $in2,$in2,v31
0928     vncipher    $out4,$out4,v30
0929      vxor       $in3,$in3,v31
0930     vncipher    $out5,$out5,v30
0931      vxor       $in4,$in4,v31
0932     vncipher    $out6,$out6,v30
0933      vxor       $in5,$in5,v31
0934     vncipher    $out7,$out7,v30
0935      vxor       $in6,$in6,v31
0936 
0937     vncipherlast    $out0,$out0,$ivec
0938     vncipherlast    $out1,$out1,$in0
0939      lvx_u      $in0,$x00,$inp      # load next input block
0940     vncipherlast    $out2,$out2,$in1
0941      lvx_u      $in1,$x10,$inp
0942     vncipherlast    $out3,$out3,$in2
0943      le?vperm   $in0,$in0,$in0,$inpperm
0944      lvx_u      $in2,$x20,$inp
0945     vncipherlast    $out4,$out4,$in3
0946      le?vperm   $in1,$in1,$in1,$inpperm
0947      lvx_u      $in3,$x30,$inp
0948     vncipherlast    $out5,$out5,$in4
0949      le?vperm   $in2,$in2,$in2,$inpperm
0950      lvx_u      $in4,$x40,$inp
0951     vncipherlast    $out6,$out6,$in5
0952      le?vperm   $in3,$in3,$in3,$inpperm
0953      lvx_u      $in5,$x50,$inp
0954     vncipherlast    $out7,$out7,$in6
0955      le?vperm   $in4,$in4,$in4,$inpperm
0956      lvx_u      $in6,$x60,$inp
0957     vmr     $ivec,$in7
0958      le?vperm   $in5,$in5,$in5,$inpperm
0959      lvx_u      $in7,$x70,$inp
0960      addi       $inp,$inp,0x80
0961 
0962     le?vperm    $out0,$out0,$out0,$inpperm
0963     le?vperm    $out1,$out1,$out1,$inpperm
0964     stvx_u      $out0,$x00,$out
0965      le?vperm   $in6,$in6,$in6,$inpperm
0966      vxor       $out0,$in0,$rndkey0
0967     le?vperm    $out2,$out2,$out2,$inpperm
0968     stvx_u      $out1,$x10,$out
0969      le?vperm   $in7,$in7,$in7,$inpperm
0970      vxor       $out1,$in1,$rndkey0
0971     le?vperm    $out3,$out3,$out3,$inpperm
0972     stvx_u      $out2,$x20,$out
0973      vxor       $out2,$in2,$rndkey0
0974     le?vperm    $out4,$out4,$out4,$inpperm
0975     stvx_u      $out3,$x30,$out
0976      vxor       $out3,$in3,$rndkey0
0977     le?vperm    $out5,$out5,$out5,$inpperm
0978     stvx_u      $out4,$x40,$out
0979      vxor       $out4,$in4,$rndkey0
0980     le?vperm    $out6,$out6,$out6,$inpperm
0981     stvx_u      $out5,$x50,$out
0982      vxor       $out5,$in5,$rndkey0
0983     le?vperm    $out7,$out7,$out7,$inpperm
0984     stvx_u      $out6,$x60,$out
0985      vxor       $out6,$in6,$rndkey0
0986     stvx_u      $out7,$x70,$out
0987     addi        $out,$out,0x80
0988      vxor       $out7,$in7,$rndkey0
0989 
0990     mtctr       $rounds
0991     beq     Loop_cbc_dec8x      # did $len-=128 borrow?
0992 
0993     addic.      $len,$len,128
0994     beq     Lcbc_dec8x_done
0995     nop
0996     nop
0997 
0998 Loop_cbc_dec8x_tail:                # up to 7 "words" tail...
0999     vncipher    $out1,$out1,v24
1000     vncipher    $out2,$out2,v24
1001     vncipher    $out3,$out3,v24
1002     vncipher    $out4,$out4,v24
1003     vncipher    $out5,$out5,v24
1004     vncipher    $out6,$out6,v24
1005     vncipher    $out7,$out7,v24
1006     lvx     v24,$x20,$key_      # round[3]
1007     addi        $key_,$key_,0x20
1008 
1009     vncipher    $out1,$out1,v25
1010     vncipher    $out2,$out2,v25
1011     vncipher    $out3,$out3,v25
1012     vncipher    $out4,$out4,v25
1013     vncipher    $out5,$out5,v25
1014     vncipher    $out6,$out6,v25
1015     vncipher    $out7,$out7,v25
1016     lvx     v25,$x10,$key_      # round[4]
1017     bdnz        Loop_cbc_dec8x_tail
1018 
1019     vncipher    $out1,$out1,v24
1020     vncipher    $out2,$out2,v24
1021     vncipher    $out3,$out3,v24
1022     vncipher    $out4,$out4,v24
1023     vncipher    $out5,$out5,v24
1024     vncipher    $out6,$out6,v24
1025     vncipher    $out7,$out7,v24
1026 
1027     vncipher    $out1,$out1,v25
1028     vncipher    $out2,$out2,v25
1029     vncipher    $out3,$out3,v25
1030     vncipher    $out4,$out4,v25
1031     vncipher    $out5,$out5,v25
1032     vncipher    $out6,$out6,v25
1033     vncipher    $out7,$out7,v25
1034 
1035     vncipher    $out1,$out1,v26
1036     vncipher    $out2,$out2,v26
1037     vncipher    $out3,$out3,v26
1038     vncipher    $out4,$out4,v26
1039     vncipher    $out5,$out5,v26
1040     vncipher    $out6,$out6,v26
1041     vncipher    $out7,$out7,v26
1042 
1043     vncipher    $out1,$out1,v27
1044     vncipher    $out2,$out2,v27
1045     vncipher    $out3,$out3,v27
1046     vncipher    $out4,$out4,v27
1047     vncipher    $out5,$out5,v27
1048     vncipher    $out6,$out6,v27
1049     vncipher    $out7,$out7,v27
1050 
1051     vncipher    $out1,$out1,v28
1052     vncipher    $out2,$out2,v28
1053     vncipher    $out3,$out3,v28
1054     vncipher    $out4,$out4,v28
1055     vncipher    $out5,$out5,v28
1056     vncipher    $out6,$out6,v28
1057     vncipher    $out7,$out7,v28
1058 
1059     vncipher    $out1,$out1,v29
1060     vncipher    $out2,$out2,v29
1061     vncipher    $out3,$out3,v29
1062     vncipher    $out4,$out4,v29
1063     vncipher    $out5,$out5,v29
1064     vncipher    $out6,$out6,v29
1065     vncipher    $out7,$out7,v29
1066 
1067     vncipher    $out1,$out1,v30
1068      vxor       $ivec,$ivec,v31     # last round key
1069     vncipher    $out2,$out2,v30
1070      vxor       $in1,$in1,v31
1071     vncipher    $out3,$out3,v30
1072      vxor       $in2,$in2,v31
1073     vncipher    $out4,$out4,v30
1074      vxor       $in3,$in3,v31
1075     vncipher    $out5,$out5,v30
1076      vxor       $in4,$in4,v31
1077     vncipher    $out6,$out6,v30
1078      vxor       $in5,$in5,v31
1079     vncipher    $out7,$out7,v30
1080      vxor       $in6,$in6,v31
1081 
1082     cmplwi      $len,32         # switch($len)
1083     blt     Lcbc_dec8x_one
1084     nop
1085     beq     Lcbc_dec8x_two
1086     cmplwi      $len,64
1087     blt     Lcbc_dec8x_three
1088     nop
1089     beq     Lcbc_dec8x_four
1090     cmplwi      $len,96
1091     blt     Lcbc_dec8x_five
1092     nop
1093     beq     Lcbc_dec8x_six
1094 
1095 Lcbc_dec8x_seven:
1096     vncipherlast    $out1,$out1,$ivec
1097     vncipherlast    $out2,$out2,$in1
1098     vncipherlast    $out3,$out3,$in2
1099     vncipherlast    $out4,$out4,$in3
1100     vncipherlast    $out5,$out5,$in4
1101     vncipherlast    $out6,$out6,$in5
1102     vncipherlast    $out7,$out7,$in6
1103     vmr     $ivec,$in7
1104 
1105     le?vperm    $out1,$out1,$out1,$inpperm
1106     le?vperm    $out2,$out2,$out2,$inpperm
1107     stvx_u      $out1,$x00,$out
1108     le?vperm    $out3,$out3,$out3,$inpperm
1109     stvx_u      $out2,$x10,$out
1110     le?vperm    $out4,$out4,$out4,$inpperm
1111     stvx_u      $out3,$x20,$out
1112     le?vperm    $out5,$out5,$out5,$inpperm
1113     stvx_u      $out4,$x30,$out
1114     le?vperm    $out6,$out6,$out6,$inpperm
1115     stvx_u      $out5,$x40,$out
1116     le?vperm    $out7,$out7,$out7,$inpperm
1117     stvx_u      $out6,$x50,$out
1118     stvx_u      $out7,$x60,$out
1119     addi        $out,$out,0x70
1120     b       Lcbc_dec8x_done
1121 
1122 .align  5
1123 Lcbc_dec8x_six:
1124     vncipherlast    $out2,$out2,$ivec
1125     vncipherlast    $out3,$out3,$in2
1126     vncipherlast    $out4,$out4,$in3
1127     vncipherlast    $out5,$out5,$in4
1128     vncipherlast    $out6,$out6,$in5
1129     vncipherlast    $out7,$out7,$in6
1130     vmr     $ivec,$in7
1131 
1132     le?vperm    $out2,$out2,$out2,$inpperm
1133     le?vperm    $out3,$out3,$out3,$inpperm
1134     stvx_u      $out2,$x00,$out
1135     le?vperm    $out4,$out4,$out4,$inpperm
1136     stvx_u      $out3,$x10,$out
1137     le?vperm    $out5,$out5,$out5,$inpperm
1138     stvx_u      $out4,$x20,$out
1139     le?vperm    $out6,$out6,$out6,$inpperm
1140     stvx_u      $out5,$x30,$out
1141     le?vperm    $out7,$out7,$out7,$inpperm
1142     stvx_u      $out6,$x40,$out
1143     stvx_u      $out7,$x50,$out
1144     addi        $out,$out,0x60
1145     b       Lcbc_dec8x_done
1146 
1147 .align  5
1148 Lcbc_dec8x_five:
1149     vncipherlast    $out3,$out3,$ivec
1150     vncipherlast    $out4,$out4,$in3
1151     vncipherlast    $out5,$out5,$in4
1152     vncipherlast    $out6,$out6,$in5
1153     vncipherlast    $out7,$out7,$in6
1154     vmr     $ivec,$in7
1155 
1156     le?vperm    $out3,$out3,$out3,$inpperm
1157     le?vperm    $out4,$out4,$out4,$inpperm
1158     stvx_u      $out3,$x00,$out
1159     le?vperm    $out5,$out5,$out5,$inpperm
1160     stvx_u      $out4,$x10,$out
1161     le?vperm    $out6,$out6,$out6,$inpperm
1162     stvx_u      $out5,$x20,$out
1163     le?vperm    $out7,$out7,$out7,$inpperm
1164     stvx_u      $out6,$x30,$out
1165     stvx_u      $out7,$x40,$out
1166     addi        $out,$out,0x50
1167     b       Lcbc_dec8x_done
1168 
1169 .align  5
1170 Lcbc_dec8x_four:
1171     vncipherlast    $out4,$out4,$ivec
1172     vncipherlast    $out5,$out5,$in4
1173     vncipherlast    $out6,$out6,$in5
1174     vncipherlast    $out7,$out7,$in6
1175     vmr     $ivec,$in7
1176 
1177     le?vperm    $out4,$out4,$out4,$inpperm
1178     le?vperm    $out5,$out5,$out5,$inpperm
1179     stvx_u      $out4,$x00,$out
1180     le?vperm    $out6,$out6,$out6,$inpperm
1181     stvx_u      $out5,$x10,$out
1182     le?vperm    $out7,$out7,$out7,$inpperm
1183     stvx_u      $out6,$x20,$out
1184     stvx_u      $out7,$x30,$out
1185     addi        $out,$out,0x40
1186     b       Lcbc_dec8x_done
1187 
1188 .align  5
1189 Lcbc_dec8x_three:
1190     vncipherlast    $out5,$out5,$ivec
1191     vncipherlast    $out6,$out6,$in5
1192     vncipherlast    $out7,$out7,$in6
1193     vmr     $ivec,$in7
1194 
1195     le?vperm    $out5,$out5,$out5,$inpperm
1196     le?vperm    $out6,$out6,$out6,$inpperm
1197     stvx_u      $out5,$x00,$out
1198     le?vperm    $out7,$out7,$out7,$inpperm
1199     stvx_u      $out6,$x10,$out
1200     stvx_u      $out7,$x20,$out
1201     addi        $out,$out,0x30
1202     b       Lcbc_dec8x_done
1203 
1204 .align  5
1205 Lcbc_dec8x_two:
1206     vncipherlast    $out6,$out6,$ivec
1207     vncipherlast    $out7,$out7,$in6
1208     vmr     $ivec,$in7
1209 
1210     le?vperm    $out6,$out6,$out6,$inpperm
1211     le?vperm    $out7,$out7,$out7,$inpperm
1212     stvx_u      $out6,$x00,$out
1213     stvx_u      $out7,$x10,$out
1214     addi        $out,$out,0x20
1215     b       Lcbc_dec8x_done
1216 
1217 .align  5
1218 Lcbc_dec8x_one:
1219     vncipherlast    $out7,$out7,$ivec
1220     vmr     $ivec,$in7
1221 
1222     le?vperm    $out7,$out7,$out7,$inpperm
1223     stvx_u      $out7,0,$out
1224     addi        $out,$out,0x10
1225 
1226 Lcbc_dec8x_done:
1227     le?vperm    $ivec,$ivec,$ivec,$inpperm
1228     stvx_u      $ivec,0,$ivp        # write [unaligned] iv
1229 
1230     li      r10,`$FRAME+15`
1231     li      r11,`$FRAME+31`
1232     stvx        $inpperm,r10,$sp    # wipe copies of round keys
1233     addi        r10,r10,32
1234     stvx        $inpperm,r11,$sp
1235     addi        r11,r11,32
1236     stvx        $inpperm,r10,$sp
1237     addi        r10,r10,32
1238     stvx        $inpperm,r11,$sp
1239     addi        r11,r11,32
1240     stvx        $inpperm,r10,$sp
1241     addi        r10,r10,32
1242     stvx        $inpperm,r11,$sp
1243     addi        r11,r11,32
1244     stvx        $inpperm,r10,$sp
1245     addi        r10,r10,32
1246     stvx        $inpperm,r11,$sp
1247     addi        r11,r11,32
1248 
1249     mtspr       256,$vrsave
1250     lvx     v20,r10,$sp     # ABI says so
1251     addi        r10,r10,32
1252     lvx     v21,r11,$sp
1253     addi        r11,r11,32
1254     lvx     v22,r10,$sp
1255     addi        r10,r10,32
1256     lvx     v23,r11,$sp
1257     addi        r11,r11,32
1258     lvx     v24,r10,$sp
1259     addi        r10,r10,32
1260     lvx     v25,r11,$sp
1261     addi        r11,r11,32
1262     lvx     v26,r10,$sp
1263     addi        r10,r10,32
1264     lvx     v27,r11,$sp
1265     addi        r11,r11,32
1266     lvx     v28,r10,$sp
1267     addi        r10,r10,32
1268     lvx     v29,r11,$sp
1269     addi        r11,r11,32
1270     lvx     v30,r10,$sp
1271     lvx     v31,r11,$sp
1272     $POP        r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1273     $POP        r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1274     $POP        r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1275     $POP        r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1276     $POP        r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1277     $POP        r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1278     addi        $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1279     blr
1280     .long       0
1281     .byte       0,12,0x14,0,0x80,6,6,0
1282     .long       0
1283 .size   .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
1284 ___
1285 }}  }}}
1286 
1287 #########################################################################
1288 {{{ # CTR procedure[s]                      #
1289 
1290 ####################### WARNING: Here be dragons! #######################
1291 #
1292 # This code is written as 'ctr32', based on a 32-bit counter used
1293 # upstream. The kernel does *not* use a 32-bit counter. The kernel uses
1294 # a 128-bit counter.
1295 #
1296 # This leads to subtle changes from the upstream code: the counter
1297 # is incremented with vaddu_q_m rather than vaddu_w_m. This occurs in
1298 # both the bulk (8 blocks at a time) path, and in the individual block
1299 # path. Be aware of this when doing updates.
1300 #
1301 # See:
1302 # 1d4aa0b4c181 ("crypto: vmx - Fixing AES-CTR counter bug")
1303 # 009b30ac7444 ("crypto: vmx - CTR: always increment IV as quadword")
1304 # https://github.com/openssl/openssl/pull/8942
1305 #
1306 #########################################################################
1307 my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
1308 my ($rndkey0,$rndkey1,$inout,$tmp)=     map("v$_",(0..3));
1309 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
1310                         map("v$_",(4..11));
1311 my $dat=$tmp;
1312 
1313 $code.=<<___;
1314 .globl  .${prefix}_ctr32_encrypt_blocks
1315     ${UCMP}i    $len,1
1316     bltlr-
1317 
1318     lis     r0,0xfff0
1319     mfspr       $vrsave,256
1320     mtspr       256,r0
1321 
1322     li      $idx,15
1323     vxor        $rndkey0,$rndkey0,$rndkey0
1324     le?vspltisb $tmp,0x0f
1325 
1326     lvx     $ivec,0,$ivp        # load [unaligned] iv
1327     lvsl        $inpperm,0,$ivp
1328     lvx     $inptail,$idx,$ivp
1329      vspltisb   $one,1
1330     le?vxor     $inpperm,$inpperm,$tmp
1331     vperm       $ivec,$ivec,$inptail,$inpperm
1332      vsldoi     $one,$rndkey0,$one,1
1333 
1334     neg     r11,$inp
1335     ?lvsl       $keyperm,0,$key     # prepare for unaligned key
1336     lwz     $rounds,240($key)
1337 
1338     lvsr        $inpperm,0,r11      # prepare for unaligned load
1339     lvx     $inptail,0,$inp
1340     addi        $inp,$inp,15        # 15 is not typo
1341     le?vxor     $inpperm,$inpperm,$tmp
1342 
1343     srwi        $rounds,$rounds,1
1344     li      $idx,16
1345     subi        $rounds,$rounds,1
1346 
1347     ${UCMP}i    $len,8
1348     bge     _aesp8_ctr32_encrypt8x
1349 
1350     ?lvsr       $outperm,0,$out     # prepare for unaligned store
1351     vspltisb    $outmask,-1
1352     lvx     $outhead,0,$out
1353     ?vperm      $outmask,$rndkey0,$outmask,$outperm
1354     le?vxor     $outperm,$outperm,$tmp
1355 
1356     lvx     $rndkey0,0,$key
1357     mtctr       $rounds
1358     lvx     $rndkey1,$idx,$key
1359     addi        $idx,$idx,16
1360     ?vperm      $rndkey0,$rndkey0,$rndkey1,$keyperm
1361     vxor        $inout,$ivec,$rndkey0
1362     lvx     $rndkey0,$idx,$key
1363     addi        $idx,$idx,16
1364     b       Loop_ctr32_enc
1365 
1366 .align  5
1367 Loop_ctr32_enc:
1368     ?vperm      $rndkey1,$rndkey1,$rndkey0,$keyperm
1369     vcipher     $inout,$inout,$rndkey1
1370     lvx     $rndkey1,$idx,$key
1371     addi        $idx,$idx,16
1372     ?vperm      $rndkey0,$rndkey0,$rndkey1,$keyperm
1373     vcipher     $inout,$inout,$rndkey0
1374     lvx     $rndkey0,$idx,$key
1375     addi        $idx,$idx,16
1376     bdnz        Loop_ctr32_enc
1377 
1378     vadduqm     $ivec,$ivec,$one    # Kernel change for 128-bit
1379      vmr        $dat,$inptail
1380      lvx        $inptail,0,$inp
1381      addi       $inp,$inp,16
1382      subic.     $len,$len,1     # blocks--
1383 
1384     ?vperm      $rndkey1,$rndkey1,$rndkey0,$keyperm
1385     vcipher     $inout,$inout,$rndkey1
1386     lvx     $rndkey1,$idx,$key
1387      vperm      $dat,$dat,$inptail,$inpperm
1388      li     $idx,16
1389     ?vperm      $rndkey1,$rndkey0,$rndkey1,$keyperm
1390      lvx        $rndkey0,0,$key
1391     vxor        $dat,$dat,$rndkey1  # last round key
1392     vcipherlast $inout,$inout,$dat
1393 
1394      lvx        $rndkey1,$idx,$key
1395      addi       $idx,$idx,16
1396     vperm       $inout,$inout,$inout,$outperm
1397     vsel        $dat,$outhead,$inout,$outmask
1398      mtctr      $rounds
1399      ?vperm     $rndkey0,$rndkey0,$rndkey1,$keyperm
1400     vmr     $outhead,$inout
1401      vxor       $inout,$ivec,$rndkey0
1402      lvx        $rndkey0,$idx,$key
1403      addi       $idx,$idx,16
1404     stvx        $dat,0,$out
1405     addi        $out,$out,16
1406     bne     Loop_ctr32_enc
1407 
1408     addi        $out,$out,-1
1409     lvx     $inout,0,$out       # redundant in aligned case
1410     vsel        $inout,$outhead,$inout,$outmask
1411     stvx        $inout,0,$out
1412 
1413     mtspr       256,$vrsave
1414     blr
1415     .long       0
1416     .byte       0,12,0x14,0,0,0,6,0
1417     .long       0
1418 ___
1419 #########################################################################
1420 {{  # Optimized CTR procedure                   #
1421 my $key_="r11";
1422 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
1423 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
1424 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
1425 my $rndkey0="v23";  # v24-v25 rotating buffer for first found keys
1426             # v26-v31 last 6 round keys
1427 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
1428 my ($two,$three,$four)=($outhead,$outperm,$outmask);
1429 
1430 $code.=<<___;
1431 .align  5
1432 _aesp8_ctr32_encrypt8x:
1433     $STU        $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
1434     li      r10,`$FRAME+8*16+15`
1435     li      r11,`$FRAME+8*16+31`
1436     stvx        v20,r10,$sp     # ABI says so
1437     addi        r10,r10,32
1438     stvx        v21,r11,$sp
1439     addi        r11,r11,32
1440     stvx        v22,r10,$sp
1441     addi        r10,r10,32
1442     stvx        v23,r11,$sp
1443     addi        r11,r11,32
1444     stvx        v24,r10,$sp
1445     addi        r10,r10,32
1446     stvx        v25,r11,$sp
1447     addi        r11,r11,32
1448     stvx        v26,r10,$sp
1449     addi        r10,r10,32
1450     stvx        v27,r11,$sp
1451     addi        r11,r11,32
1452     stvx        v28,r10,$sp
1453     addi        r10,r10,32
1454     stvx        v29,r11,$sp
1455     addi        r11,r11,32
1456     stvx        v30,r10,$sp
1457     stvx        v31,r11,$sp
1458     li      r0,-1
1459     stw     $vrsave,`$FRAME+21*16-4`($sp)   # save vrsave
1460     li      $x10,0x10
1461     $PUSH       r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1462     li      $x20,0x20
1463     $PUSH       r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1464     li      $x30,0x30
1465     $PUSH       r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1466     li      $x40,0x40
1467     $PUSH       r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1468     li      $x50,0x50
1469     $PUSH       r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1470     li      $x60,0x60
1471     $PUSH       r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1472     li      $x70,0x70
1473     mtspr       256,r0
1474 
1475     subi        $rounds,$rounds,3   # -4 in total
1476 
1477     lvx     $rndkey0,$x00,$key  # load key schedule
1478     lvx     v30,$x10,$key
1479     addi        $key,$key,0x20
1480     lvx     v31,$x00,$key
1481     ?vperm      $rndkey0,$rndkey0,v30,$keyperm
1482     addi        $key_,$sp,$FRAME+15
1483     mtctr       $rounds
1484 
1485 Load_ctr32_enc_key:
1486     ?vperm      v24,v30,v31,$keyperm
1487     lvx     v30,$x10,$key
1488     addi        $key,$key,0x20
1489     stvx        v24,$x00,$key_      # off-load round[1]
1490     ?vperm      v25,v31,v30,$keyperm
1491     lvx     v31,$x00,$key
1492     stvx        v25,$x10,$key_      # off-load round[2]
1493     addi        $key_,$key_,0x20
1494     bdnz        Load_ctr32_enc_key
1495 
1496     lvx     v26,$x10,$key
1497     ?vperm      v24,v30,v31,$keyperm
1498     lvx     v27,$x20,$key
1499     stvx        v24,$x00,$key_      # off-load round[3]
1500     ?vperm      v25,v31,v26,$keyperm
1501     lvx     v28,$x30,$key
1502     stvx        v25,$x10,$key_      # off-load round[4]
1503     addi        $key_,$sp,$FRAME+15 # rewind $key_
1504     ?vperm      v26,v26,v27,$keyperm
1505     lvx     v29,$x40,$key
1506     ?vperm      v27,v27,v28,$keyperm
1507     lvx     v30,$x50,$key
1508     ?vperm      v28,v28,v29,$keyperm
1509     lvx     v31,$x60,$key
1510     ?vperm      v29,v29,v30,$keyperm
1511     lvx     $out0,$x70,$key     # borrow $out0
1512     ?vperm      v30,v30,v31,$keyperm
1513     lvx     v24,$x00,$key_      # pre-load round[1]
1514     ?vperm      v31,v31,$out0,$keyperm
1515     lvx     v25,$x10,$key_      # pre-load round[2]
1516 
1517     vadduqm     $two,$one,$one
1518     subi        $inp,$inp,15        # undo "caller"
1519     $SHL        $len,$len,4
1520 
1521     vadduqm     $out1,$ivec,$one    # counter values ...
1522     vadduqm     $out2,$ivec,$two    # (do all ctr adds as 128-bit)
1523     vxor        $out0,$ivec,$rndkey0    # ... xored with rndkey[0]
1524      le?li      $idx,8
1525     vadduqm     $out3,$out1,$two
1526     vxor        $out1,$out1,$rndkey0
1527      le?lvsl    $inpperm,0,$idx
1528     vadduqm     $out4,$out2,$two
1529     vxor        $out2,$out2,$rndkey0
1530      le?vspltisb    $tmp,0x0f
1531     vadduqm     $out5,$out3,$two
1532     vxor        $out3,$out3,$rndkey0
1533      le?vxor    $inpperm,$inpperm,$tmp  # transform for lvx_u/stvx_u
1534     vadduqm     $out6,$out4,$two
1535     vxor        $out4,$out4,$rndkey0
1536     vadduqm     $out7,$out5,$two
1537     vxor        $out5,$out5,$rndkey0
1538     vadduqm     $ivec,$out6,$two    # next counter value
1539     vxor        $out6,$out6,$rndkey0
1540     vxor        $out7,$out7,$rndkey0
1541 
1542     mtctr       $rounds
1543     b       Loop_ctr32_enc8x
1544 .align  5
1545 Loop_ctr32_enc8x:
1546     vcipher     $out0,$out0,v24
1547     vcipher     $out1,$out1,v24
1548     vcipher     $out2,$out2,v24
1549     vcipher     $out3,$out3,v24
1550     vcipher     $out4,$out4,v24
1551     vcipher     $out5,$out5,v24
1552     vcipher     $out6,$out6,v24
1553     vcipher     $out7,$out7,v24
1554 Loop_ctr32_enc8x_middle:
1555     lvx     v24,$x20,$key_      # round[3]
1556     addi        $key_,$key_,0x20
1557 
1558     vcipher     $out0,$out0,v25
1559     vcipher     $out1,$out1,v25
1560     vcipher     $out2,$out2,v25
1561     vcipher     $out3,$out3,v25
1562     vcipher     $out4,$out4,v25
1563     vcipher     $out5,$out5,v25
1564     vcipher     $out6,$out6,v25
1565     vcipher     $out7,$out7,v25
1566     lvx     v25,$x10,$key_      # round[4]
1567     bdnz        Loop_ctr32_enc8x
1568 
1569     subic       r11,$len,256        # $len-256, borrow $key_
1570     vcipher     $out0,$out0,v24
1571     vcipher     $out1,$out1,v24
1572     vcipher     $out2,$out2,v24
1573     vcipher     $out3,$out3,v24
1574     vcipher     $out4,$out4,v24
1575     vcipher     $out5,$out5,v24
1576     vcipher     $out6,$out6,v24
1577     vcipher     $out7,$out7,v24
1578 
1579     subfe       r0,r0,r0        # borrow?-1:0
1580     vcipher     $out0,$out0,v25
1581     vcipher     $out1,$out1,v25
1582     vcipher     $out2,$out2,v25
1583     vcipher     $out3,$out3,v25
1584     vcipher     $out4,$out4,v25
1585     vcipher     $out5,$out5,v25
1586     vcipher     $out6,$out6,v25
1587     vcipher     $out7,$out7,v25
1588 
1589     and     r0,r0,r11
1590     addi        $key_,$sp,$FRAME+15 # rewind $key_
1591     vcipher     $out0,$out0,v26
1592     vcipher     $out1,$out1,v26
1593     vcipher     $out2,$out2,v26
1594     vcipher     $out3,$out3,v26
1595     vcipher     $out4,$out4,v26
1596     vcipher     $out5,$out5,v26
1597     vcipher     $out6,$out6,v26
1598     vcipher     $out7,$out7,v26
1599     lvx     v24,$x00,$key_      # re-pre-load round[1]
1600 
1601     subic       $len,$len,129       # $len-=129
1602     vcipher     $out0,$out0,v27
1603     addi        $len,$len,1     # $len-=128 really
1604     vcipher     $out1,$out1,v27
1605     vcipher     $out2,$out2,v27
1606     vcipher     $out3,$out3,v27
1607     vcipher     $out4,$out4,v27
1608     vcipher     $out5,$out5,v27
1609     vcipher     $out6,$out6,v27
1610     vcipher     $out7,$out7,v27
1611     lvx     v25,$x10,$key_      # re-pre-load round[2]
1612 
1613     vcipher     $out0,$out0,v28
1614      lvx_u      $in0,$x00,$inp      # load input
1615     vcipher     $out1,$out1,v28
1616      lvx_u      $in1,$x10,$inp
1617     vcipher     $out2,$out2,v28
1618      lvx_u      $in2,$x20,$inp
1619     vcipher     $out3,$out3,v28
1620      lvx_u      $in3,$x30,$inp
1621     vcipher     $out4,$out4,v28
1622      lvx_u      $in4,$x40,$inp
1623     vcipher     $out5,$out5,v28
1624      lvx_u      $in5,$x50,$inp
1625     vcipher     $out6,$out6,v28
1626      lvx_u      $in6,$x60,$inp
1627     vcipher     $out7,$out7,v28
1628      lvx_u      $in7,$x70,$inp
1629      addi       $inp,$inp,0x80
1630 
1631     vcipher     $out0,$out0,v29
1632      le?vperm   $in0,$in0,$in0,$inpperm
1633     vcipher     $out1,$out1,v29
1634      le?vperm   $in1,$in1,$in1,$inpperm
1635     vcipher     $out2,$out2,v29
1636      le?vperm   $in2,$in2,$in2,$inpperm
1637     vcipher     $out3,$out3,v29
1638      le?vperm   $in3,$in3,$in3,$inpperm
1639     vcipher     $out4,$out4,v29
1640      le?vperm   $in4,$in4,$in4,$inpperm
1641     vcipher     $out5,$out5,v29
1642      le?vperm   $in5,$in5,$in5,$inpperm
1643     vcipher     $out6,$out6,v29
1644      le?vperm   $in6,$in6,$in6,$inpperm
1645     vcipher     $out7,$out7,v29
1646      le?vperm   $in7,$in7,$in7,$inpperm
1647 
1648     add     $inp,$inp,r0        # $inp is adjusted in such
1649                         # way that at exit from the
1650                         # loop inX-in7 are loaded
1651                         # with last "words"
1652     subfe.      r0,r0,r0        # borrow?-1:0
1653     vcipher     $out0,$out0,v30
1654      vxor       $in0,$in0,v31       # xor with last round key
1655     vcipher     $out1,$out1,v30
1656      vxor       $in1,$in1,v31
1657     vcipher     $out2,$out2,v30
1658      vxor       $in2,$in2,v31
1659     vcipher     $out3,$out3,v30
1660      vxor       $in3,$in3,v31
1661     vcipher     $out4,$out4,v30
1662      vxor       $in4,$in4,v31
1663     vcipher     $out5,$out5,v30
1664      vxor       $in5,$in5,v31
1665     vcipher     $out6,$out6,v30
1666      vxor       $in6,$in6,v31
1667     vcipher     $out7,$out7,v30
1668      vxor       $in7,$in7,v31
1669 
1670     bne     Lctr32_enc8x_break  # did $len-129 borrow?
1671 
1672     vcipherlast $in0,$out0,$in0
1673     vcipherlast $in1,$out1,$in1
1674      vadduqm    $out1,$ivec,$one    # counter values ...
1675     vcipherlast $in2,$out2,$in2
1676      vadduqm    $out2,$ivec,$two
1677      vxor       $out0,$ivec,$rndkey0    # ... xored with rndkey[0]
1678     vcipherlast $in3,$out3,$in3
1679      vadduqm    $out3,$out1,$two
1680      vxor       $out1,$out1,$rndkey0
1681     vcipherlast $in4,$out4,$in4
1682      vadduqm    $out4,$out2,$two
1683      vxor       $out2,$out2,$rndkey0
1684     vcipherlast $in5,$out5,$in5
1685      vadduqm    $out5,$out3,$two
1686      vxor       $out3,$out3,$rndkey0
1687     vcipherlast $in6,$out6,$in6
1688      vadduqm    $out6,$out4,$two
1689      vxor       $out4,$out4,$rndkey0
1690     vcipherlast $in7,$out7,$in7
1691      vadduqm    $out7,$out5,$two
1692      vxor       $out5,$out5,$rndkey0
1693     le?vperm    $in0,$in0,$in0,$inpperm
1694      vadduqm    $ivec,$out6,$two    # next counter value
1695      vxor       $out6,$out6,$rndkey0
1696     le?vperm    $in1,$in1,$in1,$inpperm
1697      vxor       $out7,$out7,$rndkey0
1698     mtctr       $rounds
1699 
1700      vcipher    $out0,$out0,v24
1701     stvx_u      $in0,$x00,$out
1702     le?vperm    $in2,$in2,$in2,$inpperm
1703      vcipher    $out1,$out1,v24
1704     stvx_u      $in1,$x10,$out
1705     le?vperm    $in3,$in3,$in3,$inpperm
1706      vcipher    $out2,$out2,v24
1707     stvx_u      $in2,$x20,$out
1708     le?vperm    $in4,$in4,$in4,$inpperm
1709      vcipher    $out3,$out3,v24
1710     stvx_u      $in3,$x30,$out
1711     le?vperm    $in5,$in5,$in5,$inpperm
1712      vcipher    $out4,$out4,v24
1713     stvx_u      $in4,$x40,$out
1714     le?vperm    $in6,$in6,$in6,$inpperm
1715      vcipher    $out5,$out5,v24
1716     stvx_u      $in5,$x50,$out
1717     le?vperm    $in7,$in7,$in7,$inpperm
1718      vcipher    $out6,$out6,v24
1719     stvx_u      $in6,$x60,$out
1720      vcipher    $out7,$out7,v24
1721     stvx_u      $in7,$x70,$out
1722     addi        $out,$out,0x80
1723 
1724     b       Loop_ctr32_enc8x_middle
1725 
1726 .align  5
1727 Lctr32_enc8x_break:
1728     cmpwi       $len,-0x60
1729     blt     Lctr32_enc8x_one
1730     nop
1731     beq     Lctr32_enc8x_two
1732     cmpwi       $len,-0x40
1733     blt     Lctr32_enc8x_three
1734     nop
1735     beq     Lctr32_enc8x_four
1736     cmpwi       $len,-0x20
1737     blt     Lctr32_enc8x_five
1738     nop
1739     beq     Lctr32_enc8x_six
1740     cmpwi       $len,0x00
1741     blt     Lctr32_enc8x_seven
1742 
1743 Lctr32_enc8x_eight:
1744     vcipherlast $out0,$out0,$in0
1745     vcipherlast $out1,$out1,$in1
1746     vcipherlast $out2,$out2,$in2
1747     vcipherlast $out3,$out3,$in3
1748     vcipherlast $out4,$out4,$in4
1749     vcipherlast $out5,$out5,$in5
1750     vcipherlast $out6,$out6,$in6
1751     vcipherlast $out7,$out7,$in7
1752 
1753     le?vperm    $out0,$out0,$out0,$inpperm
1754     le?vperm    $out1,$out1,$out1,$inpperm
1755     stvx_u      $out0,$x00,$out
1756     le?vperm    $out2,$out2,$out2,$inpperm
1757     stvx_u      $out1,$x10,$out
1758     le?vperm    $out3,$out3,$out3,$inpperm
1759     stvx_u      $out2,$x20,$out
1760     le?vperm    $out4,$out4,$out4,$inpperm
1761     stvx_u      $out3,$x30,$out
1762     le?vperm    $out5,$out5,$out5,$inpperm
1763     stvx_u      $out4,$x40,$out
1764     le?vperm    $out6,$out6,$out6,$inpperm
1765     stvx_u      $out5,$x50,$out
1766     le?vperm    $out7,$out7,$out7,$inpperm
1767     stvx_u      $out6,$x60,$out
1768     stvx_u      $out7,$x70,$out
1769     addi        $out,$out,0x80
1770     b       Lctr32_enc8x_done
1771 
1772 .align  5
1773 Lctr32_enc8x_seven:
1774     vcipherlast $out0,$out0,$in1
1775     vcipherlast $out1,$out1,$in2
1776     vcipherlast $out2,$out2,$in3
1777     vcipherlast $out3,$out3,$in4
1778     vcipherlast $out4,$out4,$in5
1779     vcipherlast $out5,$out5,$in6
1780     vcipherlast $out6,$out6,$in7
1781 
1782     le?vperm    $out0,$out0,$out0,$inpperm
1783     le?vperm    $out1,$out1,$out1,$inpperm
1784     stvx_u      $out0,$x00,$out
1785     le?vperm    $out2,$out2,$out2,$inpperm
1786     stvx_u      $out1,$x10,$out
1787     le?vperm    $out3,$out3,$out3,$inpperm
1788     stvx_u      $out2,$x20,$out
1789     le?vperm    $out4,$out4,$out4,$inpperm
1790     stvx_u      $out3,$x30,$out
1791     le?vperm    $out5,$out5,$out5,$inpperm
1792     stvx_u      $out4,$x40,$out
1793     le?vperm    $out6,$out6,$out6,$inpperm
1794     stvx_u      $out5,$x50,$out
1795     stvx_u      $out6,$x60,$out
1796     addi        $out,$out,0x70
1797     b       Lctr32_enc8x_done
1798 
1799 .align  5
1800 Lctr32_enc8x_six:
1801     vcipherlast $out0,$out0,$in2
1802     vcipherlast $out1,$out1,$in3
1803     vcipherlast $out2,$out2,$in4
1804     vcipherlast $out3,$out3,$in5
1805     vcipherlast $out4,$out4,$in6
1806     vcipherlast $out5,$out5,$in7
1807 
1808     le?vperm    $out0,$out0,$out0,$inpperm
1809     le?vperm    $out1,$out1,$out1,$inpperm
1810     stvx_u      $out0,$x00,$out
1811     le?vperm    $out2,$out2,$out2,$inpperm
1812     stvx_u      $out1,$x10,$out
1813     le?vperm    $out3,$out3,$out3,$inpperm
1814     stvx_u      $out2,$x20,$out
1815     le?vperm    $out4,$out4,$out4,$inpperm
1816     stvx_u      $out3,$x30,$out
1817     le?vperm    $out5,$out5,$out5,$inpperm
1818     stvx_u      $out4,$x40,$out
1819     stvx_u      $out5,$x50,$out
1820     addi        $out,$out,0x60
1821     b       Lctr32_enc8x_done
1822 
1823 .align  5
1824 Lctr32_enc8x_five:
1825     vcipherlast $out0,$out0,$in3
1826     vcipherlast $out1,$out1,$in4
1827     vcipherlast $out2,$out2,$in5
1828     vcipherlast $out3,$out3,$in6
1829     vcipherlast $out4,$out4,$in7
1830 
1831     le?vperm    $out0,$out0,$out0,$inpperm
1832     le?vperm    $out1,$out1,$out1,$inpperm
1833     stvx_u      $out0,$x00,$out
1834     le?vperm    $out2,$out2,$out2,$inpperm
1835     stvx_u      $out1,$x10,$out
1836     le?vperm    $out3,$out3,$out3,$inpperm
1837     stvx_u      $out2,$x20,$out
1838     le?vperm    $out4,$out4,$out4,$inpperm
1839     stvx_u      $out3,$x30,$out
1840     stvx_u      $out4,$x40,$out
1841     addi        $out,$out,0x50
1842     b       Lctr32_enc8x_done
1843 
1844 .align  5
1845 Lctr32_enc8x_four:
1846     vcipherlast $out0,$out0,$in4
1847     vcipherlast $out1,$out1,$in5
1848     vcipherlast $out2,$out2,$in6
1849     vcipherlast $out3,$out3,$in7
1850 
1851     le?vperm    $out0,$out0,$out0,$inpperm
1852     le?vperm    $out1,$out1,$out1,$inpperm
1853     stvx_u      $out0,$x00,$out
1854     le?vperm    $out2,$out2,$out2,$inpperm
1855     stvx_u      $out1,$x10,$out
1856     le?vperm    $out3,$out3,$out3,$inpperm
1857     stvx_u      $out2,$x20,$out
1858     stvx_u      $out3,$x30,$out
1859     addi        $out,$out,0x40
1860     b       Lctr32_enc8x_done
1861 
1862 .align  5
1863 Lctr32_enc8x_three:
1864     vcipherlast $out0,$out0,$in5
1865     vcipherlast $out1,$out1,$in6
1866     vcipherlast $out2,$out2,$in7
1867 
1868     le?vperm    $out0,$out0,$out0,$inpperm
1869     le?vperm    $out1,$out1,$out1,$inpperm
1870     stvx_u      $out0,$x00,$out
1871     le?vperm    $out2,$out2,$out2,$inpperm
1872     stvx_u      $out1,$x10,$out
1873     stvx_u      $out2,$x20,$out
1874     addi        $out,$out,0x30
1875     b       Lctr32_enc8x_done
1876 
1877 .align  5
1878 Lctr32_enc8x_two:
1879     vcipherlast $out0,$out0,$in6
1880     vcipherlast $out1,$out1,$in7
1881 
1882     le?vperm    $out0,$out0,$out0,$inpperm
1883     le?vperm    $out1,$out1,$out1,$inpperm
1884     stvx_u      $out0,$x00,$out
1885     stvx_u      $out1,$x10,$out
1886     addi        $out,$out,0x20
1887     b       Lctr32_enc8x_done
1888 
1889 .align  5
1890 Lctr32_enc8x_one:
1891     vcipherlast $out0,$out0,$in7
1892 
1893     le?vperm    $out0,$out0,$out0,$inpperm
1894     stvx_u      $out0,0,$out
1895     addi        $out,$out,0x10
1896 
1897 Lctr32_enc8x_done:
1898     li      r10,`$FRAME+15`
1899     li      r11,`$FRAME+31`
1900     stvx        $inpperm,r10,$sp    # wipe copies of round keys
1901     addi        r10,r10,32
1902     stvx        $inpperm,r11,$sp
1903     addi        r11,r11,32
1904     stvx        $inpperm,r10,$sp
1905     addi        r10,r10,32
1906     stvx        $inpperm,r11,$sp
1907     addi        r11,r11,32
1908     stvx        $inpperm,r10,$sp
1909     addi        r10,r10,32
1910     stvx        $inpperm,r11,$sp
1911     addi        r11,r11,32
1912     stvx        $inpperm,r10,$sp
1913     addi        r10,r10,32
1914     stvx        $inpperm,r11,$sp
1915     addi        r11,r11,32
1916 
1917     mtspr       256,$vrsave
1918     lvx     v20,r10,$sp     # ABI says so
1919     addi        r10,r10,32
1920     lvx     v21,r11,$sp
1921     addi        r11,r11,32
1922     lvx     v22,r10,$sp
1923     addi        r10,r10,32
1924     lvx     v23,r11,$sp
1925     addi        r11,r11,32
1926     lvx     v24,r10,$sp
1927     addi        r10,r10,32
1928     lvx     v25,r11,$sp
1929     addi        r11,r11,32
1930     lvx     v26,r10,$sp
1931     addi        r10,r10,32
1932     lvx     v27,r11,$sp
1933     addi        r11,r11,32
1934     lvx     v28,r10,$sp
1935     addi        r10,r10,32
1936     lvx     v29,r11,$sp
1937     addi        r11,r11,32
1938     lvx     v30,r10,$sp
1939     lvx     v31,r11,$sp
1940     $POP        r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1941     $POP        r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1942     $POP        r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1943     $POP        r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1944     $POP        r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1945     $POP        r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1946     addi        $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1947     blr
1948     .long       0
1949     .byte       0,12,0x14,0,0x80,6,6,0
1950     .long       0
1951 .size   .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
1952 ___
1953 }}  }}}
1954 
1955 #########################################################################
1956 {{{ # XTS procedures                        #
1957 # int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len,   #
1958 #                             const AES_KEY *key1, const AES_KEY *key2, #
1959 #                             [const] unsigned char iv[16]);        #
1960 # If $key2 is NULL, then a "tweak chaining" mode is engaged, in which   #
1961 # input tweak value is assumed to be encrypted already, and last tweak  #
1962 # value, one suitable for consecutive call on same chunk of data, is    #
1963 # written back to original buffer. In addition, in "tweak chaining" #
1964 # mode only complete input blocks are processed.            #
1965 
1966 my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10));
1967 my ($rndkey0,$rndkey1,$inout) =             map("v$_",(0..2));
1968 my ($output,$inptail,$inpperm,$leperm,$keyperm) =   map("v$_",(3..7));
1969 my ($tweak,$seven,$eighty7,$tmp,$tweak1) =      map("v$_",(8..12));
1970 my $taillen = $key2;
1971 
1972    ($inp,$idx) = ($idx,$inp);               # reassign
1973 
1974 $code.=<<___;
1975 .globl  .${prefix}_xts_encrypt
1976     mr      $inp,r3             # reassign
1977     li      r3,-1
1978     ${UCMP}i    $len,16
1979     bltlr-
1980 
1981     lis     r0,0xfff0
1982     mfspr       r12,256             # save vrsave
1983     li      r11,0
1984     mtspr       256,r0
1985 
1986     vspltisb    $seven,0x07         # 0x070707..07
1987     le?lvsl     $leperm,r11,r11
1988     le?vspltisb $tmp,0x0f
1989     le?vxor     $leperm,$leperm,$seven
1990 
1991     li      $idx,15
1992     lvx     $tweak,0,$ivp           # load [unaligned] iv
1993     lvsl        $inpperm,0,$ivp
1994     lvx     $inptail,$idx,$ivp
1995     le?vxor     $inpperm,$inpperm,$tmp
1996     vperm       $tweak,$tweak,$inptail,$inpperm
1997 
1998     neg     r11,$inp
1999     lvsr        $inpperm,0,r11          # prepare for unaligned load
2000     lvx     $inout,0,$inp
2001     addi        $inp,$inp,15            # 15 is not typo
2002     le?vxor     $inpperm,$inpperm,$tmp
2003 
2004     ${UCMP}i    $key2,0             # key2==NULL?
2005     beq     Lxts_enc_no_key2
2006 
2007     ?lvsl       $keyperm,0,$key2        # prepare for unaligned key
2008     lwz     $rounds,240($key2)
2009     srwi        $rounds,$rounds,1
2010     subi        $rounds,$rounds,1
2011     li      $idx,16
2012 
2013     lvx     $rndkey0,0,$key2
2014     lvx     $rndkey1,$idx,$key2
2015     addi        $idx,$idx,16
2016     ?vperm      $rndkey0,$rndkey0,$rndkey1,$keyperm
2017     vxor        $tweak,$tweak,$rndkey0
2018     lvx     $rndkey0,$idx,$key2
2019     addi        $idx,$idx,16
2020     mtctr       $rounds
2021 
2022 Ltweak_xts_enc:
2023     ?vperm      $rndkey1,$rndkey1,$rndkey0,$keyperm
2024     vcipher     $tweak,$tweak,$rndkey1
2025     lvx     $rndkey1,$idx,$key2
2026     addi        $idx,$idx,16
2027     ?vperm      $rndkey0,$rndkey0,$rndkey1,$keyperm
2028     vcipher     $tweak,$tweak,$rndkey0
2029     lvx     $rndkey0,$idx,$key2
2030     addi        $idx,$idx,16
2031     bdnz        Ltweak_xts_enc
2032 
2033     ?vperm      $rndkey1,$rndkey1,$rndkey0,$keyperm
2034     vcipher     $tweak,$tweak,$rndkey1
2035     lvx     $rndkey1,$idx,$key2
2036     ?vperm      $rndkey0,$rndkey0,$rndkey1,$keyperm
2037     vcipherlast $tweak,$tweak,$rndkey0
2038 
2039     li      $ivp,0              # don't chain the tweak
2040     b       Lxts_enc
2041 
2042 Lxts_enc_no_key2:
2043     li      $idx,-16
2044     and     $len,$len,$idx          # in "tweak chaining"
2045                             # mode only complete
2046                             # blocks are processed
2047 Lxts_enc:
2048     lvx     $inptail,0,$inp
2049     addi        $inp,$inp,16
2050 
2051     ?lvsl       $keyperm,0,$key1        # prepare for unaligned key
2052     lwz     $rounds,240($key1)
2053     srwi        $rounds,$rounds,1
2054     subi        $rounds,$rounds,1
2055     li      $idx,16
2056 
2057     vslb        $eighty7,$seven,$seven      # 0x808080..80
2058     vor     $eighty7,$eighty7,$seven    # 0x878787..87
2059     vspltisb    $tmp,1              # 0x010101..01
2060     vsldoi      $eighty7,$eighty7,$tmp,15   # 0x870101..01
2061 
2062     ${UCMP}i    $len,96
2063     bge     _aesp8_xts_encrypt6x
2064 
2065     andi.       $taillen,$len,15
2066     subic       r0,$len,32
2067     subi        $taillen,$taillen,16
2068     subfe       r0,r0,r0
2069     and     r0,r0,$taillen
2070     add     $inp,$inp,r0
2071 
2072     lvx     $rndkey0,0,$key1
2073     lvx     $rndkey1,$idx,$key1
2074     addi        $idx,$idx,16
2075     vperm       $inout,$inout,$inptail,$inpperm
2076     ?vperm      $rndkey0,$rndkey0,$rndkey1,$keyperm
2077     vxor        $inout,$inout,$tweak
2078     vxor        $inout,$inout,$rndkey0
2079     lvx     $rndkey0,$idx,$key1
2080     addi        $idx,$idx,16
2081     mtctr       $rounds
2082     b       Loop_xts_enc
2083 
2084 .align  5
2085 Loop_xts_enc:
2086     ?vperm      $rndkey1,$rndkey1,$rndkey0,$keyperm
2087     vcipher     $inout,$inout,$rndkey1
2088     lvx     $rndkey1,$idx,$key1
2089     addi        $idx,$idx,16
2090     ?vperm      $rndkey0,$rndkey0,$rndkey1,$keyperm
2091     vcipher     $inout,$inout,$rndkey0
2092     lvx     $rndkey0,$idx,$key1
2093     addi        $idx,$idx,16
2094     bdnz        Loop_xts_enc
2095 
2096     ?vperm      $rndkey1,$rndkey1,$rndkey0,$keyperm
2097     vcipher     $inout,$inout,$rndkey1
2098     lvx     $rndkey1,$idx,$key1
2099     li      $idx,16
2100     ?vperm      $rndkey0,$rndkey0,$rndkey1,$keyperm
2101     vxor        $rndkey0,$rndkey0,$tweak
2102     vcipherlast $output,$inout,$rndkey0
2103 
2104     le?vperm    $tmp,$output,$output,$leperm
2105     be?nop
2106     le?stvx_u   $tmp,0,$out
2107     be?stvx_u   $output,0,$out
2108     addi        $out,$out,16
2109 
2110     subic.      $len,$len,16
2111     beq     Lxts_enc_done
2112 
2113     vmr     $inout,$inptail
2114     lvx     $inptail,0,$inp
2115     addi        $inp,$inp,16
2116     lvx     $rndkey0,0,$key1
2117     lvx     $rndkey1,$idx,$key1
2118     addi        $idx,$idx,16
2119 
2120     subic       r0,$len,32
2121     subfe       r0,r0,r0
2122     and     r0,r0,$taillen
2123     add     $inp,$inp,r0
2124 
2125     vsrab       $tmp,$tweak,$seven      # next tweak value
2126     vaddubm     $tweak,$tweak,$tweak
2127     vsldoi      $tmp,$tmp,$tmp,15
2128     vand        $tmp,$tmp,$eighty7
2129     vxor        $tweak,$tweak,$tmp
2130 
2131     vperm       $inout,$inout,$inptail,$inpperm
2132     ?vperm      $rndkey0,$rndkey0,$rndkey1,$keyperm
2133     vxor        $inout,$inout,$tweak
2134     vxor        $output,$output,$rndkey0    # just in case $len<16
2135     vxor        $inout,$inout,$rndkey0
2136     lvx     $rndkey0,$idx,$key1
2137     addi        $idx,$idx,16
2138 
2139     mtctr       $rounds
2140     ${UCMP}i    $len,16
2141     bge     Loop_xts_enc
2142 
2143     vxor        $output,$output,$tweak
2144     lvsr        $inpperm,0,$len         # $inpperm is no longer needed
2145     vxor        $inptail,$inptail,$inptail  # $inptail is no longer needed
2146     vspltisb    $tmp,-1
2147     vperm       $inptail,$inptail,$tmp,$inpperm
2148     vsel        $inout,$inout,$output,$inptail
2149 
2150     subi        r11,$out,17
2151     subi        $out,$out,16
2152     mtctr       $len
2153     li      $len,16
2154 Loop_xts_enc_steal:
2155     lbzu        r0,1(r11)
2156     stb     r0,16(r11)
2157     bdnz        Loop_xts_enc_steal
2158 
2159     mtctr       $rounds
2160     b       Loop_xts_enc            # one more time...
2161 
2162 Lxts_enc_done:
2163     ${UCMP}i    $ivp,0
2164     beq     Lxts_enc_ret
2165 
2166     vsrab       $tmp,$tweak,$seven      # next tweak value
2167     vaddubm     $tweak,$tweak,$tweak
2168     vsldoi      $tmp,$tmp,$tmp,15
2169     vand        $tmp,$tmp,$eighty7
2170     vxor        $tweak,$tweak,$tmp
2171 
2172     le?vperm    $tweak,$tweak,$tweak,$leperm
2173     stvx_u      $tweak,0,$ivp
2174 
2175 Lxts_enc_ret:
2176     mtspr       256,r12             # restore vrsave
2177     li      r3,0
2178     blr
2179     .long       0
2180     .byte       0,12,0x04,0,0x80,6,6,0
2181     .long       0
2182 .size   .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
2183 
2184 .globl  .${prefix}_xts_decrypt
2185     mr      $inp,r3             # reassign
2186     li      r3,-1
2187     ${UCMP}i    $len,16
2188     bltlr-
2189 
2190     lis     r0,0xfff8
2191     mfspr       r12,256             # save vrsave
2192     li      r11,0
2193     mtspr       256,r0
2194 
2195     andi.       r0,$len,15
2196     neg     r0,r0
2197     andi.       r0,r0,16
2198     sub     $len,$len,r0
2199 
2200     vspltisb    $seven,0x07         # 0x070707..07
2201     le?lvsl     $leperm,r11,r11
2202     le?vspltisb $tmp,0x0f
2203     le?vxor     $leperm,$leperm,$seven
2204 
2205     li      $idx,15
2206     lvx     $tweak,0,$ivp           # load [unaligned] iv
2207     lvsl        $inpperm,0,$ivp
2208     lvx     $inptail,$idx,$ivp
2209     le?vxor     $inpperm,$inpperm,$tmp
2210     vperm       $tweak,$tweak,$inptail,$inpperm
2211 
2212     neg     r11,$inp
2213     lvsr        $inpperm,0,r11          # prepare for unaligned load
2214     lvx     $inout,0,$inp
2215     addi        $inp,$inp,15            # 15 is not typo
2216     le?vxor     $inpperm,$inpperm,$tmp
2217 
2218     ${UCMP}i    $key2,0             # key2==NULL?
2219     beq     Lxts_dec_no_key2
2220 
2221     ?lvsl       $keyperm,0,$key2        # prepare for unaligned key
2222     lwz     $rounds,240($key2)
2223     srwi        $rounds,$rounds,1
2224     subi        $rounds,$rounds,1
2225     li      $idx,16
2226 
2227     lvx     $rndkey0,0,$key2
2228     lvx     $rndkey1,$idx,$key2
2229     addi        $idx,$idx,16
2230     ?vperm      $rndkey0,$rndkey0,$rndkey1,$keyperm
2231     vxor        $tweak,$tweak,$rndkey0
2232     lvx     $rndkey0,$idx,$key2
2233     addi        $idx,$idx,16
2234     mtctr       $rounds
2235 
2236 Ltweak_xts_dec:
2237     ?vperm      $rndkey1,$rndkey1,$rndkey0,$keyperm
2238     vcipher     $tweak,$tweak,$rndkey1
2239     lvx     $rndkey1,$idx,$key2
2240     addi        $idx,$idx,16
2241     ?vperm      $rndkey0,$rndkey0,$rndkey1,$keyperm
2242     vcipher     $tweak,$tweak,$rndkey0
2243     lvx     $rndkey0,$idx,$key2
2244     addi        $idx,$idx,16
2245     bdnz        Ltweak_xts_dec
2246 
2247     ?vperm      $rndkey1,$rndkey1,$rndkey0,$keyperm
2248     vcipher     $tweak,$tweak,$rndkey1
2249     lvx     $rndkey1,$idx,$key2
2250     ?vperm      $rndkey0,$rndkey0,$rndkey1,$keyperm
2251     vcipherlast $tweak,$tweak,$rndkey0
2252 
2253     li      $ivp,0              # don't chain the tweak
2254     b       Lxts_dec
2255 
2256 Lxts_dec_no_key2:
2257     neg     $idx,$len
2258     andi.       $idx,$idx,15
2259     add     $len,$len,$idx          # in "tweak chaining"
2260                             # mode only complete
2261                             # blocks are processed
2262 Lxts_dec:
2263     lvx     $inptail,0,$inp
2264     addi        $inp,$inp,16
2265 
2266     ?lvsl       $keyperm,0,$key1        # prepare for unaligned key
2267     lwz     $rounds,240($key1)
2268     srwi        $rounds,$rounds,1
2269     subi        $rounds,$rounds,1
2270     li      $idx,16
2271 
2272     vslb        $eighty7,$seven,$seven      # 0x808080..80
2273     vor     $eighty7,$eighty7,$seven    # 0x878787..87
2274     vspltisb    $tmp,1              # 0x010101..01
2275     vsldoi      $eighty7,$eighty7,$tmp,15   # 0x870101..01
2276 
2277     ${UCMP}i    $len,96
2278     bge     _aesp8_xts_decrypt6x
2279 
2280     lvx     $rndkey0,0,$key1
2281     lvx     $rndkey1,$idx,$key1
2282     addi        $idx,$idx,16
2283     vperm       $inout,$inout,$inptail,$inpperm
2284     ?vperm      $rndkey0,$rndkey0,$rndkey1,$keyperm
2285     vxor        $inout,$inout,$tweak
2286     vxor        $inout,$inout,$rndkey0
2287     lvx     $rndkey0,$idx,$key1
2288     addi        $idx,$idx,16
2289     mtctr       $rounds
2290 
2291     ${UCMP}i    $len,16
2292     blt     Ltail_xts_dec
2293     be?b        Loop_xts_dec
2294 
2295 .align  5
2296 Loop_xts_dec:
2297     ?vperm      $rndkey1,$rndkey1,$rndkey0,$keyperm
2298     vncipher    $inout,$inout,$rndkey1
2299     lvx     $rndkey1,$idx,$key1
2300     addi        $idx,$idx,16
2301     ?vperm      $rndkey0,$rndkey0,$rndkey1,$keyperm
2302     vncipher    $inout,$inout,$rndkey0
2303     lvx     $rndkey0,$idx,$key1
2304     addi        $idx,$idx,16
2305     bdnz        Loop_xts_dec
2306 
2307     ?vperm      $rndkey1,$rndkey1,$rndkey0,$keyperm
2308     vncipher    $inout,$inout,$rndkey1
2309     lvx     $rndkey1,$idx,$key1
2310     li      $idx,16
2311     ?vperm      $rndkey0,$rndkey0,$rndkey1,$keyperm
2312     vxor        $rndkey0,$rndkey0,$tweak
2313     vncipherlast    $output,$inout,$rndkey0
2314 
2315     le?vperm    $tmp,$output,$output,$leperm
2316     be?nop
2317     le?stvx_u   $tmp,0,$out
2318     be?stvx_u   $output,0,$out
2319     addi        $out,$out,16
2320 
2321     subic.      $len,$len,16
2322     beq     Lxts_dec_done
2323 
2324     vmr     $inout,$inptail
2325     lvx     $inptail,0,$inp
2326     addi        $inp,$inp,16
2327     lvx     $rndkey0,0,$key1
2328     lvx     $rndkey1,$idx,$key1
2329     addi        $idx,$idx,16
2330 
2331     vsrab       $tmp,$tweak,$seven      # next tweak value
2332     vaddubm     $tweak,$tweak,$tweak
2333     vsldoi      $tmp,$tmp,$tmp,15
2334     vand        $tmp,$tmp,$eighty7
2335     vxor        $tweak,$tweak,$tmp
2336 
2337     vperm       $inout,$inout,$inptail,$inpperm
2338     ?vperm      $rndkey0,$rndkey0,$rndkey1,$keyperm
2339     vxor        $inout,$inout,$tweak
2340     vxor        $inout,$inout,$rndkey0
2341     lvx     $rndkey0,$idx,$key1
2342     addi        $idx,$idx,16
2343 
2344     mtctr       $rounds
2345     ${UCMP}i    $len,16
2346     bge     Loop_xts_dec
2347 
2348 Ltail_xts_dec:
2349     vsrab       $tmp,$tweak,$seven      # next tweak value
2350     vaddubm     $tweak1,$tweak,$tweak
2351     vsldoi      $tmp,$tmp,$tmp,15
2352     vand        $tmp,$tmp,$eighty7
2353     vxor        $tweak1,$tweak1,$tmp
2354 
2355     subi        $inp,$inp,16
2356     add     $inp,$inp,$len
2357 
2358     vxor        $inout,$inout,$tweak        # :-(
2359     vxor        $inout,$inout,$tweak1       # :-)
2360 
2361 Loop_xts_dec_short:
2362     ?vperm      $rndkey1,$rndkey1,$rndkey0,$keyperm
2363     vncipher    $inout,$inout,$rndkey1
2364     lvx     $rndkey1,$idx,$key1
2365     addi        $idx,$idx,16
2366     ?vperm      $rndkey0,$rndkey0,$rndkey1,$keyperm
2367     vncipher    $inout,$inout,$rndkey0
2368     lvx     $rndkey0,$idx,$key1
2369     addi        $idx,$idx,16
2370     bdnz        Loop_xts_dec_short
2371 
2372     ?vperm      $rndkey1,$rndkey1,$rndkey0,$keyperm
2373     vncipher    $inout,$inout,$rndkey1
2374     lvx     $rndkey1,$idx,$key1
2375     li      $idx,16
2376     ?vperm      $rndkey0,$rndkey0,$rndkey1,$keyperm
2377     vxor        $rndkey0,$rndkey0,$tweak1
2378     vncipherlast    $output,$inout,$rndkey0
2379 
2380     le?vperm    $tmp,$output,$output,$leperm
2381     be?nop
2382     le?stvx_u   $tmp,0,$out
2383     be?stvx_u   $output,0,$out
2384 
2385     vmr     $inout,$inptail
2386     lvx     $inptail,0,$inp
2387     #addi       $inp,$inp,16
2388     lvx     $rndkey0,0,$key1
2389     lvx     $rndkey1,$idx,$key1
2390     addi        $idx,$idx,16
2391     vperm       $inout,$inout,$inptail,$inpperm
2392     ?vperm      $rndkey0,$rndkey0,$rndkey1,$keyperm
2393 
2394     lvsr        $inpperm,0,$len         # $inpperm is no longer needed
2395     vxor        $inptail,$inptail,$inptail  # $inptail is no longer needed
2396     vspltisb    $tmp,-1
2397     vperm       $inptail,$inptail,$tmp,$inpperm
2398     vsel        $inout,$inout,$output,$inptail
2399 
2400     vxor        $rndkey0,$rndkey0,$tweak
2401     vxor        $inout,$inout,$rndkey0
2402     lvx     $rndkey0,$idx,$key1
2403     addi        $idx,$idx,16
2404 
2405     subi        r11,$out,1
2406     mtctr       $len
2407     li      $len,16
2408 Loop_xts_dec_steal:
2409     lbzu        r0,1(r11)
2410     stb     r0,16(r11)
2411     bdnz        Loop_xts_dec_steal
2412 
2413     mtctr       $rounds
2414     b       Loop_xts_dec            # one more time...
2415 
2416 Lxts_dec_done:
2417     ${UCMP}i    $ivp,0
2418     beq     Lxts_dec_ret
2419 
2420     vsrab       $tmp,$tweak,$seven      # next tweak value
2421     vaddubm     $tweak,$tweak,$tweak
2422     vsldoi      $tmp,$tmp,$tmp,15
2423     vand        $tmp,$tmp,$eighty7
2424     vxor        $tweak,$tweak,$tmp
2425 
2426     le?vperm    $tweak,$tweak,$tweak,$leperm
2427     stvx_u      $tweak,0,$ivp
2428 
2429 Lxts_dec_ret:
2430     mtspr       256,r12             # restore vrsave
2431     li      r3,0
2432     blr
2433     .long       0
2434     .byte       0,12,0x04,0,0x80,6,6,0
2435     .long       0
2436 .size   .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
2437 ___
2438 #########################################################################
2439 {{  # Optimized XTS procedures                  #
2440 my $key_=$key2;
2441 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
2442     $x00=0 if ($flavour =~ /osx/);
2443 my ($in0,  $in1,  $in2,  $in3,  $in4,  $in5 )=map("v$_",(0..5));
2444 my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
2445 my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
2446 my $rndkey0="v23";  # v24-v25 rotating buffer for first found keys
2447             # v26-v31 last 6 round keys
2448 my ($keyperm)=($out0);  # aliases with "caller", redundant assignment
2449 my $taillen=$x70;
2450 
2451 $code.=<<___;
2452 .align  5
2453 _aesp8_xts_encrypt6x:
2454     $STU        $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
2455     mflr        r11
2456     li      r7,`$FRAME+8*16+15`
2457     li      r3,`$FRAME+8*16+31`
2458     $PUSH       r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
2459     stvx        v20,r7,$sp      # ABI says so
2460     addi        r7,r7,32
2461     stvx        v21,r3,$sp
2462     addi        r3,r3,32
2463     stvx        v22,r7,$sp
2464     addi        r7,r7,32
2465     stvx        v23,r3,$sp
2466     addi        r3,r3,32
2467     stvx        v24,r7,$sp
2468     addi        r7,r7,32
2469     stvx        v25,r3,$sp
2470     addi        r3,r3,32
2471     stvx        v26,r7,$sp
2472     addi        r7,r7,32
2473     stvx        v27,r3,$sp
2474     addi        r3,r3,32
2475     stvx        v28,r7,$sp
2476     addi        r7,r7,32
2477     stvx        v29,r3,$sp
2478     addi        r3,r3,32
2479     stvx        v30,r7,$sp
2480     stvx        v31,r3,$sp
2481     li      r0,-1
2482     stw     $vrsave,`$FRAME+21*16-4`($sp)   # save vrsave
2483     li      $x10,0x10
2484     $PUSH       r26,`$FRAME+21*16+0*$SIZE_T`($sp)
2485     li      $x20,0x20
2486     $PUSH       r27,`$FRAME+21*16+1*$SIZE_T`($sp)
2487     li      $x30,0x30
2488     $PUSH       r28,`$FRAME+21*16+2*$SIZE_T`($sp)
2489     li      $x40,0x40
2490     $PUSH       r29,`$FRAME+21*16+3*$SIZE_T`($sp)
2491     li      $x50,0x50
2492     $PUSH       r30,`$FRAME+21*16+4*$SIZE_T`($sp)
2493     li      $x60,0x60
2494     $PUSH       r31,`$FRAME+21*16+5*$SIZE_T`($sp)
2495     li      $x70,0x70
2496     mtspr       256,r0
2497 
2498     subi        $rounds,$rounds,3   # -4 in total
2499 
2500     lvx     $rndkey0,$x00,$key1 # load key schedule
2501     lvx     v30,$x10,$key1
2502     addi        $key1,$key1,0x20
2503     lvx     v31,$x00,$key1
2504     ?vperm      $rndkey0,$rndkey0,v30,$keyperm
2505     addi        $key_,$sp,$FRAME+15
2506     mtctr       $rounds
2507 
2508 Load_xts_enc_key:
2509     ?vperm      v24,v30,v31,$keyperm
2510     lvx     v30,$x10,$key1
2511     addi        $key1,$key1,0x20
2512     stvx        v24,$x00,$key_      # off-load round[1]
2513     ?vperm      v25,v31,v30,$keyperm
2514     lvx     v31,$x00,$key1
2515     stvx        v25,$x10,$key_      # off-load round[2]
2516     addi        $key_,$key_,0x20
2517     bdnz        Load_xts_enc_key
2518 
2519     lvx     v26,$x10,$key1
2520     ?vperm      v24,v30,v31,$keyperm
2521     lvx     v27,$x20,$key1
2522     stvx        v24,$x00,$key_      # off-load round[3]
2523     ?vperm      v25,v31,v26,$keyperm
2524     lvx     v28,$x30,$key1
2525     stvx        v25,$x10,$key_      # off-load round[4]
2526     addi        $key_,$sp,$FRAME+15 # rewind $key_
2527     ?vperm      v26,v26,v27,$keyperm
2528     lvx     v29,$x40,$key1
2529     ?vperm      v27,v27,v28,$keyperm
2530     lvx     v30,$x50,$key1
2531     ?vperm      v28,v28,v29,$keyperm
2532     lvx     v31,$x60,$key1
2533     ?vperm      v29,v29,v30,$keyperm
2534     lvx     $twk5,$x70,$key1    # borrow $twk5
2535     ?vperm      v30,v30,v31,$keyperm
2536     lvx     v24,$x00,$key_      # pre-load round[1]
2537     ?vperm      v31,v31,$twk5,$keyperm
2538     lvx     v25,$x10,$key_      # pre-load round[2]
2539 
2540      vperm      $in0,$inout,$inptail,$inpperm
2541      subi       $inp,$inp,31        # undo "caller"
2542     vxor        $twk0,$tweak,$rndkey0
2543     vsrab       $tmp,$tweak,$seven  # next tweak value
2544     vaddubm     $tweak,$tweak,$tweak
2545     vsldoi      $tmp,$tmp,$tmp,15
2546     vand        $tmp,$tmp,$eighty7
2547      vxor       $out0,$in0,$twk0
2548     vxor        $tweak,$tweak,$tmp
2549 
2550      lvx_u      $in1,$x10,$inp
2551     vxor        $twk1,$tweak,$rndkey0
2552     vsrab       $tmp,$tweak,$seven  # next tweak value
2553     vaddubm     $tweak,$tweak,$tweak
2554     vsldoi      $tmp,$tmp,$tmp,15
2555      le?vperm   $in1,$in1,$in1,$leperm
2556     vand        $tmp,$tmp,$eighty7
2557      vxor       $out1,$in1,$twk1
2558     vxor        $tweak,$tweak,$tmp
2559 
2560      lvx_u      $in2,$x20,$inp
2561      andi.      $taillen,$len,15
2562     vxor        $twk2,$tweak,$rndkey0
2563     vsrab       $tmp,$tweak,$seven  # next tweak value
2564     vaddubm     $tweak,$tweak,$tweak
2565     vsldoi      $tmp,$tmp,$tmp,15
2566      le?vperm   $in2,$in2,$in2,$leperm
2567     vand        $tmp,$tmp,$eighty7
2568      vxor       $out2,$in2,$twk2
2569     vxor        $tweak,$tweak,$tmp
2570 
2571      lvx_u      $in3,$x30,$inp
2572      sub        $len,$len,$taillen
2573     vxor        $twk3,$tweak,$rndkey0
2574     vsrab       $tmp,$tweak,$seven  # next tweak value
2575     vaddubm     $tweak,$tweak,$tweak
2576     vsldoi      $tmp,$tmp,$tmp,15
2577      le?vperm   $in3,$in3,$in3,$leperm
2578     vand        $tmp,$tmp,$eighty7
2579      vxor       $out3,$in3,$twk3
2580     vxor        $tweak,$tweak,$tmp
2581 
2582      lvx_u      $in4,$x40,$inp
2583      subi       $len,$len,0x60
2584     vxor        $twk4,$tweak,$rndkey0
2585     vsrab       $tmp,$tweak,$seven  # next tweak value
2586     vaddubm     $tweak,$tweak,$tweak
2587     vsldoi      $tmp,$tmp,$tmp,15
2588      le?vperm   $in4,$in4,$in4,$leperm
2589     vand        $tmp,$tmp,$eighty7
2590      vxor       $out4,$in4,$twk4
2591     vxor        $tweak,$tweak,$tmp
2592 
2593      lvx_u      $in5,$x50,$inp
2594      addi       $inp,$inp,0x60
2595     vxor        $twk5,$tweak,$rndkey0
2596     vsrab       $tmp,$tweak,$seven  # next tweak value
2597     vaddubm     $tweak,$tweak,$tweak
2598     vsldoi      $tmp,$tmp,$tmp,15
2599      le?vperm   $in5,$in5,$in5,$leperm
2600     vand        $tmp,$tmp,$eighty7
2601      vxor       $out5,$in5,$twk5
2602     vxor        $tweak,$tweak,$tmp
2603 
2604     vxor        v31,v31,$rndkey0
2605     mtctr       $rounds
2606     b       Loop_xts_enc6x
2607 
2608 .align  5
2609 Loop_xts_enc6x:
2610     vcipher     $out0,$out0,v24
2611     vcipher     $out1,$out1,v24
2612     vcipher     $out2,$out2,v24
2613     vcipher     $out3,$out3,v24
2614     vcipher     $out4,$out4,v24
2615     vcipher     $out5,$out5,v24
2616     lvx     v24,$x20,$key_      # round[3]
2617     addi        $key_,$key_,0x20
2618 
2619     vcipher     $out0,$out0,v25
2620     vcipher     $out1,$out1,v25
2621     vcipher     $out2,$out2,v25
2622     vcipher     $out3,$out3,v25
2623     vcipher     $out4,$out4,v25
2624     vcipher     $out5,$out5,v25
2625     lvx     v25,$x10,$key_      # round[4]
2626     bdnz        Loop_xts_enc6x
2627 
2628     subic       $len,$len,96        # $len-=96
2629      vxor       $in0,$twk0,v31      # xor with last round key
2630     vcipher     $out0,$out0,v24
2631     vcipher     $out1,$out1,v24
2632      vsrab      $tmp,$tweak,$seven  # next tweak value
2633      vxor       $twk0,$tweak,$rndkey0
2634      vaddubm    $tweak,$tweak,$tweak
2635     vcipher     $out2,$out2,v24
2636     vcipher     $out3,$out3,v24
2637      vsldoi     $tmp,$tmp,$tmp,15
2638     vcipher     $out4,$out4,v24
2639     vcipher     $out5,$out5,v24
2640 
2641     subfe.      r0,r0,r0        # borrow?-1:0
2642      vand       $tmp,$tmp,$eighty7
2643     vcipher     $out0,$out0,v25
2644     vcipher     $out1,$out1,v25
2645      vxor       $tweak,$tweak,$tmp
2646     vcipher     $out2,$out2,v25
2647     vcipher     $out3,$out3,v25
2648      vxor       $in1,$twk1,v31
2649      vsrab      $tmp,$tweak,$seven  # next tweak value
2650      vxor       $twk1,$tweak,$rndkey0
2651     vcipher     $out4,$out4,v25
2652     vcipher     $out5,$out5,v25
2653 
2654     and     r0,r0,$len
2655      vaddubm    $tweak,$tweak,$tweak
2656      vsldoi     $tmp,$tmp,$tmp,15
2657     vcipher     $out0,$out0,v26
2658     vcipher     $out1,$out1,v26
2659      vand       $tmp,$tmp,$eighty7
2660     vcipher     $out2,$out2,v26
2661     vcipher     $out3,$out3,v26
2662      vxor       $tweak,$tweak,$tmp
2663     vcipher     $out4,$out4,v26
2664     vcipher     $out5,$out5,v26
2665 
2666     add     $inp,$inp,r0        # $inp is adjusted in such
2667                         # way that at exit from the
2668                         # loop inX-in5 are loaded
2669                         # with last "words"
2670      vxor       $in2,$twk2,v31
2671      vsrab      $tmp,$tweak,$seven  # next tweak value
2672      vxor       $twk2,$tweak,$rndkey0
2673      vaddubm    $tweak,$tweak,$tweak
2674     vcipher     $out0,$out0,v27
2675     vcipher     $out1,$out1,v27
2676      vsldoi     $tmp,$tmp,$tmp,15
2677     vcipher     $out2,$out2,v27
2678     vcipher     $out3,$out3,v27
2679      vand       $tmp,$tmp,$eighty7
2680     vcipher     $out4,$out4,v27
2681     vcipher     $out5,$out5,v27
2682 
2683     addi        $key_,$sp,$FRAME+15 # rewind $key_
2684      vxor       $tweak,$tweak,$tmp
2685     vcipher     $out0,$out0,v28
2686     vcipher     $out1,$out1,v28
2687      vxor       $in3,$twk3,v31
2688      vsrab      $tmp,$tweak,$seven  # next tweak value
2689      vxor       $twk3,$tweak,$rndkey0
2690     vcipher     $out2,$out2,v28
2691     vcipher     $out3,$out3,v28
2692      vaddubm    $tweak,$tweak,$tweak
2693      vsldoi     $tmp,$tmp,$tmp,15
2694     vcipher     $out4,$out4,v28
2695     vcipher     $out5,$out5,v28
2696     lvx     v24,$x00,$key_      # re-pre-load round[1]
2697      vand       $tmp,$tmp,$eighty7
2698 
2699     vcipher     $out0,$out0,v29
2700     vcipher     $out1,$out1,v29
2701      vxor       $tweak,$tweak,$tmp
2702     vcipher     $out2,$out2,v29
2703     vcipher     $out3,$out3,v29
2704      vxor       $in4,$twk4,v31
2705      vsrab      $tmp,$tweak,$seven  # next tweak value
2706      vxor       $twk4,$tweak,$rndkey0
2707     vcipher     $out4,$out4,v29
2708     vcipher     $out5,$out5,v29
2709     lvx     v25,$x10,$key_      # re-pre-load round[2]
2710      vaddubm    $tweak,$tweak,$tweak
2711      vsldoi     $tmp,$tmp,$tmp,15
2712 
2713     vcipher     $out0,$out0,v30
2714     vcipher     $out1,$out1,v30
2715      vand       $tmp,$tmp,$eighty7
2716     vcipher     $out2,$out2,v30
2717     vcipher     $out3,$out3,v30
2718      vxor       $tweak,$tweak,$tmp
2719     vcipher     $out4,$out4,v30
2720     vcipher     $out5,$out5,v30
2721      vxor       $in5,$twk5,v31
2722      vsrab      $tmp,$tweak,$seven  # next tweak value
2723      vxor       $twk5,$tweak,$rndkey0
2724 
2725     vcipherlast $out0,$out0,$in0
2726      lvx_u      $in0,$x00,$inp      # load next input block
2727      vaddubm    $tweak,$tweak,$tweak
2728      vsldoi     $tmp,$tmp,$tmp,15
2729     vcipherlast $out1,$out1,$in1
2730      lvx_u      $in1,$x10,$inp
2731     vcipherlast $out2,$out2,$in2
2732      le?vperm   $in0,$in0,$in0,$leperm
2733      lvx_u      $in2,$x20,$inp
2734      vand       $tmp,$tmp,$eighty7
2735     vcipherlast $out3,$out3,$in3
2736      le?vperm   $in1,$in1,$in1,$leperm
2737      lvx_u      $in3,$x30,$inp
2738     vcipherlast $out4,$out4,$in4
2739      le?vperm   $in2,$in2,$in2,$leperm
2740      lvx_u      $in4,$x40,$inp
2741      vxor       $tweak,$tweak,$tmp
2742     vcipherlast $tmp,$out5,$in5     # last block might be needed
2743                         # in stealing mode
2744      le?vperm   $in3,$in3,$in3,$leperm
2745      lvx_u      $in5,$x50,$inp
2746      addi       $inp,$inp,0x60
2747      le?vperm   $in4,$in4,$in4,$leperm
2748      le?vperm   $in5,$in5,$in5,$leperm
2749 
2750     le?vperm    $out0,$out0,$out0,$leperm
2751     le?vperm    $out1,$out1,$out1,$leperm
2752     stvx_u      $out0,$x00,$out     # store output
2753      vxor       $out0,$in0,$twk0
2754     le?vperm    $out2,$out2,$out2,$leperm
2755     stvx_u      $out1,$x10,$out
2756      vxor       $out1,$in1,$twk1
2757     le?vperm    $out3,$out3,$out3,$leperm
2758     stvx_u      $out2,$x20,$out
2759      vxor       $out2,$in2,$twk2
2760     le?vperm    $out4,$out4,$out4,$leperm
2761     stvx_u      $out3,$x30,$out
2762      vxor       $out3,$in3,$twk3
2763     le?vperm    $out5,$tmp,$tmp,$leperm
2764     stvx_u      $out4,$x40,$out
2765      vxor       $out4,$in4,$twk4
2766     le?stvx_u   $out5,$x50,$out
2767     be?stvx_u   $tmp, $x50,$out
2768      vxor       $out5,$in5,$twk5
2769     addi        $out,$out,0x60
2770 
2771     mtctr       $rounds
2772     beq     Loop_xts_enc6x      # did $len-=96 borrow?
2773 
2774     addic.      $len,$len,0x60
2775     beq     Lxts_enc6x_zero
2776     cmpwi       $len,0x20
2777     blt     Lxts_enc6x_one
2778     nop
2779     beq     Lxts_enc6x_two
2780     cmpwi       $len,0x40
2781     blt     Lxts_enc6x_three
2782     nop
2783     beq     Lxts_enc6x_four
2784 
2785 Lxts_enc6x_five:
2786     vxor        $out0,$in1,$twk0
2787     vxor        $out1,$in2,$twk1
2788     vxor        $out2,$in3,$twk2
2789     vxor        $out3,$in4,$twk3
2790     vxor        $out4,$in5,$twk4
2791 
2792     bl      _aesp8_xts_enc5x
2793 
2794     le?vperm    $out0,$out0,$out0,$leperm
2795     vmr     $twk0,$twk5     # unused tweak
2796     le?vperm    $out1,$out1,$out1,$leperm
2797     stvx_u      $out0,$x00,$out     # store output
2798     le?vperm    $out2,$out2,$out2,$leperm
2799     stvx_u      $out1,$x10,$out
2800     le?vperm    $out3,$out3,$out3,$leperm
2801     stvx_u      $out2,$x20,$out
2802     vxor        $tmp,$out4,$twk5    # last block prep for stealing
2803     le?vperm    $out4,$out4,$out4,$leperm
2804     stvx_u      $out3,$x30,$out
2805     stvx_u      $out4,$x40,$out
2806     addi        $out,$out,0x50
2807     bne     Lxts_enc6x_steal
2808     b       Lxts_enc6x_done
2809 
2810 .align  4
2811 Lxts_enc6x_four:
2812     vxor        $out0,$in2,$twk0
2813     vxor        $out1,$in3,$twk1
2814     vxor        $out2,$in4,$twk2
2815     vxor        $out3,$in5,$twk3
2816     vxor        $out4,$out4,$out4
2817 
2818     bl      _aesp8_xts_enc5x
2819 
2820     le?vperm    $out0,$out0,$out0,$leperm
2821     vmr     $twk0,$twk4     # unused tweak
2822     le?vperm    $out1,$out1,$out1,$leperm
2823     stvx_u      $out0,$x00,$out     # store output
2824     le?vperm    $out2,$out2,$out2,$leperm
2825     stvx_u      $out1,$x10,$out
2826     vxor        $tmp,$out3,$twk4    # last block prep for stealing
2827     le?vperm    $out3,$out3,$out3,$leperm
2828     stvx_u      $out2,$x20,$out
2829     stvx_u      $out3,$x30,$out
2830     addi        $out,$out,0x40
2831     bne     Lxts_enc6x_steal
2832     b       Lxts_enc6x_done
2833 
2834 .align  4
2835 Lxts_enc6x_three:
2836     vxor        $out0,$in3,$twk0
2837     vxor        $out1,$in4,$twk1
2838     vxor        $out2,$in5,$twk2
2839     vxor        $out3,$out3,$out3
2840     vxor        $out4,$out4,$out4
2841 
2842     bl      _aesp8_xts_enc5x
2843 
2844     le?vperm    $out0,$out0,$out0,$leperm
2845     vmr     $twk0,$twk3     # unused tweak
2846     le?vperm    $out1,$out1,$out1,$leperm
2847     stvx_u      $out0,$x00,$out     # store output
2848     vxor        $tmp,$out2,$twk3    # last block prep for stealing
2849     le?vperm    $out2,$out2,$out2,$leperm
2850     stvx_u      $out1,$x10,$out
2851     stvx_u      $out2,$x20,$out
2852     addi        $out,$out,0x30
2853     bne     Lxts_enc6x_steal
2854     b       Lxts_enc6x_done
2855 
2856 .align  4
2857 Lxts_enc6x_two:
2858     vxor        $out0,$in4,$twk0
2859     vxor        $out1,$in5,$twk1
2860     vxor        $out2,$out2,$out2
2861     vxor        $out3,$out3,$out3
2862     vxor        $out4,$out4,$out4
2863 
2864     bl      _aesp8_xts_enc5x
2865 
2866     le?vperm    $out0,$out0,$out0,$leperm
2867     vmr     $twk0,$twk2     # unused tweak
2868     vxor        $tmp,$out1,$twk2    # last block prep for stealing
2869     le?vperm    $out1,$out1,$out1,$leperm
2870     stvx_u      $out0,$x00,$out     # store output
2871     stvx_u      $out1,$x10,$out
2872     addi        $out,$out,0x20
2873     bne     Lxts_enc6x_steal
2874     b       Lxts_enc6x_done
2875 
2876 .align  4
2877 Lxts_enc6x_one:
2878     vxor        $out0,$in5,$twk0
2879     nop
2880 Loop_xts_enc1x:
2881     vcipher     $out0,$out0,v24
2882     lvx     v24,$x20,$key_      # round[3]
2883     addi        $key_,$key_,0x20
2884 
2885     vcipher     $out0,$out0,v25
2886     lvx     v25,$x10,$key_      # round[4]
2887     bdnz        Loop_xts_enc1x
2888 
2889     add     $inp,$inp,$taillen
2890     cmpwi       $taillen,0
2891     vcipher     $out0,$out0,v24
2892 
2893     subi        $inp,$inp,16
2894     vcipher     $out0,$out0,v25
2895 
2896     lvsr        $inpperm,0,$taillen
2897     vcipher     $out0,$out0,v26
2898 
2899     lvx_u       $in0,0,$inp
2900     vcipher     $out0,$out0,v27
2901 
2902     addi        $key_,$sp,$FRAME+15 # rewind $key_
2903     vcipher     $out0,$out0,v28
2904     lvx     v24,$x00,$key_      # re-pre-load round[1]
2905 
2906     vcipher     $out0,$out0,v29
2907     lvx     v25,$x10,$key_      # re-pre-load round[2]
2908      vxor       $twk0,$twk0,v31
2909 
2910     le?vperm    $in0,$in0,$in0,$leperm
2911     vcipher     $out0,$out0,v30
2912 
2913     vperm       $in0,$in0,$in0,$inpperm
2914     vcipherlast $out0,$out0,$twk0
2915 
2916     vmr     $twk0,$twk1     # unused tweak
2917     vxor        $tmp,$out0,$twk1    # last block prep for stealing
2918     le?vperm    $out0,$out0,$out0,$leperm
2919     stvx_u      $out0,$x00,$out     # store output
2920     addi        $out,$out,0x10
2921     bne     Lxts_enc6x_steal
2922     b       Lxts_enc6x_done
2923 
2924 .align  4
2925 Lxts_enc6x_zero:
2926     cmpwi       $taillen,0
2927     beq     Lxts_enc6x_done
2928 
2929     add     $inp,$inp,$taillen
2930     subi        $inp,$inp,16
2931     lvx_u       $in0,0,$inp
2932     lvsr        $inpperm,0,$taillen # $in5 is no more
2933     le?vperm    $in0,$in0,$in0,$leperm
2934     vperm       $in0,$in0,$in0,$inpperm
2935     vxor        $tmp,$tmp,$twk0
2936 Lxts_enc6x_steal:
2937     vxor        $in0,$in0,$twk0
2938     vxor        $out0,$out0,$out0
2939     vspltisb    $out1,-1
2940     vperm       $out0,$out0,$out1,$inpperm
2941     vsel        $out0,$in0,$tmp,$out0   # $tmp is last block, remember?
2942 
2943     subi        r30,$out,17
2944     subi        $out,$out,16
2945     mtctr       $taillen
2946 Loop_xts_enc6x_steal:
2947     lbzu        r0,1(r30)
2948     stb     r0,16(r30)
2949     bdnz        Loop_xts_enc6x_steal
2950 
2951     li      $taillen,0
2952     mtctr       $rounds
2953     b       Loop_xts_enc1x      # one more time...
2954 
2955 .align  4
2956 Lxts_enc6x_done:
2957     ${UCMP}i    $ivp,0
2958     beq     Lxts_enc6x_ret
2959 
2960     vxor        $tweak,$twk0,$rndkey0
2961     le?vperm    $tweak,$tweak,$tweak,$leperm
2962     stvx_u      $tweak,0,$ivp
2963 
2964 Lxts_enc6x_ret:
2965     mtlr        r11
2966     li      r10,`$FRAME+15`
2967     li      r11,`$FRAME+31`
2968     stvx        $seven,r10,$sp      # wipe copies of round keys
2969     addi        r10,r10,32
2970     stvx        $seven,r11,$sp
2971     addi        r11,r11,32
2972     stvx        $seven,r10,$sp
2973     addi        r10,r10,32
2974     stvx        $seven,r11,$sp
2975     addi        r11,r11,32
2976     stvx        $seven,r10,$sp
2977     addi        r10,r10,32
2978     stvx        $seven,r11,$sp
2979     addi        r11,r11,32
2980     stvx        $seven,r10,$sp
2981     addi        r10,r10,32
2982     stvx        $seven,r11,$sp
2983     addi        r11,r11,32
2984 
2985     mtspr       256,$vrsave
2986     lvx     v20,r10,$sp     # ABI says so
2987     addi        r10,r10,32
2988     lvx     v21,r11,$sp
2989     addi        r11,r11,32
2990     lvx     v22,r10,$sp
2991     addi        r10,r10,32
2992     lvx     v23,r11,$sp
2993     addi        r11,r11,32
2994     lvx     v24,r10,$sp
2995     addi        r10,r10,32
2996     lvx     v25,r11,$sp
2997     addi        r11,r11,32
2998     lvx     v26,r10,$sp
2999     addi        r10,r10,32
3000     lvx     v27,r11,$sp
3001     addi        r11,r11,32
3002     lvx     v28,r10,$sp
3003     addi        r10,r10,32
3004     lvx     v29,r11,$sp
3005     addi        r11,r11,32
3006     lvx     v30,r10,$sp
3007     lvx     v31,r11,$sp
3008     $POP        r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3009     $POP        r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3010     $POP        r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3011     $POP        r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3012     $POP        r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3013     $POP        r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3014     addi        $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
3015     blr
3016     .long       0
3017     .byte       0,12,0x04,1,0x80,6,6,0
3018     .long       0
3019 
3020 .align  5
3021 _aesp8_xts_enc5x:
3022     vcipher     $out0,$out0,v24
3023     vcipher     $out1,$out1,v24
3024     vcipher     $out2,$out2,v24
3025     vcipher     $out3,$out3,v24
3026     vcipher     $out4,$out4,v24
3027     lvx     v24,$x20,$key_      # round[3]
3028     addi        $key_,$key_,0x20
3029 
3030     vcipher     $out0,$out0,v25
3031     vcipher     $out1,$out1,v25
3032     vcipher     $out2,$out2,v25
3033     vcipher     $out3,$out3,v25
3034     vcipher     $out4,$out4,v25
3035     lvx     v25,$x10,$key_      # round[4]
3036     bdnz        _aesp8_xts_enc5x
3037 
3038     add     $inp,$inp,$taillen
3039     cmpwi       $taillen,0
3040     vcipher     $out0,$out0,v24
3041     vcipher     $out1,$out1,v24
3042     vcipher     $out2,$out2,v24
3043     vcipher     $out3,$out3,v24
3044     vcipher     $out4,$out4,v24
3045 
3046     subi        $inp,$inp,16
3047     vcipher     $out0,$out0,v25
3048     vcipher     $out1,$out1,v25
3049     vcipher     $out2,$out2,v25
3050     vcipher     $out3,$out3,v25
3051     vcipher     $out4,$out4,v25
3052      vxor       $twk0,$twk0,v31
3053 
3054     vcipher     $out0,$out0,v26
3055     lvsr        $inpperm,r0,$taillen    # $in5 is no more
3056     vcipher     $out1,$out1,v26
3057     vcipher     $out2,$out2,v26
3058     vcipher     $out3,$out3,v26
3059     vcipher     $out4,$out4,v26
3060      vxor       $in1,$twk1,v31
3061 
3062     vcipher     $out0,$out0,v27
3063     lvx_u       $in0,0,$inp
3064     vcipher     $out1,$out1,v27
3065     vcipher     $out2,$out2,v27
3066     vcipher     $out3,$out3,v27
3067     vcipher     $out4,$out4,v27
3068      vxor       $in2,$twk2,v31
3069 
3070     addi        $key_,$sp,$FRAME+15 # rewind $key_
3071     vcipher     $out0,$out0,v28
3072     vcipher     $out1,$out1,v28
3073     vcipher     $out2,$out2,v28
3074     vcipher     $out3,$out3,v28
3075     vcipher     $out4,$out4,v28
3076     lvx     v24,$x00,$key_      # re-pre-load round[1]
3077      vxor       $in3,$twk3,v31
3078 
3079     vcipher     $out0,$out0,v29
3080     le?vperm    $in0,$in0,$in0,$leperm
3081     vcipher     $out1,$out1,v29
3082     vcipher     $out2,$out2,v29
3083     vcipher     $out3,$out3,v29
3084     vcipher     $out4,$out4,v29
3085     lvx     v25,$x10,$key_      # re-pre-load round[2]
3086      vxor       $in4,$twk4,v31
3087 
3088     vcipher     $out0,$out0,v30
3089     vperm       $in0,$in0,$in0,$inpperm
3090     vcipher     $out1,$out1,v30
3091     vcipher     $out2,$out2,v30
3092     vcipher     $out3,$out3,v30
3093     vcipher     $out4,$out4,v30
3094 
3095     vcipherlast $out0,$out0,$twk0
3096     vcipherlast $out1,$out1,$in1
3097     vcipherlast $out2,$out2,$in2
3098     vcipherlast $out3,$out3,$in3
3099     vcipherlast $out4,$out4,$in4
3100     blr
3101         .long       0
3102         .byte       0,12,0x14,0,0,0,0,0
3103 
3104 .align  5
3105 _aesp8_xts_decrypt6x:
3106     $STU        $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
3107     mflr        r11
3108     li      r7,`$FRAME+8*16+15`
3109     li      r3,`$FRAME+8*16+31`
3110     $PUSH       r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
3111     stvx        v20,r7,$sp      # ABI says so
3112     addi        r7,r7,32
3113     stvx        v21,r3,$sp
3114     addi        r3,r3,32
3115     stvx        v22,r7,$sp
3116     addi        r7,r7,32
3117     stvx        v23,r3,$sp
3118     addi        r3,r3,32
3119     stvx        v24,r7,$sp
3120     addi        r7,r7,32
3121     stvx        v25,r3,$sp
3122     addi        r3,r3,32
3123     stvx        v26,r7,$sp
3124     addi        r7,r7,32
3125     stvx        v27,r3,$sp
3126     addi        r3,r3,32
3127     stvx        v28,r7,$sp
3128     addi        r7,r7,32
3129     stvx        v29,r3,$sp
3130     addi        r3,r3,32
3131     stvx        v30,r7,$sp
3132     stvx        v31,r3,$sp
3133     li      r0,-1
3134     stw     $vrsave,`$FRAME+21*16-4`($sp)   # save vrsave
3135     li      $x10,0x10
3136     $PUSH       r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3137     li      $x20,0x20
3138     $PUSH       r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3139     li      $x30,0x30
3140     $PUSH       r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3141     li      $x40,0x40
3142     $PUSH       r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3143     li      $x50,0x50
3144     $PUSH       r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3145     li      $x60,0x60
3146     $PUSH       r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3147     li      $x70,0x70
3148     mtspr       256,r0
3149 
3150     subi        $rounds,$rounds,3   # -4 in total
3151 
3152     lvx     $rndkey0,$x00,$key1 # load key schedule
3153     lvx     v30,$x10,$key1
3154     addi        $key1,$key1,0x20
3155     lvx     v31,$x00,$key1
3156     ?vperm      $rndkey0,$rndkey0,v30,$keyperm
3157     addi        $key_,$sp,$FRAME+15
3158     mtctr       $rounds
3159 
3160 Load_xts_dec_key:
3161     ?vperm      v24,v30,v31,$keyperm
3162     lvx     v30,$x10,$key1
3163     addi        $key1,$key1,0x20
3164     stvx        v24,$x00,$key_      # off-load round[1]
3165     ?vperm      v25,v31,v30,$keyperm
3166     lvx     v31,$x00,$key1
3167     stvx        v25,$x10,$key_      # off-load round[2]
3168     addi        $key_,$key_,0x20
3169     bdnz        Load_xts_dec_key
3170 
3171     lvx     v26,$x10,$key1
3172     ?vperm      v24,v30,v31,$keyperm
3173     lvx     v27,$x20,$key1
3174     stvx        v24,$x00,$key_      # off-load round[3]
3175     ?vperm      v25,v31,v26,$keyperm
3176     lvx     v28,$x30,$key1
3177     stvx        v25,$x10,$key_      # off-load round[4]
3178     addi        $key_,$sp,$FRAME+15 # rewind $key_
3179     ?vperm      v26,v26,v27,$keyperm
3180     lvx     v29,$x40,$key1
3181     ?vperm      v27,v27,v28,$keyperm
3182     lvx     v30,$x50,$key1
3183     ?vperm      v28,v28,v29,$keyperm
3184     lvx     v31,$x60,$key1
3185     ?vperm      v29,v29,v30,$keyperm
3186     lvx     $twk5,$x70,$key1    # borrow $twk5
3187     ?vperm      v30,v30,v31,$keyperm
3188     lvx     v24,$x00,$key_      # pre-load round[1]
3189     ?vperm      v31,v31,$twk5,$keyperm
3190     lvx     v25,$x10,$key_      # pre-load round[2]
3191 
3192      vperm      $in0,$inout,$inptail,$inpperm
3193      subi       $inp,$inp,31        # undo "caller"
3194     vxor        $twk0,$tweak,$rndkey0
3195     vsrab       $tmp,$tweak,$seven  # next tweak value
3196     vaddubm     $tweak,$tweak,$tweak
3197     vsldoi      $tmp,$tmp,$tmp,15
3198     vand        $tmp,$tmp,$eighty7
3199      vxor       $out0,$in0,$twk0
3200     vxor        $tweak,$tweak,$tmp
3201 
3202      lvx_u      $in1,$x10,$inp
3203     vxor        $twk1,$tweak,$rndkey0
3204     vsrab       $tmp,$tweak,$seven  # next tweak value
3205     vaddubm     $tweak,$tweak,$tweak
3206     vsldoi      $tmp,$tmp,$tmp,15
3207      le?vperm   $in1,$in1,$in1,$leperm
3208     vand        $tmp,$tmp,$eighty7
3209      vxor       $out1,$in1,$twk1
3210     vxor        $tweak,$tweak,$tmp
3211 
3212      lvx_u      $in2,$x20,$inp
3213      andi.      $taillen,$len,15
3214     vxor        $twk2,$tweak,$rndkey0
3215     vsrab       $tmp,$tweak,$seven  # next tweak value
3216     vaddubm     $tweak,$tweak,$tweak
3217     vsldoi      $tmp,$tmp,$tmp,15
3218      le?vperm   $in2,$in2,$in2,$leperm
3219     vand        $tmp,$tmp,$eighty7
3220      vxor       $out2,$in2,$twk2
3221     vxor        $tweak,$tweak,$tmp
3222 
3223      lvx_u      $in3,$x30,$inp
3224      sub        $len,$len,$taillen
3225     vxor        $twk3,$tweak,$rndkey0
3226     vsrab       $tmp,$tweak,$seven  # next tweak value
3227     vaddubm     $tweak,$tweak,$tweak
3228     vsldoi      $tmp,$tmp,$tmp,15
3229      le?vperm   $in3,$in3,$in3,$leperm
3230     vand        $tmp,$tmp,$eighty7
3231      vxor       $out3,$in3,$twk3
3232     vxor        $tweak,$tweak,$tmp
3233 
3234      lvx_u      $in4,$x40,$inp
3235      subi       $len,$len,0x60
3236     vxor        $twk4,$tweak,$rndkey0
3237     vsrab       $tmp,$tweak,$seven  # next tweak value
3238     vaddubm     $tweak,$tweak,$tweak
3239     vsldoi      $tmp,$tmp,$tmp,15
3240      le?vperm   $in4,$in4,$in4,$leperm
3241     vand        $tmp,$tmp,$eighty7
3242      vxor       $out4,$in4,$twk4
3243     vxor        $tweak,$tweak,$tmp
3244 
3245      lvx_u      $in5,$x50,$inp
3246      addi       $inp,$inp,0x60
3247     vxor        $twk5,$tweak,$rndkey0
3248     vsrab       $tmp,$tweak,$seven  # next tweak value
3249     vaddubm     $tweak,$tweak,$tweak
3250     vsldoi      $tmp,$tmp,$tmp,15
3251      le?vperm   $in5,$in5,$in5,$leperm
3252     vand        $tmp,$tmp,$eighty7
3253      vxor       $out5,$in5,$twk5
3254     vxor        $tweak,$tweak,$tmp
3255 
3256     vxor        v31,v31,$rndkey0
3257     mtctr       $rounds
3258     b       Loop_xts_dec6x
3259 
3260 .align  5
3261 Loop_xts_dec6x:
3262     vncipher    $out0,$out0,v24
3263     vncipher    $out1,$out1,v24
3264     vncipher    $out2,$out2,v24
3265     vncipher    $out3,$out3,v24
3266     vncipher    $out4,$out4,v24
3267     vncipher    $out5,$out5,v24
3268     lvx     v24,$x20,$key_      # round[3]
3269     addi        $key_,$key_,0x20
3270 
3271     vncipher    $out0,$out0,v25
3272     vncipher    $out1,$out1,v25
3273     vncipher    $out2,$out2,v25
3274     vncipher    $out3,$out3,v25
3275     vncipher    $out4,$out4,v25
3276     vncipher    $out5,$out5,v25
3277     lvx     v25,$x10,$key_      # round[4]
3278     bdnz        Loop_xts_dec6x
3279 
3280     subic       $len,$len,96        # $len-=96
3281      vxor       $in0,$twk0,v31      # xor with last round key
3282     vncipher    $out0,$out0,v24
3283     vncipher    $out1,$out1,v24
3284      vsrab      $tmp,$tweak,$seven  # next tweak value
3285      vxor       $twk0,$tweak,$rndkey0
3286      vaddubm    $tweak,$tweak,$tweak
3287     vncipher    $out2,$out2,v24
3288     vncipher    $out3,$out3,v24
3289      vsldoi     $tmp,$tmp,$tmp,15
3290     vncipher    $out4,$out4,v24
3291     vncipher    $out5,$out5,v24
3292 
3293     subfe.      r0,r0,r0        # borrow?-1:0
3294      vand       $tmp,$tmp,$eighty7
3295     vncipher    $out0,$out0,v25
3296     vncipher    $out1,$out1,v25
3297      vxor       $tweak,$tweak,$tmp
3298     vncipher    $out2,$out2,v25
3299     vncipher    $out3,$out3,v25
3300      vxor       $in1,$twk1,v31
3301      vsrab      $tmp,$tweak,$seven  # next tweak value
3302      vxor       $twk1,$tweak,$rndkey0
3303     vncipher    $out4,$out4,v25
3304     vncipher    $out5,$out5,v25
3305 
3306     and     r0,r0,$len
3307      vaddubm    $tweak,$tweak,$tweak
3308      vsldoi     $tmp,$tmp,$tmp,15
3309     vncipher    $out0,$out0,v26
3310     vncipher    $out1,$out1,v26
3311      vand       $tmp,$tmp,$eighty7
3312     vncipher    $out2,$out2,v26
3313     vncipher    $out3,$out3,v26
3314      vxor       $tweak,$tweak,$tmp
3315     vncipher    $out4,$out4,v26
3316     vncipher    $out5,$out5,v26
3317 
3318     add     $inp,$inp,r0        # $inp is adjusted in such
3319                         # way that at exit from the
3320                         # loop inX-in5 are loaded
3321                         # with last "words"
3322      vxor       $in2,$twk2,v31
3323      vsrab      $tmp,$tweak,$seven  # next tweak value
3324      vxor       $twk2,$tweak,$rndkey0
3325      vaddubm    $tweak,$tweak,$tweak
3326     vncipher    $out0,$out0,v27
3327     vncipher    $out1,$out1,v27
3328      vsldoi     $tmp,$tmp,$tmp,15
3329     vncipher    $out2,$out2,v27
3330     vncipher    $out3,$out3,v27
3331      vand       $tmp,$tmp,$eighty7
3332     vncipher    $out4,$out4,v27
3333     vncipher    $out5,$out5,v27
3334 
3335     addi        $key_,$sp,$FRAME+15 # rewind $key_
3336      vxor       $tweak,$tweak,$tmp
3337     vncipher    $out0,$out0,v28
3338     vncipher    $out1,$out1,v28
3339      vxor       $in3,$twk3,v31
3340      vsrab      $tmp,$tweak,$seven  # next tweak value
3341      vxor       $twk3,$tweak,$rndkey0
3342     vncipher    $out2,$out2,v28
3343     vncipher    $out3,$out3,v28
3344      vaddubm    $tweak,$tweak,$tweak
3345      vsldoi     $tmp,$tmp,$tmp,15
3346     vncipher    $out4,$out4,v28
3347     vncipher    $out5,$out5,v28
3348     lvx     v24,$x00,$key_      # re-pre-load round[1]
3349      vand       $tmp,$tmp,$eighty7
3350 
3351     vncipher    $out0,$out0,v29
3352     vncipher    $out1,$out1,v29
3353      vxor       $tweak,$tweak,$tmp
3354     vncipher    $out2,$out2,v29
3355     vncipher    $out3,$out3,v29
3356      vxor       $in4,$twk4,v31
3357      vsrab      $tmp,$tweak,$seven  # next tweak value
3358      vxor       $twk4,$tweak,$rndkey0
3359     vncipher    $out4,$out4,v29
3360     vncipher    $out5,$out5,v29
3361     lvx     v25,$x10,$key_      # re-pre-load round[2]
3362      vaddubm    $tweak,$tweak,$tweak
3363      vsldoi     $tmp,$tmp,$tmp,15
3364 
3365     vncipher    $out0,$out0,v30
3366     vncipher    $out1,$out1,v30
3367      vand       $tmp,$tmp,$eighty7
3368     vncipher    $out2,$out2,v30
3369     vncipher    $out3,$out3,v30
3370      vxor       $tweak,$tweak,$tmp
3371     vncipher    $out4,$out4,v30
3372     vncipher    $out5,$out5,v30
3373      vxor       $in5,$twk5,v31
3374      vsrab      $tmp,$tweak,$seven  # next tweak value
3375      vxor       $twk5,$tweak,$rndkey0
3376 
3377     vncipherlast    $out0,$out0,$in0
3378      lvx_u      $in0,$x00,$inp      # load next input block
3379      vaddubm    $tweak,$tweak,$tweak
3380      vsldoi     $tmp,$tmp,$tmp,15
3381     vncipherlast    $out1,$out1,$in1
3382      lvx_u      $in1,$x10,$inp
3383     vncipherlast    $out2,$out2,$in2
3384      le?vperm   $in0,$in0,$in0,$leperm
3385      lvx_u      $in2,$x20,$inp
3386      vand       $tmp,$tmp,$eighty7
3387     vncipherlast    $out3,$out3,$in3
3388      le?vperm   $in1,$in1,$in1,$leperm
3389      lvx_u      $in3,$x30,$inp
3390     vncipherlast    $out4,$out4,$in4
3391      le?vperm   $in2,$in2,$in2,$leperm
3392      lvx_u      $in4,$x40,$inp
3393      vxor       $tweak,$tweak,$tmp
3394     vncipherlast    $out5,$out5,$in5
3395      le?vperm   $in3,$in3,$in3,$leperm
3396      lvx_u      $in5,$x50,$inp
3397      addi       $inp,$inp,0x60
3398      le?vperm   $in4,$in4,$in4,$leperm
3399      le?vperm   $in5,$in5,$in5,$leperm
3400 
3401     le?vperm    $out0,$out0,$out0,$leperm
3402     le?vperm    $out1,$out1,$out1,$leperm
3403     stvx_u      $out0,$x00,$out     # store output
3404      vxor       $out0,$in0,$twk0
3405     le?vperm    $out2,$out2,$out2,$leperm
3406     stvx_u      $out1,$x10,$out
3407      vxor       $out1,$in1,$twk1
3408     le?vperm    $out3,$out3,$out3,$leperm
3409     stvx_u      $out2,$x20,$out
3410      vxor       $out2,$in2,$twk2
3411     le?vperm    $out4,$out4,$out4,$leperm
3412     stvx_u      $out3,$x30,$out
3413      vxor       $out3,$in3,$twk3
3414     le?vperm    $out5,$out5,$out5,$leperm
3415     stvx_u      $out4,$x40,$out
3416      vxor       $out4,$in4,$twk4
3417     stvx_u      $out5,$x50,$out
3418      vxor       $out5,$in5,$twk5
3419     addi        $out,$out,0x60
3420 
3421     mtctr       $rounds
3422     beq     Loop_xts_dec6x      # did $len-=96 borrow?
3423 
3424     addic.      $len,$len,0x60
3425     beq     Lxts_dec6x_zero
3426     cmpwi       $len,0x20
3427     blt     Lxts_dec6x_one
3428     nop
3429     beq     Lxts_dec6x_two
3430     cmpwi       $len,0x40
3431     blt     Lxts_dec6x_three
3432     nop
3433     beq     Lxts_dec6x_four
3434 
3435 Lxts_dec6x_five:
3436     vxor        $out0,$in1,$twk0
3437     vxor        $out1,$in2,$twk1
3438     vxor        $out2,$in3,$twk2
3439     vxor        $out3,$in4,$twk3
3440     vxor        $out4,$in5,$twk4
3441 
3442     bl      _aesp8_xts_dec5x
3443 
3444     le?vperm    $out0,$out0,$out0,$leperm
3445     vmr     $twk0,$twk5     # unused tweak
3446     vxor        $twk1,$tweak,$rndkey0
3447     le?vperm    $out1,$out1,$out1,$leperm
3448     stvx_u      $out0,$x00,$out     # store output
3449     vxor        $out0,$in0,$twk1
3450     le?vperm    $out2,$out2,$out2,$leperm
3451     stvx_u      $out1,$x10,$out
3452     le?vperm    $out3,$out3,$out3,$leperm
3453     stvx_u      $out2,$x20,$out
3454     le?vperm    $out4,$out4,$out4,$leperm
3455     stvx_u      $out3,$x30,$out
3456     stvx_u      $out4,$x40,$out
3457     addi        $out,$out,0x50
3458     bne     Lxts_dec6x_steal
3459     b       Lxts_dec6x_done
3460 
3461 .align  4
3462 Lxts_dec6x_four:
3463     vxor        $out0,$in2,$twk0
3464     vxor        $out1,$in3,$twk1
3465     vxor        $out2,$in4,$twk2
3466     vxor        $out3,$in5,$twk3
3467     vxor        $out4,$out4,$out4
3468 
3469     bl      _aesp8_xts_dec5x
3470 
3471     le?vperm    $out0,$out0,$out0,$leperm
3472     vmr     $twk0,$twk4     # unused tweak
3473     vmr     $twk1,$twk5
3474     le?vperm    $out1,$out1,$out1,$leperm
3475     stvx_u      $out0,$x00,$out     # store output
3476     vxor        $out0,$in0,$twk5
3477     le?vperm    $out2,$out2,$out2,$leperm
3478     stvx_u      $out1,$x10,$out
3479     le?vperm    $out3,$out3,$out3,$leperm
3480     stvx_u      $out2,$x20,$out
3481     stvx_u      $out3,$x30,$out
3482     addi        $out,$out,0x40
3483     bne     Lxts_dec6x_steal
3484     b       Lxts_dec6x_done
3485 
3486 .align  4
3487 Lxts_dec6x_three:
3488     vxor        $out0,$in3,$twk0
3489     vxor        $out1,$in4,$twk1
3490     vxor        $out2,$in5,$twk2
3491     vxor        $out3,$out3,$out3
3492     vxor        $out4,$out4,$out4
3493 
3494     bl      _aesp8_xts_dec5x
3495 
3496     le?vperm    $out0,$out0,$out0,$leperm
3497     vmr     $twk0,$twk3     # unused tweak
3498     vmr     $twk1,$twk4
3499     le?vperm    $out1,$out1,$out1,$leperm
3500     stvx_u      $out0,$x00,$out     # store output
3501     vxor        $out0,$in0,$twk4
3502     le?vperm    $out2,$out2,$out2,$leperm
3503     stvx_u      $out1,$x10,$out
3504     stvx_u      $out2,$x20,$out
3505     addi        $out,$out,0x30
3506     bne     Lxts_dec6x_steal
3507     b       Lxts_dec6x_done
3508 
3509 .align  4
3510 Lxts_dec6x_two:
3511     vxor        $out0,$in4,$twk0
3512     vxor        $out1,$in5,$twk1
3513     vxor        $out2,$out2,$out2
3514     vxor        $out3,$out3,$out3
3515     vxor        $out4,$out4,$out4
3516 
3517     bl      _aesp8_xts_dec5x
3518 
3519     le?vperm    $out0,$out0,$out0,$leperm
3520     vmr     $twk0,$twk2     # unused tweak
3521     vmr     $twk1,$twk3
3522     le?vperm    $out1,$out1,$out1,$leperm
3523     stvx_u      $out0,$x00,$out     # store output
3524     vxor        $out0,$in0,$twk3
3525     stvx_u      $out1,$x10,$out
3526     addi        $out,$out,0x20
3527     bne     Lxts_dec6x_steal
3528     b       Lxts_dec6x_done
3529 
3530 .align  4
3531 Lxts_dec6x_one:
3532     vxor        $out0,$in5,$twk0
3533     nop
3534 Loop_xts_dec1x:
3535     vncipher    $out0,$out0,v24
3536     lvx     v24,$x20,$key_      # round[3]
3537     addi        $key_,$key_,0x20
3538 
3539     vncipher    $out0,$out0,v25
3540     lvx     v25,$x10,$key_      # round[4]
3541     bdnz        Loop_xts_dec1x
3542 
3543     subi        r0,$taillen,1
3544     vncipher    $out0,$out0,v24
3545 
3546     andi.       r0,r0,16
3547     cmpwi       $taillen,0
3548     vncipher    $out0,$out0,v25
3549 
3550     sub     $inp,$inp,r0
3551     vncipher    $out0,$out0,v26
3552 
3553     lvx_u       $in0,0,$inp
3554     vncipher    $out0,$out0,v27
3555 
3556     addi        $key_,$sp,$FRAME+15 # rewind $key_
3557     vncipher    $out0,$out0,v28
3558     lvx     v24,$x00,$key_      # re-pre-load round[1]
3559 
3560     vncipher    $out0,$out0,v29
3561     lvx     v25,$x10,$key_      # re-pre-load round[2]
3562      vxor       $twk0,$twk0,v31
3563 
3564     le?vperm    $in0,$in0,$in0,$leperm
3565     vncipher    $out0,$out0,v30
3566 
3567     mtctr       $rounds
3568     vncipherlast    $out0,$out0,$twk0
3569 
3570     vmr     $twk0,$twk1     # unused tweak
3571     vmr     $twk1,$twk2
3572     le?vperm    $out0,$out0,$out0,$leperm
3573     stvx_u      $out0,$x00,$out     # store output
3574     addi        $out,$out,0x10
3575     vxor        $out0,$in0,$twk2
3576     bne     Lxts_dec6x_steal
3577     b       Lxts_dec6x_done
3578 
3579 .align  4
3580 Lxts_dec6x_zero:
3581     cmpwi       $taillen,0
3582     beq     Lxts_dec6x_done
3583 
3584     lvx_u       $in0,0,$inp
3585     le?vperm    $in0,$in0,$in0,$leperm
3586     vxor        $out0,$in0,$twk1
3587 Lxts_dec6x_steal:
3588     vncipher    $out0,$out0,v24
3589     lvx     v24,$x20,$key_      # round[3]
3590     addi        $key_,$key_,0x20
3591 
3592     vncipher    $out0,$out0,v25
3593     lvx     v25,$x10,$key_      # round[4]
3594     bdnz        Lxts_dec6x_steal
3595 
3596     add     $inp,$inp,$taillen
3597     vncipher    $out0,$out0,v24
3598 
3599     cmpwi       $taillen,0
3600     vncipher    $out0,$out0,v25
3601 
3602     lvx_u       $in0,0,$inp
3603     vncipher    $out0,$out0,v26
3604 
3605     lvsr        $inpperm,0,$taillen # $in5 is no more
3606     vncipher    $out0,$out0,v27
3607 
3608     addi        $key_,$sp,$FRAME+15 # rewind $key_
3609     vncipher    $out0,$out0,v28
3610     lvx     v24,$x00,$key_      # re-pre-load round[1]
3611 
3612     vncipher    $out0,$out0,v29
3613     lvx     v25,$x10,$key_      # re-pre-load round[2]
3614      vxor       $twk1,$twk1,v31
3615 
3616     le?vperm    $in0,$in0,$in0,$leperm
3617     vncipher    $out0,$out0,v30
3618 
3619     vperm       $in0,$in0,$in0,$inpperm
3620     vncipherlast    $tmp,$out0,$twk1
3621 
3622     le?vperm    $out0,$tmp,$tmp,$leperm
3623     le?stvx_u   $out0,0,$out
3624     be?stvx_u   $tmp,0,$out
3625 
3626     vxor        $out0,$out0,$out0
3627     vspltisb    $out1,-1
3628     vperm       $out0,$out0,$out1,$inpperm
3629     vsel        $out0,$in0,$tmp,$out0
3630     vxor        $out0,$out0,$twk0
3631 
3632     subi        r30,$out,1
3633     mtctr       $taillen
3634 Loop_xts_dec6x_steal:
3635     lbzu        r0,1(r30)
3636     stb     r0,16(r30)
3637     bdnz        Loop_xts_dec6x_steal
3638 
3639     li      $taillen,0
3640     mtctr       $rounds
3641     b       Loop_xts_dec1x      # one more time...
3642 
3643 .align  4
3644 Lxts_dec6x_done:
3645     ${UCMP}i    $ivp,0
3646     beq     Lxts_dec6x_ret
3647 
3648     vxor        $tweak,$twk0,$rndkey0
3649     le?vperm    $tweak,$tweak,$tweak,$leperm
3650     stvx_u      $tweak,0,$ivp
3651 
3652 Lxts_dec6x_ret:
3653     mtlr        r11
3654     li      r10,`$FRAME+15`
3655     li      r11,`$FRAME+31`
3656     stvx        $seven,r10,$sp      # wipe copies of round keys
3657     addi        r10,r10,32
3658     stvx        $seven,r11,$sp
3659     addi        r11,r11,32
3660     stvx        $seven,r10,$sp
3661     addi        r10,r10,32
3662     stvx        $seven,r11,$sp
3663     addi        r11,r11,32
3664     stvx        $seven,r10,$sp
3665     addi        r10,r10,32
3666     stvx        $seven,r11,$sp
3667     addi        r11,r11,32
3668     stvx        $seven,r10,$sp
3669     addi        r10,r10,32
3670     stvx        $seven,r11,$sp
3671     addi        r11,r11,32
3672 
3673     mtspr       256,$vrsave
3674     lvx     v20,r10,$sp     # ABI says so
3675     addi        r10,r10,32
3676     lvx     v21,r11,$sp
3677     addi        r11,r11,32
3678     lvx     v22,r10,$sp
3679     addi        r10,r10,32
3680     lvx     v23,r11,$sp
3681     addi        r11,r11,32
3682     lvx     v24,r10,$sp
3683     addi        r10,r10,32
3684     lvx     v25,r11,$sp
3685     addi        r11,r11,32
3686     lvx     v26,r10,$sp
3687     addi        r10,r10,32
3688     lvx     v27,r11,$sp
3689     addi        r11,r11,32
3690     lvx     v28,r10,$sp
3691     addi        r10,r10,32
3692     lvx     v29,r11,$sp
3693     addi        r11,r11,32
3694     lvx     v30,r10,$sp
3695     lvx     v31,r11,$sp
3696     $POP        r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3697     $POP        r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3698     $POP        r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3699     $POP        r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3700     $POP        r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3701     $POP        r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3702     addi        $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
3703     blr
3704     .long       0
3705     .byte       0,12,0x04,1,0x80,6,6,0
3706     .long       0
3707 
3708 .align  5
3709 _aesp8_xts_dec5x:
3710     vncipher    $out0,$out0,v24
3711     vncipher    $out1,$out1,v24
3712     vncipher    $out2,$out2,v24
3713     vncipher    $out3,$out3,v24
3714     vncipher    $out4,$out4,v24
3715     lvx     v24,$x20,$key_      # round[3]
3716     addi        $key_,$key_,0x20
3717 
3718     vncipher    $out0,$out0,v25
3719     vncipher    $out1,$out1,v25
3720     vncipher    $out2,$out2,v25
3721     vncipher    $out3,$out3,v25
3722     vncipher    $out4,$out4,v25
3723     lvx     v25,$x10,$key_      # round[4]
3724     bdnz        _aesp8_xts_dec5x
3725 
3726     subi        r0,$taillen,1
3727     vncipher    $out0,$out0,v24
3728     vncipher    $out1,$out1,v24
3729     vncipher    $out2,$out2,v24
3730     vncipher    $out3,$out3,v24
3731     vncipher    $out4,$out4,v24
3732 
3733     andi.       r0,r0,16
3734     cmpwi       $taillen,0
3735     vncipher    $out0,$out0,v25
3736     vncipher    $out1,$out1,v25
3737     vncipher    $out2,$out2,v25
3738     vncipher    $out3,$out3,v25
3739     vncipher    $out4,$out4,v25
3740      vxor       $twk0,$twk0,v31
3741 
3742     sub     $inp,$inp,r0
3743     vncipher    $out0,$out0,v26
3744     vncipher    $out1,$out1,v26
3745     vncipher    $out2,$out2,v26
3746     vncipher    $out3,$out3,v26
3747     vncipher    $out4,$out4,v26
3748      vxor       $in1,$twk1,v31
3749 
3750     vncipher    $out0,$out0,v27
3751     lvx_u       $in0,0,$inp
3752     vncipher    $out1,$out1,v27
3753     vncipher    $out2,$out2,v27
3754     vncipher    $out3,$out3,v27
3755     vncipher    $out4,$out4,v27
3756      vxor       $in2,$twk2,v31
3757 
3758     addi        $key_,$sp,$FRAME+15 # rewind $key_
3759     vncipher    $out0,$out0,v28
3760     vncipher    $out1,$out1,v28
3761     vncipher    $out2,$out2,v28
3762     vncipher    $out3,$out3,v28
3763     vncipher    $out4,$out4,v28
3764     lvx     v24,$x00,$key_      # re-pre-load round[1]
3765      vxor       $in3,$twk3,v31
3766 
3767     vncipher    $out0,$out0,v29
3768     le?vperm    $in0,$in0,$in0,$leperm
3769     vncipher    $out1,$out1,v29
3770     vncipher    $out2,$out2,v29
3771     vncipher    $out3,$out3,v29
3772     vncipher    $out4,$out4,v29
3773     lvx     v25,$x10,$key_      # re-pre-load round[2]
3774      vxor       $in4,$twk4,v31
3775 
3776     vncipher    $out0,$out0,v30
3777     vncipher    $out1,$out1,v30
3778     vncipher    $out2,$out2,v30
3779     vncipher    $out3,$out3,v30
3780     vncipher    $out4,$out4,v30
3781 
3782     vncipherlast    $out0,$out0,$twk0
3783     vncipherlast    $out1,$out1,$in1
3784     vncipherlast    $out2,$out2,$in2
3785     vncipherlast    $out3,$out3,$in3
3786     vncipherlast    $out4,$out4,$in4
3787     mtctr       $rounds
3788     blr
3789         .long       0
3790         .byte       0,12,0x14,0,0,0,0,0
3791 ___
3792 }}  }}}
3793 
3794 my $consts=1;
3795 foreach(split("\n",$code)) {
3796         s/\`([^\`]*)\`/eval($1)/geo;
3797 
3798     # constants table endian-specific conversion
3799     if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
3800         my $conv=$3;
3801         my @bytes=();
3802 
3803         # convert to endian-agnostic format
3804         if ($1 eq "long") {
3805           foreach (split(/,\s*/,$2)) {
3806         my $l = /^0/?oct:int;
3807         push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
3808           }
3809         } else {
3810         @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
3811         }
3812 
3813         # little-endian conversion
3814         if ($flavour =~ /le$/o) {
3815         SWITCH: for($conv)  {
3816             /\?inv/ && do   { @bytes=map($_^0xf,@bytes); last; };
3817             /\?rev/ && do   { @bytes=reverse(@bytes);    last; };
3818         }
3819         }
3820 
3821         #emit
3822         print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
3823         next;
3824     }
3825     $consts=0 if (m/Lconsts:/o);    # end of table
3826 
3827     # instructions prefixed with '?' are endian-specific and need
3828     # to be adjusted accordingly...
3829     if ($flavour =~ /le$/o) {   # little-endian
3830         s/le\?//o       or
3831         s/be\?/#be#/o   or
3832         s/\?lvsr/lvsl/o or
3833         s/\?lvsl/lvsr/o or
3834         s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
3835         s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
3836         s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
3837     } else {            # big-endian
3838         s/le\?/#le#/o   or
3839         s/be\?//o       or
3840         s/\?([a-z]+)/$1/o;
3841     }
3842 
3843         print $_,"\n";
3844 }
3845 
3846 close STDOUT;