Home | History | Annotate | Download | only in asm
      1 #! /usr/bin/env perl
      2 # Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
      3 #
      4 # Licensed under the OpenSSL license (the "License").  You may not use
      5 # this file except in compliance with the License.  You can obtain a copy
      6 # in the file LICENSE in the source distribution or at
      7 # https://www.openssl.org/source/license.html
      8 
      9 #
     10 # ====================================================================
     11 # Written by Andy Polyakov <appro (at] openssl.org> for the OpenSSL
     12 # project. The module is, however, dual licensed under OpenSSL and
     13 # CRYPTOGAMS licenses depending on where you obtain it. For further
     14 # details see http://www.openssl.org/~appro/cryptogams/.
     15 # ====================================================================
     16 #
     17 # This module implements support for AES instructions as per PowerISA
     18 # specification version 2.07, first implemented by POWER8 processor.
     19 # The module is endian-agnostic in sense that it supports both big-
     20 # and little-endian cases. Data alignment in parallelizable modes is
     21 # handled with VSX loads and stores, which implies MSR.VSX flag being
     22 # set. It should also be noted that ISA specification doesn't prohibit
     23 # alignment exceptions for these instructions on page boundaries.
     24 # Initially alignment was handled in pure AltiVec/VMX way [when data
     25 # is aligned programmatically, which in turn guarantees exception-
     26 # free execution], but it turned to hamper performance when vcipher
     27 # instructions are interleaved. It's reckoned that eventual
     28 # misalignment penalties at page boundaries are in average lower
     29 # than additional overhead in pure AltiVec approach.
     30 #
     31 # May 2016
     32 #
     33 # Add XTS subroutine, 9x on little- and 12x improvement on big-endian
     34 # systems were measured.
     35 #
     36 ######################################################################
     37 # Current large-block performance in cycles per byte processed with
     38 # 128-bit key (less is better).
     39 #
     40 #		CBC en-/decrypt	CTR	XTS
     41 # POWER8[le]	3.96/0.72	0.74	1.1
     42 # POWER8[be]	3.75/0.65	0.66	1.0
     43 
     44 $flavour = shift;
     45 
     46 if ($flavour =~ /64/) {
     47 	$SIZE_T	=8;
     48 	$LRSAVE	=2*$SIZE_T;
     49 	$STU	="stdu";
     50 	$POP	="ld";
     51 	$PUSH	="std";
     52 	$UCMP	="cmpld";
     53 	$SHL	="sldi";
     54 } elsif ($flavour =~ /32/) {
     55 	$SIZE_T	=4;
     56 	$LRSAVE	=$SIZE_T;
     57 	$STU	="stwu";
     58 	$POP	="lwz";
     59 	$PUSH	="stw";
     60 	$UCMP	="cmplw";
     61 	$SHL	="slwi";
     62 } else { die "nonsense $flavour"; }
     63 
     64 $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
     65 
     66 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
     67 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
     68 ( $xlate="${dir}../../../perlasm/ppc-xlate.pl" and -f $xlate) or
     69 die "can't locate ppc-xlate.pl";
     70 
     71 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
     72 
     73 $FRAME=8*$SIZE_T;
     74 $prefix="aes_hw";
     75 
     76 $sp="r1";
     77 $vrsave="r12";
     78 
     79 #########################################################################
     80 {{{	# Key setup procedures						#
     81 my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
     82 my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
     83 my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
     84 
     85 $code.=<<___;
     86 .machine	"any"
     87 
     88 .text
     89 
     90 .align	7
     91 Lrcon:
     92 .long	0x01000000, 0x01000000, 0x01000000, 0x01000000	?rev
     93 .long	0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000	?rev
     94 .long	0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c	?rev
     95 .long	0,0,0,0						?asis
     96 Lconsts:
     97 	mflr	r0
     98 	bcl	20,31,\$+4
     99 	mflr	$ptr	 #vvvvv "distance between . and rcon
    100 	addi	$ptr,$ptr,-0x48
    101 	mtlr	r0
    102 	blr
    103 	.long	0
    104 	.byte	0,12,0x14,0,0,0,0,0
    105 .asciz	"AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
    106 
    107 .globl	.${prefix}_set_encrypt_key
    108 .align	5
    109 .${prefix}_set_encrypt_key:
    110 Lset_encrypt_key:
    111 	mflr		r11
    112 	$PUSH		r11,$LRSAVE($sp)
    113 
    114 	li		$ptr,-1
    115 	${UCMP}i	$inp,0
    116 	beq-		Lenc_key_abort		# if ($inp==0) return -1;
    117 	${UCMP}i	$out,0
    118 	beq-		Lenc_key_abort		# if ($out==0) return -1;
    119 	li		$ptr,-2
    120 	cmpwi		$bits,128
    121 	blt-		Lenc_key_abort
    122 	cmpwi		$bits,256
    123 	bgt-		Lenc_key_abort
    124 	andi.		r0,$bits,0x3f
    125 	bne-		Lenc_key_abort
    126 
    127 	lis		r0,0xfff0
    128 	mfspr		$vrsave,256
    129 	mtspr		256,r0
    130 
    131 	bl		Lconsts
    132 	mtlr		r11
    133 
    134 	neg		r9,$inp
    135 	lvx		$in0,0,$inp
    136 	addi		$inp,$inp,15		# 15 is not typo
    137 	lvsr		$key,0,r9		# borrow $key
    138 	li		r8,0x20
    139 	cmpwi		$bits,192
    140 	lvx		$in1,0,$inp
    141 	le?vspltisb	$mask,0x0f		# borrow $mask
    142 	lvx		$rcon,0,$ptr
    143 	le?vxor		$key,$key,$mask		# adjust for byte swap
    144 	lvx		$mask,r8,$ptr
    145 	addi		$ptr,$ptr,0x10
    146 	vperm		$in0,$in0,$in1,$key	# align [and byte swap in LE]
    147 	li		$cnt,8
    148 	vxor		$zero,$zero,$zero
    149 	mtctr		$cnt
    150 
    151 	?lvsr		$outperm,0,$out
    152 	vspltisb	$outmask,-1
    153 	lvx		$outhead,0,$out
    154 	?vperm		$outmask,$zero,$outmask,$outperm
    155 
    156 	blt		Loop128
    157 	addi		$inp,$inp,8
    158 	beq		L192
    159 	addi		$inp,$inp,8
    160 	b		L256
    161 
    162 .align	4
    163 Loop128:
    164 	vperm		$key,$in0,$in0,$mask	# rotate-n-splat
    165 	vsldoi		$tmp,$zero,$in0,12	# >>32
    166 	 vperm		$outtail,$in0,$in0,$outperm	# rotate
    167 	 vsel		$stage,$outhead,$outtail,$outmask
    168 	 vmr		$outhead,$outtail
    169 	vcipherlast	$key,$key,$rcon
    170 	 stvx		$stage,0,$out
    171 	 addi		$out,$out,16
    172 
    173 	vxor		$in0,$in0,$tmp
    174 	vsldoi		$tmp,$zero,$tmp,12	# >>32
    175 	vxor		$in0,$in0,$tmp
    176 	vsldoi		$tmp,$zero,$tmp,12	# >>32
    177 	vxor		$in0,$in0,$tmp
    178 	 vadduwm	$rcon,$rcon,$rcon
    179 	vxor		$in0,$in0,$key
    180 	bdnz		Loop128
    181 
    182 	lvx		$rcon,0,$ptr		# last two round keys
    183 
    184 	vperm		$key,$in0,$in0,$mask	# rotate-n-splat
    185 	vsldoi		$tmp,$zero,$in0,12	# >>32
    186 	 vperm		$outtail,$in0,$in0,$outperm	# rotate
    187 	 vsel		$stage,$outhead,$outtail,$outmask
    188 	 vmr		$outhead,$outtail
    189 	vcipherlast	$key,$key,$rcon
    190 	 stvx		$stage,0,$out
    191 	 addi		$out,$out,16
    192 
    193 	vxor		$in0,$in0,$tmp
    194 	vsldoi		$tmp,$zero,$tmp,12	# >>32
    195 	vxor		$in0,$in0,$tmp
    196 	vsldoi		$tmp,$zero,$tmp,12	# >>32
    197 	vxor		$in0,$in0,$tmp
    198 	 vadduwm	$rcon,$rcon,$rcon
    199 	vxor		$in0,$in0,$key
    200 
    201 	vperm		$key,$in0,$in0,$mask	# rotate-n-splat
    202 	vsldoi		$tmp,$zero,$in0,12	# >>32
    203 	 vperm		$outtail,$in0,$in0,$outperm	# rotate
    204 	 vsel		$stage,$outhead,$outtail,$outmask
    205 	 vmr		$outhead,$outtail
    206 	vcipherlast	$key,$key,$rcon
    207 	 stvx		$stage,0,$out
    208 	 addi		$out,$out,16
    209 
    210 	vxor		$in0,$in0,$tmp
    211 	vsldoi		$tmp,$zero,$tmp,12	# >>32
    212 	vxor		$in0,$in0,$tmp
    213 	vsldoi		$tmp,$zero,$tmp,12	# >>32
    214 	vxor		$in0,$in0,$tmp
    215 	vxor		$in0,$in0,$key
    216 	 vperm		$outtail,$in0,$in0,$outperm	# rotate
    217 	 vsel		$stage,$outhead,$outtail,$outmask
    218 	 vmr		$outhead,$outtail
    219 	 stvx		$stage,0,$out
    220 
    221 	addi		$inp,$out,15		# 15 is not typo
    222 	addi		$out,$out,0x50
    223 
    224 	li		$rounds,10
    225 	b		Ldone
    226 
    227 .align	4
    228 L192:
    229 	lvx		$tmp,0,$inp
    230 	li		$cnt,4
    231 	 vperm		$outtail,$in0,$in0,$outperm	# rotate
    232 	 vsel		$stage,$outhead,$outtail,$outmask
    233 	 vmr		$outhead,$outtail
    234 	 stvx		$stage,0,$out
    235 	 addi		$out,$out,16
    236 	vperm		$in1,$in1,$tmp,$key	# align [and byte swap in LE]
    237 	vspltisb	$key,8			# borrow $key
    238 	mtctr		$cnt
    239 	vsububm		$mask,$mask,$key	# adjust the mask
    240 
    241 Loop192:
    242 	vperm		$key,$in1,$in1,$mask	# roate-n-splat
    243 	vsldoi		$tmp,$zero,$in0,12	# >>32
    244 	vcipherlast	$key,$key,$rcon
    245 
    246 	vxor		$in0,$in0,$tmp
    247 	vsldoi		$tmp,$zero,$tmp,12	# >>32
    248 	vxor		$in0,$in0,$tmp
    249 	vsldoi		$tmp,$zero,$tmp,12	# >>32
    250 	vxor		$in0,$in0,$tmp
    251 
    252 	 vsldoi		$stage,$zero,$in1,8
    253 	vspltw		$tmp,$in0,3
    254 	vxor		$tmp,$tmp,$in1
    255 	vsldoi		$in1,$zero,$in1,12	# >>32
    256 	 vadduwm	$rcon,$rcon,$rcon
    257 	vxor		$in1,$in1,$tmp
    258 	vxor		$in0,$in0,$key
    259 	vxor		$in1,$in1,$key
    260 	 vsldoi		$stage,$stage,$in0,8
    261 
    262 	vperm		$key,$in1,$in1,$mask	# rotate-n-splat
    263 	vsldoi		$tmp,$zero,$in0,12	# >>32
    264 	 vperm		$outtail,$stage,$stage,$outperm	# rotate
    265 	 vsel		$stage,$outhead,$outtail,$outmask
    266 	 vmr		$outhead,$outtail
    267 	vcipherlast	$key,$key,$rcon
    268 	 stvx		$stage,0,$out
    269 	 addi		$out,$out,16
    270 
    271 	 vsldoi		$stage,$in0,$in1,8
    272 	vxor		$in0,$in0,$tmp
    273 	vsldoi		$tmp,$zero,$tmp,12	# >>32
    274 	 vperm		$outtail,$stage,$stage,$outperm	# rotate
    275 	 vsel		$stage,$outhead,$outtail,$outmask
    276 	 vmr		$outhead,$outtail
    277 	vxor		$in0,$in0,$tmp
    278 	vsldoi		$tmp,$zero,$tmp,12	# >>32
    279 	vxor		$in0,$in0,$tmp
    280 	 stvx		$stage,0,$out
    281 	 addi		$out,$out,16
    282 
    283 	vspltw		$tmp,$in0,3
    284 	vxor		$tmp,$tmp,$in1
    285 	vsldoi		$in1,$zero,$in1,12	# >>32
    286 	 vadduwm	$rcon,$rcon,$rcon
    287 	vxor		$in1,$in1,$tmp
    288 	vxor		$in0,$in0,$key
    289 	vxor		$in1,$in1,$key
    290 	 vperm		$outtail,$in0,$in0,$outperm	# rotate
    291 	 vsel		$stage,$outhead,$outtail,$outmask
    292 	 vmr		$outhead,$outtail
    293 	 stvx		$stage,0,$out
    294 	 addi		$inp,$out,15		# 15 is not typo
    295 	 addi		$out,$out,16
    296 	bdnz		Loop192
    297 
    298 	li		$rounds,12
    299 	addi		$out,$out,0x20
    300 	b		Ldone
    301 
    302 .align	4
    303 L256:
    304 	lvx		$tmp,0,$inp
    305 	li		$cnt,7
    306 	li		$rounds,14
    307 	 vperm		$outtail,$in0,$in0,$outperm	# rotate
    308 	 vsel		$stage,$outhead,$outtail,$outmask
    309 	 vmr		$outhead,$outtail
    310 	 stvx		$stage,0,$out
    311 	 addi		$out,$out,16
    312 	vperm		$in1,$in1,$tmp,$key	# align [and byte swap in LE]
    313 	mtctr		$cnt
    314 
    315 Loop256:
    316 	vperm		$key,$in1,$in1,$mask	# rotate-n-splat
    317 	vsldoi		$tmp,$zero,$in0,12	# >>32
    318 	 vperm		$outtail,$in1,$in1,$outperm	# rotate
    319 	 vsel		$stage,$outhead,$outtail,$outmask
    320 	 vmr		$outhead,$outtail
    321 	vcipherlast	$key,$key,$rcon
    322 	 stvx		$stage,0,$out
    323 	 addi		$out,$out,16
    324 
    325 	vxor		$in0,$in0,$tmp
    326 	vsldoi		$tmp,$zero,$tmp,12	# >>32
    327 	vxor		$in0,$in0,$tmp
    328 	vsldoi		$tmp,$zero,$tmp,12	# >>32
    329 	vxor		$in0,$in0,$tmp
    330 	 vadduwm	$rcon,$rcon,$rcon
    331 	vxor		$in0,$in0,$key
    332 	 vperm		$outtail,$in0,$in0,$outperm	# rotate
    333 	 vsel		$stage,$outhead,$outtail,$outmask
    334 	 vmr		$outhead,$outtail
    335 	 stvx		$stage,0,$out
    336 	 addi		$inp,$out,15		# 15 is not typo
    337 	 addi		$out,$out,16
    338 	bdz		Ldone
    339 
    340 	vspltw		$key,$in0,3		# just splat
    341 	vsldoi		$tmp,$zero,$in1,12	# >>32
    342 	vsbox		$key,$key
    343 
    344 	vxor		$in1,$in1,$tmp
    345 	vsldoi		$tmp,$zero,$tmp,12	# >>32
    346 	vxor		$in1,$in1,$tmp
    347 	vsldoi		$tmp,$zero,$tmp,12	# >>32
    348 	vxor		$in1,$in1,$tmp
    349 
    350 	vxor		$in1,$in1,$key
    351 	b		Loop256
    352 
    353 .align	4
    354 Ldone:
    355 	lvx		$in1,0,$inp		# redundant in aligned case
    356 	vsel		$in1,$outhead,$in1,$outmask
    357 	stvx		$in1,0,$inp
    358 	li		$ptr,0
    359 	mtspr		256,$vrsave
    360 	stw		$rounds,0($out)
    361 
    362 Lenc_key_abort:
    363 	mr		r3,$ptr
    364 	blr
    365 	.long		0
    366 	.byte		0,12,0x14,1,0,0,3,0
    367 	.long		0
    368 .size	.${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
    369 
    370 .globl	.${prefix}_set_decrypt_key
    371 .align	5
    372 .${prefix}_set_decrypt_key:
    373 	$STU		$sp,-$FRAME($sp)
    374 	mflr		r10
    375 	$PUSH		r10,`$FRAME+$LRSAVE`($sp)
    376 	bl		Lset_encrypt_key
    377 	mtlr		r10
    378 
    379 	cmpwi		r3,0
    380 	bne-		Ldec_key_abort
    381 
    382 	slwi		$cnt,$rounds,4
    383 	subi		$inp,$out,240		# first round key
    384 	srwi		$rounds,$rounds,1
    385 	add		$out,$inp,$cnt		# last round key
    386 	mtctr		$rounds
    387 
    388 Ldeckey:
    389 	lwz		r0, 0($inp)
    390 	lwz		r6, 4($inp)
    391 	lwz		r7, 8($inp)
    392 	lwz		r8, 12($inp)
    393 	addi		$inp,$inp,16
    394 	lwz		r9, 0($out)
    395 	lwz		r10,4($out)
    396 	lwz		r11,8($out)
    397 	lwz		r12,12($out)
    398 	stw		r0, 0($out)
    399 	stw		r6, 4($out)
    400 	stw		r7, 8($out)
    401 	stw		r8, 12($out)
    402 	subi		$out,$out,16
    403 	stw		r9, -16($inp)
    404 	stw		r10,-12($inp)
    405 	stw		r11,-8($inp)
    406 	stw		r12,-4($inp)
    407 	bdnz		Ldeckey
    408 
    409 	xor		r3,r3,r3		# return value
    410 Ldec_key_abort:
    411 	addi		$sp,$sp,$FRAME
    412 	blr
    413 	.long		0
    414 	.byte		0,12,4,1,0x80,0,3,0
    415 	.long		0
    416 .size	.${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
    417 ___
    418 }}}
    419 #########################################################################
    420 {{{	# Single block en- and decrypt procedures			#
    421 sub gen_block () {
    422 my $dir = shift;
    423 my $n   = $dir eq "de" ? "n" : "";
    424 my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
    425 
    426 $code.=<<___;
    427 .globl	.${prefix}_${dir}crypt
    428 .align	5
    429 .${prefix}_${dir}crypt:
    430 	lwz		$rounds,240($key)
    431 	lis		r0,0xfc00
    432 	mfspr		$vrsave,256
    433 	li		$idx,15			# 15 is not typo
    434 	mtspr		256,r0
    435 
    436 	lvx		v0,0,$inp
    437 	neg		r11,$out
    438 	lvx		v1,$idx,$inp
    439 	lvsl		v2,0,$inp		# inpperm
    440 	le?vspltisb	v4,0x0f
    441 	?lvsl		v3,0,r11		# outperm
    442 	le?vxor		v2,v2,v4
    443 	li		$idx,16
    444 	vperm		v0,v0,v1,v2		# align [and byte swap in LE]
    445 	lvx		v1,0,$key
    446 	?lvsl		v5,0,$key		# keyperm
    447 	srwi		$rounds,$rounds,1
    448 	lvx		v2,$idx,$key
    449 	addi		$idx,$idx,16
    450 	subi		$rounds,$rounds,1
    451 	?vperm		v1,v1,v2,v5		# align round key
    452 
    453 	vxor		v0,v0,v1
    454 	lvx		v1,$idx,$key
    455 	addi		$idx,$idx,16
    456 	mtctr		$rounds
    457 
    458 Loop_${dir}c:
    459 	?vperm		v2,v2,v1,v5
    460 	v${n}cipher	v0,v0,v2
    461 	lvx		v2,$idx,$key
    462 	addi		$idx,$idx,16
    463 	?vperm		v1,v1,v2,v5
    464 	v${n}cipher	v0,v0,v1
    465 	lvx		v1,$idx,$key
    466 	addi		$idx,$idx,16
    467 	bdnz		Loop_${dir}c
    468 
    469 	?vperm		v2,v2,v1,v5
    470 	v${n}cipher	v0,v0,v2
    471 	lvx		v2,$idx,$key
    472 	?vperm		v1,v1,v2,v5
    473 	v${n}cipherlast	v0,v0,v1
    474 
    475 	vspltisb	v2,-1
    476 	vxor		v1,v1,v1
    477 	li		$idx,15			# 15 is not typo
    478 	?vperm		v2,v1,v2,v3		# outmask
    479 	le?vxor		v3,v3,v4
    480 	lvx		v1,0,$out		# outhead
    481 	vperm		v0,v0,v0,v3		# rotate [and byte swap in LE]
    482 	vsel		v1,v1,v0,v2
    483 	lvx		v4,$idx,$out
    484 	stvx		v1,0,$out
    485 	vsel		v0,v0,v4,v2
    486 	stvx		v0,$idx,$out
    487 
    488 	mtspr		256,$vrsave
    489 	blr
    490 	.long		0
    491 	.byte		0,12,0x14,0,0,0,3,0
    492 	.long		0
    493 .size	.${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
    494 ___
    495 }
    496 &gen_block("en");
    497 &gen_block("de");
    498 }}}
    499 #########################################################################
    500 {{{	# CBC en- and decrypt procedures				#
    501 my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
    502 my ($rndkey0,$rndkey1,$inout,$tmp)=		map("v$_",(0..3));
    503 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
    504 						map("v$_",(4..10));
    505 $code.=<<___;
    506 .globl	.${prefix}_cbc_encrypt
    507 .align	5
    508 .${prefix}_cbc_encrypt:
    509 	${UCMP}i	$len,16
    510 	bltlr-
    511 
    512 	cmpwi		$enc,0			# test direction
    513 	lis		r0,0xffe0
    514 	mfspr		$vrsave,256
    515 	mtspr		256,r0
    516 
    517 	li		$idx,15
    518 	vxor		$rndkey0,$rndkey0,$rndkey0
    519 	le?vspltisb	$tmp,0x0f
    520 
    521 	lvx		$ivec,0,$ivp		# load [unaligned] iv
    522 	lvsl		$inpperm,0,$ivp
    523 	lvx		$inptail,$idx,$ivp
    524 	le?vxor		$inpperm,$inpperm,$tmp
    525 	vperm		$ivec,$ivec,$inptail,$inpperm
    526 
    527 	neg		r11,$inp
    528 	?lvsl		$keyperm,0,$key		# prepare for unaligned key
    529 	lwz		$rounds,240($key)
    530 
    531 	lvsr		$inpperm,0,r11		# prepare for unaligned load
    532 	lvx		$inptail,0,$inp
    533 	addi		$inp,$inp,15		# 15 is not typo
    534 	le?vxor		$inpperm,$inpperm,$tmp
    535 
    536 	?lvsr		$outperm,0,$out		# prepare for unaligned store
    537 	vspltisb	$outmask,-1
    538 	lvx		$outhead,0,$out
    539 	?vperm		$outmask,$rndkey0,$outmask,$outperm
    540 	le?vxor		$outperm,$outperm,$tmp
    541 
    542 	srwi		$rounds,$rounds,1
    543 	li		$idx,16
    544 	subi		$rounds,$rounds,1
    545 	beq		Lcbc_dec
    546 
    547 Lcbc_enc:
    548 	vmr		$inout,$inptail
    549 	lvx		$inptail,0,$inp
    550 	addi		$inp,$inp,16
    551 	mtctr		$rounds
    552 	subi		$len,$len,16		# len-=16
    553 
    554 	lvx		$rndkey0,0,$key
    555 	 vperm		$inout,$inout,$inptail,$inpperm
    556 	lvx		$rndkey1,$idx,$key
    557 	addi		$idx,$idx,16
    558 	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
    559 	vxor		$inout,$inout,$rndkey0
    560 	lvx		$rndkey0,$idx,$key
    561 	addi		$idx,$idx,16
    562 	vxor		$inout,$inout,$ivec
    563 
    564 Loop_cbc_enc:
    565 	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
    566 	vcipher		$inout,$inout,$rndkey1
    567 	lvx		$rndkey1,$idx,$key
    568 	addi		$idx,$idx,16
    569 	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
    570 	vcipher		$inout,$inout,$rndkey0
    571 	lvx		$rndkey0,$idx,$key
    572 	addi		$idx,$idx,16
    573 	bdnz		Loop_cbc_enc
    574 
    575 	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
    576 	vcipher		$inout,$inout,$rndkey1
    577 	lvx		$rndkey1,$idx,$key
    578 	li		$idx,16
    579 	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
    580 	vcipherlast	$ivec,$inout,$rndkey0
    581 	${UCMP}i	$len,16
    582 
    583 	vperm		$tmp,$ivec,$ivec,$outperm
    584 	vsel		$inout,$outhead,$tmp,$outmask
    585 	vmr		$outhead,$tmp
    586 	stvx		$inout,0,$out
    587 	addi		$out,$out,16
    588 	bge		Lcbc_enc
    589 
    590 	b		Lcbc_done
    591 
    592 .align	4
    593 Lcbc_dec:
    594 	${UCMP}i	$len,128
    595 	bge		_aesp8_cbc_decrypt8x
    596 	vmr		$tmp,$inptail
    597 	lvx		$inptail,0,$inp
    598 	addi		$inp,$inp,16
    599 	mtctr		$rounds
    600 	subi		$len,$len,16		# len-=16
    601 
    602 	lvx		$rndkey0,0,$key
    603 	 vperm		$tmp,$tmp,$inptail,$inpperm
    604 	lvx		$rndkey1,$idx,$key
    605 	addi		$idx,$idx,16
    606 	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
    607 	vxor		$inout,$tmp,$rndkey0
    608 	lvx		$rndkey0,$idx,$key
    609 	addi		$idx,$idx,16
    610 
    611 Loop_cbc_dec:
    612 	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
    613 	vncipher	$inout,$inout,$rndkey1
    614 	lvx		$rndkey1,$idx,$key
    615 	addi		$idx,$idx,16
    616 	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
    617 	vncipher	$inout,$inout,$rndkey0
    618 	lvx		$rndkey0,$idx,$key
    619 	addi		$idx,$idx,16
    620 	bdnz		Loop_cbc_dec
    621 
    622 	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
    623 	vncipher	$inout,$inout,$rndkey1
    624 	lvx		$rndkey1,$idx,$key
    625 	li		$idx,16
    626 	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
    627 	vncipherlast	$inout,$inout,$rndkey0
    628 	${UCMP}i	$len,16
    629 
    630 	vxor		$inout,$inout,$ivec
    631 	vmr		$ivec,$tmp
    632 	vperm		$tmp,$inout,$inout,$outperm
    633 	vsel		$inout,$outhead,$tmp,$outmask
    634 	vmr		$outhead,$tmp
    635 	stvx		$inout,0,$out
    636 	addi		$out,$out,16
    637 	bge		Lcbc_dec
    638 
    639 Lcbc_done:
    640 	addi		$out,$out,-1
    641 	lvx		$inout,0,$out		# redundant in aligned case
    642 	vsel		$inout,$outhead,$inout,$outmask
    643 	stvx		$inout,0,$out
    644 
    645 	neg		$enc,$ivp		# write [unaligned] iv
    646 	li		$idx,15			# 15 is not typo
    647 	vxor		$rndkey0,$rndkey0,$rndkey0
    648 	vspltisb	$outmask,-1
    649 	le?vspltisb	$tmp,0x0f
    650 	?lvsl		$outperm,0,$enc
    651 	?vperm		$outmask,$rndkey0,$outmask,$outperm
    652 	le?vxor		$outperm,$outperm,$tmp
    653 	lvx		$outhead,0,$ivp
    654 	vperm		$ivec,$ivec,$ivec,$outperm
    655 	vsel		$inout,$outhead,$ivec,$outmask
    656 	lvx		$inptail,$idx,$ivp
    657 	stvx		$inout,0,$ivp
    658 	vsel		$inout,$ivec,$inptail,$outmask
    659 	stvx		$inout,$idx,$ivp
    660 
    661 	mtspr		256,$vrsave
    662 	blr
    663 	.long		0
    664 	.byte		0,12,0x14,0,0,0,6,0
    665 	.long		0
    666 ___
    667 #########################################################################
    668 {{	# Optimized CBC decrypt procedure				#
    669 my $key_="r11";
    670 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
    671     $x00=0 if ($flavour =~ /osx/);
    672 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
    673 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
    674 my $rndkey0="v23";	# v24-v25 rotating buffer for first found keys
    675 			# v26-v31 last 6 round keys
    676 my ($tmp,$keyperm)=($in3,$in4);	# aliases with "caller", redundant assignment
    677 
    678 $code.=<<___;
    679 .align	5
    680 _aesp8_cbc_decrypt8x:
    681 	$STU		$sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
    682 	li		r10,`$FRAME+8*16+15`
    683 	li		r11,`$FRAME+8*16+31`
    684 	stvx		v20,r10,$sp		# ABI says so
    685 	addi		r10,r10,32
    686 	stvx		v21,r11,$sp
    687 	addi		r11,r11,32
    688 	stvx		v22,r10,$sp
    689 	addi		r10,r10,32
    690 	stvx		v23,r11,$sp
    691 	addi		r11,r11,32
    692 	stvx		v24,r10,$sp
    693 	addi		r10,r10,32
    694 	stvx		v25,r11,$sp
    695 	addi		r11,r11,32
    696 	stvx		v26,r10,$sp
    697 	addi		r10,r10,32
    698 	stvx		v27,r11,$sp
    699 	addi		r11,r11,32
    700 	stvx		v28,r10,$sp
    701 	addi		r10,r10,32
    702 	stvx		v29,r11,$sp
    703 	addi		r11,r11,32
    704 	stvx		v30,r10,$sp
    705 	stvx		v31,r11,$sp
    706 	li		r0,-1
    707 	stw		$vrsave,`$FRAME+21*16-4`($sp)	# save vrsave
    708 	li		$x10,0x10
    709 	$PUSH		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
    710 	li		$x20,0x20
    711 	$PUSH		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
    712 	li		$x30,0x30
    713 	$PUSH		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
    714 	li		$x40,0x40
    715 	$PUSH		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
    716 	li		$x50,0x50
    717 	$PUSH		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
    718 	li		$x60,0x60
    719 	$PUSH		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
    720 	li		$x70,0x70
    721 	mtspr		256,r0
    722 
    723 	subi		$rounds,$rounds,3	# -4 in total
    724 	subi		$len,$len,128		# bias
    725 
    726 	lvx		$rndkey0,$x00,$key	# load key schedule
    727 	lvx		v30,$x10,$key
    728 	addi		$key,$key,0x20
    729 	lvx		v31,$x00,$key
    730 	?vperm		$rndkey0,$rndkey0,v30,$keyperm
    731 	addi		$key_,$sp,`$FRAME+15`
    732 	mtctr		$rounds
    733 
    734 Load_cbc_dec_key:
    735 	?vperm		v24,v30,v31,$keyperm
    736 	lvx		v30,$x10,$key
    737 	addi		$key,$key,0x20
    738 	stvx		v24,$x00,$key_		# off-load round[1]
    739 	?vperm		v25,v31,v30,$keyperm
    740 	lvx		v31,$x00,$key
    741 	stvx		v25,$x10,$key_		# off-load round[2]
    742 	addi		$key_,$key_,0x20
    743 	bdnz		Load_cbc_dec_key
    744 
    745 	lvx		v26,$x10,$key
    746 	?vperm		v24,v30,v31,$keyperm
    747 	lvx		v27,$x20,$key
    748 	stvx		v24,$x00,$key_		# off-load round[3]
    749 	?vperm		v25,v31,v26,$keyperm
    750 	lvx		v28,$x30,$key
    751 	stvx		v25,$x10,$key_		# off-load round[4]
    752 	addi		$key_,$sp,`$FRAME+15`	# rewind $key_
    753 	?vperm		v26,v26,v27,$keyperm
    754 	lvx		v29,$x40,$key
    755 	?vperm		v27,v27,v28,$keyperm
    756 	lvx		v30,$x50,$key
    757 	?vperm		v28,v28,v29,$keyperm
    758 	lvx		v31,$x60,$key
    759 	?vperm		v29,v29,v30,$keyperm
    760 	lvx		$out0,$x70,$key		# borrow $out0
    761 	?vperm		v30,v30,v31,$keyperm
    762 	lvx		v24,$x00,$key_		# pre-load round[1]
    763 	?vperm		v31,v31,$out0,$keyperm
    764 	lvx		v25,$x10,$key_		# pre-load round[2]
    765 
    766 	#lvx		$inptail,0,$inp		# "caller" already did this
    767 	#addi		$inp,$inp,15		# 15 is not typo
    768 	subi		$inp,$inp,15		# undo "caller"
    769 
    770 	 le?li		$idx,8
    771 	lvx_u		$in0,$x00,$inp		# load first 8 "words"
    772 	 le?lvsl	$inpperm,0,$idx
    773 	 le?vspltisb	$tmp,0x0f
    774 	lvx_u		$in1,$x10,$inp
    775 	 le?vxor	$inpperm,$inpperm,$tmp	# transform for lvx_u/stvx_u
    776 	lvx_u		$in2,$x20,$inp
    777 	 le?vperm	$in0,$in0,$in0,$inpperm
    778 	lvx_u		$in3,$x30,$inp
    779 	 le?vperm	$in1,$in1,$in1,$inpperm
    780 	lvx_u		$in4,$x40,$inp
    781 	 le?vperm	$in2,$in2,$in2,$inpperm
    782 	vxor		$out0,$in0,$rndkey0
    783 	lvx_u		$in5,$x50,$inp
    784 	 le?vperm	$in3,$in3,$in3,$inpperm
    785 	vxor		$out1,$in1,$rndkey0
    786 	lvx_u		$in6,$x60,$inp
    787 	 le?vperm	$in4,$in4,$in4,$inpperm
    788 	vxor		$out2,$in2,$rndkey0
    789 	lvx_u		$in7,$x70,$inp
    790 	addi		$inp,$inp,0x80
    791 	 le?vperm	$in5,$in5,$in5,$inpperm
    792 	vxor		$out3,$in3,$rndkey0
    793 	 le?vperm	$in6,$in6,$in6,$inpperm
    794 	vxor		$out4,$in4,$rndkey0
    795 	 le?vperm	$in7,$in7,$in7,$inpperm
    796 	vxor		$out5,$in5,$rndkey0
    797 	vxor		$out6,$in6,$rndkey0
    798 	vxor		$out7,$in7,$rndkey0
    799 
    800 	mtctr		$rounds
    801 	b		Loop_cbc_dec8x
    802 .align	5
    803 Loop_cbc_dec8x:
    804 	vncipher	$out0,$out0,v24
    805 	vncipher	$out1,$out1,v24
    806 	vncipher	$out2,$out2,v24
    807 	vncipher	$out3,$out3,v24
    808 	vncipher	$out4,$out4,v24
    809 	vncipher	$out5,$out5,v24
    810 	vncipher	$out6,$out6,v24
    811 	vncipher	$out7,$out7,v24
    812 	lvx		v24,$x20,$key_		# round[3]
    813 	addi		$key_,$key_,0x20
    814 
    815 	vncipher	$out0,$out0,v25
    816 	vncipher	$out1,$out1,v25
    817 	vncipher	$out2,$out2,v25
    818 	vncipher	$out3,$out3,v25
    819 	vncipher	$out4,$out4,v25
    820 	vncipher	$out5,$out5,v25
    821 	vncipher	$out6,$out6,v25
    822 	vncipher	$out7,$out7,v25
    823 	lvx		v25,$x10,$key_		# round[4]
    824 	bdnz		Loop_cbc_dec8x
    825 
    826 	subic		$len,$len,128		# $len-=128
    827 	vncipher	$out0,$out0,v24
    828 	vncipher	$out1,$out1,v24
    829 	vncipher	$out2,$out2,v24
    830 	vncipher	$out3,$out3,v24
    831 	vncipher	$out4,$out4,v24
    832 	vncipher	$out5,$out5,v24
    833 	vncipher	$out6,$out6,v24
    834 	vncipher	$out7,$out7,v24
    835 
    836 	subfe.		r0,r0,r0		# borrow?-1:0
    837 	vncipher	$out0,$out0,v25
    838 	vncipher	$out1,$out1,v25
    839 	vncipher	$out2,$out2,v25
    840 	vncipher	$out3,$out3,v25
    841 	vncipher	$out4,$out4,v25
    842 	vncipher	$out5,$out5,v25
    843 	vncipher	$out6,$out6,v25
    844 	vncipher	$out7,$out7,v25
    845 
    846 	and		r0,r0,$len
    847 	vncipher	$out0,$out0,v26
    848 	vncipher	$out1,$out1,v26
    849 	vncipher	$out2,$out2,v26
    850 	vncipher	$out3,$out3,v26
    851 	vncipher	$out4,$out4,v26
    852 	vncipher	$out5,$out5,v26
    853 	vncipher	$out6,$out6,v26
    854 	vncipher	$out7,$out7,v26
    855 
    856 	add		$inp,$inp,r0		# $inp is adjusted in such
    857 						# way that at exit from the
    858 						# loop inX-in7 are loaded
    859 						# with last "words"
    860 	vncipher	$out0,$out0,v27
    861 	vncipher	$out1,$out1,v27
    862 	vncipher	$out2,$out2,v27
    863 	vncipher	$out3,$out3,v27
    864 	vncipher	$out4,$out4,v27
    865 	vncipher	$out5,$out5,v27
    866 	vncipher	$out6,$out6,v27
    867 	vncipher	$out7,$out7,v27
    868 
    869 	addi		$key_,$sp,`$FRAME+15`	# rewind $key_
    870 	vncipher	$out0,$out0,v28
    871 	vncipher	$out1,$out1,v28
    872 	vncipher	$out2,$out2,v28
    873 	vncipher	$out3,$out3,v28
    874 	vncipher	$out4,$out4,v28
    875 	vncipher	$out5,$out5,v28
    876 	vncipher	$out6,$out6,v28
    877 	vncipher	$out7,$out7,v28
    878 	lvx		v24,$x00,$key_		# re-pre-load round[1]
    879 
    880 	vncipher	$out0,$out0,v29
    881 	vncipher	$out1,$out1,v29
    882 	vncipher	$out2,$out2,v29
    883 	vncipher	$out3,$out3,v29
    884 	vncipher	$out4,$out4,v29
    885 	vncipher	$out5,$out5,v29
    886 	vncipher	$out6,$out6,v29
    887 	vncipher	$out7,$out7,v29
    888 	lvx		v25,$x10,$key_		# re-pre-load round[2]
    889 
    890 	vncipher	$out0,$out0,v30
    891 	 vxor		$ivec,$ivec,v31		# xor with last round key
    892 	vncipher	$out1,$out1,v30
    893 	 vxor		$in0,$in0,v31
    894 	vncipher	$out2,$out2,v30
    895 	 vxor		$in1,$in1,v31
    896 	vncipher	$out3,$out3,v30
    897 	 vxor		$in2,$in2,v31
    898 	vncipher	$out4,$out4,v30
    899 	 vxor		$in3,$in3,v31
    900 	vncipher	$out5,$out5,v30
    901 	 vxor		$in4,$in4,v31
    902 	vncipher	$out6,$out6,v30
    903 	 vxor		$in5,$in5,v31
    904 	vncipher	$out7,$out7,v30
    905 	 vxor		$in6,$in6,v31
    906 
    907 	vncipherlast	$out0,$out0,$ivec
    908 	vncipherlast	$out1,$out1,$in0
    909 	 lvx_u		$in0,$x00,$inp		# load next input block
    910 	vncipherlast	$out2,$out2,$in1
    911 	 lvx_u		$in1,$x10,$inp
    912 	vncipherlast	$out3,$out3,$in2
    913 	 le?vperm	$in0,$in0,$in0,$inpperm
    914 	 lvx_u		$in2,$x20,$inp
    915 	vncipherlast	$out4,$out4,$in3
    916 	 le?vperm	$in1,$in1,$in1,$inpperm
    917 	 lvx_u		$in3,$x30,$inp
    918 	vncipherlast	$out5,$out5,$in4
    919 	 le?vperm	$in2,$in2,$in2,$inpperm
    920 	 lvx_u		$in4,$x40,$inp
    921 	vncipherlast	$out6,$out6,$in5
    922 	 le?vperm	$in3,$in3,$in3,$inpperm
    923 	 lvx_u		$in5,$x50,$inp
    924 	vncipherlast	$out7,$out7,$in6
    925 	 le?vperm	$in4,$in4,$in4,$inpperm
    926 	 lvx_u		$in6,$x60,$inp
    927 	vmr		$ivec,$in7
    928 	 le?vperm	$in5,$in5,$in5,$inpperm
    929 	 lvx_u		$in7,$x70,$inp
    930 	 addi		$inp,$inp,0x80
    931 
    932 	le?vperm	$out0,$out0,$out0,$inpperm
    933 	le?vperm	$out1,$out1,$out1,$inpperm
    934 	stvx_u		$out0,$x00,$out
    935 	 le?vperm	$in6,$in6,$in6,$inpperm
    936 	 vxor		$out0,$in0,$rndkey0
    937 	le?vperm	$out2,$out2,$out2,$inpperm
    938 	stvx_u		$out1,$x10,$out
    939 	 le?vperm	$in7,$in7,$in7,$inpperm
    940 	 vxor		$out1,$in1,$rndkey0
    941 	le?vperm	$out3,$out3,$out3,$inpperm
    942 	stvx_u		$out2,$x20,$out
    943 	 vxor		$out2,$in2,$rndkey0
    944 	le?vperm	$out4,$out4,$out4,$inpperm
    945 	stvx_u		$out3,$x30,$out
    946 	 vxor		$out3,$in3,$rndkey0
    947 	le?vperm	$out5,$out5,$out5,$inpperm
    948 	stvx_u		$out4,$x40,$out
    949 	 vxor		$out4,$in4,$rndkey0
    950 	le?vperm	$out6,$out6,$out6,$inpperm
    951 	stvx_u		$out5,$x50,$out
    952 	 vxor		$out5,$in5,$rndkey0
    953 	le?vperm	$out7,$out7,$out7,$inpperm
    954 	stvx_u		$out6,$x60,$out
    955 	 vxor		$out6,$in6,$rndkey0
    956 	stvx_u		$out7,$x70,$out
    957 	addi		$out,$out,0x80
    958 	 vxor		$out7,$in7,$rndkey0
    959 
    960 	mtctr		$rounds
    961 	beq		Loop_cbc_dec8x		# did $len-=128 borrow?
    962 
    963 	addic.		$len,$len,128
    964 	beq		Lcbc_dec8x_done
    965 	nop
    966 	nop
    967 
    968 Loop_cbc_dec8x_tail:				# up to 7 "words" tail...
    969 	vncipher	$out1,$out1,v24
    970 	vncipher	$out2,$out2,v24
    971 	vncipher	$out3,$out3,v24
    972 	vncipher	$out4,$out4,v24
    973 	vncipher	$out5,$out5,v24
    974 	vncipher	$out6,$out6,v24
    975 	vncipher	$out7,$out7,v24
    976 	lvx		v24,$x20,$key_		# round[3]
    977 	addi		$key_,$key_,0x20
    978 
    979 	vncipher	$out1,$out1,v25
    980 	vncipher	$out2,$out2,v25
    981 	vncipher	$out3,$out3,v25
    982 	vncipher	$out4,$out4,v25
    983 	vncipher	$out5,$out5,v25
    984 	vncipher	$out6,$out6,v25
    985 	vncipher	$out7,$out7,v25
    986 	lvx		v25,$x10,$key_		# round[4]
    987 	bdnz		Loop_cbc_dec8x_tail
    988 
    989 	vncipher	$out1,$out1,v24
    990 	vncipher	$out2,$out2,v24
    991 	vncipher	$out3,$out3,v24
    992 	vncipher	$out4,$out4,v24
    993 	vncipher	$out5,$out5,v24
    994 	vncipher	$out6,$out6,v24
    995 	vncipher	$out7,$out7,v24
    996 
    997 	vncipher	$out1,$out1,v25
    998 	vncipher	$out2,$out2,v25
    999 	vncipher	$out3,$out3,v25
   1000 	vncipher	$out4,$out4,v25
   1001 	vncipher	$out5,$out5,v25
   1002 	vncipher	$out6,$out6,v25
   1003 	vncipher	$out7,$out7,v25
   1004 
   1005 	vncipher	$out1,$out1,v26
   1006 	vncipher	$out2,$out2,v26
   1007 	vncipher	$out3,$out3,v26
   1008 	vncipher	$out4,$out4,v26
   1009 	vncipher	$out5,$out5,v26
   1010 	vncipher	$out6,$out6,v26
   1011 	vncipher	$out7,$out7,v26
   1012 
   1013 	vncipher	$out1,$out1,v27
   1014 	vncipher	$out2,$out2,v27
   1015 	vncipher	$out3,$out3,v27
   1016 	vncipher	$out4,$out4,v27
   1017 	vncipher	$out5,$out5,v27
   1018 	vncipher	$out6,$out6,v27
   1019 	vncipher	$out7,$out7,v27
   1020 
   1021 	vncipher	$out1,$out1,v28
   1022 	vncipher	$out2,$out2,v28
   1023 	vncipher	$out3,$out3,v28
   1024 	vncipher	$out4,$out4,v28
   1025 	vncipher	$out5,$out5,v28
   1026 	vncipher	$out6,$out6,v28
   1027 	vncipher	$out7,$out7,v28
   1028 
   1029 	vncipher	$out1,$out1,v29
   1030 	vncipher	$out2,$out2,v29
   1031 	vncipher	$out3,$out3,v29
   1032 	vncipher	$out4,$out4,v29
   1033 	vncipher	$out5,$out5,v29
   1034 	vncipher	$out6,$out6,v29
   1035 	vncipher	$out7,$out7,v29
   1036 
   1037 	vncipher	$out1,$out1,v30
   1038 	 vxor		$ivec,$ivec,v31		# last round key
   1039 	vncipher	$out2,$out2,v30
   1040 	 vxor		$in1,$in1,v31
   1041 	vncipher	$out3,$out3,v30
   1042 	 vxor		$in2,$in2,v31
   1043 	vncipher	$out4,$out4,v30
   1044 	 vxor		$in3,$in3,v31
   1045 	vncipher	$out5,$out5,v30
   1046 	 vxor		$in4,$in4,v31
   1047 	vncipher	$out6,$out6,v30
   1048 	 vxor		$in5,$in5,v31
   1049 	vncipher	$out7,$out7,v30
   1050 	 vxor		$in6,$in6,v31
   1051 
   1052 	cmplwi		$len,32			# switch($len)
   1053 	blt		Lcbc_dec8x_one
   1054 	nop
   1055 	beq		Lcbc_dec8x_two
   1056 	cmplwi		$len,64
   1057 	blt		Lcbc_dec8x_three
   1058 	nop
   1059 	beq		Lcbc_dec8x_four
   1060 	cmplwi		$len,96
   1061 	blt		Lcbc_dec8x_five
   1062 	nop
   1063 	beq		Lcbc_dec8x_six
   1064 
   1065 Lcbc_dec8x_seven:
   1066 	vncipherlast	$out1,$out1,$ivec
   1067 	vncipherlast	$out2,$out2,$in1
   1068 	vncipherlast	$out3,$out3,$in2
   1069 	vncipherlast	$out4,$out4,$in3
   1070 	vncipherlast	$out5,$out5,$in4
   1071 	vncipherlast	$out6,$out6,$in5
   1072 	vncipherlast	$out7,$out7,$in6
   1073 	vmr		$ivec,$in7
   1074 
   1075 	le?vperm	$out1,$out1,$out1,$inpperm
   1076 	le?vperm	$out2,$out2,$out2,$inpperm
   1077 	stvx_u		$out1,$x00,$out
   1078 	le?vperm	$out3,$out3,$out3,$inpperm
   1079 	stvx_u		$out2,$x10,$out
   1080 	le?vperm	$out4,$out4,$out4,$inpperm
   1081 	stvx_u		$out3,$x20,$out
   1082 	le?vperm	$out5,$out5,$out5,$inpperm
   1083 	stvx_u		$out4,$x30,$out
   1084 	le?vperm	$out6,$out6,$out6,$inpperm
   1085 	stvx_u		$out5,$x40,$out
   1086 	le?vperm	$out7,$out7,$out7,$inpperm
   1087 	stvx_u		$out6,$x50,$out
   1088 	stvx_u		$out7,$x60,$out
   1089 	addi		$out,$out,0x70
   1090 	b		Lcbc_dec8x_done
   1091 
   1092 .align	5
   1093 Lcbc_dec8x_six:
   1094 	vncipherlast	$out2,$out2,$ivec
   1095 	vncipherlast	$out3,$out3,$in2
   1096 	vncipherlast	$out4,$out4,$in3
   1097 	vncipherlast	$out5,$out5,$in4
   1098 	vncipherlast	$out6,$out6,$in5
   1099 	vncipherlast	$out7,$out7,$in6
   1100 	vmr		$ivec,$in7
   1101 
   1102 	le?vperm	$out2,$out2,$out2,$inpperm
   1103 	le?vperm	$out3,$out3,$out3,$inpperm
   1104 	stvx_u		$out2,$x00,$out
   1105 	le?vperm	$out4,$out4,$out4,$inpperm
   1106 	stvx_u		$out3,$x10,$out
   1107 	le?vperm	$out5,$out5,$out5,$inpperm
   1108 	stvx_u		$out4,$x20,$out
   1109 	le?vperm	$out6,$out6,$out6,$inpperm
   1110 	stvx_u		$out5,$x30,$out
   1111 	le?vperm	$out7,$out7,$out7,$inpperm
   1112 	stvx_u		$out6,$x40,$out
   1113 	stvx_u		$out7,$x50,$out
   1114 	addi		$out,$out,0x60
   1115 	b		Lcbc_dec8x_done
   1116 
   1117 .align	5
   1118 Lcbc_dec8x_five:
   1119 	vncipherlast	$out3,$out3,$ivec
   1120 	vncipherlast	$out4,$out4,$in3
   1121 	vncipherlast	$out5,$out5,$in4
   1122 	vncipherlast	$out6,$out6,$in5
   1123 	vncipherlast	$out7,$out7,$in6
   1124 	vmr		$ivec,$in7
   1125 
   1126 	le?vperm	$out3,$out3,$out3,$inpperm
   1127 	le?vperm	$out4,$out4,$out4,$inpperm
   1128 	stvx_u		$out3,$x00,$out
   1129 	le?vperm	$out5,$out5,$out5,$inpperm
   1130 	stvx_u		$out4,$x10,$out
   1131 	le?vperm	$out6,$out6,$out6,$inpperm
   1132 	stvx_u		$out5,$x20,$out
   1133 	le?vperm	$out7,$out7,$out7,$inpperm
   1134 	stvx_u		$out6,$x30,$out
   1135 	stvx_u		$out7,$x40,$out
   1136 	addi		$out,$out,0x50
   1137 	b		Lcbc_dec8x_done
   1138 
   1139 .align	5
   1140 Lcbc_dec8x_four:
   1141 	vncipherlast	$out4,$out4,$ivec
   1142 	vncipherlast	$out5,$out5,$in4
   1143 	vncipherlast	$out6,$out6,$in5
   1144 	vncipherlast	$out7,$out7,$in6
   1145 	vmr		$ivec,$in7
   1146 
   1147 	le?vperm	$out4,$out4,$out4,$inpperm
   1148 	le?vperm	$out5,$out5,$out5,$inpperm
   1149 	stvx_u		$out4,$x00,$out
   1150 	le?vperm	$out6,$out6,$out6,$inpperm
   1151 	stvx_u		$out5,$x10,$out
   1152 	le?vperm	$out7,$out7,$out7,$inpperm
   1153 	stvx_u		$out6,$x20,$out
   1154 	stvx_u		$out7,$x30,$out
   1155 	addi		$out,$out,0x40
   1156 	b		Lcbc_dec8x_done
   1157 
   1158 .align	5
   1159 Lcbc_dec8x_three:
   1160 	vncipherlast	$out5,$out5,$ivec
   1161 	vncipherlast	$out6,$out6,$in5
   1162 	vncipherlast	$out7,$out7,$in6
   1163 	vmr		$ivec,$in7
   1164 
   1165 	le?vperm	$out5,$out5,$out5,$inpperm
   1166 	le?vperm	$out6,$out6,$out6,$inpperm
   1167 	stvx_u		$out5,$x00,$out
   1168 	le?vperm	$out7,$out7,$out7,$inpperm
   1169 	stvx_u		$out6,$x10,$out
   1170 	stvx_u		$out7,$x20,$out
   1171 	addi		$out,$out,0x30
   1172 	b		Lcbc_dec8x_done
   1173 
   1174 .align	5
   1175 Lcbc_dec8x_two:
   1176 	vncipherlast	$out6,$out6,$ivec
   1177 	vncipherlast	$out7,$out7,$in6
   1178 	vmr		$ivec,$in7
   1179 
   1180 	le?vperm	$out6,$out6,$out6,$inpperm
   1181 	le?vperm	$out7,$out7,$out7,$inpperm
   1182 	stvx_u		$out6,$x00,$out
   1183 	stvx_u		$out7,$x10,$out
   1184 	addi		$out,$out,0x20
   1185 	b		Lcbc_dec8x_done
   1186 
   1187 .align	5
   1188 Lcbc_dec8x_one:
   1189 	vncipherlast	$out7,$out7,$ivec
   1190 	vmr		$ivec,$in7
   1191 
   1192 	le?vperm	$out7,$out7,$out7,$inpperm
   1193 	stvx_u		$out7,0,$out
   1194 	addi		$out,$out,0x10
   1195 
   1196 Lcbc_dec8x_done:
   1197 	le?vperm	$ivec,$ivec,$ivec,$inpperm
   1198 	stvx_u		$ivec,0,$ivp		# write [unaligned] iv
   1199 
   1200 	li		r10,`$FRAME+15`
   1201 	li		r11,`$FRAME+31`
   1202 	stvx		$inpperm,r10,$sp	# wipe copies of round keys
   1203 	addi		r10,r10,32
   1204 	stvx		$inpperm,r11,$sp
   1205 	addi		r11,r11,32
   1206 	stvx		$inpperm,r10,$sp
   1207 	addi		r10,r10,32
   1208 	stvx		$inpperm,r11,$sp
   1209 	addi		r11,r11,32
   1210 	stvx		$inpperm,r10,$sp
   1211 	addi		r10,r10,32
   1212 	stvx		$inpperm,r11,$sp
   1213 	addi		r11,r11,32
   1214 	stvx		$inpperm,r10,$sp
   1215 	addi		r10,r10,32
   1216 	stvx		$inpperm,r11,$sp
   1217 	addi		r11,r11,32
   1218 
   1219 	mtspr		256,$vrsave
   1220 	lvx		v20,r10,$sp		# ABI says so
   1221 	addi		r10,r10,32
   1222 	lvx		v21,r11,$sp
   1223 	addi		r11,r11,32
   1224 	lvx		v22,r10,$sp
   1225 	addi		r10,r10,32
   1226 	lvx		v23,r11,$sp
   1227 	addi		r11,r11,32
   1228 	lvx		v24,r10,$sp
   1229 	addi		r10,r10,32
   1230 	lvx		v25,r11,$sp
   1231 	addi		r11,r11,32
   1232 	lvx		v26,r10,$sp
   1233 	addi		r10,r10,32
   1234 	lvx		v27,r11,$sp
   1235 	addi		r11,r11,32
   1236 	lvx		v28,r10,$sp
   1237 	addi		r10,r10,32
   1238 	lvx		v29,r11,$sp
   1239 	addi		r11,r11,32
   1240 	lvx		v30,r10,$sp
   1241 	lvx		v31,r11,$sp
   1242 	$POP		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
   1243 	$POP		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
   1244 	$POP		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
   1245 	$POP		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
   1246 	$POP		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
   1247 	$POP		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
   1248 	addi		$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
   1249 	blr
   1250 	.long		0
   1251 	.byte		0,12,0x04,0,0x80,6,6,0
   1252 	.long		0
   1253 .size	.${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
   1254 ___
   1255 }}	}}}
   1256 
   1257 #########################################################################
   1258 {{{	# CTR procedure[s]						#
   1259 my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
   1260 my ($rndkey0,$rndkey1,$inout,$tmp)=		map("v$_",(0..3));
   1261 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
   1262 						map("v$_",(4..11));
   1263 my $dat=$tmp;
   1264 
   1265 $code.=<<___;
   1266 .globl	.${prefix}_ctr32_encrypt_blocks
   1267 .align	5
   1268 .${prefix}_ctr32_encrypt_blocks:
   1269 	${UCMP}i	$len,1
   1270 	bltlr-
   1271 
   1272 	lis		r0,0xfff0
   1273 	mfspr		$vrsave,256
   1274 	mtspr		256,r0
   1275 
   1276 	li		$idx,15
   1277 	vxor		$rndkey0,$rndkey0,$rndkey0
   1278 	le?vspltisb	$tmp,0x0f
   1279 
   1280 	lvx		$ivec,0,$ivp		# load [unaligned] iv
   1281 	lvsl		$inpperm,0,$ivp
   1282 	lvx		$inptail,$idx,$ivp
   1283 	 vspltisb	$one,1
   1284 	le?vxor		$inpperm,$inpperm,$tmp
   1285 	vperm		$ivec,$ivec,$inptail,$inpperm
   1286 	 vsldoi		$one,$rndkey0,$one,1
   1287 
   1288 	neg		r11,$inp
   1289 	?lvsl		$keyperm,0,$key		# prepare for unaligned key
   1290 	lwz		$rounds,240($key)
   1291 
   1292 	lvsr		$inpperm,0,r11		# prepare for unaligned load
   1293 	lvx		$inptail,0,$inp
   1294 	addi		$inp,$inp,15		# 15 is not typo
   1295 	le?vxor		$inpperm,$inpperm,$tmp
   1296 
   1297 	srwi		$rounds,$rounds,1
   1298 	li		$idx,16
   1299 	subi		$rounds,$rounds,1
   1300 
   1301 	${UCMP}i	$len,8
   1302 	bge		_aesp8_ctr32_encrypt8x
   1303 
   1304 	?lvsr		$outperm,0,$out		# prepare for unaligned store
   1305 	vspltisb	$outmask,-1
   1306 	lvx		$outhead,0,$out
   1307 	?vperm		$outmask,$rndkey0,$outmask,$outperm
   1308 	le?vxor		$outperm,$outperm,$tmp
   1309 
   1310 	lvx		$rndkey0,0,$key
   1311 	mtctr		$rounds
   1312 	lvx		$rndkey1,$idx,$key
   1313 	addi		$idx,$idx,16
   1314 	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
   1315 	vxor		$inout,$ivec,$rndkey0
   1316 	lvx		$rndkey0,$idx,$key
   1317 	addi		$idx,$idx,16
   1318 	b		Loop_ctr32_enc
   1319 
   1320 .align	5
   1321 Loop_ctr32_enc:
   1322 	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
   1323 	vcipher		$inout,$inout,$rndkey1
   1324 	lvx		$rndkey1,$idx,$key
   1325 	addi		$idx,$idx,16
   1326 	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
   1327 	vcipher		$inout,$inout,$rndkey0
   1328 	lvx		$rndkey0,$idx,$key
   1329 	addi		$idx,$idx,16
   1330 	bdnz		Loop_ctr32_enc
   1331 
   1332 	vadduwm		$ivec,$ivec,$one
   1333 	 vmr		$dat,$inptail
   1334 	 lvx		$inptail,0,$inp
   1335 	 addi		$inp,$inp,16
   1336 	 subic.		$len,$len,1		# blocks--
   1337 
   1338 	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
   1339 	vcipher		$inout,$inout,$rndkey1
   1340 	lvx		$rndkey1,$idx,$key
   1341 	 vperm		$dat,$dat,$inptail,$inpperm
   1342 	 li		$idx,16
   1343 	?vperm		$rndkey1,$rndkey0,$rndkey1,$keyperm
   1344 	 lvx		$rndkey0,0,$key
   1345 	vxor		$dat,$dat,$rndkey1	# last round key
   1346 	vcipherlast	$inout,$inout,$dat
   1347 
   1348 	 lvx		$rndkey1,$idx,$key
   1349 	 addi		$idx,$idx,16
   1350 	vperm		$inout,$inout,$inout,$outperm
   1351 	vsel		$dat,$outhead,$inout,$outmask
   1352 	 mtctr		$rounds
   1353 	 ?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
   1354 	vmr		$outhead,$inout
   1355 	 vxor		$inout,$ivec,$rndkey0
   1356 	 lvx		$rndkey0,$idx,$key
   1357 	 addi		$idx,$idx,16
   1358 	stvx		$dat,0,$out
   1359 	addi		$out,$out,16
   1360 	bne		Loop_ctr32_enc
   1361 
   1362 	addi		$out,$out,-1
   1363 	lvx		$inout,0,$out		# redundant in aligned case
   1364 	vsel		$inout,$outhead,$inout,$outmask
   1365 	stvx		$inout,0,$out
   1366 
   1367 	mtspr		256,$vrsave
   1368 	blr
   1369 	.long		0
   1370 	.byte		0,12,0x14,0,0,0,6,0
   1371 	.long		0
   1372 ___
   1373 #########################################################################
   1374 {{	# Optimized CTR procedure					#
   1375 my $key_="r11";
   1376 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
   1377     $x00=0 if ($flavour =~ /osx/);
   1378 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
   1379 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
   1380 my $rndkey0="v23";	# v24-v25 rotating buffer for first found keys
   1381 			# v26-v31 last 6 round keys
   1382 my ($tmp,$keyperm)=($in3,$in4);	# aliases with "caller", redundant assignment
   1383 my ($two,$three,$four)=($outhead,$outperm,$outmask);
   1384 
   1385 $code.=<<___;
   1386 .align	5
   1387 _aesp8_ctr32_encrypt8x:
   1388 	$STU		$sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
   1389 	li		r10,`$FRAME+8*16+15`
   1390 	li		r11,`$FRAME+8*16+31`
   1391 	stvx		v20,r10,$sp		# ABI says so
   1392 	addi		r10,r10,32
   1393 	stvx		v21,r11,$sp
   1394 	addi		r11,r11,32
   1395 	stvx		v22,r10,$sp
   1396 	addi		r10,r10,32
   1397 	stvx		v23,r11,$sp
   1398 	addi		r11,r11,32
   1399 	stvx		v24,r10,$sp
   1400 	addi		r10,r10,32
   1401 	stvx		v25,r11,$sp
   1402 	addi		r11,r11,32
   1403 	stvx		v26,r10,$sp
   1404 	addi		r10,r10,32
   1405 	stvx		v27,r11,$sp
   1406 	addi		r11,r11,32
   1407 	stvx		v28,r10,$sp
   1408 	addi		r10,r10,32
   1409 	stvx		v29,r11,$sp
   1410 	addi		r11,r11,32
   1411 	stvx		v30,r10,$sp
   1412 	stvx		v31,r11,$sp
   1413 	li		r0,-1
   1414 	stw		$vrsave,`$FRAME+21*16-4`($sp)	# save vrsave
   1415 	li		$x10,0x10
   1416 	$PUSH		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
   1417 	li		$x20,0x20
   1418 	$PUSH		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
   1419 	li		$x30,0x30
   1420 	$PUSH		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
   1421 	li		$x40,0x40
   1422 	$PUSH		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
   1423 	li		$x50,0x50
   1424 	$PUSH		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
   1425 	li		$x60,0x60
   1426 	$PUSH		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
   1427 	li		$x70,0x70
   1428 	mtspr		256,r0
   1429 
   1430 	subi		$rounds,$rounds,3	# -4 in total
   1431 
   1432 	lvx		$rndkey0,$x00,$key	# load key schedule
   1433 	lvx		v30,$x10,$key
   1434 	addi		$key,$key,0x20
   1435 	lvx		v31,$x00,$key
   1436 	?vperm		$rndkey0,$rndkey0,v30,$keyperm
   1437 	addi		$key_,$sp,`$FRAME+15`
   1438 	mtctr		$rounds
   1439 
   1440 Load_ctr32_enc_key:
   1441 	?vperm		v24,v30,v31,$keyperm
   1442 	lvx		v30,$x10,$key
   1443 	addi		$key,$key,0x20
   1444 	stvx		v24,$x00,$key_		# off-load round[1]
   1445 	?vperm		v25,v31,v30,$keyperm
   1446 	lvx		v31,$x00,$key
   1447 	stvx		v25,$x10,$key_		# off-load round[2]
   1448 	addi		$key_,$key_,0x20
   1449 	bdnz		Load_ctr32_enc_key
   1450 
   1451 	lvx		v26,$x10,$key
   1452 	?vperm		v24,v30,v31,$keyperm
   1453 	lvx		v27,$x20,$key
   1454 	stvx		v24,$x00,$key_		# off-load round[3]
   1455 	?vperm		v25,v31,v26,$keyperm
   1456 	lvx		v28,$x30,$key
   1457 	stvx		v25,$x10,$key_		# off-load round[4]
   1458 	addi		$key_,$sp,`$FRAME+15`	# rewind $key_
   1459 	?vperm		v26,v26,v27,$keyperm
   1460 	lvx		v29,$x40,$key
   1461 	?vperm		v27,v27,v28,$keyperm
   1462 	lvx		v30,$x50,$key
   1463 	?vperm		v28,v28,v29,$keyperm
   1464 	lvx		v31,$x60,$key
   1465 	?vperm		v29,v29,v30,$keyperm
   1466 	lvx		$out0,$x70,$key		# borrow $out0
   1467 	?vperm		v30,v30,v31,$keyperm
   1468 	lvx		v24,$x00,$key_		# pre-load round[1]
   1469 	?vperm		v31,v31,$out0,$keyperm
   1470 	lvx		v25,$x10,$key_		# pre-load round[2]
   1471 
   1472 	vadduwm		$two,$one,$one
   1473 	subi		$inp,$inp,15		# undo "caller"
   1474 	$SHL		$len,$len,4
   1475 
   1476 	vadduwm		$out1,$ivec,$one	# counter values ...
   1477 	vadduwm		$out2,$ivec,$two
   1478 	vxor		$out0,$ivec,$rndkey0	# ... xored with rndkey[0]
   1479 	 le?li		$idx,8
   1480 	vadduwm		$out3,$out1,$two
   1481 	vxor		$out1,$out1,$rndkey0
   1482 	 le?lvsl	$inpperm,0,$idx
   1483 	vadduwm		$out4,$out2,$two
   1484 	vxor		$out2,$out2,$rndkey0
   1485 	 le?vspltisb	$tmp,0x0f
   1486 	vadduwm		$out5,$out3,$two
   1487 	vxor		$out3,$out3,$rndkey0
   1488 	 le?vxor	$inpperm,$inpperm,$tmp	# transform for lvx_u/stvx_u
   1489 	vadduwm		$out6,$out4,$two
   1490 	vxor		$out4,$out4,$rndkey0
   1491 	vadduwm		$out7,$out5,$two
   1492 	vxor		$out5,$out5,$rndkey0
   1493 	vadduwm		$ivec,$out6,$two	# next counter value
   1494 	vxor		$out6,$out6,$rndkey0
   1495 	vxor		$out7,$out7,$rndkey0
   1496 
   1497 	mtctr		$rounds
   1498 	b		Loop_ctr32_enc8x
   1499 .align	5
   1500 Loop_ctr32_enc8x:
   1501 	vcipher 	$out0,$out0,v24
   1502 	vcipher 	$out1,$out1,v24
   1503 	vcipher 	$out2,$out2,v24
   1504 	vcipher 	$out3,$out3,v24
   1505 	vcipher 	$out4,$out4,v24
   1506 	vcipher 	$out5,$out5,v24
   1507 	vcipher 	$out6,$out6,v24
   1508 	vcipher 	$out7,$out7,v24
   1509 Loop_ctr32_enc8x_middle:
   1510 	lvx		v24,$x20,$key_		# round[3]
   1511 	addi		$key_,$key_,0x20
   1512 
   1513 	vcipher 	$out0,$out0,v25
   1514 	vcipher 	$out1,$out1,v25
   1515 	vcipher 	$out2,$out2,v25
   1516 	vcipher 	$out3,$out3,v25
   1517 	vcipher 	$out4,$out4,v25
   1518 	vcipher 	$out5,$out5,v25
   1519 	vcipher 	$out6,$out6,v25
   1520 	vcipher 	$out7,$out7,v25
   1521 	lvx		v25,$x10,$key_		# round[4]
   1522 	bdnz		Loop_ctr32_enc8x
   1523 
   1524 	subic		r11,$len,256		# $len-256, borrow $key_
   1525 	vcipher 	$out0,$out0,v24
   1526 	vcipher 	$out1,$out1,v24
   1527 	vcipher 	$out2,$out2,v24
   1528 	vcipher 	$out3,$out3,v24
   1529 	vcipher 	$out4,$out4,v24
   1530 	vcipher 	$out5,$out5,v24
   1531 	vcipher 	$out6,$out6,v24
   1532 	vcipher 	$out7,$out7,v24
   1533 
   1534 	subfe		r0,r0,r0		# borrow?-1:0
   1535 	vcipher 	$out0,$out0,v25
   1536 	vcipher 	$out1,$out1,v25
   1537 	vcipher 	$out2,$out2,v25
   1538 	vcipher 	$out3,$out3,v25
   1539 	vcipher 	$out4,$out4,v25
   1540 	vcipher		$out5,$out5,v25
   1541 	vcipher		$out6,$out6,v25
   1542 	vcipher		$out7,$out7,v25
   1543 
   1544 	and		r0,r0,r11
   1545 	addi		$key_,$sp,`$FRAME+15`	# rewind $key_
   1546 	vcipher		$out0,$out0,v26
   1547 	vcipher		$out1,$out1,v26
   1548 	vcipher		$out2,$out2,v26
   1549 	vcipher		$out3,$out3,v26
   1550 	vcipher		$out4,$out4,v26
   1551 	vcipher		$out5,$out5,v26
   1552 	vcipher		$out6,$out6,v26
   1553 	vcipher		$out7,$out7,v26
   1554 	lvx		v24,$x00,$key_		# re-pre-load round[1]
   1555 
   1556 	subic		$len,$len,129		# $len-=129
   1557 	vcipher		$out0,$out0,v27
   1558 	addi		$len,$len,1		# $len-=128 really
   1559 	vcipher		$out1,$out1,v27
   1560 	vcipher		$out2,$out2,v27
   1561 	vcipher		$out3,$out3,v27
   1562 	vcipher		$out4,$out4,v27
   1563 	vcipher		$out5,$out5,v27
   1564 	vcipher		$out6,$out6,v27
   1565 	vcipher		$out7,$out7,v27
   1566 	lvx		v25,$x10,$key_		# re-pre-load round[2]
   1567 
   1568 	vcipher		$out0,$out0,v28
   1569 	 lvx_u		$in0,$x00,$inp		# load input
   1570 	vcipher		$out1,$out1,v28
   1571 	 lvx_u		$in1,$x10,$inp
   1572 	vcipher		$out2,$out2,v28
   1573 	 lvx_u		$in2,$x20,$inp
   1574 	vcipher		$out3,$out3,v28
   1575 	 lvx_u		$in3,$x30,$inp
   1576 	vcipher		$out4,$out4,v28
   1577 	 lvx_u		$in4,$x40,$inp
   1578 	vcipher		$out5,$out5,v28
   1579 	 lvx_u		$in5,$x50,$inp
   1580 	vcipher		$out6,$out6,v28
   1581 	 lvx_u		$in6,$x60,$inp
   1582 	vcipher		$out7,$out7,v28
   1583 	 lvx_u		$in7,$x70,$inp
   1584 	 addi		$inp,$inp,0x80
   1585 
   1586 	vcipher		$out0,$out0,v29
   1587 	 le?vperm	$in0,$in0,$in0,$inpperm
   1588 	vcipher		$out1,$out1,v29
   1589 	 le?vperm	$in1,$in1,$in1,$inpperm
   1590 	vcipher		$out2,$out2,v29
   1591 	 le?vperm	$in2,$in2,$in2,$inpperm
   1592 	vcipher		$out3,$out3,v29
   1593 	 le?vperm	$in3,$in3,$in3,$inpperm
   1594 	vcipher		$out4,$out4,v29
   1595 	 le?vperm	$in4,$in4,$in4,$inpperm
   1596 	vcipher		$out5,$out5,v29
   1597 	 le?vperm	$in5,$in5,$in5,$inpperm
   1598 	vcipher		$out6,$out6,v29
   1599 	 le?vperm	$in6,$in6,$in6,$inpperm
   1600 	vcipher		$out7,$out7,v29
   1601 	 le?vperm	$in7,$in7,$in7,$inpperm
   1602 
   1603 	add		$inp,$inp,r0		# $inp is adjusted in such
   1604 						# way that at exit from the
   1605 						# loop inX-in7 are loaded
   1606 						# with last "words"
   1607 	subfe.		r0,r0,r0		# borrow?-1:0
   1608 	vcipher		$out0,$out0,v30
   1609 	 vxor		$in0,$in0,v31		# xor with last round key
   1610 	vcipher		$out1,$out1,v30
   1611 	 vxor		$in1,$in1,v31
   1612 	vcipher		$out2,$out2,v30
   1613 	 vxor		$in2,$in2,v31
   1614 	vcipher		$out3,$out3,v30
   1615 	 vxor		$in3,$in3,v31
   1616 	vcipher		$out4,$out4,v30
   1617 	 vxor		$in4,$in4,v31
   1618 	vcipher		$out5,$out5,v30
   1619 	 vxor		$in5,$in5,v31
   1620 	vcipher		$out6,$out6,v30
   1621 	 vxor		$in6,$in6,v31
   1622 	vcipher		$out7,$out7,v30
   1623 	 vxor		$in7,$in7,v31
   1624 
   1625 	bne		Lctr32_enc8x_break	# did $len-129 borrow?
   1626 
   1627 	vcipherlast	$in0,$out0,$in0
   1628 	vcipherlast	$in1,$out1,$in1
   1629 	 vadduwm	$out1,$ivec,$one	# counter values ...
   1630 	vcipherlast	$in2,$out2,$in2
   1631 	 vadduwm	$out2,$ivec,$two
   1632 	 vxor		$out0,$ivec,$rndkey0	# ... xored with rndkey[0]
   1633 	vcipherlast	$in3,$out3,$in3
   1634 	 vadduwm	$out3,$out1,$two
   1635 	 vxor		$out1,$out1,$rndkey0
   1636 	vcipherlast	$in4,$out4,$in4
   1637 	 vadduwm	$out4,$out2,$two
   1638 	 vxor		$out2,$out2,$rndkey0
   1639 	vcipherlast	$in5,$out5,$in5
   1640 	 vadduwm	$out5,$out3,$two
   1641 	 vxor		$out3,$out3,$rndkey0
   1642 	vcipherlast	$in6,$out6,$in6
   1643 	 vadduwm	$out6,$out4,$two
   1644 	 vxor		$out4,$out4,$rndkey0
   1645 	vcipherlast	$in7,$out7,$in7
   1646 	 vadduwm	$out7,$out5,$two
   1647 	 vxor		$out5,$out5,$rndkey0
   1648 	le?vperm	$in0,$in0,$in0,$inpperm
   1649 	 vadduwm	$ivec,$out6,$two	# next counter value
   1650 	 vxor		$out6,$out6,$rndkey0
   1651 	le?vperm	$in1,$in1,$in1,$inpperm
   1652 	 vxor		$out7,$out7,$rndkey0
   1653 	mtctr		$rounds
   1654 
   1655 	 vcipher	$out0,$out0,v24
   1656 	stvx_u		$in0,$x00,$out
   1657 	le?vperm	$in2,$in2,$in2,$inpperm
   1658 	 vcipher	$out1,$out1,v24
   1659 	stvx_u		$in1,$x10,$out
   1660 	le?vperm	$in3,$in3,$in3,$inpperm
   1661 	 vcipher	$out2,$out2,v24
   1662 	stvx_u		$in2,$x20,$out
   1663 	le?vperm	$in4,$in4,$in4,$inpperm
   1664 	 vcipher	$out3,$out3,v24
   1665 	stvx_u		$in3,$x30,$out
   1666 	le?vperm	$in5,$in5,$in5,$inpperm
   1667 	 vcipher	$out4,$out4,v24
   1668 	stvx_u		$in4,$x40,$out
   1669 	le?vperm	$in6,$in6,$in6,$inpperm
   1670 	 vcipher	$out5,$out5,v24
   1671 	stvx_u		$in5,$x50,$out
   1672 	le?vperm	$in7,$in7,$in7,$inpperm
   1673 	 vcipher	$out6,$out6,v24
   1674 	stvx_u		$in6,$x60,$out
   1675 	 vcipher	$out7,$out7,v24
   1676 	stvx_u		$in7,$x70,$out
   1677 	addi		$out,$out,0x80
   1678 
   1679 	b		Loop_ctr32_enc8x_middle
   1680 
   1681 .align	5
   1682 Lctr32_enc8x_break:
   1683 	cmpwi		$len,-0x60
   1684 	blt		Lctr32_enc8x_one
   1685 	nop
   1686 	beq		Lctr32_enc8x_two
   1687 	cmpwi		$len,-0x40
   1688 	blt		Lctr32_enc8x_three
   1689 	nop
   1690 	beq		Lctr32_enc8x_four
   1691 	cmpwi		$len,-0x20
   1692 	blt		Lctr32_enc8x_five
   1693 	nop
   1694 	beq		Lctr32_enc8x_six
   1695 	cmpwi		$len,0x00
   1696 	blt		Lctr32_enc8x_seven
   1697 
   1698 Lctr32_enc8x_eight:
   1699 	vcipherlast	$out0,$out0,$in0
   1700 	vcipherlast	$out1,$out1,$in1
   1701 	vcipherlast	$out2,$out2,$in2
   1702 	vcipherlast	$out3,$out3,$in3
   1703 	vcipherlast	$out4,$out4,$in4
   1704 	vcipherlast	$out5,$out5,$in5
   1705 	vcipherlast	$out6,$out6,$in6
   1706 	vcipherlast	$out7,$out7,$in7
   1707 
   1708 	le?vperm	$out0,$out0,$out0,$inpperm
   1709 	le?vperm	$out1,$out1,$out1,$inpperm
   1710 	stvx_u		$out0,$x00,$out
   1711 	le?vperm	$out2,$out2,$out2,$inpperm
   1712 	stvx_u		$out1,$x10,$out
   1713 	le?vperm	$out3,$out3,$out3,$inpperm
   1714 	stvx_u		$out2,$x20,$out
   1715 	le?vperm	$out4,$out4,$out4,$inpperm
   1716 	stvx_u		$out3,$x30,$out
   1717 	le?vperm	$out5,$out5,$out5,$inpperm
   1718 	stvx_u		$out4,$x40,$out
   1719 	le?vperm	$out6,$out6,$out6,$inpperm
   1720 	stvx_u		$out5,$x50,$out
   1721 	le?vperm	$out7,$out7,$out7,$inpperm
   1722 	stvx_u		$out6,$x60,$out
   1723 	stvx_u		$out7,$x70,$out
   1724 	addi		$out,$out,0x80
   1725 	b		Lctr32_enc8x_done
   1726 
   1727 .align	5
   1728 Lctr32_enc8x_seven:
   1729 	vcipherlast	$out0,$out0,$in1
   1730 	vcipherlast	$out1,$out1,$in2
   1731 	vcipherlast	$out2,$out2,$in3
   1732 	vcipherlast	$out3,$out3,$in4
   1733 	vcipherlast	$out4,$out4,$in5
   1734 	vcipherlast	$out5,$out5,$in6
   1735 	vcipherlast	$out6,$out6,$in7
   1736 
   1737 	le?vperm	$out0,$out0,$out0,$inpperm
   1738 	le?vperm	$out1,$out1,$out1,$inpperm
   1739 	stvx_u		$out0,$x00,$out
   1740 	le?vperm	$out2,$out2,$out2,$inpperm
   1741 	stvx_u		$out1,$x10,$out
   1742 	le?vperm	$out3,$out3,$out3,$inpperm
   1743 	stvx_u		$out2,$x20,$out
   1744 	le?vperm	$out4,$out4,$out4,$inpperm
   1745 	stvx_u		$out3,$x30,$out
   1746 	le?vperm	$out5,$out5,$out5,$inpperm
   1747 	stvx_u		$out4,$x40,$out
   1748 	le?vperm	$out6,$out6,$out6,$inpperm
   1749 	stvx_u		$out5,$x50,$out
   1750 	stvx_u		$out6,$x60,$out
   1751 	addi		$out,$out,0x70
   1752 	b		Lctr32_enc8x_done
   1753 
   1754 .align	5
   1755 Lctr32_enc8x_six:
   1756 	vcipherlast	$out0,$out0,$in2
   1757 	vcipherlast	$out1,$out1,$in3
   1758 	vcipherlast	$out2,$out2,$in4
   1759 	vcipherlast	$out3,$out3,$in5
   1760 	vcipherlast	$out4,$out4,$in6
   1761 	vcipherlast	$out5,$out5,$in7
   1762 
   1763 	le?vperm	$out0,$out0,$out0,$inpperm
   1764 	le?vperm	$out1,$out1,$out1,$inpperm
   1765 	stvx_u		$out0,$x00,$out
   1766 	le?vperm	$out2,$out2,$out2,$inpperm
   1767 	stvx_u		$out1,$x10,$out
   1768 	le?vperm	$out3,$out3,$out3,$inpperm
   1769 	stvx_u		$out2,$x20,$out
   1770 	le?vperm	$out4,$out4,$out4,$inpperm
   1771 	stvx_u		$out3,$x30,$out
   1772 	le?vperm	$out5,$out5,$out5,$inpperm
   1773 	stvx_u		$out4,$x40,$out
   1774 	stvx_u		$out5,$x50,$out
   1775 	addi		$out,$out,0x60
   1776 	b		Lctr32_enc8x_done
   1777 
   1778 .align	5
   1779 Lctr32_enc8x_five:
   1780 	vcipherlast	$out0,$out0,$in3
   1781 	vcipherlast	$out1,$out1,$in4
   1782 	vcipherlast	$out2,$out2,$in5
   1783 	vcipherlast	$out3,$out3,$in6
   1784 	vcipherlast	$out4,$out4,$in7
   1785 
   1786 	le?vperm	$out0,$out0,$out0,$inpperm
   1787 	le?vperm	$out1,$out1,$out1,$inpperm
   1788 	stvx_u		$out0,$x00,$out
   1789 	le?vperm	$out2,$out2,$out2,$inpperm
   1790 	stvx_u		$out1,$x10,$out
   1791 	le?vperm	$out3,$out3,$out3,$inpperm
   1792 	stvx_u		$out2,$x20,$out
   1793 	le?vperm	$out4,$out4,$out4,$inpperm
   1794 	stvx_u		$out3,$x30,$out
   1795 	stvx_u		$out4,$x40,$out
   1796 	addi		$out,$out,0x50
   1797 	b		Lctr32_enc8x_done
   1798 
   1799 .align	5
   1800 Lctr32_enc8x_four:
   1801 	vcipherlast	$out0,$out0,$in4
   1802 	vcipherlast	$out1,$out1,$in5
   1803 	vcipherlast	$out2,$out2,$in6
   1804 	vcipherlast	$out3,$out3,$in7
   1805 
   1806 	le?vperm	$out0,$out0,$out0,$inpperm
   1807 	le?vperm	$out1,$out1,$out1,$inpperm
   1808 	stvx_u		$out0,$x00,$out
   1809 	le?vperm	$out2,$out2,$out2,$inpperm
   1810 	stvx_u		$out1,$x10,$out
   1811 	le?vperm	$out3,$out3,$out3,$inpperm
   1812 	stvx_u		$out2,$x20,$out
   1813 	stvx_u		$out3,$x30,$out
   1814 	addi		$out,$out,0x40
   1815 	b		Lctr32_enc8x_done
   1816 
   1817 .align	5
   1818 Lctr32_enc8x_three:
   1819 	vcipherlast	$out0,$out0,$in5
   1820 	vcipherlast	$out1,$out1,$in6
   1821 	vcipherlast	$out2,$out2,$in7
   1822 
   1823 	le?vperm	$out0,$out0,$out0,$inpperm
   1824 	le?vperm	$out1,$out1,$out1,$inpperm
   1825 	stvx_u		$out0,$x00,$out
   1826 	le?vperm	$out2,$out2,$out2,$inpperm
   1827 	stvx_u		$out1,$x10,$out
   1828 	stvx_u		$out2,$x20,$out
   1829 	addi		$out,$out,0x30
   1830 	b		Lcbc_dec8x_done
   1831 
   1832 .align	5
   1833 Lctr32_enc8x_two:
   1834 	vcipherlast	$out0,$out0,$in6
   1835 	vcipherlast	$out1,$out1,$in7
   1836 
   1837 	le?vperm	$out0,$out0,$out0,$inpperm
   1838 	le?vperm	$out1,$out1,$out1,$inpperm
   1839 	stvx_u		$out0,$x00,$out
   1840 	stvx_u		$out1,$x10,$out
   1841 	addi		$out,$out,0x20
   1842 	b		Lcbc_dec8x_done
   1843 
   1844 .align	5
   1845 Lctr32_enc8x_one:
   1846 	vcipherlast	$out0,$out0,$in7
   1847 
   1848 	le?vperm	$out0,$out0,$out0,$inpperm
   1849 	stvx_u		$out0,0,$out
   1850 	addi		$out,$out,0x10
   1851 
   1852 Lctr32_enc8x_done:
   1853 	li		r10,`$FRAME+15`
   1854 	li		r11,`$FRAME+31`
   1855 	stvx		$inpperm,r10,$sp	# wipe copies of round keys
   1856 	addi		r10,r10,32
   1857 	stvx		$inpperm,r11,$sp
   1858 	addi		r11,r11,32
   1859 	stvx		$inpperm,r10,$sp
   1860 	addi		r10,r10,32
   1861 	stvx		$inpperm,r11,$sp
   1862 	addi		r11,r11,32
   1863 	stvx		$inpperm,r10,$sp
   1864 	addi		r10,r10,32
   1865 	stvx		$inpperm,r11,$sp
   1866 	addi		r11,r11,32
   1867 	stvx		$inpperm,r10,$sp
   1868 	addi		r10,r10,32
   1869 	stvx		$inpperm,r11,$sp
   1870 	addi		r11,r11,32
   1871 
   1872 	mtspr		256,$vrsave
   1873 	lvx		v20,r10,$sp		# ABI says so
   1874 	addi		r10,r10,32
   1875 	lvx		v21,r11,$sp
   1876 	addi		r11,r11,32
   1877 	lvx		v22,r10,$sp
   1878 	addi		r10,r10,32
   1879 	lvx		v23,r11,$sp
   1880 	addi		r11,r11,32
   1881 	lvx		v24,r10,$sp
   1882 	addi		r10,r10,32
   1883 	lvx		v25,r11,$sp
   1884 	addi		r11,r11,32
   1885 	lvx		v26,r10,$sp
   1886 	addi		r10,r10,32
   1887 	lvx		v27,r11,$sp
   1888 	addi		r11,r11,32
   1889 	lvx		v28,r10,$sp
   1890 	addi		r10,r10,32
   1891 	lvx		v29,r11,$sp
   1892 	addi		r11,r11,32
   1893 	lvx		v30,r10,$sp
   1894 	lvx		v31,r11,$sp
   1895 	$POP		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
   1896 	$POP		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
   1897 	$POP		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
   1898 	$POP		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
   1899 	$POP		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
   1900 	$POP		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
   1901 	addi		$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
   1902 	blr
   1903 	.long		0
   1904 	.byte		0,12,0x04,0,0x80,6,6,0
   1905 	.long		0
   1906 .size	.${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
   1907 ___
   1908 }}	}}}
   1909 
   1910 #########################################################################
   1911 {{{	# XTS procedures						#
   1912 # int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len,	#
   1913 #                             const AES_KEY *key1, const AES_KEY *key2,	#
   1914 #                             [const] unsigned char iv[16]);		#
   1915 # If $key2 is NULL, then a "tweak chaining" mode is engaged, in which	#
   1916 # input tweak value is assumed to be encrypted already, and last tweak	#
   1917 # value, one suitable for consecutive call on same chunk of data, is	#
   1918 # written back to original buffer. In addition, in "tweak chaining"	#
   1919 # mode only complete input blocks are processed.			#
   1920 
   1921 my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) =	map("r$_",(3..10));
   1922 my ($rndkey0,$rndkey1,$inout) =				map("v$_",(0..2));
   1923 my ($output,$inptail,$inpperm,$leperm,$keyperm) =	map("v$_",(3..7));
   1924 my ($tweak,$seven,$eighty7,$tmp,$tweak1) =		map("v$_",(8..12));
   1925 my $taillen = $key2;
   1926 
   1927    ($inp,$idx) = ($idx,$inp);				# reassign
   1928 
   1929 $code.=<<___;
   1930 .globl	.${prefix}_xts_encrypt
   1931 .align	5
   1932 .${prefix}_xts_encrypt:
   1933 	mr		$inp,r3				# reassign
   1934 	li		r3,-1
   1935 	${UCMP}i	$len,16
   1936 	bltlr-
   1937 
   1938 	lis		r0,0xfff0
   1939 	mfspr		r12,256				# save vrsave
   1940 	li		r11,0
   1941 	mtspr		256,r0
   1942 
   1943 	vspltisb	$seven,0x07			# 0x070707..07
   1944 	le?lvsl		$leperm,r11,r11
   1945 	le?vspltisb	$tmp,0x0f
   1946 	le?vxor		$leperm,$leperm,$seven
   1947 
   1948 	li		$idx,15
   1949 	lvx		$tweak,0,$ivp			# load [unaligned] iv
   1950 	lvsl		$inpperm,0,$ivp
   1951 	lvx		$inptail,$idx,$ivp
   1952 	le?vxor		$inpperm,$inpperm,$tmp
   1953 	vperm		$tweak,$tweak,$inptail,$inpperm
   1954 
   1955 	neg		r11,$inp
   1956 	lvsr		$inpperm,0,r11			# prepare for unaligned load
   1957 	lvx		$inout,0,$inp
   1958 	addi		$inp,$inp,15			# 15 is not typo
   1959 	le?vxor		$inpperm,$inpperm,$tmp
   1960 
   1961 	${UCMP}i	$key2,0				# key2==NULL?
   1962 	beq		Lxts_enc_no_key2
   1963 
   1964 	?lvsl		$keyperm,0,$key2		# prepare for unaligned key
   1965 	lwz		$rounds,240($key2)
   1966 	srwi		$rounds,$rounds,1
   1967 	subi		$rounds,$rounds,1
   1968 	li		$idx,16
   1969 
   1970 	lvx		$rndkey0,0,$key2
   1971 	lvx		$rndkey1,$idx,$key2
   1972 	addi		$idx,$idx,16
   1973 	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
   1974 	vxor		$tweak,$tweak,$rndkey0
   1975 	lvx		$rndkey0,$idx,$key2
   1976 	addi		$idx,$idx,16
   1977 	mtctr		$rounds
   1978 
   1979 Ltweak_xts_enc:
   1980 	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
   1981 	vcipher		$tweak,$tweak,$rndkey1
   1982 	lvx		$rndkey1,$idx,$key2
   1983 	addi		$idx,$idx,16
   1984 	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
   1985 	vcipher		$tweak,$tweak,$rndkey0
   1986 	lvx		$rndkey0,$idx,$key2
   1987 	addi		$idx,$idx,16
   1988 	bdnz		Ltweak_xts_enc
   1989 
   1990 	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
   1991 	vcipher		$tweak,$tweak,$rndkey1
   1992 	lvx		$rndkey1,$idx,$key2
   1993 	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
   1994 	vcipherlast	$tweak,$tweak,$rndkey0
   1995 
   1996 	li		$ivp,0				# don't chain the tweak
   1997 	b		Lxts_enc
   1998 
   1999 Lxts_enc_no_key2:
   2000 	li		$idx,-16
   2001 	and		$len,$len,$idx			# in "tweak chaining"
   2002 							# mode only complete
   2003 							# blocks are processed
   2004 Lxts_enc:
   2005 	lvx		$inptail,0,$inp
   2006 	addi		$inp,$inp,16
   2007 
   2008 	?lvsl		$keyperm,0,$key1		# prepare for unaligned key
   2009 	lwz		$rounds,240($key1)
   2010 	srwi		$rounds,$rounds,1
   2011 	subi		$rounds,$rounds,1
   2012 	li		$idx,16
   2013 
   2014 	vslb		$eighty7,$seven,$seven		# 0x808080..80
   2015 	vor		$eighty7,$eighty7,$seven	# 0x878787..87
   2016 	vspltisb	$tmp,1				# 0x010101..01
   2017 	vsldoi		$eighty7,$eighty7,$tmp,15	# 0x870101..01
   2018 
   2019 	${UCMP}i	$len,96
   2020 	bge		_aesp8_xts_encrypt6x
   2021 
   2022 	andi.		$taillen,$len,15
   2023 	subic		r0,$len,32
   2024 	subi		$taillen,$taillen,16
   2025 	subfe		r0,r0,r0
   2026 	and		r0,r0,$taillen
   2027 	add		$inp,$inp,r0
   2028 
   2029 	lvx		$rndkey0,0,$key1
   2030 	lvx		$rndkey1,$idx,$key1
   2031 	addi		$idx,$idx,16
   2032 	vperm		$inout,$inout,$inptail,$inpperm
   2033 	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
   2034 	vxor		$inout,$inout,$tweak
   2035 	vxor		$inout,$inout,$rndkey0
   2036 	lvx		$rndkey0,$idx,$key1
   2037 	addi		$idx,$idx,16
   2038 	mtctr		$rounds
   2039 	b		Loop_xts_enc
   2040 
   2041 .align	5
   2042 Loop_xts_enc:
   2043 	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
   2044 	vcipher		$inout,$inout,$rndkey1
   2045 	lvx		$rndkey1,$idx,$key1
   2046 	addi		$idx,$idx,16
   2047 	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
   2048 	vcipher		$inout,$inout,$rndkey0
   2049 	lvx		$rndkey0,$idx,$key1
   2050 	addi		$idx,$idx,16
   2051 	bdnz		Loop_xts_enc
   2052 
   2053 	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
   2054 	vcipher		$inout,$inout,$rndkey1
   2055 	lvx		$rndkey1,$idx,$key1
   2056 	li		$idx,16
   2057 	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
   2058 	vxor		$rndkey0,$rndkey0,$tweak
   2059 	vcipherlast	$output,$inout,$rndkey0
   2060 
   2061 	le?vperm	$tmp,$output,$output,$leperm
   2062 	be?nop
   2063 	le?stvx_u	$tmp,0,$out
   2064 	be?stvx_u	$output,0,$out
   2065 	addi		$out,$out,16
   2066 
   2067 	subic.		$len,$len,16
   2068 	beq		Lxts_enc_done
   2069 
   2070 	vmr		$inout,$inptail
   2071 	lvx		$inptail,0,$inp
   2072 	addi		$inp,$inp,16
   2073 	lvx		$rndkey0,0,$key1
   2074 	lvx		$rndkey1,$idx,$key1
   2075 	addi		$idx,$idx,16
   2076 
   2077 	subic		r0,$len,32
   2078 	subfe		r0,r0,r0
   2079 	and		r0,r0,$taillen
   2080 	add		$inp,$inp,r0
   2081 
   2082 	vsrab		$tmp,$tweak,$seven		# next tweak value
   2083 	vaddubm		$tweak,$tweak,$tweak
   2084 	vsldoi		$tmp,$tmp,$tmp,15
   2085 	vand		$tmp,$tmp,$eighty7
   2086 	vxor		$tweak,$tweak,$tmp
   2087 
   2088 	vperm		$inout,$inout,$inptail,$inpperm
   2089 	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
   2090 	vxor		$inout,$inout,$tweak
   2091 	vxor		$output,$output,$rndkey0	# just in case $len<16
   2092 	vxor		$inout,$inout,$rndkey0
   2093 	lvx		$rndkey0,$idx,$key1
   2094 	addi		$idx,$idx,16
   2095 
   2096 	mtctr		$rounds
   2097 	${UCMP}i	$len,16
   2098 	bge		Loop_xts_enc
   2099 
   2100 	vxor		$output,$output,$tweak
   2101 	lvsr		$inpperm,0,$len			# $inpperm is no longer needed
   2102 	vxor		$inptail,$inptail,$inptail	# $inptail is no longer needed
   2103 	vspltisb	$tmp,-1
   2104 	vperm		$inptail,$inptail,$tmp,$inpperm
   2105 	vsel		$inout,$inout,$output,$inptail
   2106 
   2107 	subi		r11,$out,17
   2108 	subi		$out,$out,16
   2109 	mtctr		$len
   2110 	li		$len,16
   2111 Loop_xts_enc_steal:
   2112 	lbzu		r0,1(r11)
   2113 	stb		r0,16(r11)
   2114 	bdnz		Loop_xts_enc_steal
   2115 
   2116 	mtctr		$rounds
   2117 	b		Loop_xts_enc			# one more time...
   2118 
   2119 Lxts_enc_done:
   2120 	${UCMP}i	$ivp,0
   2121 	beq		Lxts_enc_ret
   2122 
   2123 	vsrab		$tmp,$tweak,$seven		# next tweak value
   2124 	vaddubm		$tweak,$tweak,$tweak
   2125 	vsldoi		$tmp,$tmp,$tmp,15
   2126 	vand		$tmp,$tmp,$eighty7
   2127 	vxor		$tweak,$tweak,$tmp
   2128 
   2129 	le?vperm	$tweak,$tweak,$tweak,$leperm
   2130 	stvx_u		$tweak,0,$ivp
   2131 
   2132 Lxts_enc_ret:
   2133 	mtspr		256,r12				# restore vrsave
   2134 	li		r3,0
   2135 	blr
   2136 	.long		0
   2137 	.byte		0,12,0x04,0,0x80,6,6,0
   2138 	.long		0
   2139 .size	.${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
   2140 
   2141 .globl	.${prefix}_xts_decrypt
   2142 .align	5
   2143 .${prefix}_xts_decrypt:
   2144 	mr		$inp,r3				# reassign
   2145 	li		r3,-1
   2146 	${UCMP}i	$len,16
   2147 	bltlr-
   2148 
   2149 	lis		r0,0xfff8
   2150 	mfspr		r12,256				# save vrsave
   2151 	li		r11,0
   2152 	mtspr		256,r0
   2153 
   2154 	andi.		r0,$len,15
   2155 	neg		r0,r0
   2156 	andi.		r0,r0,16
   2157 	sub		$len,$len,r0
   2158 
   2159 	vspltisb	$seven,0x07			# 0x070707..07
   2160 	le?lvsl		$leperm,r11,r11
   2161 	le?vspltisb	$tmp,0x0f
   2162 	le?vxor		$leperm,$leperm,$seven
   2163 
   2164 	li		$idx,15
   2165 	lvx		$tweak,0,$ivp			# load [unaligned] iv
   2166 	lvsl		$inpperm,0,$ivp
   2167 	lvx		$inptail,$idx,$ivp
   2168 	le?vxor		$inpperm,$inpperm,$tmp
   2169 	vperm		$tweak,$tweak,$inptail,$inpperm
   2170 
   2171 	neg		r11,$inp
   2172 	lvsr		$inpperm,0,r11			# prepare for unaligned load
   2173 	lvx		$inout,0,$inp
   2174 	addi		$inp,$inp,15			# 15 is not typo
   2175 	le?vxor		$inpperm,$inpperm,$tmp
   2176 
   2177 	${UCMP}i	$key2,0				# key2==NULL?
   2178 	beq		Lxts_dec_no_key2
   2179 
   2180 	?lvsl		$keyperm,0,$key2		# prepare for unaligned key
   2181 	lwz		$rounds,240($key2)
   2182 	srwi		$rounds,$rounds,1
   2183 	subi		$rounds,$rounds,1
   2184 	li		$idx,16
   2185 
   2186 	lvx		$rndkey0,0,$key2
   2187 	lvx		$rndkey1,$idx,$key2
   2188 	addi		$idx,$idx,16
   2189 	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
   2190 	vxor		$tweak,$tweak,$rndkey0
   2191 	lvx		$rndkey0,$idx,$key2
   2192 	addi		$idx,$idx,16
   2193 	mtctr		$rounds
   2194 
   2195 Ltweak_xts_dec:
   2196 	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
   2197 	vcipher		$tweak,$tweak,$rndkey1
   2198 	lvx		$rndkey1,$idx,$key2
   2199 	addi		$idx,$idx,16
   2200 	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
   2201 	vcipher		$tweak,$tweak,$rndkey0
   2202 	lvx		$rndkey0,$idx,$key2
   2203 	addi		$idx,$idx,16
   2204 	bdnz		Ltweak_xts_dec
   2205 
   2206 	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
   2207 	vcipher		$tweak,$tweak,$rndkey1
   2208 	lvx		$rndkey1,$idx,$key2
   2209 	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
   2210 	vcipherlast	$tweak,$tweak,$rndkey0
   2211 
   2212 	li		$ivp,0				# don't chain the tweak
   2213 	b		Lxts_dec
   2214 
   2215 Lxts_dec_no_key2:
   2216 	neg		$idx,$len
   2217 	andi.		$idx,$idx,15
   2218 	add		$len,$len,$idx			# in "tweak chaining"
   2219 							# mode only complete
   2220 							# blocks are processed
   2221 Lxts_dec:
   2222 	lvx		$inptail,0,$inp
   2223 	addi		$inp,$inp,16
   2224 
   2225 	?lvsl		$keyperm,0,$key1		# prepare for unaligned key
   2226 	lwz		$rounds,240($key1)
   2227 	srwi		$rounds,$rounds,1
   2228 	subi		$rounds,$rounds,1
   2229 	li		$idx,16
   2230 
   2231 	vslb		$eighty7,$seven,$seven		# 0x808080..80
   2232 	vor		$eighty7,$eighty7,$seven	# 0x878787..87
   2233 	vspltisb	$tmp,1				# 0x010101..01
   2234 	vsldoi		$eighty7,$eighty7,$tmp,15	# 0x870101..01
   2235 
   2236 	${UCMP}i	$len,96
   2237 	bge		_aesp8_xts_decrypt6x
   2238 
   2239 	lvx		$rndkey0,0,$key1
   2240 	lvx		$rndkey1,$idx,$key1
   2241 	addi		$idx,$idx,16
   2242 	vperm		$inout,$inout,$inptail,$inpperm
   2243 	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
   2244 	vxor		$inout,$inout,$tweak
   2245 	vxor		$inout,$inout,$rndkey0
   2246 	lvx		$rndkey0,$idx,$key1
   2247 	addi		$idx,$idx,16
   2248 	mtctr		$rounds
   2249 
   2250 	${UCMP}i	$len,16
   2251 	blt		Ltail_xts_dec
   2252 	be?b		Loop_xts_dec
   2253 
   2254 .align	5
   2255 Loop_xts_dec:
   2256 	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
   2257 	vncipher	$inout,$inout,$rndkey1
   2258 	lvx		$rndkey1,$idx,$key1
   2259 	addi		$idx,$idx,16
   2260 	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
   2261 	vncipher	$inout,$inout,$rndkey0
   2262 	lvx		$rndkey0,$idx,$key1
   2263 	addi		$idx,$idx,16
   2264 	bdnz		Loop_xts_dec
   2265 
   2266 	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
   2267 	vncipher	$inout,$inout,$rndkey1
   2268 	lvx		$rndkey1,$idx,$key1
   2269 	li		$idx,16
   2270 	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
   2271 	vxor		$rndkey0,$rndkey0,$tweak
   2272 	vncipherlast	$output,$inout,$rndkey0
   2273 
   2274 	le?vperm	$tmp,$output,$output,$leperm
   2275 	be?nop
   2276 	le?stvx_u	$tmp,0,$out
   2277 	be?stvx_u	$output,0,$out
   2278 	addi		$out,$out,16
   2279 
   2280 	subic.		$len,$len,16
   2281 	beq		Lxts_dec_done
   2282 
   2283 	vmr		$inout,$inptail
   2284 	lvx		$inptail,0,$inp
   2285 	addi		$inp,$inp,16
   2286 	lvx		$rndkey0,0,$key1
   2287 	lvx		$rndkey1,$idx,$key1
   2288 	addi		$idx,$idx,16
   2289 
   2290 	vsrab		$tmp,$tweak,$seven		# next tweak value
   2291 	vaddubm		$tweak,$tweak,$tweak
   2292 	vsldoi		$tmp,$tmp,$tmp,15
   2293 	vand		$tmp,$tmp,$eighty7
   2294 	vxor		$tweak,$tweak,$tmp
   2295 
   2296 	vperm		$inout,$inout,$inptail,$inpperm
   2297 	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
   2298 	vxor		$inout,$inout,$tweak
   2299 	vxor		$inout,$inout,$rndkey0
   2300 	lvx		$rndkey0,$idx,$key1
   2301 	addi		$idx,$idx,16
   2302 
   2303 	mtctr		$rounds
   2304 	${UCMP}i	$len,16
   2305 	bge		Loop_xts_dec
   2306 
   2307 Ltail_xts_dec:
   2308 	vsrab		$tmp,$tweak,$seven		# next tweak value
   2309 	vaddubm		$tweak1,$tweak,$tweak
   2310 	vsldoi		$tmp,$tmp,$tmp,15
   2311 	vand		$tmp,$tmp,$eighty7
   2312 	vxor		$tweak1,$tweak1,$tmp
   2313 
   2314 	subi		$inp,$inp,16
   2315 	add		$inp,$inp,$len
   2316 
   2317 	vxor		$inout,$inout,$tweak		# :-(
   2318 	vxor		$inout,$inout,$tweak1		# :-)
   2319 
   2320 Loop_xts_dec_short:
   2321 	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
   2322 	vncipher	$inout,$inout,$rndkey1
   2323 	lvx		$rndkey1,$idx,$key1
   2324 	addi		$idx,$idx,16
   2325 	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
   2326 	vncipher	$inout,$inout,$rndkey0
   2327 	lvx		$rndkey0,$idx,$key1
   2328 	addi		$idx,$idx,16
   2329 	bdnz		Loop_xts_dec_short
   2330 
   2331 	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
   2332 	vncipher	$inout,$inout,$rndkey1
   2333 	lvx		$rndkey1,$idx,$key1
   2334 	li		$idx,16
   2335 	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
   2336 	vxor		$rndkey0,$rndkey0,$tweak1
   2337 	vncipherlast	$output,$inout,$rndkey0
   2338 
   2339 	le?vperm	$tmp,$output,$output,$leperm
   2340 	be?nop
   2341 	le?stvx_u	$tmp,0,$out
   2342 	be?stvx_u	$output,0,$out
   2343 
   2344 	vmr		$inout,$inptail
   2345 	lvx		$inptail,0,$inp
   2346 	#addi		$inp,$inp,16
   2347 	lvx		$rndkey0,0,$key1
   2348 	lvx		$rndkey1,$idx,$key1
   2349 	addi		$idx,$idx,16
   2350 	vperm		$inout,$inout,$inptail,$inpperm
   2351 	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
   2352 
   2353 	lvsr		$inpperm,0,$len			# $inpperm is no longer needed
   2354 	vxor		$inptail,$inptail,$inptail	# $inptail is no longer needed
   2355 	vspltisb	$tmp,-1
   2356 	vperm		$inptail,$inptail,$tmp,$inpperm
   2357 	vsel		$inout,$inout,$output,$inptail
   2358 
   2359 	vxor		$rndkey0,$rndkey0,$tweak
   2360 	vxor		$inout,$inout,$rndkey0
   2361 	lvx		$rndkey0,$idx,$key1
   2362 	addi		$idx,$idx,16
   2363 
   2364 	subi		r11,$out,1
   2365 	mtctr		$len
   2366 	li		$len,16
   2367 Loop_xts_dec_steal:
   2368 	lbzu		r0,1(r11)
   2369 	stb		r0,16(r11)
   2370 	bdnz		Loop_xts_dec_steal
   2371 
   2372 	mtctr		$rounds
   2373 	b		Loop_xts_dec			# one more time...
   2374 
   2375 Lxts_dec_done:
   2376 	${UCMP}i	$ivp,0
   2377 	beq		Lxts_dec_ret
   2378 
   2379 	vsrab		$tmp,$tweak,$seven		# next tweak value
   2380 	vaddubm		$tweak,$tweak,$tweak
   2381 	vsldoi		$tmp,$tmp,$tmp,15
   2382 	vand		$tmp,$tmp,$eighty7
   2383 	vxor		$tweak,$tweak,$tmp
   2384 
   2385 	le?vperm	$tweak,$tweak,$tweak,$leperm
   2386 	stvx_u		$tweak,0,$ivp
   2387 
   2388 Lxts_dec_ret:
   2389 	mtspr		256,r12				# restore vrsave
   2390 	li		r3,0
   2391 	blr
   2392 	.long		0
   2393 	.byte		0,12,0x04,0,0x80,6,6,0
   2394 	.long		0
   2395 .size	.${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
   2396 ___
   2397 #########################################################################
   2398 {{	# Optimized XTS procedures					#
   2399 my $key_=$key2;
   2400 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
   2401     $x00=0 if ($flavour =~ /osx/);
   2402 my ($in0,  $in1,  $in2,  $in3,  $in4,  $in5 )=map("v$_",(0..5));
   2403 my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
   2404 my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
   2405 my $rndkey0="v23";	# v24-v25 rotating buffer for first found keys
   2406 			# v26-v31 last 6 round keys
   2407 my ($keyperm)=($out0);	# aliases with "caller", redundant assignment
   2408 my $taillen=$x70;
   2409 
   2410 $code.=<<___;
   2411 .align	5
   2412 _aesp8_xts_encrypt6x:
   2413 	$STU		$sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
   2414 	mflr		r11
   2415 	li		r7,`$FRAME+8*16+15`
   2416 	li		r3,`$FRAME+8*16+31`
   2417 	$PUSH		r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
   2418 	stvx		v20,r7,$sp		# ABI says so
   2419 	addi		r7,r7,32
   2420 	stvx		v21,r3,$sp
   2421 	addi		r3,r3,32
   2422 	stvx		v22,r7,$sp
   2423 	addi		r7,r7,32
   2424 	stvx		v23,r3,$sp
   2425 	addi		r3,r3,32
   2426 	stvx		v24,r7,$sp
   2427 	addi		r7,r7,32
   2428 	stvx		v25,r3,$sp
   2429 	addi		r3,r3,32
   2430 	stvx		v26,r7,$sp
   2431 	addi		r7,r7,32
   2432 	stvx		v27,r3,$sp
   2433 	addi		r3,r3,32
   2434 	stvx		v28,r7,$sp
   2435 	addi		r7,r7,32
   2436 	stvx		v29,r3,$sp
   2437 	addi		r3,r3,32
   2438 	stvx		v30,r7,$sp
   2439 	stvx		v31,r3,$sp
   2440 	li		r0,-1
   2441 	stw		$vrsave,`$FRAME+21*16-4`($sp)	# save vrsave
   2442 	li		$x10,0x10
   2443 	$PUSH		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
   2444 	li		$x20,0x20
   2445 	$PUSH		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
   2446 	li		$x30,0x30
   2447 	$PUSH		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
   2448 	li		$x40,0x40
   2449 	$PUSH		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
   2450 	li		$x50,0x50
   2451 	$PUSH		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
   2452 	li		$x60,0x60
   2453 	$PUSH		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
   2454 	li		$x70,0x70
   2455 	mtspr		256,r0
   2456 
   2457 	subi		$rounds,$rounds,3	# -4 in total
   2458 
   2459 	lvx		$rndkey0,$x00,$key1	# load key schedule
   2460 	lvx		v30,$x10,$key1
   2461 	addi		$key1,$key1,0x20
   2462 	lvx		v31,$x00,$key1
   2463 	?vperm		$rndkey0,$rndkey0,v30,$keyperm
   2464 	addi		$key_,$sp,`$FRAME+15`
   2465 	mtctr		$rounds
   2466 
   2467 Load_xts_enc_key:
   2468 	?vperm		v24,v30,v31,$keyperm
   2469 	lvx		v30,$x10,$key1
   2470 	addi		$key1,$key1,0x20
   2471 	stvx		v24,$x00,$key_		# off-load round[1]
   2472 	?vperm		v25,v31,v30,$keyperm
   2473 	lvx		v31,$x00,$key1
   2474 	stvx		v25,$x10,$key_		# off-load round[2]
   2475 	addi		$key_,$key_,0x20
   2476 	bdnz		Load_xts_enc_key
   2477 
   2478 	lvx		v26,$x10,$key1
   2479 	?vperm		v24,v30,v31,$keyperm
   2480 	lvx		v27,$x20,$key1
   2481 	stvx		v24,$x00,$key_		# off-load round[3]
   2482 	?vperm		v25,v31,v26,$keyperm
   2483 	lvx		v28,$x30,$key1
   2484 	stvx		v25,$x10,$key_		# off-load round[4]
   2485 	addi		$key_,$sp,`$FRAME+15`	# rewind $key_
   2486 	?vperm		v26,v26,v27,$keyperm
   2487 	lvx		v29,$x40,$key1
   2488 	?vperm		v27,v27,v28,$keyperm
   2489 	lvx		v30,$x50,$key1
   2490 	?vperm		v28,v28,v29,$keyperm
   2491 	lvx		v31,$x60,$key1
   2492 	?vperm		v29,v29,v30,$keyperm
   2493 	lvx		$twk5,$x70,$key1	# borrow $twk5
   2494 	?vperm		v30,v30,v31,$keyperm
   2495 	lvx		v24,$x00,$key_		# pre-load round[1]
   2496 	?vperm		v31,v31,$twk5,$keyperm
   2497 	lvx		v25,$x10,$key_		# pre-load round[2]
   2498 
   2499 	 vperm		$in0,$inout,$inptail,$inpperm
   2500 	 subi		$inp,$inp,31		# undo "caller"
   2501 	vxor		$twk0,$tweak,$rndkey0
   2502 	vsrab		$tmp,$tweak,$seven	# next tweak value
   2503 	vaddubm		$tweak,$tweak,$tweak
   2504 	vsldoi		$tmp,$tmp,$tmp,15
   2505 	vand		$tmp,$tmp,$eighty7
   2506 	 vxor		$out0,$in0,$twk0
   2507 	vxor		$tweak,$tweak,$tmp
   2508 
   2509 	 lvx_u		$in1,$x10,$inp
   2510 	vxor		$twk1,$tweak,$rndkey0
   2511 	vsrab		$tmp,$tweak,$seven	# next tweak value
   2512 	vaddubm		$tweak,$tweak,$tweak
   2513 	vsldoi		$tmp,$tmp,$tmp,15
   2514 	 le?vperm	$in1,$in1,$in1,$leperm
   2515 	vand		$tmp,$tmp,$eighty7
   2516 	 vxor		$out1,$in1,$twk1
   2517 	vxor		$tweak,$tweak,$tmp
   2518 
   2519 	 lvx_u		$in2,$x20,$inp
   2520 	 andi.		$taillen,$len,15
   2521 	vxor		$twk2,$tweak,$rndkey0
   2522 	vsrab		$tmp,$tweak,$seven	# next tweak value
   2523 	vaddubm		$tweak,$tweak,$tweak
   2524 	vsldoi		$tmp,$tmp,$tmp,15
   2525 	 le?vperm	$in2,$in2,$in2,$leperm
   2526 	vand		$tmp,$tmp,$eighty7
   2527 	 vxor		$out2,$in2,$twk2
   2528 	vxor		$tweak,$tweak,$tmp
   2529 
   2530 	 lvx_u		$in3,$x30,$inp
   2531 	 sub		$len,$len,$taillen
   2532 	vxor		$twk3,$tweak,$rndkey0
   2533 	vsrab		$tmp,$tweak,$seven	# next tweak value
   2534 	vaddubm		$tweak,$tweak,$tweak
   2535 	vsldoi		$tmp,$tmp,$tmp,15
   2536 	 le?vperm	$in3,$in3,$in3,$leperm
   2537 	vand		$tmp,$tmp,$eighty7
   2538 	 vxor		$out3,$in3,$twk3
   2539 	vxor		$tweak,$tweak,$tmp
   2540 
   2541 	 lvx_u		$in4,$x40,$inp
   2542 	 subi		$len,$len,0x60
   2543 	vxor		$twk4,$tweak,$rndkey0
   2544 	vsrab		$tmp,$tweak,$seven	# next tweak value
   2545 	vaddubm		$tweak,$tweak,$tweak
   2546 	vsldoi		$tmp,$tmp,$tmp,15
   2547 	 le?vperm	$in4,$in4,$in4,$leperm
   2548 	vand		$tmp,$tmp,$eighty7
   2549 	 vxor		$out4,$in4,$twk4
   2550 	vxor		$tweak,$tweak,$tmp
   2551 
   2552 	 lvx_u		$in5,$x50,$inp
   2553 	 addi		$inp,$inp,0x60
   2554 	vxor		$twk5,$tweak,$rndkey0
   2555 	vsrab		$tmp,$tweak,$seven	# next tweak value
   2556 	vaddubm		$tweak,$tweak,$tweak
   2557 	vsldoi		$tmp,$tmp,$tmp,15
   2558 	 le?vperm	$in5,$in5,$in5,$leperm
   2559 	vand		$tmp,$tmp,$eighty7
   2560 	 vxor		$out5,$in5,$twk5
   2561 	vxor		$tweak,$tweak,$tmp
   2562 
   2563 	vxor		v31,v31,$rndkey0
   2564 	mtctr		$rounds
   2565 	b		Loop_xts_enc6x
   2566 
   2567 .align	5
   2568 Loop_xts_enc6x:
   2569 	vcipher		$out0,$out0,v24
   2570 	vcipher		$out1,$out1,v24
   2571 	vcipher		$out2,$out2,v24
   2572 	vcipher		$out3,$out3,v24
   2573 	vcipher		$out4,$out4,v24
   2574 	vcipher		$out5,$out5,v24
   2575 	lvx		v24,$x20,$key_		# round[3]
   2576 	addi		$key_,$key_,0x20
   2577 
   2578 	vcipher		$out0,$out0,v25
   2579 	vcipher		$out1,$out1,v25
   2580 	vcipher		$out2,$out2,v25
   2581 	vcipher		$out3,$out3,v25
   2582 	vcipher		$out4,$out4,v25
   2583 	vcipher		$out5,$out5,v25
   2584 	lvx		v25,$x10,$key_		# round[4]
   2585 	bdnz		Loop_xts_enc6x
   2586 
   2587 	subic		$len,$len,96		# $len-=96
   2588 	 vxor		$in0,$twk0,v31		# xor with last round key
   2589 	vcipher		$out0,$out0,v24
   2590 	vcipher		$out1,$out1,v24
   2591 	 vsrab		$tmp,$tweak,$seven	# next tweak value
   2592 	 vxor		$twk0,$tweak,$rndkey0
   2593 	 vaddubm	$tweak,$tweak,$tweak
   2594 	vcipher		$out2,$out2,v24
   2595 	vcipher		$out3,$out3,v24
   2596 	 vsldoi		$tmp,$tmp,$tmp,15
   2597 	vcipher		$out4,$out4,v24
   2598 	vcipher		$out5,$out5,v24
   2599 
   2600 	subfe.		r0,r0,r0		# borrow?-1:0
   2601 	 vand		$tmp,$tmp,$eighty7
   2602 	vcipher		$out0,$out0,v25
   2603 	vcipher		$out1,$out1,v25
   2604 	 vxor		$tweak,$tweak,$tmp
   2605 	vcipher		$out2,$out2,v25
   2606 	vcipher		$out3,$out3,v25
   2607 	 vxor		$in1,$twk1,v31
   2608 	 vsrab		$tmp,$tweak,$seven	# next tweak value
   2609 	 vxor		$twk1,$tweak,$rndkey0
   2610 	vcipher		$out4,$out4,v25
   2611 	vcipher		$out5,$out5,v25
   2612 
   2613 	and		r0,r0,$len
   2614 	 vaddubm	$tweak,$tweak,$tweak
   2615 	 vsldoi		$tmp,$tmp,$tmp,15
   2616 	vcipher		$out0,$out0,v26
   2617 	vcipher		$out1,$out1,v26
   2618 	 vand		$tmp,$tmp,$eighty7
   2619 	vcipher		$out2,$out2,v26
   2620 	vcipher		$out3,$out3,v26
   2621 	 vxor		$tweak,$tweak,$tmp
   2622 	vcipher		$out4,$out4,v26
   2623 	vcipher		$out5,$out5,v26
   2624 
   2625 	add		$inp,$inp,r0		# $inp is adjusted in such
   2626 						# way that at exit from the
   2627 						# loop inX-in5 are loaded
   2628 						# with last "words"
   2629 	 vxor		$in2,$twk2,v31
   2630 	 vsrab		$tmp,$tweak,$seven	# next tweak value
   2631 	 vxor		$twk2,$tweak,$rndkey0
   2632 	 vaddubm	$tweak,$tweak,$tweak
   2633 	vcipher		$out0,$out0,v27
   2634 	vcipher		$out1,$out1,v27
   2635 	 vsldoi		$tmp,$tmp,$tmp,15
   2636 	vcipher		$out2,$out2,v27
   2637 	vcipher		$out3,$out3,v27
   2638 	 vand		$tmp,$tmp,$eighty7
   2639 	vcipher		$out4,$out4,v27
   2640 	vcipher		$out5,$out5,v27
   2641 
   2642 	addi		$key_,$sp,`$FRAME+15`	# rewind $key_
   2643 	 vxor		$tweak,$tweak,$tmp
   2644 	vcipher		$out0,$out0,v28
   2645 	vcipher		$out1,$out1,v28
   2646 	 vxor		$in3,$twk3,v31
   2647 	 vsrab		$tmp,$tweak,$seven	# next tweak value
   2648 	 vxor		$twk3,$tweak,$rndkey0
   2649 	vcipher		$out2,$out2,v28
   2650 	vcipher		$out3,$out3,v28
   2651 	 vaddubm	$tweak,$tweak,$tweak
   2652 	 vsldoi		$tmp,$tmp,$tmp,15
   2653 	vcipher		$out4,$out4,v28
   2654 	vcipher		$out5,$out5,v28
   2655 	lvx		v24,$x00,$key_		# re-pre-load round[1]
   2656 	 vand		$tmp,$tmp,$eighty7
   2657 
   2658 	vcipher		$out0,$out0,v29
   2659 	vcipher		$out1,$out1,v29
   2660 	 vxor		$tweak,$tweak,$tmp
   2661 	vcipher		$out2,$out2,v29
   2662 	vcipher		$out3,$out3,v29
   2663 	 vxor		$in4,$twk4,v31
   2664 	 vsrab		$tmp,$tweak,$seven	# next tweak value
   2665 	 vxor		$twk4,$tweak,$rndkey0
   2666 	vcipher		$out4,$out4,v29
   2667 	vcipher		$out5,$out5,v29
   2668 	lvx		v25,$x10,$key_		# re-pre-load round[2]
   2669 	 vaddubm	$tweak,$tweak,$tweak
   2670 	 vsldoi		$tmp,$tmp,$tmp,15
   2671 
   2672 	vcipher		$out0,$out0,v30
   2673 	vcipher		$out1,$out1,v30
   2674 	 vand		$tmp,$tmp,$eighty7
   2675 	vcipher		$out2,$out2,v30
   2676 	vcipher		$out3,$out3,v30
   2677 	 vxor		$tweak,$tweak,$tmp
   2678 	vcipher		$out4,$out4,v30
   2679 	vcipher		$out5,$out5,v30
   2680 	 vxor		$in5,$twk5,v31
   2681 	 vsrab		$tmp,$tweak,$seven	# next tweak value
   2682 	 vxor		$twk5,$tweak,$rndkey0
   2683 
   2684 	vcipherlast	$out0,$out0,$in0
   2685 	 lvx_u		$in0,$x00,$inp		# load next input block
   2686 	 vaddubm	$tweak,$tweak,$tweak
   2687 	 vsldoi		$tmp,$tmp,$tmp,15
   2688 	vcipherlast	$out1,$out1,$in1
   2689 	 lvx_u		$in1,$x10,$inp
   2690 	vcipherlast	$out2,$out2,$in2
   2691 	 le?vperm	$in0,$in0,$in0,$leperm
   2692 	 lvx_u		$in2,$x20,$inp
   2693 	 vand		$tmp,$tmp,$eighty7
   2694 	vcipherlast	$out3,$out3,$in3
   2695 	 le?vperm	$in1,$in1,$in1,$leperm
   2696 	 lvx_u		$in3,$x30,$inp
   2697 	vcipherlast	$out4,$out4,$in4
   2698 	 le?vperm	$in2,$in2,$in2,$leperm
   2699 	 lvx_u		$in4,$x40,$inp
   2700 	 vxor		$tweak,$tweak,$tmp
   2701 	vcipherlast	$tmp,$out5,$in5		# last block might be needed
   2702 						# in stealing mode
   2703 	 le?vperm	$in3,$in3,$in3,$leperm
   2704 	 lvx_u		$in5,$x50,$inp
   2705 	 addi		$inp,$inp,0x60
   2706 	 le?vperm	$in4,$in4,$in4,$leperm
   2707 	 le?vperm	$in5,$in5,$in5,$leperm
   2708 
   2709 	le?vperm	$out0,$out0,$out0,$leperm
   2710 	le?vperm	$out1,$out1,$out1,$leperm
   2711 	stvx_u		$out0,$x00,$out		# store output
   2712 	 vxor		$out0,$in0,$twk0
   2713 	le?vperm	$out2,$out2,$out2,$leperm
   2714 	stvx_u		$out1,$x10,$out
   2715 	 vxor		$out1,$in1,$twk1
   2716 	le?vperm	$out3,$out3,$out3,$leperm
   2717 	stvx_u		$out2,$x20,$out
   2718 	 vxor		$out2,$in2,$twk2
   2719 	le?vperm	$out4,$out4,$out4,$leperm
   2720 	stvx_u		$out3,$x30,$out
   2721 	 vxor		$out3,$in3,$twk3
   2722 	le?vperm	$out5,$tmp,$tmp,$leperm
   2723 	stvx_u		$out4,$x40,$out
   2724 	 vxor		$out4,$in4,$twk4
   2725 	le?stvx_u	$out5,$x50,$out
   2726 	be?stvx_u	$tmp, $x50,$out
   2727 	 vxor		$out5,$in5,$twk5
   2728 	addi		$out,$out,0x60
   2729 
   2730 	mtctr		$rounds
   2731 	beq		Loop_xts_enc6x		# did $len-=96 borrow?
   2732 
   2733 	addic.		$len,$len,0x60
   2734 	beq		Lxts_enc6x_zero
   2735 	cmpwi		$len,0x20
   2736 	blt		Lxts_enc6x_one
   2737 	nop
   2738 	beq		Lxts_enc6x_two
   2739 	cmpwi		$len,0x40
   2740 	blt		Lxts_enc6x_three
   2741 	nop
   2742 	beq		Lxts_enc6x_four
   2743 
   2744 Lxts_enc6x_five:
   2745 	vxor		$out0,$in1,$twk0
   2746 	vxor		$out1,$in2,$twk1
   2747 	vxor		$out2,$in3,$twk2
   2748 	vxor		$out3,$in4,$twk3
   2749 	vxor		$out4,$in5,$twk4
   2750 
   2751 	bl		_aesp8_xts_enc5x
   2752 
   2753 	le?vperm	$out0,$out0,$out0,$leperm
   2754 	vmr		$twk0,$twk5		# unused tweak
   2755 	le?vperm	$out1,$out1,$out1,$leperm
   2756 	stvx_u		$out0,$x00,$out		# store output
   2757 	le?vperm	$out2,$out2,$out2,$leperm
   2758 	stvx_u		$out1,$x10,$out
   2759 	le?vperm	$out3,$out3,$out3,$leperm
   2760 	stvx_u		$out2,$x20,$out
   2761 	vxor		$tmp,$out4,$twk5	# last block prep for stealing
   2762 	le?vperm	$out4,$out4,$out4,$leperm
   2763 	stvx_u		$out3,$x30,$out
   2764 	stvx_u		$out4,$x40,$out
   2765 	addi		$out,$out,0x50
   2766 	bne		Lxts_enc6x_steal
   2767 	b		Lxts_enc6x_done
   2768 
   2769 .align	4
   2770 Lxts_enc6x_four:
   2771 	vxor		$out0,$in2,$twk0
   2772 	vxor		$out1,$in3,$twk1
   2773 	vxor		$out2,$in4,$twk2
   2774 	vxor		$out3,$in5,$twk3
   2775 	vxor		$out4,$out4,$out4
   2776 
   2777 	bl		_aesp8_xts_enc5x
   2778 
   2779 	le?vperm	$out0,$out0,$out0,$leperm
   2780 	vmr		$twk0,$twk4		# unused tweak
   2781 	le?vperm	$out1,$out1,$out1,$leperm
   2782 	stvx_u		$out0,$x00,$out		# store output
   2783 	le?vperm	$out2,$out2,$out2,$leperm
   2784 	stvx_u		$out1,$x10,$out
   2785 	vxor		$tmp,$out3,$twk4	# last block prep for stealing
   2786 	le?vperm	$out3,$out3,$out3,$leperm
   2787 	stvx_u		$out2,$x20,$out
   2788 	stvx_u		$out3,$x30,$out
   2789 	addi		$out,$out,0x40
   2790 	bne		Lxts_enc6x_steal
   2791 	b		Lxts_enc6x_done
   2792 
   2793 .align	4
   2794 Lxts_enc6x_three:
   2795 	vxor		$out0,$in3,$twk0
   2796 	vxor		$out1,$in4,$twk1
   2797 	vxor		$out2,$in5,$twk2
   2798 	vxor		$out3,$out3,$out3
   2799 	vxor		$out4,$out4,$out4
   2800 
   2801 	bl		_aesp8_xts_enc5x
   2802 
   2803 	le?vperm	$out0,$out0,$out0,$leperm
   2804 	vmr		$twk0,$twk3		# unused tweak
   2805 	le?vperm	$out1,$out1,$out1,$leperm
   2806 	stvx_u		$out0,$x00,$out		# store output
   2807 	vxor		$tmp,$out2,$twk3	# last block prep for stealing
   2808 	le?vperm	$out2,$out2,$out2,$leperm
   2809 	stvx_u		$out1,$x10,$out
   2810 	stvx_u		$out2,$x20,$out
   2811 	addi		$out,$out,0x30
   2812 	bne		Lxts_enc6x_steal
   2813 	b		Lxts_enc6x_done
   2814 
   2815 .align	4
   2816 Lxts_enc6x_two:
   2817 	vxor		$out0,$in4,$twk0
   2818 	vxor		$out1,$in5,$twk1
   2819 	vxor		$out2,$out2,$out2
   2820 	vxor		$out3,$out3,$out3
   2821 	vxor		$out4,$out4,$out4
   2822 
   2823 	bl		_aesp8_xts_enc5x
   2824 
   2825 	le?vperm	$out0,$out0,$out0,$leperm
   2826 	vmr		$twk0,$twk2		# unused tweak
   2827 	vxor		$tmp,$out1,$twk2	# last block prep for stealing
   2828 	le?vperm	$out1,$out1,$out1,$leperm
   2829 	stvx_u		$out0,$x00,$out		# store output
   2830 	stvx_u		$out1,$x10,$out
   2831 	addi		$out,$out,0x20
   2832 	bne		Lxts_enc6x_steal
   2833 	b		Lxts_enc6x_done
   2834 
   2835 .align	4
   2836 Lxts_enc6x_one:
   2837 	vxor		$out0,$in5,$twk0
   2838 	nop
   2839 Loop_xts_enc1x:
   2840 	vcipher		$out0,$out0,v24
   2841 	lvx		v24,$x20,$key_		# round[3]
   2842 	addi		$key_,$key_,0x20
   2843 
   2844 	vcipher		$out0,$out0,v25
   2845 	lvx		v25,$x10,$key_		# round[4]
   2846 	bdnz		Loop_xts_enc1x
   2847 
   2848 	add		$inp,$inp,$taillen
   2849 	cmpwi		$taillen,0
   2850 	vcipher		$out0,$out0,v24
   2851 
   2852 	subi		$inp,$inp,16
   2853 	vcipher		$out0,$out0,v25
   2854 
   2855 	lvsr		$inpperm,0,$taillen
   2856 	vcipher		$out0,$out0,v26
   2857 
   2858 	lvx_u		$in0,0,$inp
   2859 	vcipher		$out0,$out0,v27
   2860 
   2861 	addi		$key_,$sp,`$FRAME+15`	# rewind $key_
   2862 	vcipher		$out0,$out0,v28
   2863 	lvx		v24,$x00,$key_		# re-pre-load round[1]
   2864 
   2865 	vcipher		$out0,$out0,v29
   2866 	lvx		v25,$x10,$key_		# re-pre-load round[2]
   2867 	 vxor		$twk0,$twk0,v31
   2868 
   2869 	le?vperm	$in0,$in0,$in0,$leperm
   2870 	vcipher		$out0,$out0,v30
   2871 
   2872 	vperm		$in0,$in0,$in0,$inpperm
   2873 	vcipherlast	$out0,$out0,$twk0
   2874 
   2875 	vmr		$twk0,$twk1		# unused tweak
   2876 	vxor		$tmp,$out0,$twk1	# last block prep for stealing
   2877 	le?vperm	$out0,$out0,$out0,$leperm
   2878 	stvx_u		$out0,$x00,$out		# store output
   2879 	addi		$out,$out,0x10
   2880 	bne		Lxts_enc6x_steal
   2881 	b		Lxts_enc6x_done
   2882 
   2883 .align	4
   2884 Lxts_enc6x_zero:
   2885 	cmpwi		$taillen,0
   2886 	beq		Lxts_enc6x_done
   2887 
   2888 	add		$inp,$inp,$taillen
   2889 	subi		$inp,$inp,16
   2890 	lvx_u		$in0,0,$inp
   2891 	lvsr		$inpperm,0,$taillen	# $in5 is no more
   2892 	le?vperm	$in0,$in0,$in0,$leperm
   2893 	vperm		$in0,$in0,$in0,$inpperm
   2894 	vxor		$tmp,$tmp,$twk0
   2895 Lxts_enc6x_steal:
   2896 	vxor		$in0,$in0,$twk0
   2897 	vxor		$out0,$out0,$out0
   2898 	vspltisb	$out1,-1
   2899 	vperm		$out0,$out0,$out1,$inpperm
   2900 	vsel		$out0,$in0,$tmp,$out0	# $tmp is last block, remember?
   2901 
   2902 	subi		r30,$out,17
   2903 	subi		$out,$out,16
   2904 	mtctr		$taillen
   2905 Loop_xts_enc6x_steal:
   2906 	lbzu		r0,1(r30)
   2907 	stb		r0,16(r30)
   2908 	bdnz		Loop_xts_enc6x_steal
   2909 
   2910 	li		$taillen,0
   2911 	mtctr		$rounds
   2912 	b		Loop_xts_enc1x		# one more time...
   2913 
   2914 .align	4
   2915 Lxts_enc6x_done:
   2916 	${UCMP}i	$ivp,0
   2917 	beq		Lxts_enc6x_ret
   2918 
   2919 	vxor		$tweak,$twk0,$rndkey0
   2920 	le?vperm	$tweak,$tweak,$tweak,$leperm
   2921 	stvx_u		$tweak,0,$ivp
   2922 
   2923 Lxts_enc6x_ret:
   2924 	mtlr		r11
   2925 	li		r10,`$FRAME+15`
   2926 	li		r11,`$FRAME+31`
   2927 	stvx		$seven,r10,$sp		# wipe copies of round keys
   2928 	addi		r10,r10,32
   2929 	stvx		$seven,r11,$sp
   2930 	addi		r11,r11,32
   2931 	stvx		$seven,r10,$sp
   2932 	addi		r10,r10,32
   2933 	stvx		$seven,r11,$sp
   2934 	addi		r11,r11,32
   2935 	stvx		$seven,r10,$sp
   2936 	addi		r10,r10,32
   2937 	stvx		$seven,r11,$sp
   2938 	addi		r11,r11,32
   2939 	stvx		$seven,r10,$sp
   2940 	addi		r10,r10,32
   2941 	stvx		$seven,r11,$sp
   2942 	addi		r11,r11,32
   2943 
   2944 	mtspr		256,$vrsave
   2945 	lvx		v20,r10,$sp		# ABI says so
   2946 	addi		r10,r10,32
   2947 	lvx		v21,r11,$sp
   2948 	addi		r11,r11,32
   2949 	lvx		v22,r10,$sp
   2950 	addi		r10,r10,32
   2951 	lvx		v23,r11,$sp
   2952 	addi		r11,r11,32
   2953 	lvx		v24,r10,$sp
   2954 	addi		r10,r10,32
   2955 	lvx		v25,r11,$sp
   2956 	addi		r11,r11,32
   2957 	lvx		v26,r10,$sp
   2958 	addi		r10,r10,32
   2959 	lvx		v27,r11,$sp
   2960 	addi		r11,r11,32
   2961 	lvx		v28,r10,$sp
   2962 	addi		r10,r10,32
   2963 	lvx		v29,r11,$sp
   2964 	addi		r11,r11,32
   2965 	lvx		v30,r10,$sp
   2966 	lvx		v31,r11,$sp
   2967 	$POP		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
   2968 	$POP		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
   2969 	$POP		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
   2970 	$POP		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
   2971 	$POP		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
   2972 	$POP		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
   2973 	addi		$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
   2974 	blr
   2975 	.long		0
   2976 	.byte		0,12,0x04,1,0x80,6,6,0
   2977 	.long		0
   2978 
   2979 .align	5
   2980 _aesp8_xts_enc5x:
   2981 	vcipher		$out0,$out0,v24
   2982 	vcipher		$out1,$out1,v24
   2983 	vcipher		$out2,$out2,v24
   2984 	vcipher		$out3,$out3,v24
   2985 	vcipher		$out4,$out4,v24
   2986 	lvx		v24,$x20,$key_		# round[3]
   2987 	addi		$key_,$key_,0x20
   2988 
   2989 	vcipher		$out0,$out0,v25
   2990 	vcipher		$out1,$out1,v25
   2991 	vcipher		$out2,$out2,v25
   2992 	vcipher		$out3,$out3,v25
   2993 	vcipher		$out4,$out4,v25
   2994 	lvx		v25,$x10,$key_		# round[4]
   2995 	bdnz		_aesp8_xts_enc5x
   2996 
   2997 	add		$inp,$inp,$taillen
   2998 	cmpwi		$taillen,0
   2999 	vcipher		$out0,$out0,v24
   3000 	vcipher		$out1,$out1,v24
   3001 	vcipher		$out2,$out2,v24
   3002 	vcipher		$out3,$out3,v24
   3003 	vcipher		$out4,$out4,v24
   3004 
   3005 	subi		$inp,$inp,16
   3006 	vcipher		$out0,$out0,v25
   3007 	vcipher		$out1,$out1,v25
   3008 	vcipher		$out2,$out2,v25
   3009 	vcipher		$out3,$out3,v25
   3010 	vcipher		$out4,$out4,v25
   3011 	 vxor		$twk0,$twk0,v31
   3012 
   3013 	vcipher		$out0,$out0,v26
   3014 	lvsr		$inpperm,0,$taillen	# $in5 is no more
   3015 	vcipher		$out1,$out1,v26
   3016 	vcipher		$out2,$out2,v26
   3017 	vcipher		$out3,$out3,v26
   3018 	vcipher		$out4,$out4,v26
   3019 	 vxor		$in1,$twk1,v31
   3020 
   3021 	vcipher		$out0,$out0,v27
   3022 	lvx_u		$in0,0,$inp
   3023 	vcipher		$out1,$out1,v27
   3024 	vcipher		$out2,$out2,v27
   3025 	vcipher		$out3,$out3,v27
   3026 	vcipher		$out4,$out4,v27
   3027 	 vxor		$in2,$twk2,v31
   3028 
   3029 	addi		$key_,$sp,`$FRAME+15`	# rewind $key_
   3030 	vcipher		$out0,$out0,v28
   3031 	vcipher		$out1,$out1,v28
   3032 	vcipher		$out2,$out2,v28
   3033 	vcipher		$out3,$out3,v28
   3034 	vcipher		$out4,$out4,v28
   3035 	lvx		v24,$x00,$key_		# re-pre-load round[1]
   3036 	 vxor		$in3,$twk3,v31
   3037 
   3038 	vcipher		$out0,$out0,v29
   3039 	le?vperm	$in0,$in0,$in0,$leperm
   3040 	vcipher		$out1,$out1,v29
   3041 	vcipher		$out2,$out2,v29
   3042 	vcipher		$out3,$out3,v29
   3043 	vcipher		$out4,$out4,v29
   3044 	lvx		v25,$x10,$key_		# re-pre-load round[2]
   3045 	 vxor		$in4,$twk4,v31
   3046 
   3047 	vcipher		$out0,$out0,v30
   3048 	vperm		$in0,$in0,$in0,$inpperm
   3049 	vcipher		$out1,$out1,v30
   3050 	vcipher		$out2,$out2,v30
   3051 	vcipher		$out3,$out3,v30
   3052 	vcipher		$out4,$out4,v30
   3053 
   3054 	vcipherlast	$out0,$out0,$twk0
   3055 	vcipherlast	$out1,$out1,$in1
   3056 	vcipherlast	$out2,$out2,$in2
   3057 	vcipherlast	$out3,$out3,$in3
   3058 	vcipherlast	$out4,$out4,$in4
   3059 	blr
   3060         .long   	0
   3061         .byte   	0,12,0x14,0,0,0,0,0
   3062 
   3063 .align	5
   3064 _aesp8_xts_decrypt6x:
   3065 	$STU		$sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
   3066 	mflr		r11
   3067 	li		r7,`$FRAME+8*16+15`
   3068 	li		r3,`$FRAME+8*16+31`
   3069 	$PUSH		r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
   3070 	stvx		v20,r7,$sp		# ABI says so
   3071 	addi		r7,r7,32
   3072 	stvx		v21,r3,$sp
   3073 	addi		r3,r3,32
   3074 	stvx		v22,r7,$sp
   3075 	addi		r7,r7,32
   3076 	stvx		v23,r3,$sp
   3077 	addi		r3,r3,32
   3078 	stvx		v24,r7,$sp
   3079 	addi		r7,r7,32
   3080 	stvx		v25,r3,$sp
   3081 	addi		r3,r3,32
   3082 	stvx		v26,r7,$sp
   3083 	addi		r7,r7,32
   3084 	stvx		v27,r3,$sp
   3085 	addi		r3,r3,32
   3086 	stvx		v28,r7,$sp
   3087 	addi		r7,r7,32
   3088 	stvx		v29,r3,$sp
   3089 	addi		r3,r3,32
   3090 	stvx		v30,r7,$sp
   3091 	stvx		v31,r3,$sp
   3092 	li		r0,-1
   3093 	stw		$vrsave,`$FRAME+21*16-4`($sp)	# save vrsave
   3094 	li		$x10,0x10
   3095 	$PUSH		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
   3096 	li		$x20,0x20
   3097 	$PUSH		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
   3098 	li		$x30,0x30
   3099 	$PUSH		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
   3100 	li		$x40,0x40
   3101 	$PUSH		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
   3102 	li		$x50,0x50
   3103 	$PUSH		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
   3104 	li		$x60,0x60
   3105 	$PUSH		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
   3106 	li		$x70,0x70
   3107 	mtspr		256,r0
   3108 
   3109 	subi		$rounds,$rounds,3	# -4 in total
   3110 
   3111 	lvx		$rndkey0,$x00,$key1	# load key schedule
   3112 	lvx		v30,$x10,$key1
   3113 	addi		$key1,$key1,0x20
   3114 	lvx		v31,$x00,$key1
   3115 	?vperm		$rndkey0,$rndkey0,v30,$keyperm
   3116 	addi		$key_,$sp,`$FRAME+15`
   3117 	mtctr		$rounds
   3118 
   3119 Load_xts_dec_key:
   3120 	?vperm		v24,v30,v31,$keyperm
   3121 	lvx		v30,$x10,$key1
   3122 	addi		$key1,$key1,0x20
   3123 	stvx		v24,$x00,$key_		# off-load round[1]
   3124 	?vperm		v25,v31,v30,$keyperm
   3125 	lvx		v31,$x00,$key1
   3126 	stvx		v25,$x10,$key_		# off-load round[2]
   3127 	addi		$key_,$key_,0x20
   3128 	bdnz		Load_xts_dec_key
   3129 
   3130 	lvx		v26,$x10,$key1
   3131 	?vperm		v24,v30,v31,$keyperm
   3132 	lvx		v27,$x20,$key1
   3133 	stvx		v24,$x00,$key_		# off-load round[3]
   3134 	?vperm		v25,v31,v26,$keyperm
   3135 	lvx		v28,$x30,$key1
   3136 	stvx		v25,$x10,$key_		# off-load round[4]
   3137 	addi		$key_,$sp,`$FRAME+15`	# rewind $key_
   3138 	?vperm		v26,v26,v27,$keyperm
   3139 	lvx		v29,$x40,$key1
   3140 	?vperm		v27,v27,v28,$keyperm
   3141 	lvx		v30,$x50,$key1
   3142 	?vperm		v28,v28,v29,$keyperm
   3143 	lvx		v31,$x60,$key1
   3144 	?vperm		v29,v29,v30,$keyperm
   3145 	lvx		$twk5,$x70,$key1	# borrow $twk5
   3146 	?vperm		v30,v30,v31,$keyperm
   3147 	lvx		v24,$x00,$key_		# pre-load round[1]
   3148 	?vperm		v31,v31,$twk5,$keyperm
   3149 	lvx		v25,$x10,$key_		# pre-load round[2]
   3150 
   3151 	 vperm		$in0,$inout,$inptail,$inpperm
   3152 	 subi		$inp,$inp,31		# undo "caller"
   3153 	vxor		$twk0,$tweak,$rndkey0
   3154 	vsrab		$tmp,$tweak,$seven	# next tweak value
   3155 	vaddubm		$tweak,$tweak,$tweak
   3156 	vsldoi		$tmp,$tmp,$tmp,15
   3157 	vand		$tmp,$tmp,$eighty7
   3158 	 vxor		$out0,$in0,$twk0
   3159 	vxor		$tweak,$tweak,$tmp
   3160 
   3161 	 lvx_u		$in1,$x10,$inp
   3162 	vxor		$twk1,$tweak,$rndkey0
   3163 	vsrab		$tmp,$tweak,$seven	# next tweak value
   3164 	vaddubm		$tweak,$tweak,$tweak
   3165 	vsldoi		$tmp,$tmp,$tmp,15
   3166 	 le?vperm	$in1,$in1,$in1,$leperm
   3167 	vand		$tmp,$tmp,$eighty7
   3168 	 vxor		$out1,$in1,$twk1
   3169 	vxor		$tweak,$tweak,$tmp
   3170 
   3171 	 lvx_u		$in2,$x20,$inp
   3172 	 andi.		$taillen,$len,15
   3173 	vxor		$twk2,$tweak,$rndkey0
   3174 	vsrab		$tmp,$tweak,$seven	# next tweak value
   3175 	vaddubm		$tweak,$tweak,$tweak
   3176 	vsldoi		$tmp,$tmp,$tmp,15
   3177 	 le?vperm	$in2,$in2,$in2,$leperm
   3178 	vand		$tmp,$tmp,$eighty7
   3179 	 vxor		$out2,$in2,$twk2
   3180 	vxor		$tweak,$tweak,$tmp
   3181 
   3182 	 lvx_u		$in3,$x30,$inp
   3183 	 sub		$len,$len,$taillen
   3184 	vxor		$twk3,$tweak,$rndkey0
   3185 	vsrab		$tmp,$tweak,$seven	# next tweak value
   3186 	vaddubm		$tweak,$tweak,$tweak
   3187 	vsldoi		$tmp,$tmp,$tmp,15
   3188 	 le?vperm	$in3,$in3,$in3,$leperm
   3189 	vand		$tmp,$tmp,$eighty7
   3190 	 vxor		$out3,$in3,$twk3
   3191 	vxor		$tweak,$tweak,$tmp
   3192 
   3193 	 lvx_u		$in4,$x40,$inp
   3194 	 subi		$len,$len,0x60
   3195 	vxor		$twk4,$tweak,$rndkey0
   3196 	vsrab		$tmp,$tweak,$seven	# next tweak value
   3197 	vaddubm		$tweak,$tweak,$tweak
   3198 	vsldoi		$tmp,$tmp,$tmp,15
   3199 	 le?vperm	$in4,$in4,$in4,$leperm
   3200 	vand		$tmp,$tmp,$eighty7
   3201 	 vxor		$out4,$in4,$twk4
   3202 	vxor		$tweak,$tweak,$tmp
   3203 
   3204 	 lvx_u		$in5,$x50,$inp
   3205 	 addi		$inp,$inp,0x60
   3206 	vxor		$twk5,$tweak,$rndkey0
   3207 	vsrab		$tmp,$tweak,$seven	# next tweak value
   3208 	vaddubm		$tweak,$tweak,$tweak
   3209 	vsldoi		$tmp,$tmp,$tmp,15
   3210 	 le?vperm	$in5,$in5,$in5,$leperm
   3211 	vand		$tmp,$tmp,$eighty7
   3212 	 vxor		$out5,$in5,$twk5
   3213 	vxor		$tweak,$tweak,$tmp
   3214 
   3215 	vxor		v31,v31,$rndkey0
   3216 	mtctr		$rounds
   3217 	b		Loop_xts_dec6x
   3218 
   3219 .align	5
   3220 Loop_xts_dec6x:
   3221 	vncipher	$out0,$out0,v24
   3222 	vncipher	$out1,$out1,v24
   3223 	vncipher	$out2,$out2,v24
   3224 	vncipher	$out3,$out3,v24
   3225 	vncipher	$out4,$out4,v24
   3226 	vncipher	$out5,$out5,v24
   3227 	lvx		v24,$x20,$key_		# round[3]
   3228 	addi		$key_,$key_,0x20
   3229 
   3230 	vncipher	$out0,$out0,v25
   3231 	vncipher	$out1,$out1,v25
   3232 	vncipher	$out2,$out2,v25
   3233 	vncipher	$out3,$out3,v25
   3234 	vncipher	$out4,$out4,v25
   3235 	vncipher	$out5,$out5,v25
   3236 	lvx		v25,$x10,$key_		# round[4]
   3237 	bdnz		Loop_xts_dec6x
   3238 
   3239 	subic		$len,$len,96		# $len-=96
   3240 	 vxor		$in0,$twk0,v31		# xor with last round key
   3241 	vncipher	$out0,$out0,v24
   3242 	vncipher	$out1,$out1,v24
   3243 	 vsrab		$tmp,$tweak,$seven	# next tweak value
   3244 	 vxor		$twk0,$tweak,$rndkey0
   3245 	 vaddubm	$tweak,$tweak,$tweak
   3246 	vncipher	$out2,$out2,v24
   3247 	vncipher	$out3,$out3,v24
   3248 	 vsldoi		$tmp,$tmp,$tmp,15
   3249 	vncipher	$out4,$out4,v24
   3250 	vncipher	$out5,$out5,v24
   3251 
   3252 	subfe.		r0,r0,r0		# borrow?-1:0
   3253 	 vand		$tmp,$tmp,$eighty7
   3254 	vncipher	$out0,$out0,v25
   3255 	vncipher	$out1,$out1,v25
   3256 	 vxor		$tweak,$tweak,$tmp
   3257 	vncipher	$out2,$out2,v25
   3258 	vncipher	$out3,$out3,v25
   3259 	 vxor		$in1,$twk1,v31
   3260 	 vsrab		$tmp,$tweak,$seven	# next tweak value
   3261 	 vxor		$twk1,$tweak,$rndkey0
   3262 	vncipher	$out4,$out4,v25
   3263 	vncipher	$out5,$out5,v25
   3264 
   3265 	and		r0,r0,$len
   3266 	 vaddubm	$tweak,$tweak,$tweak
   3267 	 vsldoi		$tmp,$tmp,$tmp,15
   3268 	vncipher	$out0,$out0,v26
   3269 	vncipher	$out1,$out1,v26
   3270 	 vand		$tmp,$tmp,$eighty7
   3271 	vncipher	$out2,$out2,v26
   3272 	vncipher	$out3,$out3,v26
   3273 	 vxor		$tweak,$tweak,$tmp
   3274 	vncipher	$out4,$out4,v26
   3275 	vncipher	$out5,$out5,v26
   3276 
   3277 	add		$inp,$inp,r0		# $inp is adjusted in such
   3278 						# way that at exit from the
   3279 						# loop inX-in5 are loaded
   3280 						# with last "words"
   3281 	 vxor		$in2,$twk2,v31
   3282 	 vsrab		$tmp,$tweak,$seven	# next tweak value
   3283 	 vxor		$twk2,$tweak,$rndkey0
   3284 	 vaddubm	$tweak,$tweak,$tweak
   3285 	vncipher	$out0,$out0,v27
   3286 	vncipher	$out1,$out1,v27
   3287 	 vsldoi		$tmp,$tmp,$tmp,15
   3288 	vncipher	$out2,$out2,v27
   3289 	vncipher	$out3,$out3,v27
   3290 	 vand		$tmp,$tmp,$eighty7
   3291 	vncipher	$out4,$out4,v27
   3292 	vncipher	$out5,$out5,v27
   3293 
   3294 	addi		$key_,$sp,`$FRAME+15`	# rewind $key_
   3295 	 vxor		$tweak,$tweak,$tmp
   3296 	vncipher	$out0,$out0,v28
   3297 	vncipher	$out1,$out1,v28
   3298 	 vxor		$in3,$twk3,v31
   3299 	 vsrab		$tmp,$tweak,$seven	# next tweak value
   3300 	 vxor		$twk3,$tweak,$rndkey0
   3301 	vncipher	$out2,$out2,v28
   3302 	vncipher	$out3,$out3,v28
   3303 	 vaddubm	$tweak,$tweak,$tweak
   3304 	 vsldoi		$tmp,$tmp,$tmp,15
   3305 	vncipher	$out4,$out4,v28
   3306 	vncipher	$out5,$out5,v28
   3307 	lvx		v24,$x00,$key_		# re-pre-load round[1]
   3308 	 vand		$tmp,$tmp,$eighty7
   3309 
   3310 	vncipher	$out0,$out0,v29
   3311 	vncipher	$out1,$out1,v29
   3312 	 vxor		$tweak,$tweak,$tmp
   3313 	vncipher	$out2,$out2,v29
   3314 	vncipher	$out3,$out3,v29
   3315 	 vxor		$in4,$twk4,v31
   3316 	 vsrab		$tmp,$tweak,$seven	# next tweak value
   3317 	 vxor		$twk4,$tweak,$rndkey0
   3318 	vncipher	$out4,$out4,v29
   3319 	vncipher	$out5,$out5,v29
   3320 	lvx		v25,$x10,$key_		# re-pre-load round[2]
   3321 	 vaddubm	$tweak,$tweak,$tweak
   3322 	 vsldoi		$tmp,$tmp,$tmp,15
   3323 
   3324 	vncipher	$out0,$out0,v30
   3325 	vncipher	$out1,$out1,v30
   3326 	 vand		$tmp,$tmp,$eighty7
   3327 	vncipher	$out2,$out2,v30
   3328 	vncipher	$out3,$out3,v30
   3329 	 vxor		$tweak,$tweak,$tmp
   3330 	vncipher	$out4,$out4,v30
   3331 	vncipher	$out5,$out5,v30
   3332 	 vxor		$in5,$twk5,v31
   3333 	 vsrab		$tmp,$tweak,$seven	# next tweak value
   3334 	 vxor		$twk5,$tweak,$rndkey0
   3335 
   3336 	vncipherlast	$out0,$out0,$in0
   3337 	 lvx_u		$in0,$x00,$inp		# load next input block
   3338 	 vaddubm	$tweak,$tweak,$tweak
   3339 	 vsldoi		$tmp,$tmp,$tmp,15
   3340 	vncipherlast	$out1,$out1,$in1
   3341 	 lvx_u		$in1,$x10,$inp
   3342 	vncipherlast	$out2,$out2,$in2
   3343 	 le?vperm	$in0,$in0,$in0,$leperm
   3344 	 lvx_u		$in2,$x20,$inp
   3345 	 vand		$tmp,$tmp,$eighty7
   3346 	vncipherlast	$out3,$out3,$in3
   3347 	 le?vperm	$in1,$in1,$in1,$leperm
   3348 	 lvx_u		$in3,$x30,$inp
   3349 	vncipherlast	$out4,$out4,$in4
   3350 	 le?vperm	$in2,$in2,$in2,$leperm
   3351 	 lvx_u		$in4,$x40,$inp
   3352 	 vxor		$tweak,$tweak,$tmp
   3353 	vncipherlast	$out5,$out5,$in5
   3354 	 le?vperm	$in3,$in3,$in3,$leperm
   3355 	 lvx_u		$in5,$x50,$inp
   3356 	 addi		$inp,$inp,0x60
   3357 	 le?vperm	$in4,$in4,$in4,$leperm
   3358 	 le?vperm	$in5,$in5,$in5,$leperm
   3359 
   3360 	le?vperm	$out0,$out0,$out0,$leperm
   3361 	le?vperm	$out1,$out1,$out1,$leperm
   3362 	stvx_u		$out0,$x00,$out		# store output
   3363 	 vxor		$out0,$in0,$twk0
   3364 	le?vperm	$out2,$out2,$out2,$leperm
   3365 	stvx_u		$out1,$x10,$out
   3366 	 vxor		$out1,$in1,$twk1
   3367 	le?vperm	$out3,$out3,$out3,$leperm
   3368 	stvx_u		$out2,$x20,$out
   3369 	 vxor		$out2,$in2,$twk2
   3370 	le?vperm	$out4,$out4,$out4,$leperm
   3371 	stvx_u		$out3,$x30,$out
   3372 	 vxor		$out3,$in3,$twk3
   3373 	le?vperm	$out5,$out5,$out5,$leperm
   3374 	stvx_u		$out4,$x40,$out
   3375 	 vxor		$out4,$in4,$twk4
   3376 	stvx_u		$out5,$x50,$out
   3377 	 vxor		$out5,$in5,$twk5
   3378 	addi		$out,$out,0x60
   3379 
   3380 	mtctr		$rounds
   3381 	beq		Loop_xts_dec6x		# did $len-=96 borrow?
   3382 
   3383 	addic.		$len,$len,0x60
   3384 	beq		Lxts_dec6x_zero
   3385 	cmpwi		$len,0x20
   3386 	blt		Lxts_dec6x_one
   3387 	nop
   3388 	beq		Lxts_dec6x_two
   3389 	cmpwi		$len,0x40
   3390 	blt		Lxts_dec6x_three
   3391 	nop
   3392 	beq		Lxts_dec6x_four
   3393 
   3394 Lxts_dec6x_five:
   3395 	vxor		$out0,$in1,$twk0
   3396 	vxor		$out1,$in2,$twk1
   3397 	vxor		$out2,$in3,$twk2
   3398 	vxor		$out3,$in4,$twk3
   3399 	vxor		$out4,$in5,$twk4
   3400 
   3401 	bl		_aesp8_xts_dec5x
   3402 
   3403 	le?vperm	$out0,$out0,$out0,$leperm
   3404 	vmr		$twk0,$twk5		# unused tweak
   3405 	vxor		$twk1,$tweak,$rndkey0
   3406 	le?vperm	$out1,$out1,$out1,$leperm
   3407 	stvx_u		$out0,$x00,$out		# store output
   3408 	vxor		$out0,$in0,$twk1
   3409 	le?vperm	$out2,$out2,$out2,$leperm
   3410 	stvx_u		$out1,$x10,$out
   3411 	le?vperm	$out3,$out3,$out3,$leperm
   3412 	stvx_u		$out2,$x20,$out
   3413 	le?vperm	$out4,$out4,$out4,$leperm
   3414 	stvx_u		$out3,$x30,$out
   3415 	stvx_u		$out4,$x40,$out
   3416 	addi		$out,$out,0x50
   3417 	bne		Lxts_dec6x_steal
   3418 	b		Lxts_dec6x_done
   3419 
   3420 .align	4
   3421 Lxts_dec6x_four:
   3422 	vxor		$out0,$in2,$twk0
   3423 	vxor		$out1,$in3,$twk1
   3424 	vxor		$out2,$in4,$twk2
   3425 	vxor		$out3,$in5,$twk3
   3426 	vxor		$out4,$out4,$out4
   3427 
   3428 	bl		_aesp8_xts_dec5x
   3429 
   3430 	le?vperm	$out0,$out0,$out0,$leperm
   3431 	vmr		$twk0,$twk4		# unused tweak
   3432 	vmr		$twk1,$twk5
   3433 	le?vperm	$out1,$out1,$out1,$leperm
   3434 	stvx_u		$out0,$x00,$out		# store output
   3435 	vxor		$out0,$in0,$twk5
   3436 	le?vperm	$out2,$out2,$out2,$leperm
   3437 	stvx_u		$out1,$x10,$out
   3438 	le?vperm	$out3,$out3,$out3,$leperm
   3439 	stvx_u		$out2,$x20,$out
   3440 	stvx_u		$out3,$x30,$out
   3441 	addi		$out,$out,0x40
   3442 	bne		Lxts_dec6x_steal
   3443 	b		Lxts_dec6x_done
   3444 
   3445 .align	4
   3446 Lxts_dec6x_three:
   3447 	vxor		$out0,$in3,$twk0
   3448 	vxor		$out1,$in4,$twk1
   3449 	vxor		$out2,$in5,$twk2
   3450 	vxor		$out3,$out3,$out3
   3451 	vxor		$out4,$out4,$out4
   3452 
   3453 	bl		_aesp8_xts_dec5x
   3454 
   3455 	le?vperm	$out0,$out0,$out0,$leperm
   3456 	vmr		$twk0,$twk3		# unused tweak
   3457 	vmr		$twk1,$twk4
   3458 	le?vperm	$out1,$out1,$out1,$leperm
   3459 	stvx_u		$out0,$x00,$out		# store output
   3460 	vxor		$out0,$in0,$twk4
   3461 	le?vperm	$out2,$out2,$out2,$leperm
   3462 	stvx_u		$out1,$x10,$out
   3463 	stvx_u		$out2,$x20,$out
   3464 	addi		$out,$out,0x30
   3465 	bne		Lxts_dec6x_steal
   3466 	b		Lxts_dec6x_done
   3467 
   3468 .align	4
   3469 Lxts_dec6x_two:
   3470 	vxor		$out0,$in4,$twk0
   3471 	vxor		$out1,$in5,$twk1
   3472 	vxor		$out2,$out2,$out2
   3473 	vxor		$out3,$out3,$out3
   3474 	vxor		$out4,$out4,$out4
   3475 
   3476 	bl		_aesp8_xts_dec5x
   3477 
   3478 	le?vperm	$out0,$out0,$out0,$leperm
   3479 	vmr		$twk0,$twk2		# unused tweak
   3480 	vmr		$twk1,$twk3
   3481 	le?vperm	$out1,$out1,$out1,$leperm
   3482 	stvx_u		$out0,$x00,$out		# store output
   3483 	vxor		$out0,$in0,$twk3
   3484 	stvx_u		$out1,$x10,$out
   3485 	addi		$out,$out,0x20
   3486 	bne		Lxts_dec6x_steal
   3487 	b		Lxts_dec6x_done
   3488 
   3489 .align	4
   3490 Lxts_dec6x_one:
   3491 	vxor		$out0,$in5,$twk0
   3492 	nop
   3493 Loop_xts_dec1x:
   3494 	vncipher	$out0,$out0,v24
   3495 	lvx		v24,$x20,$key_		# round[3]
   3496 	addi		$key_,$key_,0x20
   3497 
   3498 	vncipher	$out0,$out0,v25
   3499 	lvx		v25,$x10,$key_		# round[4]
   3500 	bdnz		Loop_xts_dec1x
   3501 
   3502 	subi		r0,$taillen,1
   3503 	vncipher	$out0,$out0,v24
   3504 
   3505 	andi.		r0,r0,16
   3506 	cmpwi		$taillen,0
   3507 	vncipher	$out0,$out0,v25
   3508 
   3509 	sub		$inp,$inp,r0
   3510 	vncipher	$out0,$out0,v26
   3511 
   3512 	lvx_u		$in0,0,$inp
   3513 	vncipher	$out0,$out0,v27
   3514 
   3515 	addi		$key_,$sp,`$FRAME+15`	# rewind $key_
   3516 	vncipher	$out0,$out0,v28
   3517 	lvx		v24,$x00,$key_		# re-pre-load round[1]
   3518 
   3519 	vncipher	$out0,$out0,v29
   3520 	lvx		v25,$x10,$key_		# re-pre-load round[2]
   3521 	 vxor		$twk0,$twk0,v31
   3522 
   3523 	le?vperm	$in0,$in0,$in0,$leperm
   3524 	vncipher	$out0,$out0,v30
   3525 
   3526 	mtctr		$rounds
   3527 	vncipherlast	$out0,$out0,$twk0
   3528 
   3529 	vmr		$twk0,$twk1		# unused tweak
   3530 	vmr		$twk1,$twk2
   3531 	le?vperm	$out0,$out0,$out0,$leperm
   3532 	stvx_u		$out0,$x00,$out		# store output
   3533 	addi		$out,$out,0x10
   3534 	vxor		$out0,$in0,$twk2
   3535 	bne		Lxts_dec6x_steal
   3536 	b		Lxts_dec6x_done
   3537 
   3538 .align	4
   3539 Lxts_dec6x_zero:
   3540 	cmpwi		$taillen,0
   3541 	beq		Lxts_dec6x_done
   3542 
   3543 	lvx_u		$in0,0,$inp
   3544 	le?vperm	$in0,$in0,$in0,$leperm
   3545 	vxor		$out0,$in0,$twk1
   3546 Lxts_dec6x_steal:
   3547 	vncipher	$out0,$out0,v24
   3548 	lvx		v24,$x20,$key_		# round[3]
   3549 	addi		$key_,$key_,0x20
   3550 
   3551 	vncipher	$out0,$out0,v25
   3552 	lvx		v25,$x10,$key_		# round[4]
   3553 	bdnz		Lxts_dec6x_steal
   3554 
   3555 	add		$inp,$inp,$taillen
   3556 	vncipher	$out0,$out0,v24
   3557 
   3558 	cmpwi		$taillen,0
   3559 	vncipher	$out0,$out0,v25
   3560 
   3561 	lvx_u		$in0,0,$inp
   3562 	vncipher	$out0,$out0,v26
   3563 
   3564 	lvsr		$inpperm,0,$taillen	# $in5 is no more
   3565 	vncipher	$out0,$out0,v27
   3566 
   3567 	addi		$key_,$sp,`$FRAME+15`	# rewind $key_
   3568 	vncipher	$out0,$out0,v28
   3569 	lvx		v24,$x00,$key_		# re-pre-load round[1]
   3570 
   3571 	vncipher	$out0,$out0,v29
   3572 	lvx		v25,$x10,$key_		# re-pre-load round[2]
   3573 	 vxor		$twk1,$twk1,v31
   3574 
   3575 	le?vperm	$in0,$in0,$in0,$leperm
   3576 	vncipher	$out0,$out0,v30
   3577 
   3578 	vperm		$in0,$in0,$in0,$inpperm
   3579 	vncipherlast	$tmp,$out0,$twk1
   3580 
   3581 	le?vperm	$out0,$tmp,$tmp,$leperm
   3582 	le?stvx_u	$out0,0,$out
   3583 	be?stvx_u	$tmp,0,$out
   3584 
   3585 	vxor		$out0,$out0,$out0
   3586 	vspltisb	$out1,-1
   3587 	vperm		$out0,$out0,$out1,$inpperm
   3588 	vsel		$out0,$in0,$tmp,$out0
   3589 	vxor		$out0,$out0,$twk0
   3590 
   3591 	subi		r30,$out,1
   3592 	mtctr		$taillen
   3593 Loop_xts_dec6x_steal:
   3594 	lbzu		r0,1(r30)
   3595 	stb		r0,16(r30)
   3596 	bdnz		Loop_xts_dec6x_steal
   3597 
   3598 	li		$taillen,0
   3599 	mtctr		$rounds
   3600 	b		Loop_xts_dec1x		# one more time...
   3601 
   3602 .align	4
   3603 Lxts_dec6x_done:
   3604 	${UCMP}i	$ivp,0
   3605 	beq		Lxts_dec6x_ret
   3606 
   3607 	vxor		$tweak,$twk0,$rndkey0
   3608 	le?vperm	$tweak,$tweak,$tweak,$leperm
   3609 	stvx_u		$tweak,0,$ivp
   3610 
   3611 Lxts_dec6x_ret:
   3612 	mtlr		r11
   3613 	li		r10,`$FRAME+15`
   3614 	li		r11,`$FRAME+31`
   3615 	stvx		$seven,r10,$sp		# wipe copies of round keys
   3616 	addi		r10,r10,32
   3617 	stvx		$seven,r11,$sp
   3618 	addi		r11,r11,32
   3619 	stvx		$seven,r10,$sp
   3620 	addi		r10,r10,32
   3621 	stvx		$seven,r11,$sp
   3622 	addi		r11,r11,32
   3623 	stvx		$seven,r10,$sp
   3624 	addi		r10,r10,32
   3625 	stvx		$seven,r11,$sp
   3626 	addi		r11,r11,32
   3627 	stvx		$seven,r10,$sp
   3628 	addi		r10,r10,32
   3629 	stvx		$seven,r11,$sp
   3630 	addi		r11,r11,32
   3631 
   3632 	mtspr		256,$vrsave
   3633 	lvx		v20,r10,$sp		# ABI says so
   3634 	addi		r10,r10,32
   3635 	lvx		v21,r11,$sp
   3636 	addi		r11,r11,32
   3637 	lvx		v22,r10,$sp
   3638 	addi		r10,r10,32
   3639 	lvx		v23,r11,$sp
   3640 	addi		r11,r11,32
   3641 	lvx		v24,r10,$sp
   3642 	addi		r10,r10,32
   3643 	lvx		v25,r11,$sp
   3644 	addi		r11,r11,32
   3645 	lvx		v26,r10,$sp
   3646 	addi		r10,r10,32
   3647 	lvx		v27,r11,$sp
   3648 	addi		r11,r11,32
   3649 	lvx		v28,r10,$sp
   3650 	addi		r10,r10,32
   3651 	lvx		v29,r11,$sp
   3652 	addi		r11,r11,32
   3653 	lvx		v30,r10,$sp
   3654 	lvx		v31,r11,$sp
   3655 	$POP		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
   3656 	$POP		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
   3657 	$POP		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
   3658 	$POP		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
   3659 	$POP		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
   3660 	$POP		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
   3661 	addi		$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
   3662 	blr
   3663 	.long		0
   3664 	.byte		0,12,0x04,1,0x80,6,6,0
   3665 	.long		0
   3666 
   3667 .align	5
   3668 _aesp8_xts_dec5x:
   3669 	vncipher	$out0,$out0,v24
   3670 	vncipher	$out1,$out1,v24
   3671 	vncipher	$out2,$out2,v24
   3672 	vncipher	$out3,$out3,v24
   3673 	vncipher	$out4,$out4,v24
   3674 	lvx		v24,$x20,$key_		# round[3]
   3675 	addi		$key_,$key_,0x20
   3676 
   3677 	vncipher	$out0,$out0,v25
   3678 	vncipher	$out1,$out1,v25
   3679 	vncipher	$out2,$out2,v25
   3680 	vncipher	$out3,$out3,v25
   3681 	vncipher	$out4,$out4,v25
   3682 	lvx		v25,$x10,$key_		# round[4]
   3683 	bdnz		_aesp8_xts_dec5x
   3684 
   3685 	subi		r0,$taillen,1
   3686 	vncipher	$out0,$out0,v24
   3687 	vncipher	$out1,$out1,v24
   3688 	vncipher	$out2,$out2,v24
   3689 	vncipher	$out3,$out3,v24
   3690 	vncipher	$out4,$out4,v24
   3691 
   3692 	andi.		r0,r0,16
   3693 	cmpwi		$taillen,0
   3694 	vncipher	$out0,$out0,v25
   3695 	vncipher	$out1,$out1,v25
   3696 	vncipher	$out2,$out2,v25
   3697 	vncipher	$out3,$out3,v25
   3698 	vncipher	$out4,$out4,v25
   3699 	 vxor		$twk0,$twk0,v31
   3700 
   3701 	sub		$inp,$inp,r0
   3702 	vncipher	$out0,$out0,v26
   3703 	vncipher	$out1,$out1,v26
   3704 	vncipher	$out2,$out2,v26
   3705 	vncipher	$out3,$out3,v26
   3706 	vncipher	$out4,$out4,v26
   3707 	 vxor		$in1,$twk1,v31
   3708 
   3709 	vncipher	$out0,$out0,v27
   3710 	lvx_u		$in0,0,$inp
   3711 	vncipher	$out1,$out1,v27
   3712 	vncipher	$out2,$out2,v27
   3713 	vncipher	$out3,$out3,v27
   3714 	vncipher	$out4,$out4,v27
   3715 	 vxor		$in2,$twk2,v31
   3716 
   3717 	addi		$key_,$sp,`$FRAME+15`	# rewind $key_
   3718 	vncipher	$out0,$out0,v28
   3719 	vncipher	$out1,$out1,v28
   3720 	vncipher	$out2,$out2,v28
   3721 	vncipher	$out3,$out3,v28
   3722 	vncipher	$out4,$out4,v28
   3723 	lvx		v24,$x00,$key_		# re-pre-load round[1]
   3724 	 vxor		$in3,$twk3,v31
   3725 
   3726 	vncipher	$out0,$out0,v29
   3727 	le?vperm	$in0,$in0,$in0,$leperm
   3728 	vncipher	$out1,$out1,v29
   3729 	vncipher	$out2,$out2,v29
   3730 	vncipher	$out3,$out3,v29
   3731 	vncipher	$out4,$out4,v29
   3732 	lvx		v25,$x10,$key_		# re-pre-load round[2]
   3733 	 vxor		$in4,$twk4,v31
   3734 
   3735 	vncipher	$out0,$out0,v30
   3736 	vncipher	$out1,$out1,v30
   3737 	vncipher	$out2,$out2,v30
   3738 	vncipher	$out3,$out3,v30
   3739 	vncipher	$out4,$out4,v30
   3740 
   3741 	vncipherlast	$out0,$out0,$twk0
   3742 	vncipherlast	$out1,$out1,$in1
   3743 	vncipherlast	$out2,$out2,$in2
   3744 	vncipherlast	$out3,$out3,$in3
   3745 	vncipherlast	$out4,$out4,$in4
   3746 	mtctr		$rounds
   3747 	blr
   3748         .long   	0
   3749         .byte   	0,12,0x14,0,0,0,0,0
   3750 ___
   3751 }}	}}}
   3752 
   3753 my $consts=1;
   3754 foreach(split("\n",$code)) {
   3755         s/\`([^\`]*)\`/eval($1)/geo;
   3756 
   3757 	# constants table endian-specific conversion
   3758 	if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
   3759 	    my $conv=$3;
   3760 	    my @bytes=();
   3761 
   3762 	    # convert to endian-agnostic format
   3763 	    if ($1 eq "long") {
   3764 	      foreach (split(/,\s*/,$2)) {
   3765 		my $l = /^0/?oct:int;
   3766 		push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
   3767 	      }
   3768 	    } else {
   3769 		@bytes = map(/^0/?oct:int,split(/,\s*/,$2));
   3770 	    }
   3771 
   3772 	    # little-endian conversion
   3773 	    if ($flavour =~ /le$/o) {
   3774 		SWITCH: for($conv)  {
   3775 		    /\?inv/ && do   { @bytes=map($_^0xf,@bytes); last; };
   3776 		    /\?rev/ && do   { @bytes=reverse(@bytes);    last; };
   3777 		}
   3778 	    }
   3779 
   3780 	    #emit
   3781 	    print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
   3782 	    next;
   3783 	}
   3784 	$consts=0 if (m/Lconsts:/o);	# end of table
   3785 
   3786 	# instructions prefixed with '?' are endian-specific and need
   3787 	# to be adjusted accordingly...
   3788 	if ($flavour =~ /le$/o) {	# little-endian
   3789 	    s/le\?//o		or
   3790 	    s/be\?/#be#/o	or
   3791 	    s/\?lvsr/lvsl/o	or
   3792 	    s/\?lvsl/lvsr/o	or
   3793 	    s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
   3794 	    s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
   3795 	    s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
   3796 	} else {			# big-endian
   3797 	    s/le\?/#le#/o	or
   3798 	    s/be\?//o		or
   3799 	    s/\?([a-z]+)/$1/o;
   3800 	}
   3801 
   3802         print $_,"\n";
   3803 }
   3804 
   3805 close STDOUT;
   3806