mirror of
				https://github.com/ossrs/srs.git
				synced 2025-03-09 15:49:59 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			3807 lines
		
	
	
	
		
			91 KiB
		
	
	
	
		
			Raku
		
	
	
		
			Executable file
		
	
	
	
	
			
		
		
	
	
			3807 lines
		
	
	
	
		
			91 KiB
		
	
	
	
		
			Raku
		
	
	
		
			Executable file
		
	
	
	
	
| #! /usr/bin/env perl
 | |
| # Copyright 2014-2018 The OpenSSL Project Authors. All Rights Reserved.
 | |
| #
 | |
| # Licensed under the OpenSSL license (the "License").  You may not use
 | |
| # this file except in compliance with the License.  You can obtain a copy
 | |
| # in the file LICENSE in the source distribution or at
 | |
| # https://www.openssl.org/source/license.html
 | |
| 
 | |
| #
 | |
| # ====================================================================
 | |
| # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
 | |
| # project. The module is, however, dual licensed under OpenSSL and
 | |
| # CRYPTOGAMS licenses depending on where you obtain it. For further
 | |
| # details see http://www.openssl.org/~appro/cryptogams/.
 | |
| # ====================================================================
 | |
| #
 | |
| # This module implements support for AES instructions as per PowerISA
 | |
| # specification version 2.07, first implemented by POWER8 processor.
 | |
| # The module is endian-agnostic in sense that it supports both big-
 | |
| # and little-endian cases. Data alignment in parallelizable modes is
 | |
| # handled with VSX loads and stores, which implies MSR.VSX flag being
 | |
| # set. It should also be noted that ISA specification doesn't prohibit
 | |
| # alignment exceptions for these instructions on page boundaries.
 | |
| # Initially alignment was handled in pure AltiVec/VMX way [when data
 | |
| # is aligned programmatically, which in turn guarantees exception-
 | |
| # free execution], but it turned to hamper performance when vcipher
 | |
| # instructions are interleaved. It's reckoned that eventual
 | |
| # misalignment penalties at page boundaries are in average lower
 | |
| # than additional overhead in pure AltiVec approach.
 | |
| #
 | |
| # May 2016
 | |
| #
 | |
| # Add XTS subroutine, 9x on little- and 12x improvement on big-endian
 | |
| # systems were measured.
 | |
| #
 | |
| ######################################################################
 | |
| # Current large-block performance in cycles per byte processed with
 | |
| # 128-bit key (less is better).
 | |
| #
 | |
| #		CBC en-/decrypt	CTR	XTS
 | |
| # POWER8[le]	3.96/0.72	0.74	1.1
 | |
| # POWER8[be]	3.75/0.65	0.66	1.0
 | |
| # POWER9[le]	4.02/0.86	0.84	1.05
 | |
| # POWER9[be]	3.99/0.78	0.79	0.97
 | |
| 
 | |
| $flavour = shift;
 | |
| 
 | |
| if ($flavour =~ /64/) {
 | |
| 	$SIZE_T	=8;
 | |
| 	$LRSAVE	=2*$SIZE_T;
 | |
| 	$STU	="stdu";
 | |
| 	$POP	="ld";
 | |
| 	$PUSH	="std";
 | |
| 	$UCMP	="cmpld";
 | |
| 	$SHL	="sldi";
 | |
| } elsif ($flavour =~ /32/) {
 | |
| 	$SIZE_T	=4;
 | |
| 	$LRSAVE	=$SIZE_T;
 | |
| 	$STU	="stwu";
 | |
| 	$POP	="lwz";
 | |
| 	$PUSH	="stw";
 | |
| 	$UCMP	="cmplw";
 | |
| 	$SHL	="slwi";
 | |
| } else { die "nonsense $flavour"; }
 | |
| 
 | |
| $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
 | |
| 
 | |
| $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
 | |
| ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
 | |
| ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
 | |
| die "can't locate ppc-xlate.pl";
 | |
| 
 | |
| open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
 | |
| 
 | |
| $FRAME=8*$SIZE_T;
 | |
| $prefix="aes_p8";
 | |
| 
 | |
| $sp="r1";
 | |
| $vrsave="r12";
 | |
| 
 | |
| #########################################################################
 | |
| {{{	# Key setup procedures						#
 | |
| my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
 | |
| my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
 | |
| my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
 | |
| 
 | |
| $code.=<<___;
 | |
| .machine	"any"
 | |
| 
 | |
| .text
 | |
| 
 | |
| .align	7
 | |
| rcon:
 | |
| .long	0x01000000, 0x01000000, 0x01000000, 0x01000000	?rev
 | |
| .long	0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000	?rev
 | |
| .long	0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c	?rev
 | |
| .long	0,0,0,0						?asis
 | |
| Lconsts:
 | |
| 	mflr	r0
 | |
| 	bcl	20,31,\$+4
 | |
| 	mflr	$ptr	 #vvvvv "distance between . and rcon
 | |
| 	addi	$ptr,$ptr,-0x48
 | |
| 	mtlr	r0
 | |
| 	blr
 | |
| 	.long	0
 | |
| 	.byte	0,12,0x14,0,0,0,0,0
 | |
| .asciz	"AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
 | |
| 
 | |
| .globl	.${prefix}_set_encrypt_key
 | |
| .align	5
 | |
| .${prefix}_set_encrypt_key:
 | |
| Lset_encrypt_key:
 | |
| 	mflr		r11
 | |
| 	$PUSH		r11,$LRSAVE($sp)
 | |
| 
 | |
| 	li		$ptr,-1
 | |
| 	${UCMP}i	$inp,0
 | |
| 	beq-		Lenc_key_abort		# if ($inp==0) return -1;
 | |
| 	${UCMP}i	$out,0
 | |
| 	beq-		Lenc_key_abort		# if ($out==0) return -1;
 | |
| 	li		$ptr,-2
 | |
| 	cmpwi		$bits,128
 | |
| 	blt-		Lenc_key_abort
 | |
| 	cmpwi		$bits,256
 | |
| 	bgt-		Lenc_key_abort
 | |
| 	andi.		r0,$bits,0x3f
 | |
| 	bne-		Lenc_key_abort
 | |
| 
 | |
| 	lis		r0,0xfff0
 | |
| 	mfspr		$vrsave,256
 | |
| 	mtspr		256,r0
 | |
| 
 | |
| 	bl		Lconsts
 | |
| 	mtlr		r11
 | |
| 
 | |
| 	neg		r9,$inp
 | |
| 	lvx		$in0,0,$inp
 | |
| 	addi		$inp,$inp,15		# 15 is not typo
 | |
| 	lvsr		$key,0,r9		# borrow $key
 | |
| 	li		r8,0x20
 | |
| 	cmpwi		$bits,192
 | |
| 	lvx		$in1,0,$inp
 | |
| 	le?vspltisb	$mask,0x0f		# borrow $mask
 | |
| 	lvx		$rcon,0,$ptr
 | |
| 	le?vxor		$key,$key,$mask		# adjust for byte swap
 | |
| 	lvx		$mask,r8,$ptr
 | |
| 	addi		$ptr,$ptr,0x10
 | |
| 	vperm		$in0,$in0,$in1,$key	# align [and byte swap in LE]
 | |
| 	li		$cnt,8
 | |
| 	vxor		$zero,$zero,$zero
 | |
| 	mtctr		$cnt
 | |
| 
 | |
| 	?lvsr		$outperm,0,$out
 | |
| 	vspltisb	$outmask,-1
 | |
| 	lvx		$outhead,0,$out
 | |
| 	?vperm		$outmask,$zero,$outmask,$outperm
 | |
| 
 | |
| 	blt		Loop128
 | |
| 	addi		$inp,$inp,8
 | |
| 	beq		L192
 | |
| 	addi		$inp,$inp,8
 | |
| 	b		L256
 | |
| 
 | |
| .align	4
 | |
| Loop128:
 | |
| 	vperm		$key,$in0,$in0,$mask	# rotate-n-splat
 | |
| 	vsldoi		$tmp,$zero,$in0,12	# >>32
 | |
| 	 vperm		$outtail,$in0,$in0,$outperm	# rotate
 | |
| 	 vsel		$stage,$outhead,$outtail,$outmask
 | |
| 	 vmr		$outhead,$outtail
 | |
| 	vcipherlast	$key,$key,$rcon
 | |
| 	 stvx		$stage,0,$out
 | |
| 	 addi		$out,$out,16
 | |
| 
 | |
| 	vxor		$in0,$in0,$tmp
 | |
| 	vsldoi		$tmp,$zero,$tmp,12	# >>32
 | |
| 	vxor		$in0,$in0,$tmp
 | |
| 	vsldoi		$tmp,$zero,$tmp,12	# >>32
 | |
| 	vxor		$in0,$in0,$tmp
 | |
| 	 vadduwm	$rcon,$rcon,$rcon
 | |
| 	vxor		$in0,$in0,$key
 | |
| 	bdnz		Loop128
 | |
| 
 | |
| 	lvx		$rcon,0,$ptr		# last two round keys
 | |
| 
 | |
| 	vperm		$key,$in0,$in0,$mask	# rotate-n-splat
 | |
| 	vsldoi		$tmp,$zero,$in0,12	# >>32
 | |
| 	 vperm		$outtail,$in0,$in0,$outperm	# rotate
 | |
| 	 vsel		$stage,$outhead,$outtail,$outmask
 | |
| 	 vmr		$outhead,$outtail
 | |
| 	vcipherlast	$key,$key,$rcon
 | |
| 	 stvx		$stage,0,$out
 | |
| 	 addi		$out,$out,16
 | |
| 
 | |
| 	vxor		$in0,$in0,$tmp
 | |
| 	vsldoi		$tmp,$zero,$tmp,12	# >>32
 | |
| 	vxor		$in0,$in0,$tmp
 | |
| 	vsldoi		$tmp,$zero,$tmp,12	# >>32
 | |
| 	vxor		$in0,$in0,$tmp
 | |
| 	 vadduwm	$rcon,$rcon,$rcon
 | |
| 	vxor		$in0,$in0,$key
 | |
| 
 | |
| 	vperm		$key,$in0,$in0,$mask	# rotate-n-splat
 | |
| 	vsldoi		$tmp,$zero,$in0,12	# >>32
 | |
| 	 vperm		$outtail,$in0,$in0,$outperm	# rotate
 | |
| 	 vsel		$stage,$outhead,$outtail,$outmask
 | |
| 	 vmr		$outhead,$outtail
 | |
| 	vcipherlast	$key,$key,$rcon
 | |
| 	 stvx		$stage,0,$out
 | |
| 	 addi		$out,$out,16
 | |
| 
 | |
| 	vxor		$in0,$in0,$tmp
 | |
| 	vsldoi		$tmp,$zero,$tmp,12	# >>32
 | |
| 	vxor		$in0,$in0,$tmp
 | |
| 	vsldoi		$tmp,$zero,$tmp,12	# >>32
 | |
| 	vxor		$in0,$in0,$tmp
 | |
| 	vxor		$in0,$in0,$key
 | |
| 	 vperm		$outtail,$in0,$in0,$outperm	# rotate
 | |
| 	 vsel		$stage,$outhead,$outtail,$outmask
 | |
| 	 vmr		$outhead,$outtail
 | |
| 	 stvx		$stage,0,$out
 | |
| 
 | |
| 	addi		$inp,$out,15		# 15 is not typo
 | |
| 	addi		$out,$out,0x50
 | |
| 
 | |
| 	li		$rounds,10
 | |
| 	b		Ldone
 | |
| 
 | |
| .align	4
 | |
| L192:
 | |
| 	lvx		$tmp,0,$inp
 | |
| 	li		$cnt,4
 | |
| 	 vperm		$outtail,$in0,$in0,$outperm	# rotate
 | |
| 	 vsel		$stage,$outhead,$outtail,$outmask
 | |
| 	 vmr		$outhead,$outtail
 | |
| 	 stvx		$stage,0,$out
 | |
| 	 addi		$out,$out,16
 | |
| 	vperm		$in1,$in1,$tmp,$key	# align [and byte swap in LE]
 | |
| 	vspltisb	$key,8			# borrow $key
 | |
| 	mtctr		$cnt
 | |
| 	vsububm		$mask,$mask,$key	# adjust the mask
 | |
| 
 | |
| Loop192:
 | |
| 	vperm		$key,$in1,$in1,$mask	# roate-n-splat
 | |
| 	vsldoi		$tmp,$zero,$in0,12	# >>32
 | |
| 	vcipherlast	$key,$key,$rcon
 | |
| 
 | |
| 	vxor		$in0,$in0,$tmp
 | |
| 	vsldoi		$tmp,$zero,$tmp,12	# >>32
 | |
| 	vxor		$in0,$in0,$tmp
 | |
| 	vsldoi		$tmp,$zero,$tmp,12	# >>32
 | |
| 	vxor		$in0,$in0,$tmp
 | |
| 
 | |
| 	 vsldoi		$stage,$zero,$in1,8
 | |
| 	vspltw		$tmp,$in0,3
 | |
| 	vxor		$tmp,$tmp,$in1
 | |
| 	vsldoi		$in1,$zero,$in1,12	# >>32
 | |
| 	 vadduwm	$rcon,$rcon,$rcon
 | |
| 	vxor		$in1,$in1,$tmp
 | |
| 	vxor		$in0,$in0,$key
 | |
| 	vxor		$in1,$in1,$key
 | |
| 	 vsldoi		$stage,$stage,$in0,8
 | |
| 
 | |
| 	vperm		$key,$in1,$in1,$mask	# rotate-n-splat
 | |
| 	vsldoi		$tmp,$zero,$in0,12	# >>32
 | |
| 	 vperm		$outtail,$stage,$stage,$outperm	# rotate
 | |
| 	 vsel		$stage,$outhead,$outtail,$outmask
 | |
| 	 vmr		$outhead,$outtail
 | |
| 	vcipherlast	$key,$key,$rcon
 | |
| 	 stvx		$stage,0,$out
 | |
| 	 addi		$out,$out,16
 | |
| 
 | |
| 	 vsldoi		$stage,$in0,$in1,8
 | |
| 	vxor		$in0,$in0,$tmp
 | |
| 	vsldoi		$tmp,$zero,$tmp,12	# >>32
 | |
| 	 vperm		$outtail,$stage,$stage,$outperm	# rotate
 | |
| 	 vsel		$stage,$outhead,$outtail,$outmask
 | |
| 	 vmr		$outhead,$outtail
 | |
| 	vxor		$in0,$in0,$tmp
 | |
| 	vsldoi		$tmp,$zero,$tmp,12	# >>32
 | |
| 	vxor		$in0,$in0,$tmp
 | |
| 	 stvx		$stage,0,$out
 | |
| 	 addi		$out,$out,16
 | |
| 
 | |
| 	vspltw		$tmp,$in0,3
 | |
| 	vxor		$tmp,$tmp,$in1
 | |
| 	vsldoi		$in1,$zero,$in1,12	# >>32
 | |
| 	 vadduwm	$rcon,$rcon,$rcon
 | |
| 	vxor		$in1,$in1,$tmp
 | |
| 	vxor		$in0,$in0,$key
 | |
| 	vxor		$in1,$in1,$key
 | |
| 	 vperm		$outtail,$in0,$in0,$outperm	# rotate
 | |
| 	 vsel		$stage,$outhead,$outtail,$outmask
 | |
| 	 vmr		$outhead,$outtail
 | |
| 	 stvx		$stage,0,$out
 | |
| 	 addi		$inp,$out,15		# 15 is not typo
 | |
| 	 addi		$out,$out,16
 | |
| 	bdnz		Loop192
 | |
| 
 | |
| 	li		$rounds,12
 | |
| 	addi		$out,$out,0x20
 | |
| 	b		Ldone
 | |
| 
 | |
| .align	4
 | |
| L256:
 | |
| 	lvx		$tmp,0,$inp
 | |
| 	li		$cnt,7
 | |
| 	li		$rounds,14
 | |
| 	 vperm		$outtail,$in0,$in0,$outperm	# rotate
 | |
| 	 vsel		$stage,$outhead,$outtail,$outmask
 | |
| 	 vmr		$outhead,$outtail
 | |
| 	 stvx		$stage,0,$out
 | |
| 	 addi		$out,$out,16
 | |
| 	vperm		$in1,$in1,$tmp,$key	# align [and byte swap in LE]
 | |
| 	mtctr		$cnt
 | |
| 
 | |
| Loop256:
 | |
| 	vperm		$key,$in1,$in1,$mask	# rotate-n-splat
 | |
| 	vsldoi		$tmp,$zero,$in0,12	# >>32
 | |
| 	 vperm		$outtail,$in1,$in1,$outperm	# rotate
 | |
| 	 vsel		$stage,$outhead,$outtail,$outmask
 | |
| 	 vmr		$outhead,$outtail
 | |
| 	vcipherlast	$key,$key,$rcon
 | |
| 	 stvx		$stage,0,$out
 | |
| 	 addi		$out,$out,16
 | |
| 
 | |
| 	vxor		$in0,$in0,$tmp
 | |
| 	vsldoi		$tmp,$zero,$tmp,12	# >>32
 | |
| 	vxor		$in0,$in0,$tmp
 | |
| 	vsldoi		$tmp,$zero,$tmp,12	# >>32
 | |
| 	vxor		$in0,$in0,$tmp
 | |
| 	 vadduwm	$rcon,$rcon,$rcon
 | |
| 	vxor		$in0,$in0,$key
 | |
| 	 vperm		$outtail,$in0,$in0,$outperm	# rotate
 | |
| 	 vsel		$stage,$outhead,$outtail,$outmask
 | |
| 	 vmr		$outhead,$outtail
 | |
| 	 stvx		$stage,0,$out
 | |
| 	 addi		$inp,$out,15		# 15 is not typo
 | |
| 	 addi		$out,$out,16
 | |
| 	bdz		Ldone
 | |
| 
 | |
| 	vspltw		$key,$in0,3		# just splat
 | |
| 	vsldoi		$tmp,$zero,$in1,12	# >>32
 | |
| 	vsbox		$key,$key
 | |
| 
 | |
| 	vxor		$in1,$in1,$tmp
 | |
| 	vsldoi		$tmp,$zero,$tmp,12	# >>32
 | |
| 	vxor		$in1,$in1,$tmp
 | |
| 	vsldoi		$tmp,$zero,$tmp,12	# >>32
 | |
| 	vxor		$in1,$in1,$tmp
 | |
| 
 | |
| 	vxor		$in1,$in1,$key
 | |
| 	b		Loop256
 | |
| 
 | |
| .align	4
 | |
| Ldone:
 | |
| 	lvx		$in1,0,$inp		# redundant in aligned case
 | |
| 	vsel		$in1,$outhead,$in1,$outmask
 | |
| 	stvx		$in1,0,$inp
 | |
| 	li		$ptr,0
 | |
| 	mtspr		256,$vrsave
 | |
| 	stw		$rounds,0($out)
 | |
| 
 | |
| Lenc_key_abort:
 | |
| 	mr		r3,$ptr
 | |
| 	blr
 | |
| 	.long		0
 | |
| 	.byte		0,12,0x14,1,0,0,3,0
 | |
| 	.long		0
 | |
| .size	.${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
 | |
| 
 | |
| .globl	.${prefix}_set_decrypt_key
 | |
| .align	5
 | |
| .${prefix}_set_decrypt_key:
 | |
| 	$STU		$sp,-$FRAME($sp)
 | |
| 	mflr		r10
 | |
| 	$PUSH		r10,$FRAME+$LRSAVE($sp)
 | |
| 	bl		Lset_encrypt_key
 | |
| 	mtlr		r10
 | |
| 
 | |
| 	cmpwi		r3,0
 | |
| 	bne-		Ldec_key_abort
 | |
| 
 | |
| 	slwi		$cnt,$rounds,4
 | |
| 	subi		$inp,$out,240		# first round key
 | |
| 	srwi		$rounds,$rounds,1
 | |
| 	add		$out,$inp,$cnt		# last round key
 | |
| 	mtctr		$rounds
 | |
| 
 | |
| Ldeckey:
 | |
| 	lwz		r0, 0($inp)
 | |
| 	lwz		r6, 4($inp)
 | |
| 	lwz		r7, 8($inp)
 | |
| 	lwz		r8, 12($inp)
 | |
| 	addi		$inp,$inp,16
 | |
| 	lwz		r9, 0($out)
 | |
| 	lwz		r10,4($out)
 | |
| 	lwz		r11,8($out)
 | |
| 	lwz		r12,12($out)
 | |
| 	stw		r0, 0($out)
 | |
| 	stw		r6, 4($out)
 | |
| 	stw		r7, 8($out)
 | |
| 	stw		r8, 12($out)
 | |
| 	subi		$out,$out,16
 | |
| 	stw		r9, -16($inp)
 | |
| 	stw		r10,-12($inp)
 | |
| 	stw		r11,-8($inp)
 | |
| 	stw		r12,-4($inp)
 | |
| 	bdnz		Ldeckey
 | |
| 
 | |
| 	xor		r3,r3,r3		# return value
 | |
| Ldec_key_abort:
 | |
| 	addi		$sp,$sp,$FRAME
 | |
| 	blr
 | |
| 	.long		0
 | |
| 	.byte		0,12,4,1,0x80,0,3,0
 | |
| 	.long		0
 | |
| .size	.${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
 | |
| ___
 | |
| }}}
 | |
| #########################################################################
 | |
| {{{	# Single block en- and decrypt procedures			#
 | |
| sub gen_block () {
 | |
| my $dir = shift;
 | |
| my $n   = $dir eq "de" ? "n" : "";
 | |
| my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
 | |
| 
 | |
| $code.=<<___;
 | |
| .globl	.${prefix}_${dir}crypt
 | |
| .align	5
 | |
| .${prefix}_${dir}crypt:
 | |
| 	lwz		$rounds,240($key)
 | |
| 	lis		r0,0xfc00
 | |
| 	mfspr		$vrsave,256
 | |
| 	li		$idx,15			# 15 is not typo
 | |
| 	mtspr		256,r0
 | |
| 
 | |
| 	lvx		v0,0,$inp
 | |
| 	neg		r11,$out
 | |
| 	lvx		v1,$idx,$inp
 | |
| 	lvsl		v2,0,$inp		# inpperm
 | |
| 	le?vspltisb	v4,0x0f
 | |
| 	?lvsl		v3,0,r11		# outperm
 | |
| 	le?vxor		v2,v2,v4
 | |
| 	li		$idx,16
 | |
| 	vperm		v0,v0,v1,v2		# align [and byte swap in LE]
 | |
| 	lvx		v1,0,$key
 | |
| 	?lvsl		v5,0,$key		# keyperm
 | |
| 	srwi		$rounds,$rounds,1
 | |
| 	lvx		v2,$idx,$key
 | |
| 	addi		$idx,$idx,16
 | |
| 	subi		$rounds,$rounds,1
 | |
| 	?vperm		v1,v1,v2,v5		# align round key
 | |
| 
 | |
| 	vxor		v0,v0,v1
 | |
| 	lvx		v1,$idx,$key
 | |
| 	addi		$idx,$idx,16
 | |
| 	mtctr		$rounds
 | |
| 
 | |
| Loop_${dir}c:
 | |
| 	?vperm		v2,v2,v1,v5
 | |
| 	v${n}cipher	v0,v0,v2
 | |
| 	lvx		v2,$idx,$key
 | |
| 	addi		$idx,$idx,16
 | |
| 	?vperm		v1,v1,v2,v5
 | |
| 	v${n}cipher	v0,v0,v1
 | |
| 	lvx		v1,$idx,$key
 | |
| 	addi		$idx,$idx,16
 | |
| 	bdnz		Loop_${dir}c
 | |
| 
 | |
| 	?vperm		v2,v2,v1,v5
 | |
| 	v${n}cipher	v0,v0,v2
 | |
| 	lvx		v2,$idx,$key
 | |
| 	?vperm		v1,v1,v2,v5
 | |
| 	v${n}cipherlast	v0,v0,v1
 | |
| 
 | |
| 	vspltisb	v2,-1
 | |
| 	vxor		v1,v1,v1
 | |
| 	li		$idx,15			# 15 is not typo
 | |
| 	?vperm		v2,v1,v2,v3		# outmask
 | |
| 	le?vxor		v3,v3,v4
 | |
| 	lvx		v1,0,$out		# outhead
 | |
| 	vperm		v0,v0,v0,v3		# rotate [and byte swap in LE]
 | |
| 	vsel		v1,v1,v0,v2
 | |
| 	lvx		v4,$idx,$out
 | |
| 	stvx		v1,0,$out
 | |
| 	vsel		v0,v0,v4,v2
 | |
| 	stvx		v0,$idx,$out
 | |
| 
 | |
| 	mtspr		256,$vrsave
 | |
| 	blr
 | |
| 	.long		0
 | |
| 	.byte		0,12,0x14,0,0,0,3,0
 | |
| 	.long		0
 | |
| .size	.${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
 | |
| ___
 | |
| }
 | |
| &gen_block("en");
 | |
| &gen_block("de");
 | |
| }}}
 | |
| #########################################################################
 | |
| {{{	# CBC en- and decrypt procedures				#
 | |
| my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
 | |
| my ($rndkey0,$rndkey1,$inout,$tmp)=		map("v$_",(0..3));
 | |
| my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
 | |
| 						map("v$_",(4..10));
 | |
| $code.=<<___;
 | |
| .globl	.${prefix}_cbc_encrypt
 | |
| .align	5
 | |
| .${prefix}_cbc_encrypt:
 | |
| 	${UCMP}i	$len,16
 | |
| 	bltlr-
 | |
| 
 | |
| 	cmpwi		$enc,0			# test direction
 | |
| 	lis		r0,0xffe0
 | |
| 	mfspr		$vrsave,256
 | |
| 	mtspr		256,r0
 | |
| 
 | |
| 	li		$idx,15
 | |
| 	vxor		$rndkey0,$rndkey0,$rndkey0
 | |
| 	le?vspltisb	$tmp,0x0f
 | |
| 
 | |
| 	lvx		$ivec,0,$ivp		# load [unaligned] iv
 | |
| 	lvsl		$inpperm,0,$ivp
 | |
| 	lvx		$inptail,$idx,$ivp
 | |
| 	le?vxor		$inpperm,$inpperm,$tmp
 | |
| 	vperm		$ivec,$ivec,$inptail,$inpperm
 | |
| 
 | |
| 	neg		r11,$inp
 | |
| 	?lvsl		$keyperm,0,$key		# prepare for unaligned key
 | |
| 	lwz		$rounds,240($key)
 | |
| 
 | |
| 	lvsr		$inpperm,0,r11		# prepare for unaligned load
 | |
| 	lvx		$inptail,0,$inp
 | |
| 	addi		$inp,$inp,15		# 15 is not typo
 | |
| 	le?vxor		$inpperm,$inpperm,$tmp
 | |
| 
 | |
| 	?lvsr		$outperm,0,$out		# prepare for unaligned store
 | |
| 	vspltisb	$outmask,-1
 | |
| 	lvx		$outhead,0,$out
 | |
| 	?vperm		$outmask,$rndkey0,$outmask,$outperm
 | |
| 	le?vxor		$outperm,$outperm,$tmp
 | |
| 
 | |
| 	srwi		$rounds,$rounds,1
 | |
| 	li		$idx,16
 | |
| 	subi		$rounds,$rounds,1
 | |
| 	beq		Lcbc_dec
 | |
| 
 | |
| Lcbc_enc:
 | |
| 	vmr		$inout,$inptail
 | |
| 	lvx		$inptail,0,$inp
 | |
| 	addi		$inp,$inp,16
 | |
| 	mtctr		$rounds
 | |
| 	subi		$len,$len,16		# len-=16
 | |
| 
 | |
| 	lvx		$rndkey0,0,$key
 | |
| 	 vperm		$inout,$inout,$inptail,$inpperm
 | |
| 	lvx		$rndkey1,$idx,$key
 | |
| 	addi		$idx,$idx,16
 | |
| 	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
 | |
| 	vxor		$inout,$inout,$rndkey0
 | |
| 	lvx		$rndkey0,$idx,$key
 | |
| 	addi		$idx,$idx,16
 | |
| 	vxor		$inout,$inout,$ivec
 | |
| 
 | |
| Loop_cbc_enc:
 | |
| 	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
 | |
| 	vcipher		$inout,$inout,$rndkey1
 | |
| 	lvx		$rndkey1,$idx,$key
 | |
| 	addi		$idx,$idx,16
 | |
| 	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
 | |
| 	vcipher		$inout,$inout,$rndkey0
 | |
| 	lvx		$rndkey0,$idx,$key
 | |
| 	addi		$idx,$idx,16
 | |
| 	bdnz		Loop_cbc_enc
 | |
| 
 | |
| 	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
 | |
| 	vcipher		$inout,$inout,$rndkey1
 | |
| 	lvx		$rndkey1,$idx,$key
 | |
| 	li		$idx,16
 | |
| 	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
 | |
| 	vcipherlast	$ivec,$inout,$rndkey0
 | |
| 	${UCMP}i	$len,16
 | |
| 
 | |
| 	vperm		$tmp,$ivec,$ivec,$outperm
 | |
| 	vsel		$inout,$outhead,$tmp,$outmask
 | |
| 	vmr		$outhead,$tmp
 | |
| 	stvx		$inout,0,$out
 | |
| 	addi		$out,$out,16
 | |
| 	bge		Lcbc_enc
 | |
| 
 | |
| 	b		Lcbc_done
 | |
| 
 | |
| .align	4
 | |
| Lcbc_dec:
 | |
| 	${UCMP}i	$len,128
 | |
| 	bge		_aesp8_cbc_decrypt8x
 | |
| 	vmr		$tmp,$inptail
 | |
| 	lvx		$inptail,0,$inp
 | |
| 	addi		$inp,$inp,16
 | |
| 	mtctr		$rounds
 | |
| 	subi		$len,$len,16		# len-=16
 | |
| 
 | |
| 	lvx		$rndkey0,0,$key
 | |
| 	 vperm		$tmp,$tmp,$inptail,$inpperm
 | |
| 	lvx		$rndkey1,$idx,$key
 | |
| 	addi		$idx,$idx,16
 | |
| 	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
 | |
| 	vxor		$inout,$tmp,$rndkey0
 | |
| 	lvx		$rndkey0,$idx,$key
 | |
| 	addi		$idx,$idx,16
 | |
| 
 | |
| Loop_cbc_dec:
 | |
| 	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
 | |
| 	vncipher	$inout,$inout,$rndkey1
 | |
| 	lvx		$rndkey1,$idx,$key
 | |
| 	addi		$idx,$idx,16
 | |
| 	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
 | |
| 	vncipher	$inout,$inout,$rndkey0
 | |
| 	lvx		$rndkey0,$idx,$key
 | |
| 	addi		$idx,$idx,16
 | |
| 	bdnz		Loop_cbc_dec
 | |
| 
 | |
| 	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
 | |
| 	vncipher	$inout,$inout,$rndkey1
 | |
| 	lvx		$rndkey1,$idx,$key
 | |
| 	li		$idx,16
 | |
| 	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
 | |
| 	vncipherlast	$inout,$inout,$rndkey0
 | |
| 	${UCMP}i	$len,16
 | |
| 
 | |
| 	vxor		$inout,$inout,$ivec
 | |
| 	vmr		$ivec,$tmp
 | |
| 	vperm		$tmp,$inout,$inout,$outperm
 | |
| 	vsel		$inout,$outhead,$tmp,$outmask
 | |
| 	vmr		$outhead,$tmp
 | |
| 	stvx		$inout,0,$out
 | |
| 	addi		$out,$out,16
 | |
| 	bge		Lcbc_dec
 | |
| 
 | |
| Lcbc_done:
 | |
| 	addi		$out,$out,-1
 | |
| 	lvx		$inout,0,$out		# redundant in aligned case
 | |
| 	vsel		$inout,$outhead,$inout,$outmask
 | |
| 	stvx		$inout,0,$out
 | |
| 
 | |
| 	neg		$enc,$ivp		# write [unaligned] iv
 | |
| 	li		$idx,15			# 15 is not typo
 | |
| 	vxor		$rndkey0,$rndkey0,$rndkey0
 | |
| 	vspltisb	$outmask,-1
 | |
| 	le?vspltisb	$tmp,0x0f
 | |
| 	?lvsl		$outperm,0,$enc
 | |
| 	?vperm		$outmask,$rndkey0,$outmask,$outperm
 | |
| 	le?vxor		$outperm,$outperm,$tmp
 | |
| 	lvx		$outhead,0,$ivp
 | |
| 	vperm		$ivec,$ivec,$ivec,$outperm
 | |
| 	vsel		$inout,$outhead,$ivec,$outmask
 | |
| 	lvx		$inptail,$idx,$ivp
 | |
| 	stvx		$inout,0,$ivp
 | |
| 	vsel		$inout,$ivec,$inptail,$outmask
 | |
| 	stvx		$inout,$idx,$ivp
 | |
| 
 | |
| 	mtspr		256,$vrsave
 | |
| 	blr
 | |
| 	.long		0
 | |
| 	.byte		0,12,0x14,0,0,0,6,0
 | |
| 	.long		0
 | |
| ___
 | |
| #########################################################################
 | |
| {{	# Optimized CBC decrypt procedure				#
 | |
| my $key_="r11";
 | |
| my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
 | |
|     $x00=0 if ($flavour =~ /osx/);
 | |
| my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
 | |
| my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
 | |
| my $rndkey0="v23";	# v24-v25 rotating buffer for first found keys
 | |
| 			# v26-v31 last 6 round keys
 | |
| my ($tmp,$keyperm)=($in3,$in4);	# aliases with "caller", redundant assignment
 | |
| 
 | |
| $code.=<<___;
 | |
| .align	5
 | |
| _aesp8_cbc_decrypt8x:
 | |
| 	$STU		$sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
 | |
| 	li		r10,`$FRAME+8*16+15`
 | |
| 	li		r11,`$FRAME+8*16+31`
 | |
| 	stvx		v20,r10,$sp		# ABI says so
 | |
| 	addi		r10,r10,32
 | |
| 	stvx		v21,r11,$sp
 | |
| 	addi		r11,r11,32
 | |
| 	stvx		v22,r10,$sp
 | |
| 	addi		r10,r10,32
 | |
| 	stvx		v23,r11,$sp
 | |
| 	addi		r11,r11,32
 | |
| 	stvx		v24,r10,$sp
 | |
| 	addi		r10,r10,32
 | |
| 	stvx		v25,r11,$sp
 | |
| 	addi		r11,r11,32
 | |
| 	stvx		v26,r10,$sp
 | |
| 	addi		r10,r10,32
 | |
| 	stvx		v27,r11,$sp
 | |
| 	addi		r11,r11,32
 | |
| 	stvx		v28,r10,$sp
 | |
| 	addi		r10,r10,32
 | |
| 	stvx		v29,r11,$sp
 | |
| 	addi		r11,r11,32
 | |
| 	stvx		v30,r10,$sp
 | |
| 	stvx		v31,r11,$sp
 | |
| 	li		r0,-1
 | |
| 	stw		$vrsave,`$FRAME+21*16-4`($sp)	# save vrsave
 | |
| 	li		$x10,0x10
 | |
| 	$PUSH		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
 | |
| 	li		$x20,0x20
 | |
| 	$PUSH		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
 | |
| 	li		$x30,0x30
 | |
| 	$PUSH		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
 | |
| 	li		$x40,0x40
 | |
| 	$PUSH		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
 | |
| 	li		$x50,0x50
 | |
| 	$PUSH		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
 | |
| 	li		$x60,0x60
 | |
| 	$PUSH		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
 | |
| 	li		$x70,0x70
 | |
| 	mtspr		256,r0
 | |
| 
 | |
| 	subi		$rounds,$rounds,3	# -4 in total
 | |
| 	subi		$len,$len,128		# bias
 | |
| 
 | |
| 	lvx		$rndkey0,$x00,$key	# load key schedule
 | |
| 	lvx		v30,$x10,$key
 | |
| 	addi		$key,$key,0x20
 | |
| 	lvx		v31,$x00,$key
 | |
| 	?vperm		$rndkey0,$rndkey0,v30,$keyperm
 | |
| 	addi		$key_,$sp,$FRAME+15
 | |
| 	mtctr		$rounds
 | |
| 
 | |
| Load_cbc_dec_key:
 | |
| 	?vperm		v24,v30,v31,$keyperm
 | |
| 	lvx		v30,$x10,$key
 | |
| 	addi		$key,$key,0x20
 | |
| 	stvx		v24,$x00,$key_		# off-load round[1]
 | |
| 	?vperm		v25,v31,v30,$keyperm
 | |
| 	lvx		v31,$x00,$key
 | |
| 	stvx		v25,$x10,$key_		# off-load round[2]
 | |
| 	addi		$key_,$key_,0x20
 | |
| 	bdnz		Load_cbc_dec_key
 | |
| 
 | |
| 	lvx		v26,$x10,$key
 | |
| 	?vperm		v24,v30,v31,$keyperm
 | |
| 	lvx		v27,$x20,$key
 | |
| 	stvx		v24,$x00,$key_		# off-load round[3]
 | |
| 	?vperm		v25,v31,v26,$keyperm
 | |
| 	lvx		v28,$x30,$key
 | |
| 	stvx		v25,$x10,$key_		# off-load round[4]
 | |
| 	addi		$key_,$sp,$FRAME+15	# rewind $key_
 | |
| 	?vperm		v26,v26,v27,$keyperm
 | |
| 	lvx		v29,$x40,$key
 | |
| 	?vperm		v27,v27,v28,$keyperm
 | |
| 	lvx		v30,$x50,$key
 | |
| 	?vperm		v28,v28,v29,$keyperm
 | |
| 	lvx		v31,$x60,$key
 | |
| 	?vperm		v29,v29,v30,$keyperm
 | |
| 	lvx		$out0,$x70,$key		# borrow $out0
 | |
| 	?vperm		v30,v30,v31,$keyperm
 | |
| 	lvx		v24,$x00,$key_		# pre-load round[1]
 | |
| 	?vperm		v31,v31,$out0,$keyperm
 | |
| 	lvx		v25,$x10,$key_		# pre-load round[2]
 | |
| 
 | |
| 	#lvx		$inptail,0,$inp		# "caller" already did this
 | |
| 	#addi		$inp,$inp,15		# 15 is not typo
 | |
| 	subi		$inp,$inp,15		# undo "caller"
 | |
| 
 | |
| 	 le?li		$idx,8
 | |
| 	lvx_u		$in0,$x00,$inp		# load first 8 "words"
 | |
| 	 le?lvsl	$inpperm,0,$idx
 | |
| 	 le?vspltisb	$tmp,0x0f
 | |
| 	lvx_u		$in1,$x10,$inp
 | |
| 	 le?vxor	$inpperm,$inpperm,$tmp	# transform for lvx_u/stvx_u
 | |
| 	lvx_u		$in2,$x20,$inp
 | |
| 	 le?vperm	$in0,$in0,$in0,$inpperm
 | |
| 	lvx_u		$in3,$x30,$inp
 | |
| 	 le?vperm	$in1,$in1,$in1,$inpperm
 | |
| 	lvx_u		$in4,$x40,$inp
 | |
| 	 le?vperm	$in2,$in2,$in2,$inpperm
 | |
| 	vxor		$out0,$in0,$rndkey0
 | |
| 	lvx_u		$in5,$x50,$inp
 | |
| 	 le?vperm	$in3,$in3,$in3,$inpperm
 | |
| 	vxor		$out1,$in1,$rndkey0
 | |
| 	lvx_u		$in6,$x60,$inp
 | |
| 	 le?vperm	$in4,$in4,$in4,$inpperm
 | |
| 	vxor		$out2,$in2,$rndkey0
 | |
| 	lvx_u		$in7,$x70,$inp
 | |
| 	addi		$inp,$inp,0x80
 | |
| 	 le?vperm	$in5,$in5,$in5,$inpperm
 | |
| 	vxor		$out3,$in3,$rndkey0
 | |
| 	 le?vperm	$in6,$in6,$in6,$inpperm
 | |
| 	vxor		$out4,$in4,$rndkey0
 | |
| 	 le?vperm	$in7,$in7,$in7,$inpperm
 | |
| 	vxor		$out5,$in5,$rndkey0
 | |
| 	vxor		$out6,$in6,$rndkey0
 | |
| 	vxor		$out7,$in7,$rndkey0
 | |
| 
 | |
| 	mtctr		$rounds
 | |
| 	b		Loop_cbc_dec8x
 | |
| .align	5
 | |
| Loop_cbc_dec8x:
 | |
| 	vncipher	$out0,$out0,v24
 | |
| 	vncipher	$out1,$out1,v24
 | |
| 	vncipher	$out2,$out2,v24
 | |
| 	vncipher	$out3,$out3,v24
 | |
| 	vncipher	$out4,$out4,v24
 | |
| 	vncipher	$out5,$out5,v24
 | |
| 	vncipher	$out6,$out6,v24
 | |
| 	vncipher	$out7,$out7,v24
 | |
| 	lvx		v24,$x20,$key_		# round[3]
 | |
| 	addi		$key_,$key_,0x20
 | |
| 
 | |
| 	vncipher	$out0,$out0,v25
 | |
| 	vncipher	$out1,$out1,v25
 | |
| 	vncipher	$out2,$out2,v25
 | |
| 	vncipher	$out3,$out3,v25
 | |
| 	vncipher	$out4,$out4,v25
 | |
| 	vncipher	$out5,$out5,v25
 | |
| 	vncipher	$out6,$out6,v25
 | |
| 	vncipher	$out7,$out7,v25
 | |
| 	lvx		v25,$x10,$key_		# round[4]
 | |
| 	bdnz		Loop_cbc_dec8x
 | |
| 
 | |
| 	subic		$len,$len,128		# $len-=128
 | |
| 	vncipher	$out0,$out0,v24
 | |
| 	vncipher	$out1,$out1,v24
 | |
| 	vncipher	$out2,$out2,v24
 | |
| 	vncipher	$out3,$out3,v24
 | |
| 	vncipher	$out4,$out4,v24
 | |
| 	vncipher	$out5,$out5,v24
 | |
| 	vncipher	$out6,$out6,v24
 | |
| 	vncipher	$out7,$out7,v24
 | |
| 
 | |
| 	subfe.		r0,r0,r0		# borrow?-1:0
 | |
| 	vncipher	$out0,$out0,v25
 | |
| 	vncipher	$out1,$out1,v25
 | |
| 	vncipher	$out2,$out2,v25
 | |
| 	vncipher	$out3,$out3,v25
 | |
| 	vncipher	$out4,$out4,v25
 | |
| 	vncipher	$out5,$out5,v25
 | |
| 	vncipher	$out6,$out6,v25
 | |
| 	vncipher	$out7,$out7,v25
 | |
| 
 | |
| 	and		r0,r0,$len
 | |
| 	vncipher	$out0,$out0,v26
 | |
| 	vncipher	$out1,$out1,v26
 | |
| 	vncipher	$out2,$out2,v26
 | |
| 	vncipher	$out3,$out3,v26
 | |
| 	vncipher	$out4,$out4,v26
 | |
| 	vncipher	$out5,$out5,v26
 | |
| 	vncipher	$out6,$out6,v26
 | |
| 	vncipher	$out7,$out7,v26
 | |
| 
 | |
| 	add		$inp,$inp,r0		# $inp is adjusted in such
 | |
| 						# way that at exit from the
 | |
| 						# loop inX-in7 are loaded
 | |
| 						# with last "words"
 | |
| 	vncipher	$out0,$out0,v27
 | |
| 	vncipher	$out1,$out1,v27
 | |
| 	vncipher	$out2,$out2,v27
 | |
| 	vncipher	$out3,$out3,v27
 | |
| 	vncipher	$out4,$out4,v27
 | |
| 	vncipher	$out5,$out5,v27
 | |
| 	vncipher	$out6,$out6,v27
 | |
| 	vncipher	$out7,$out7,v27
 | |
| 
 | |
| 	addi		$key_,$sp,$FRAME+15	# rewind $key_
 | |
| 	vncipher	$out0,$out0,v28
 | |
| 	vncipher	$out1,$out1,v28
 | |
| 	vncipher	$out2,$out2,v28
 | |
| 	vncipher	$out3,$out3,v28
 | |
| 	vncipher	$out4,$out4,v28
 | |
| 	vncipher	$out5,$out5,v28
 | |
| 	vncipher	$out6,$out6,v28
 | |
| 	vncipher	$out7,$out7,v28
 | |
| 	lvx		v24,$x00,$key_		# re-pre-load round[1]
 | |
| 
 | |
| 	vncipher	$out0,$out0,v29
 | |
| 	vncipher	$out1,$out1,v29
 | |
| 	vncipher	$out2,$out2,v29
 | |
| 	vncipher	$out3,$out3,v29
 | |
| 	vncipher	$out4,$out4,v29
 | |
| 	vncipher	$out5,$out5,v29
 | |
| 	vncipher	$out6,$out6,v29
 | |
| 	vncipher	$out7,$out7,v29
 | |
| 	lvx		v25,$x10,$key_		# re-pre-load round[2]
 | |
| 
 | |
| 	vncipher	$out0,$out0,v30
 | |
| 	 vxor		$ivec,$ivec,v31		# xor with last round key
 | |
| 	vncipher	$out1,$out1,v30
 | |
| 	 vxor		$in0,$in0,v31
 | |
| 	vncipher	$out2,$out2,v30
 | |
| 	 vxor		$in1,$in1,v31
 | |
| 	vncipher	$out3,$out3,v30
 | |
| 	 vxor		$in2,$in2,v31
 | |
| 	vncipher	$out4,$out4,v30
 | |
| 	 vxor		$in3,$in3,v31
 | |
| 	vncipher	$out5,$out5,v30
 | |
| 	 vxor		$in4,$in4,v31
 | |
| 	vncipher	$out6,$out6,v30
 | |
| 	 vxor		$in5,$in5,v31
 | |
| 	vncipher	$out7,$out7,v30
 | |
| 	 vxor		$in6,$in6,v31
 | |
| 
 | |
| 	vncipherlast	$out0,$out0,$ivec
 | |
| 	vncipherlast	$out1,$out1,$in0
 | |
| 	 lvx_u		$in0,$x00,$inp		# load next input block
 | |
| 	vncipherlast	$out2,$out2,$in1
 | |
| 	 lvx_u		$in1,$x10,$inp
 | |
| 	vncipherlast	$out3,$out3,$in2
 | |
| 	 le?vperm	$in0,$in0,$in0,$inpperm
 | |
| 	 lvx_u		$in2,$x20,$inp
 | |
| 	vncipherlast	$out4,$out4,$in3
 | |
| 	 le?vperm	$in1,$in1,$in1,$inpperm
 | |
| 	 lvx_u		$in3,$x30,$inp
 | |
| 	vncipherlast	$out5,$out5,$in4
 | |
| 	 le?vperm	$in2,$in2,$in2,$inpperm
 | |
| 	 lvx_u		$in4,$x40,$inp
 | |
| 	vncipherlast	$out6,$out6,$in5
 | |
| 	 le?vperm	$in3,$in3,$in3,$inpperm
 | |
| 	 lvx_u		$in5,$x50,$inp
 | |
| 	vncipherlast	$out7,$out7,$in6
 | |
| 	 le?vperm	$in4,$in4,$in4,$inpperm
 | |
| 	 lvx_u		$in6,$x60,$inp
 | |
| 	vmr		$ivec,$in7
 | |
| 	 le?vperm	$in5,$in5,$in5,$inpperm
 | |
| 	 lvx_u		$in7,$x70,$inp
 | |
| 	 addi		$inp,$inp,0x80
 | |
| 
 | |
| 	le?vperm	$out0,$out0,$out0,$inpperm
 | |
| 	le?vperm	$out1,$out1,$out1,$inpperm
 | |
| 	stvx_u		$out0,$x00,$out
 | |
| 	 le?vperm	$in6,$in6,$in6,$inpperm
 | |
| 	 vxor		$out0,$in0,$rndkey0
 | |
| 	le?vperm	$out2,$out2,$out2,$inpperm
 | |
| 	stvx_u		$out1,$x10,$out
 | |
| 	 le?vperm	$in7,$in7,$in7,$inpperm
 | |
| 	 vxor		$out1,$in1,$rndkey0
 | |
| 	le?vperm	$out3,$out3,$out3,$inpperm
 | |
| 	stvx_u		$out2,$x20,$out
 | |
| 	 vxor		$out2,$in2,$rndkey0
 | |
| 	le?vperm	$out4,$out4,$out4,$inpperm
 | |
| 	stvx_u		$out3,$x30,$out
 | |
| 	 vxor		$out3,$in3,$rndkey0
 | |
| 	le?vperm	$out5,$out5,$out5,$inpperm
 | |
| 	stvx_u		$out4,$x40,$out
 | |
| 	 vxor		$out4,$in4,$rndkey0
 | |
| 	le?vperm	$out6,$out6,$out6,$inpperm
 | |
| 	stvx_u		$out5,$x50,$out
 | |
| 	 vxor		$out5,$in5,$rndkey0
 | |
| 	le?vperm	$out7,$out7,$out7,$inpperm
 | |
| 	stvx_u		$out6,$x60,$out
 | |
| 	 vxor		$out6,$in6,$rndkey0
 | |
| 	stvx_u		$out7,$x70,$out
 | |
| 	addi		$out,$out,0x80
 | |
| 	 vxor		$out7,$in7,$rndkey0
 | |
| 
 | |
| 	mtctr		$rounds
 | |
| 	beq		Loop_cbc_dec8x		# did $len-=128 borrow?
 | |
| 
 | |
| 	addic.		$len,$len,128
 | |
| 	beq		Lcbc_dec8x_done
 | |
| 	nop
 | |
| 	nop
 | |
| 
 | |
| Loop_cbc_dec8x_tail:				# up to 7 "words" tail...
 | |
| 	vncipher	$out1,$out1,v24
 | |
| 	vncipher	$out2,$out2,v24
 | |
| 	vncipher	$out3,$out3,v24
 | |
| 	vncipher	$out4,$out4,v24
 | |
| 	vncipher	$out5,$out5,v24
 | |
| 	vncipher	$out6,$out6,v24
 | |
| 	vncipher	$out7,$out7,v24
 | |
| 	lvx		v24,$x20,$key_		# round[3]
 | |
| 	addi		$key_,$key_,0x20
 | |
| 
 | |
| 	vncipher	$out1,$out1,v25
 | |
| 	vncipher	$out2,$out2,v25
 | |
| 	vncipher	$out3,$out3,v25
 | |
| 	vncipher	$out4,$out4,v25
 | |
| 	vncipher	$out5,$out5,v25
 | |
| 	vncipher	$out6,$out6,v25
 | |
| 	vncipher	$out7,$out7,v25
 | |
| 	lvx		v25,$x10,$key_		# round[4]
 | |
| 	bdnz		Loop_cbc_dec8x_tail
 | |
| 
 | |
| 	vncipher	$out1,$out1,v24
 | |
| 	vncipher	$out2,$out2,v24
 | |
| 	vncipher	$out3,$out3,v24
 | |
| 	vncipher	$out4,$out4,v24
 | |
| 	vncipher	$out5,$out5,v24
 | |
| 	vncipher	$out6,$out6,v24
 | |
| 	vncipher	$out7,$out7,v24
 | |
| 
 | |
| 	vncipher	$out1,$out1,v25
 | |
| 	vncipher	$out2,$out2,v25
 | |
| 	vncipher	$out3,$out3,v25
 | |
| 	vncipher	$out4,$out4,v25
 | |
| 	vncipher	$out5,$out5,v25
 | |
| 	vncipher	$out6,$out6,v25
 | |
| 	vncipher	$out7,$out7,v25
 | |
| 
 | |
| 	vncipher	$out1,$out1,v26
 | |
| 	vncipher	$out2,$out2,v26
 | |
| 	vncipher	$out3,$out3,v26
 | |
| 	vncipher	$out4,$out4,v26
 | |
| 	vncipher	$out5,$out5,v26
 | |
| 	vncipher	$out6,$out6,v26
 | |
| 	vncipher	$out7,$out7,v26
 | |
| 
 | |
| 	vncipher	$out1,$out1,v27
 | |
| 	vncipher	$out2,$out2,v27
 | |
| 	vncipher	$out3,$out3,v27
 | |
| 	vncipher	$out4,$out4,v27
 | |
| 	vncipher	$out5,$out5,v27
 | |
| 	vncipher	$out6,$out6,v27
 | |
| 	vncipher	$out7,$out7,v27
 | |
| 
 | |
| 	vncipher	$out1,$out1,v28
 | |
| 	vncipher	$out2,$out2,v28
 | |
| 	vncipher	$out3,$out3,v28
 | |
| 	vncipher	$out4,$out4,v28
 | |
| 	vncipher	$out5,$out5,v28
 | |
| 	vncipher	$out6,$out6,v28
 | |
| 	vncipher	$out7,$out7,v28
 | |
| 
 | |
| 	vncipher	$out1,$out1,v29
 | |
| 	vncipher	$out2,$out2,v29
 | |
| 	vncipher	$out3,$out3,v29
 | |
| 	vncipher	$out4,$out4,v29
 | |
| 	vncipher	$out5,$out5,v29
 | |
| 	vncipher	$out6,$out6,v29
 | |
| 	vncipher	$out7,$out7,v29
 | |
| 
 | |
| 	vncipher	$out1,$out1,v30
 | |
| 	 vxor		$ivec,$ivec,v31		# last round key
 | |
| 	vncipher	$out2,$out2,v30
 | |
| 	 vxor		$in1,$in1,v31
 | |
| 	vncipher	$out3,$out3,v30
 | |
| 	 vxor		$in2,$in2,v31
 | |
| 	vncipher	$out4,$out4,v30
 | |
| 	 vxor		$in3,$in3,v31
 | |
| 	vncipher	$out5,$out5,v30
 | |
| 	 vxor		$in4,$in4,v31
 | |
| 	vncipher	$out6,$out6,v30
 | |
| 	 vxor		$in5,$in5,v31
 | |
| 	vncipher	$out7,$out7,v30
 | |
| 	 vxor		$in6,$in6,v31
 | |
| 
 | |
| 	cmplwi		$len,32			# switch($len)
 | |
| 	blt		Lcbc_dec8x_one
 | |
| 	nop
 | |
| 	beq		Lcbc_dec8x_two
 | |
| 	cmplwi		$len,64
 | |
| 	blt		Lcbc_dec8x_three
 | |
| 	nop
 | |
| 	beq		Lcbc_dec8x_four
 | |
| 	cmplwi		$len,96
 | |
| 	blt		Lcbc_dec8x_five
 | |
| 	nop
 | |
| 	beq		Lcbc_dec8x_six
 | |
| 
 | |
| Lcbc_dec8x_seven:
 | |
| 	vncipherlast	$out1,$out1,$ivec
 | |
| 	vncipherlast	$out2,$out2,$in1
 | |
| 	vncipherlast	$out3,$out3,$in2
 | |
| 	vncipherlast	$out4,$out4,$in3
 | |
| 	vncipherlast	$out5,$out5,$in4
 | |
| 	vncipherlast	$out6,$out6,$in5
 | |
| 	vncipherlast	$out7,$out7,$in6
 | |
| 	vmr		$ivec,$in7
 | |
| 
 | |
| 	le?vperm	$out1,$out1,$out1,$inpperm
 | |
| 	le?vperm	$out2,$out2,$out2,$inpperm
 | |
| 	stvx_u		$out1,$x00,$out
 | |
| 	le?vperm	$out3,$out3,$out3,$inpperm
 | |
| 	stvx_u		$out2,$x10,$out
 | |
| 	le?vperm	$out4,$out4,$out4,$inpperm
 | |
| 	stvx_u		$out3,$x20,$out
 | |
| 	le?vperm	$out5,$out5,$out5,$inpperm
 | |
| 	stvx_u		$out4,$x30,$out
 | |
| 	le?vperm	$out6,$out6,$out6,$inpperm
 | |
| 	stvx_u		$out5,$x40,$out
 | |
| 	le?vperm	$out7,$out7,$out7,$inpperm
 | |
| 	stvx_u		$out6,$x50,$out
 | |
| 	stvx_u		$out7,$x60,$out
 | |
| 	addi		$out,$out,0x70
 | |
| 	b		Lcbc_dec8x_done
 | |
| 
 | |
| .align	5
 | |
| Lcbc_dec8x_six:
 | |
| 	vncipherlast	$out2,$out2,$ivec
 | |
| 	vncipherlast	$out3,$out3,$in2
 | |
| 	vncipherlast	$out4,$out4,$in3
 | |
| 	vncipherlast	$out5,$out5,$in4
 | |
| 	vncipherlast	$out6,$out6,$in5
 | |
| 	vncipherlast	$out7,$out7,$in6
 | |
| 	vmr		$ivec,$in7
 | |
| 
 | |
| 	le?vperm	$out2,$out2,$out2,$inpperm
 | |
| 	le?vperm	$out3,$out3,$out3,$inpperm
 | |
| 	stvx_u		$out2,$x00,$out
 | |
| 	le?vperm	$out4,$out4,$out4,$inpperm
 | |
| 	stvx_u		$out3,$x10,$out
 | |
| 	le?vperm	$out5,$out5,$out5,$inpperm
 | |
| 	stvx_u		$out4,$x20,$out
 | |
| 	le?vperm	$out6,$out6,$out6,$inpperm
 | |
| 	stvx_u		$out5,$x30,$out
 | |
| 	le?vperm	$out7,$out7,$out7,$inpperm
 | |
| 	stvx_u		$out6,$x40,$out
 | |
| 	stvx_u		$out7,$x50,$out
 | |
| 	addi		$out,$out,0x60
 | |
| 	b		Lcbc_dec8x_done
 | |
| 
 | |
| .align	5
 | |
| Lcbc_dec8x_five:
 | |
| 	vncipherlast	$out3,$out3,$ivec
 | |
| 	vncipherlast	$out4,$out4,$in3
 | |
| 	vncipherlast	$out5,$out5,$in4
 | |
| 	vncipherlast	$out6,$out6,$in5
 | |
| 	vncipherlast	$out7,$out7,$in6
 | |
| 	vmr		$ivec,$in7
 | |
| 
 | |
| 	le?vperm	$out3,$out3,$out3,$inpperm
 | |
| 	le?vperm	$out4,$out4,$out4,$inpperm
 | |
| 	stvx_u		$out3,$x00,$out
 | |
| 	le?vperm	$out5,$out5,$out5,$inpperm
 | |
| 	stvx_u		$out4,$x10,$out
 | |
| 	le?vperm	$out6,$out6,$out6,$inpperm
 | |
| 	stvx_u		$out5,$x20,$out
 | |
| 	le?vperm	$out7,$out7,$out7,$inpperm
 | |
| 	stvx_u		$out6,$x30,$out
 | |
| 	stvx_u		$out7,$x40,$out
 | |
| 	addi		$out,$out,0x50
 | |
| 	b		Lcbc_dec8x_done
 | |
| 
 | |
| .align	5
 | |
| Lcbc_dec8x_four:
 | |
| 	vncipherlast	$out4,$out4,$ivec
 | |
| 	vncipherlast	$out5,$out5,$in4
 | |
| 	vncipherlast	$out6,$out6,$in5
 | |
| 	vncipherlast	$out7,$out7,$in6
 | |
| 	vmr		$ivec,$in7
 | |
| 
 | |
| 	le?vperm	$out4,$out4,$out4,$inpperm
 | |
| 	le?vperm	$out5,$out5,$out5,$inpperm
 | |
| 	stvx_u		$out4,$x00,$out
 | |
| 	le?vperm	$out6,$out6,$out6,$inpperm
 | |
| 	stvx_u		$out5,$x10,$out
 | |
| 	le?vperm	$out7,$out7,$out7,$inpperm
 | |
| 	stvx_u		$out6,$x20,$out
 | |
| 	stvx_u		$out7,$x30,$out
 | |
| 	addi		$out,$out,0x40
 | |
| 	b		Lcbc_dec8x_done
 | |
| 
 | |
| .align	5
 | |
| Lcbc_dec8x_three:
 | |
| 	vncipherlast	$out5,$out5,$ivec
 | |
| 	vncipherlast	$out6,$out6,$in5
 | |
| 	vncipherlast	$out7,$out7,$in6
 | |
| 	vmr		$ivec,$in7
 | |
| 
 | |
| 	le?vperm	$out5,$out5,$out5,$inpperm
 | |
| 	le?vperm	$out6,$out6,$out6,$inpperm
 | |
| 	stvx_u		$out5,$x00,$out
 | |
| 	le?vperm	$out7,$out7,$out7,$inpperm
 | |
| 	stvx_u		$out6,$x10,$out
 | |
| 	stvx_u		$out7,$x20,$out
 | |
| 	addi		$out,$out,0x30
 | |
| 	b		Lcbc_dec8x_done
 | |
| 
 | |
| .align	5
 | |
| Lcbc_dec8x_two:
 | |
| 	vncipherlast	$out6,$out6,$ivec
 | |
| 	vncipherlast	$out7,$out7,$in6
 | |
| 	vmr		$ivec,$in7
 | |
| 
 | |
| 	le?vperm	$out6,$out6,$out6,$inpperm
 | |
| 	le?vperm	$out7,$out7,$out7,$inpperm
 | |
| 	stvx_u		$out6,$x00,$out
 | |
| 	stvx_u		$out7,$x10,$out
 | |
| 	addi		$out,$out,0x20
 | |
| 	b		Lcbc_dec8x_done
 | |
| 
 | |
| .align	5
 | |
| Lcbc_dec8x_one:
 | |
| 	vncipherlast	$out7,$out7,$ivec
 | |
| 	vmr		$ivec,$in7
 | |
| 
 | |
| 	le?vperm	$out7,$out7,$out7,$inpperm
 | |
| 	stvx_u		$out7,0,$out
 | |
| 	addi		$out,$out,0x10
 | |
| 
 | |
| Lcbc_dec8x_done:
 | |
| 	le?vperm	$ivec,$ivec,$ivec,$inpperm
 | |
| 	stvx_u		$ivec,0,$ivp		# write [unaligned] iv
 | |
| 
 | |
| 	li		r10,`$FRAME+15`
 | |
| 	li		r11,`$FRAME+31`
 | |
| 	stvx		$inpperm,r10,$sp	# wipe copies of round keys
 | |
| 	addi		r10,r10,32
 | |
| 	stvx		$inpperm,r11,$sp
 | |
| 	addi		r11,r11,32
 | |
| 	stvx		$inpperm,r10,$sp
 | |
| 	addi		r10,r10,32
 | |
| 	stvx		$inpperm,r11,$sp
 | |
| 	addi		r11,r11,32
 | |
| 	stvx		$inpperm,r10,$sp
 | |
| 	addi		r10,r10,32
 | |
| 	stvx		$inpperm,r11,$sp
 | |
| 	addi		r11,r11,32
 | |
| 	stvx		$inpperm,r10,$sp
 | |
| 	addi		r10,r10,32
 | |
| 	stvx		$inpperm,r11,$sp
 | |
| 	addi		r11,r11,32
 | |
| 
 | |
| 	mtspr		256,$vrsave
 | |
| 	lvx		v20,r10,$sp		# ABI says so
 | |
| 	addi		r10,r10,32
 | |
| 	lvx		v21,r11,$sp
 | |
| 	addi		r11,r11,32
 | |
| 	lvx		v22,r10,$sp
 | |
| 	addi		r10,r10,32
 | |
| 	lvx		v23,r11,$sp
 | |
| 	addi		r11,r11,32
 | |
| 	lvx		v24,r10,$sp
 | |
| 	addi		r10,r10,32
 | |
| 	lvx		v25,r11,$sp
 | |
| 	addi		r11,r11,32
 | |
| 	lvx		v26,r10,$sp
 | |
| 	addi		r10,r10,32
 | |
| 	lvx		v27,r11,$sp
 | |
| 	addi		r11,r11,32
 | |
| 	lvx		v28,r10,$sp
 | |
| 	addi		r10,r10,32
 | |
| 	lvx		v29,r11,$sp
 | |
| 	addi		r11,r11,32
 | |
| 	lvx		v30,r10,$sp
 | |
| 	lvx		v31,r11,$sp
 | |
| 	$POP		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
 | |
| 	$POP		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
 | |
| 	$POP		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
 | |
| 	$POP		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
 | |
| 	$POP		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
 | |
| 	$POP		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
 | |
| 	addi		$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
 | |
| 	blr
 | |
| 	.long		0
 | |
| 	.byte		0,12,0x04,0,0x80,6,6,0
 | |
| 	.long		0
 | |
| .size	.${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
 | |
| ___
 | |
| }}	}}}
 | |
| 
 | |
| #########################################################################
 | |
| {{{	# CTR procedure[s]						#
 | |
| my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
 | |
| my ($rndkey0,$rndkey1,$inout,$tmp)=		map("v$_",(0..3));
 | |
| my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
 | |
| 						map("v$_",(4..11));
 | |
| my $dat=$tmp;
 | |
| 
 | |
| $code.=<<___;
 | |
| .globl	.${prefix}_ctr32_encrypt_blocks
 | |
| .align	5
 | |
| .${prefix}_ctr32_encrypt_blocks:
 | |
| 	${UCMP}i	$len,1
 | |
| 	bltlr-
 | |
| 
 | |
| 	lis		r0,0xfff0
 | |
| 	mfspr		$vrsave,256
 | |
| 	mtspr		256,r0
 | |
| 
 | |
| 	li		$idx,15
 | |
| 	vxor		$rndkey0,$rndkey0,$rndkey0
 | |
| 	le?vspltisb	$tmp,0x0f
 | |
| 
 | |
| 	lvx		$ivec,0,$ivp		# load [unaligned] iv
 | |
| 	lvsl		$inpperm,0,$ivp
 | |
| 	lvx		$inptail,$idx,$ivp
 | |
| 	 vspltisb	$one,1
 | |
| 	le?vxor		$inpperm,$inpperm,$tmp
 | |
| 	vperm		$ivec,$ivec,$inptail,$inpperm
 | |
| 	 vsldoi		$one,$rndkey0,$one,1
 | |
| 
 | |
| 	neg		r11,$inp
 | |
| 	?lvsl		$keyperm,0,$key		# prepare for unaligned key
 | |
| 	lwz		$rounds,240($key)
 | |
| 
 | |
| 	lvsr		$inpperm,0,r11		# prepare for unaligned load
 | |
| 	lvx		$inptail,0,$inp
 | |
| 	addi		$inp,$inp,15		# 15 is not typo
 | |
| 	le?vxor		$inpperm,$inpperm,$tmp
 | |
| 
 | |
| 	srwi		$rounds,$rounds,1
 | |
| 	li		$idx,16
 | |
| 	subi		$rounds,$rounds,1
 | |
| 
 | |
| 	${UCMP}i	$len,8
 | |
| 	bge		_aesp8_ctr32_encrypt8x
 | |
| 
 | |
| 	?lvsr		$outperm,0,$out		# prepare for unaligned store
 | |
| 	vspltisb	$outmask,-1
 | |
| 	lvx		$outhead,0,$out
 | |
| 	?vperm		$outmask,$rndkey0,$outmask,$outperm
 | |
| 	le?vxor		$outperm,$outperm,$tmp
 | |
| 
 | |
| 	lvx		$rndkey0,0,$key
 | |
| 	mtctr		$rounds
 | |
| 	lvx		$rndkey1,$idx,$key
 | |
| 	addi		$idx,$idx,16
 | |
| 	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
 | |
| 	vxor		$inout,$ivec,$rndkey0
 | |
| 	lvx		$rndkey0,$idx,$key
 | |
| 	addi		$idx,$idx,16
 | |
| 	b		Loop_ctr32_enc
 | |
| 
 | |
| .align	5
 | |
| Loop_ctr32_enc:
 | |
| 	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
 | |
| 	vcipher		$inout,$inout,$rndkey1
 | |
| 	lvx		$rndkey1,$idx,$key
 | |
| 	addi		$idx,$idx,16
 | |
| 	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
 | |
| 	vcipher		$inout,$inout,$rndkey0
 | |
| 	lvx		$rndkey0,$idx,$key
 | |
| 	addi		$idx,$idx,16
 | |
| 	bdnz		Loop_ctr32_enc
 | |
| 
 | |
| 	vadduwm		$ivec,$ivec,$one
 | |
| 	 vmr		$dat,$inptail
 | |
| 	 lvx		$inptail,0,$inp
 | |
| 	 addi		$inp,$inp,16
 | |
| 	 subic.		$len,$len,1		# blocks--
 | |
| 
 | |
| 	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
 | |
| 	vcipher		$inout,$inout,$rndkey1
 | |
| 	lvx		$rndkey1,$idx,$key
 | |
| 	 vperm		$dat,$dat,$inptail,$inpperm
 | |
| 	 li		$idx,16
 | |
| 	?vperm		$rndkey1,$rndkey0,$rndkey1,$keyperm
 | |
| 	 lvx		$rndkey0,0,$key
 | |
| 	vxor		$dat,$dat,$rndkey1	# last round key
 | |
| 	vcipherlast	$inout,$inout,$dat
 | |
| 
 | |
| 	 lvx		$rndkey1,$idx,$key
 | |
| 	 addi		$idx,$idx,16
 | |
| 	vperm		$inout,$inout,$inout,$outperm
 | |
| 	vsel		$dat,$outhead,$inout,$outmask
 | |
| 	 mtctr		$rounds
 | |
| 	 ?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
 | |
| 	vmr		$outhead,$inout
 | |
| 	 vxor		$inout,$ivec,$rndkey0
 | |
| 	 lvx		$rndkey0,$idx,$key
 | |
| 	 addi		$idx,$idx,16
 | |
| 	stvx		$dat,0,$out
 | |
| 	addi		$out,$out,16
 | |
| 	bne		Loop_ctr32_enc
 | |
| 
 | |
| 	addi		$out,$out,-1
 | |
| 	lvx		$inout,0,$out		# redundant in aligned case
 | |
| 	vsel		$inout,$outhead,$inout,$outmask
 | |
| 	stvx		$inout,0,$out
 | |
| 
 | |
| 	mtspr		256,$vrsave
 | |
| 	blr
 | |
| 	.long		0
 | |
| 	.byte		0,12,0x14,0,0,0,6,0
 | |
| 	.long		0
 | |
| ___
 | |
| #########################################################################
 | |
| {{	# Optimized CTR procedure					#
 | |
| my $key_="r11";
 | |
| my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
 | |
|     $x00=0 if ($flavour =~ /osx/);
 | |
| my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
 | |
| my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
 | |
| my $rndkey0="v23";	# v24-v25 rotating buffer for first found keys
 | |
| 			# v26-v31 last 6 round keys
 | |
| my ($tmp,$keyperm)=($in3,$in4);	# aliases with "caller", redundant assignment
 | |
| my ($two,$three,$four)=($outhead,$outperm,$outmask);
 | |
| 
 | |
| $code.=<<___;
 | |
| .align	5
 | |
| _aesp8_ctr32_encrypt8x:
 | |
| 	$STU		$sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
 | |
| 	li		r10,`$FRAME+8*16+15`
 | |
| 	li		r11,`$FRAME+8*16+31`
 | |
| 	stvx		v20,r10,$sp		# ABI says so
 | |
| 	addi		r10,r10,32
 | |
| 	stvx		v21,r11,$sp
 | |
| 	addi		r11,r11,32
 | |
| 	stvx		v22,r10,$sp
 | |
| 	addi		r10,r10,32
 | |
| 	stvx		v23,r11,$sp
 | |
| 	addi		r11,r11,32
 | |
| 	stvx		v24,r10,$sp
 | |
| 	addi		r10,r10,32
 | |
| 	stvx		v25,r11,$sp
 | |
| 	addi		r11,r11,32
 | |
| 	stvx		v26,r10,$sp
 | |
| 	addi		r10,r10,32
 | |
| 	stvx		v27,r11,$sp
 | |
| 	addi		r11,r11,32
 | |
| 	stvx		v28,r10,$sp
 | |
| 	addi		r10,r10,32
 | |
| 	stvx		v29,r11,$sp
 | |
| 	addi		r11,r11,32
 | |
| 	stvx		v30,r10,$sp
 | |
| 	stvx		v31,r11,$sp
 | |
| 	li		r0,-1
 | |
| 	stw		$vrsave,`$FRAME+21*16-4`($sp)	# save vrsave
 | |
| 	li		$x10,0x10
 | |
| 	$PUSH		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
 | |
| 	li		$x20,0x20
 | |
| 	$PUSH		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
 | |
| 	li		$x30,0x30
 | |
| 	$PUSH		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
 | |
| 	li		$x40,0x40
 | |
| 	$PUSH		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
 | |
| 	li		$x50,0x50
 | |
| 	$PUSH		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
 | |
| 	li		$x60,0x60
 | |
| 	$PUSH		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
 | |
| 	li		$x70,0x70
 | |
| 	mtspr		256,r0
 | |
| 
 | |
| 	subi		$rounds,$rounds,3	# -4 in total
 | |
| 
 | |
| 	lvx		$rndkey0,$x00,$key	# load key schedule
 | |
| 	lvx		v30,$x10,$key
 | |
| 	addi		$key,$key,0x20
 | |
| 	lvx		v31,$x00,$key
 | |
| 	?vperm		$rndkey0,$rndkey0,v30,$keyperm
 | |
| 	addi		$key_,$sp,$FRAME+15
 | |
| 	mtctr		$rounds
 | |
| 
 | |
| Load_ctr32_enc_key:
 | |
| 	?vperm		v24,v30,v31,$keyperm
 | |
| 	lvx		v30,$x10,$key
 | |
| 	addi		$key,$key,0x20
 | |
| 	stvx		v24,$x00,$key_		# off-load round[1]
 | |
| 	?vperm		v25,v31,v30,$keyperm
 | |
| 	lvx		v31,$x00,$key
 | |
| 	stvx		v25,$x10,$key_		# off-load round[2]
 | |
| 	addi		$key_,$key_,0x20
 | |
| 	bdnz		Load_ctr32_enc_key
 | |
| 
 | |
| 	lvx		v26,$x10,$key
 | |
| 	?vperm		v24,v30,v31,$keyperm
 | |
| 	lvx		v27,$x20,$key
 | |
| 	stvx		v24,$x00,$key_		# off-load round[3]
 | |
| 	?vperm		v25,v31,v26,$keyperm
 | |
| 	lvx		v28,$x30,$key
 | |
| 	stvx		v25,$x10,$key_		# off-load round[4]
 | |
| 	addi		$key_,$sp,$FRAME+15	# rewind $key_
 | |
| 	?vperm		v26,v26,v27,$keyperm
 | |
| 	lvx		v29,$x40,$key
 | |
| 	?vperm		v27,v27,v28,$keyperm
 | |
| 	lvx		v30,$x50,$key
 | |
| 	?vperm		v28,v28,v29,$keyperm
 | |
| 	lvx		v31,$x60,$key
 | |
| 	?vperm		v29,v29,v30,$keyperm
 | |
| 	lvx		$out0,$x70,$key		# borrow $out0
 | |
| 	?vperm		v30,v30,v31,$keyperm
 | |
| 	lvx		v24,$x00,$key_		# pre-load round[1]
 | |
| 	?vperm		v31,v31,$out0,$keyperm
 | |
| 	lvx		v25,$x10,$key_		# pre-load round[2]
 | |
| 
 | |
| 	vadduwm		$two,$one,$one
 | |
| 	subi		$inp,$inp,15		# undo "caller"
 | |
| 	$SHL		$len,$len,4
 | |
| 
 | |
| 	vadduwm		$out1,$ivec,$one	# counter values ...
 | |
| 	vadduwm		$out2,$ivec,$two
 | |
| 	vxor		$out0,$ivec,$rndkey0	# ... xored with rndkey[0]
 | |
| 	 le?li		$idx,8
 | |
| 	vadduwm		$out3,$out1,$two
 | |
| 	vxor		$out1,$out1,$rndkey0
 | |
| 	 le?lvsl	$inpperm,0,$idx
 | |
| 	vadduwm		$out4,$out2,$two
 | |
| 	vxor		$out2,$out2,$rndkey0
 | |
| 	 le?vspltisb	$tmp,0x0f
 | |
| 	vadduwm		$out5,$out3,$two
 | |
| 	vxor		$out3,$out3,$rndkey0
 | |
| 	 le?vxor	$inpperm,$inpperm,$tmp	# transform for lvx_u/stvx_u
 | |
| 	vadduwm		$out6,$out4,$two
 | |
| 	vxor		$out4,$out4,$rndkey0
 | |
| 	vadduwm		$out7,$out5,$two
 | |
| 	vxor		$out5,$out5,$rndkey0
 | |
| 	vadduwm		$ivec,$out6,$two	# next counter value
 | |
| 	vxor		$out6,$out6,$rndkey0
 | |
| 	vxor		$out7,$out7,$rndkey0
 | |
| 
 | |
| 	mtctr		$rounds
 | |
| 	b		Loop_ctr32_enc8x
 | |
| .align	5
 | |
| Loop_ctr32_enc8x:
 | |
| 	vcipher 	$out0,$out0,v24
 | |
| 	vcipher 	$out1,$out1,v24
 | |
| 	vcipher 	$out2,$out2,v24
 | |
| 	vcipher 	$out3,$out3,v24
 | |
| 	vcipher 	$out4,$out4,v24
 | |
| 	vcipher 	$out5,$out5,v24
 | |
| 	vcipher 	$out6,$out6,v24
 | |
| 	vcipher 	$out7,$out7,v24
 | |
| Loop_ctr32_enc8x_middle:
 | |
| 	lvx		v24,$x20,$key_		# round[3]
 | |
| 	addi		$key_,$key_,0x20
 | |
| 
 | |
| 	vcipher 	$out0,$out0,v25
 | |
| 	vcipher 	$out1,$out1,v25
 | |
| 	vcipher 	$out2,$out2,v25
 | |
| 	vcipher 	$out3,$out3,v25
 | |
| 	vcipher 	$out4,$out4,v25
 | |
| 	vcipher 	$out5,$out5,v25
 | |
| 	vcipher 	$out6,$out6,v25
 | |
| 	vcipher 	$out7,$out7,v25
 | |
| 	lvx		v25,$x10,$key_		# round[4]
 | |
| 	bdnz		Loop_ctr32_enc8x
 | |
| 
 | |
| 	subic		r11,$len,256		# $len-256, borrow $key_
 | |
| 	vcipher 	$out0,$out0,v24
 | |
| 	vcipher 	$out1,$out1,v24
 | |
| 	vcipher 	$out2,$out2,v24
 | |
| 	vcipher 	$out3,$out3,v24
 | |
| 	vcipher 	$out4,$out4,v24
 | |
| 	vcipher 	$out5,$out5,v24
 | |
| 	vcipher 	$out6,$out6,v24
 | |
| 	vcipher 	$out7,$out7,v24
 | |
| 
 | |
| 	subfe		r0,r0,r0		# borrow?-1:0
 | |
| 	vcipher 	$out0,$out0,v25
 | |
| 	vcipher 	$out1,$out1,v25
 | |
| 	vcipher 	$out2,$out2,v25
 | |
| 	vcipher 	$out3,$out3,v25
 | |
| 	vcipher 	$out4,$out4,v25
 | |
| 	vcipher		$out5,$out5,v25
 | |
| 	vcipher		$out6,$out6,v25
 | |
| 	vcipher		$out7,$out7,v25
 | |
| 
 | |
| 	and		r0,r0,r11
 | |
| 	addi		$key_,$sp,$FRAME+15	# rewind $key_
 | |
| 	vcipher		$out0,$out0,v26
 | |
| 	vcipher		$out1,$out1,v26
 | |
| 	vcipher		$out2,$out2,v26
 | |
| 	vcipher		$out3,$out3,v26
 | |
| 	vcipher		$out4,$out4,v26
 | |
| 	vcipher		$out5,$out5,v26
 | |
| 	vcipher		$out6,$out6,v26
 | |
| 	vcipher		$out7,$out7,v26
 | |
| 	lvx		v24,$x00,$key_		# re-pre-load round[1]
 | |
| 
 | |
| 	subic		$len,$len,129		# $len-=129
 | |
| 	vcipher		$out0,$out0,v27
 | |
| 	addi		$len,$len,1		# $len-=128 really
 | |
| 	vcipher		$out1,$out1,v27
 | |
| 	vcipher		$out2,$out2,v27
 | |
| 	vcipher		$out3,$out3,v27
 | |
| 	vcipher		$out4,$out4,v27
 | |
| 	vcipher		$out5,$out5,v27
 | |
| 	vcipher		$out6,$out6,v27
 | |
| 	vcipher		$out7,$out7,v27
 | |
| 	lvx		v25,$x10,$key_		# re-pre-load round[2]
 | |
| 
 | |
| 	vcipher		$out0,$out0,v28
 | |
| 	 lvx_u		$in0,$x00,$inp		# load input
 | |
| 	vcipher		$out1,$out1,v28
 | |
| 	 lvx_u		$in1,$x10,$inp
 | |
| 	vcipher		$out2,$out2,v28
 | |
| 	 lvx_u		$in2,$x20,$inp
 | |
| 	vcipher		$out3,$out3,v28
 | |
| 	 lvx_u		$in3,$x30,$inp
 | |
| 	vcipher		$out4,$out4,v28
 | |
| 	 lvx_u		$in4,$x40,$inp
 | |
| 	vcipher		$out5,$out5,v28
 | |
| 	 lvx_u		$in5,$x50,$inp
 | |
| 	vcipher		$out6,$out6,v28
 | |
| 	 lvx_u		$in6,$x60,$inp
 | |
| 	vcipher		$out7,$out7,v28
 | |
| 	 lvx_u		$in7,$x70,$inp
 | |
| 	 addi		$inp,$inp,0x80
 | |
| 
 | |
| 	vcipher		$out0,$out0,v29
 | |
| 	 le?vperm	$in0,$in0,$in0,$inpperm
 | |
| 	vcipher		$out1,$out1,v29
 | |
| 	 le?vperm	$in1,$in1,$in1,$inpperm
 | |
| 	vcipher		$out2,$out2,v29
 | |
| 	 le?vperm	$in2,$in2,$in2,$inpperm
 | |
| 	vcipher		$out3,$out3,v29
 | |
| 	 le?vperm	$in3,$in3,$in3,$inpperm
 | |
| 	vcipher		$out4,$out4,v29
 | |
| 	 le?vperm	$in4,$in4,$in4,$inpperm
 | |
| 	vcipher		$out5,$out5,v29
 | |
| 	 le?vperm	$in5,$in5,$in5,$inpperm
 | |
| 	vcipher		$out6,$out6,v29
 | |
| 	 le?vperm	$in6,$in6,$in6,$inpperm
 | |
| 	vcipher		$out7,$out7,v29
 | |
| 	 le?vperm	$in7,$in7,$in7,$inpperm
 | |
| 
 | |
| 	add		$inp,$inp,r0		# $inp is adjusted in such
 | |
| 						# way that at exit from the
 | |
| 						# loop inX-in7 are loaded
 | |
| 						# with last "words"
 | |
| 	subfe.		r0,r0,r0		# borrow?-1:0
 | |
| 	vcipher		$out0,$out0,v30
 | |
| 	 vxor		$in0,$in0,v31		# xor with last round key
 | |
| 	vcipher		$out1,$out1,v30
 | |
| 	 vxor		$in1,$in1,v31
 | |
| 	vcipher		$out2,$out2,v30
 | |
| 	 vxor		$in2,$in2,v31
 | |
| 	vcipher		$out3,$out3,v30
 | |
| 	 vxor		$in3,$in3,v31
 | |
| 	vcipher		$out4,$out4,v30
 | |
| 	 vxor		$in4,$in4,v31
 | |
| 	vcipher		$out5,$out5,v30
 | |
| 	 vxor		$in5,$in5,v31
 | |
| 	vcipher		$out6,$out6,v30
 | |
| 	 vxor		$in6,$in6,v31
 | |
| 	vcipher		$out7,$out7,v30
 | |
| 	 vxor		$in7,$in7,v31
 | |
| 
 | |
| 	bne		Lctr32_enc8x_break	# did $len-129 borrow?
 | |
| 
 | |
| 	vcipherlast	$in0,$out0,$in0
 | |
| 	vcipherlast	$in1,$out1,$in1
 | |
| 	 vadduwm	$out1,$ivec,$one	# counter values ...
 | |
| 	vcipherlast	$in2,$out2,$in2
 | |
| 	 vadduwm	$out2,$ivec,$two
 | |
| 	 vxor		$out0,$ivec,$rndkey0	# ... xored with rndkey[0]
 | |
| 	vcipherlast	$in3,$out3,$in3
 | |
| 	 vadduwm	$out3,$out1,$two
 | |
| 	 vxor		$out1,$out1,$rndkey0
 | |
| 	vcipherlast	$in4,$out4,$in4
 | |
| 	 vadduwm	$out4,$out2,$two
 | |
| 	 vxor		$out2,$out2,$rndkey0
 | |
| 	vcipherlast	$in5,$out5,$in5
 | |
| 	 vadduwm	$out5,$out3,$two
 | |
| 	 vxor		$out3,$out3,$rndkey0
 | |
| 	vcipherlast	$in6,$out6,$in6
 | |
| 	 vadduwm	$out6,$out4,$two
 | |
| 	 vxor		$out4,$out4,$rndkey0
 | |
| 	vcipherlast	$in7,$out7,$in7
 | |
| 	 vadduwm	$out7,$out5,$two
 | |
| 	 vxor		$out5,$out5,$rndkey0
 | |
| 	le?vperm	$in0,$in0,$in0,$inpperm
 | |
| 	 vadduwm	$ivec,$out6,$two	# next counter value
 | |
| 	 vxor		$out6,$out6,$rndkey0
 | |
| 	le?vperm	$in1,$in1,$in1,$inpperm
 | |
| 	 vxor		$out7,$out7,$rndkey0
 | |
| 	mtctr		$rounds
 | |
| 
 | |
| 	 vcipher	$out0,$out0,v24
 | |
| 	stvx_u		$in0,$x00,$out
 | |
| 	le?vperm	$in2,$in2,$in2,$inpperm
 | |
| 	 vcipher	$out1,$out1,v24
 | |
| 	stvx_u		$in1,$x10,$out
 | |
| 	le?vperm	$in3,$in3,$in3,$inpperm
 | |
| 	 vcipher	$out2,$out2,v24
 | |
| 	stvx_u		$in2,$x20,$out
 | |
| 	le?vperm	$in4,$in4,$in4,$inpperm
 | |
| 	 vcipher	$out3,$out3,v24
 | |
| 	stvx_u		$in3,$x30,$out
 | |
| 	le?vperm	$in5,$in5,$in5,$inpperm
 | |
| 	 vcipher	$out4,$out4,v24
 | |
| 	stvx_u		$in4,$x40,$out
 | |
| 	le?vperm	$in6,$in6,$in6,$inpperm
 | |
| 	 vcipher	$out5,$out5,v24
 | |
| 	stvx_u		$in5,$x50,$out
 | |
| 	le?vperm	$in7,$in7,$in7,$inpperm
 | |
| 	 vcipher	$out6,$out6,v24
 | |
| 	stvx_u		$in6,$x60,$out
 | |
| 	 vcipher	$out7,$out7,v24
 | |
| 	stvx_u		$in7,$x70,$out
 | |
| 	addi		$out,$out,0x80
 | |
| 
 | |
| 	b		Loop_ctr32_enc8x_middle
 | |
| 
 | |
| .align	5
 | |
| Lctr32_enc8x_break:
 | |
| 	cmpwi		$len,-0x60
 | |
| 	blt		Lctr32_enc8x_one
 | |
| 	nop
 | |
| 	beq		Lctr32_enc8x_two
 | |
| 	cmpwi		$len,-0x40
 | |
| 	blt		Lctr32_enc8x_three
 | |
| 	nop
 | |
| 	beq		Lctr32_enc8x_four
 | |
| 	cmpwi		$len,-0x20
 | |
| 	blt		Lctr32_enc8x_five
 | |
| 	nop
 | |
| 	beq		Lctr32_enc8x_six
 | |
| 	cmpwi		$len,0x00
 | |
| 	blt		Lctr32_enc8x_seven
 | |
| 
 | |
| Lctr32_enc8x_eight:
 | |
| 	vcipherlast	$out0,$out0,$in0
 | |
| 	vcipherlast	$out1,$out1,$in1
 | |
| 	vcipherlast	$out2,$out2,$in2
 | |
| 	vcipherlast	$out3,$out3,$in3
 | |
| 	vcipherlast	$out4,$out4,$in4
 | |
| 	vcipherlast	$out5,$out5,$in5
 | |
| 	vcipherlast	$out6,$out6,$in6
 | |
| 	vcipherlast	$out7,$out7,$in7
 | |
| 
 | |
| 	le?vperm	$out0,$out0,$out0,$inpperm
 | |
| 	le?vperm	$out1,$out1,$out1,$inpperm
 | |
| 	stvx_u		$out0,$x00,$out
 | |
| 	le?vperm	$out2,$out2,$out2,$inpperm
 | |
| 	stvx_u		$out1,$x10,$out
 | |
| 	le?vperm	$out3,$out3,$out3,$inpperm
 | |
| 	stvx_u		$out2,$x20,$out
 | |
| 	le?vperm	$out4,$out4,$out4,$inpperm
 | |
| 	stvx_u		$out3,$x30,$out
 | |
| 	le?vperm	$out5,$out5,$out5,$inpperm
 | |
| 	stvx_u		$out4,$x40,$out
 | |
| 	le?vperm	$out6,$out6,$out6,$inpperm
 | |
| 	stvx_u		$out5,$x50,$out
 | |
| 	le?vperm	$out7,$out7,$out7,$inpperm
 | |
| 	stvx_u		$out6,$x60,$out
 | |
| 	stvx_u		$out7,$x70,$out
 | |
| 	addi		$out,$out,0x80
 | |
| 	b		Lctr32_enc8x_done
 | |
| 
 | |
| .align	5
 | |
| Lctr32_enc8x_seven:
 | |
| 	vcipherlast	$out0,$out0,$in1
 | |
| 	vcipherlast	$out1,$out1,$in2
 | |
| 	vcipherlast	$out2,$out2,$in3
 | |
| 	vcipherlast	$out3,$out3,$in4
 | |
| 	vcipherlast	$out4,$out4,$in5
 | |
| 	vcipherlast	$out5,$out5,$in6
 | |
| 	vcipherlast	$out6,$out6,$in7
 | |
| 
 | |
| 	le?vperm	$out0,$out0,$out0,$inpperm
 | |
| 	le?vperm	$out1,$out1,$out1,$inpperm
 | |
| 	stvx_u		$out0,$x00,$out
 | |
| 	le?vperm	$out2,$out2,$out2,$inpperm
 | |
| 	stvx_u		$out1,$x10,$out
 | |
| 	le?vperm	$out3,$out3,$out3,$inpperm
 | |
| 	stvx_u		$out2,$x20,$out
 | |
| 	le?vperm	$out4,$out4,$out4,$inpperm
 | |
| 	stvx_u		$out3,$x30,$out
 | |
| 	le?vperm	$out5,$out5,$out5,$inpperm
 | |
| 	stvx_u		$out4,$x40,$out
 | |
| 	le?vperm	$out6,$out6,$out6,$inpperm
 | |
| 	stvx_u		$out5,$x50,$out
 | |
| 	stvx_u		$out6,$x60,$out
 | |
| 	addi		$out,$out,0x70
 | |
| 	b		Lctr32_enc8x_done
 | |
| 
 | |
| .align	5
 | |
| Lctr32_enc8x_six:
 | |
| 	vcipherlast	$out0,$out0,$in2
 | |
| 	vcipherlast	$out1,$out1,$in3
 | |
| 	vcipherlast	$out2,$out2,$in4
 | |
| 	vcipherlast	$out3,$out3,$in5
 | |
| 	vcipherlast	$out4,$out4,$in6
 | |
| 	vcipherlast	$out5,$out5,$in7
 | |
| 
 | |
| 	le?vperm	$out0,$out0,$out0,$inpperm
 | |
| 	le?vperm	$out1,$out1,$out1,$inpperm
 | |
| 	stvx_u		$out0,$x00,$out
 | |
| 	le?vperm	$out2,$out2,$out2,$inpperm
 | |
| 	stvx_u		$out1,$x10,$out
 | |
| 	le?vperm	$out3,$out3,$out3,$inpperm
 | |
| 	stvx_u		$out2,$x20,$out
 | |
| 	le?vperm	$out4,$out4,$out4,$inpperm
 | |
| 	stvx_u		$out3,$x30,$out
 | |
| 	le?vperm	$out5,$out5,$out5,$inpperm
 | |
| 	stvx_u		$out4,$x40,$out
 | |
| 	stvx_u		$out5,$x50,$out
 | |
| 	addi		$out,$out,0x60
 | |
| 	b		Lctr32_enc8x_done
 | |
| 
 | |
| .align	5
 | |
| Lctr32_enc8x_five:
 | |
| 	vcipherlast	$out0,$out0,$in3
 | |
| 	vcipherlast	$out1,$out1,$in4
 | |
| 	vcipherlast	$out2,$out2,$in5
 | |
| 	vcipherlast	$out3,$out3,$in6
 | |
| 	vcipherlast	$out4,$out4,$in7
 | |
| 
 | |
| 	le?vperm	$out0,$out0,$out0,$inpperm
 | |
| 	le?vperm	$out1,$out1,$out1,$inpperm
 | |
| 	stvx_u		$out0,$x00,$out
 | |
| 	le?vperm	$out2,$out2,$out2,$inpperm
 | |
| 	stvx_u		$out1,$x10,$out
 | |
| 	le?vperm	$out3,$out3,$out3,$inpperm
 | |
| 	stvx_u		$out2,$x20,$out
 | |
| 	le?vperm	$out4,$out4,$out4,$inpperm
 | |
| 	stvx_u		$out3,$x30,$out
 | |
| 	stvx_u		$out4,$x40,$out
 | |
| 	addi		$out,$out,0x50
 | |
| 	b		Lctr32_enc8x_done
 | |
| 
 | |
| .align	5
 | |
| Lctr32_enc8x_four:
 | |
| 	vcipherlast	$out0,$out0,$in4
 | |
| 	vcipherlast	$out1,$out1,$in5
 | |
| 	vcipherlast	$out2,$out2,$in6
 | |
| 	vcipherlast	$out3,$out3,$in7
 | |
| 
 | |
| 	le?vperm	$out0,$out0,$out0,$inpperm
 | |
| 	le?vperm	$out1,$out1,$out1,$inpperm
 | |
| 	stvx_u		$out0,$x00,$out
 | |
| 	le?vperm	$out2,$out2,$out2,$inpperm
 | |
| 	stvx_u		$out1,$x10,$out
 | |
| 	le?vperm	$out3,$out3,$out3,$inpperm
 | |
| 	stvx_u		$out2,$x20,$out
 | |
| 	stvx_u		$out3,$x30,$out
 | |
| 	addi		$out,$out,0x40
 | |
| 	b		Lctr32_enc8x_done
 | |
| 
 | |
| .align	5
 | |
| Lctr32_enc8x_three:
 | |
| 	vcipherlast	$out0,$out0,$in5
 | |
| 	vcipherlast	$out1,$out1,$in6
 | |
| 	vcipherlast	$out2,$out2,$in7
 | |
| 
 | |
| 	le?vperm	$out0,$out0,$out0,$inpperm
 | |
| 	le?vperm	$out1,$out1,$out1,$inpperm
 | |
| 	stvx_u		$out0,$x00,$out
 | |
| 	le?vperm	$out2,$out2,$out2,$inpperm
 | |
| 	stvx_u		$out1,$x10,$out
 | |
| 	stvx_u		$out2,$x20,$out
 | |
| 	addi		$out,$out,0x30
 | |
| 	b		Lcbc_dec8x_done
 | |
| 
 | |
| .align	5
 | |
| Lctr32_enc8x_two:
 | |
| 	vcipherlast	$out0,$out0,$in6
 | |
| 	vcipherlast	$out1,$out1,$in7
 | |
| 
 | |
| 	le?vperm	$out0,$out0,$out0,$inpperm
 | |
| 	le?vperm	$out1,$out1,$out1,$inpperm
 | |
| 	stvx_u		$out0,$x00,$out
 | |
| 	stvx_u		$out1,$x10,$out
 | |
| 	addi		$out,$out,0x20
 | |
| 	b		Lcbc_dec8x_done
 | |
| 
 | |
| .align	5
 | |
| Lctr32_enc8x_one:
 | |
| 	vcipherlast	$out0,$out0,$in7
 | |
| 
 | |
| 	le?vperm	$out0,$out0,$out0,$inpperm
 | |
| 	stvx_u		$out0,0,$out
 | |
| 	addi		$out,$out,0x10
 | |
| 
 | |
| Lctr32_enc8x_done:
 | |
| 	li		r10,`$FRAME+15`
 | |
| 	li		r11,`$FRAME+31`
 | |
| 	stvx		$inpperm,r10,$sp	# wipe copies of round keys
 | |
| 	addi		r10,r10,32
 | |
| 	stvx		$inpperm,r11,$sp
 | |
| 	addi		r11,r11,32
 | |
| 	stvx		$inpperm,r10,$sp
 | |
| 	addi		r10,r10,32
 | |
| 	stvx		$inpperm,r11,$sp
 | |
| 	addi		r11,r11,32
 | |
| 	stvx		$inpperm,r10,$sp
 | |
| 	addi		r10,r10,32
 | |
| 	stvx		$inpperm,r11,$sp
 | |
| 	addi		r11,r11,32
 | |
| 	stvx		$inpperm,r10,$sp
 | |
| 	addi		r10,r10,32
 | |
| 	stvx		$inpperm,r11,$sp
 | |
| 	addi		r11,r11,32
 | |
| 
 | |
| 	mtspr		256,$vrsave
 | |
| 	lvx		v20,r10,$sp		# ABI says so
 | |
| 	addi		r10,r10,32
 | |
| 	lvx		v21,r11,$sp
 | |
| 	addi		r11,r11,32
 | |
| 	lvx		v22,r10,$sp
 | |
| 	addi		r10,r10,32
 | |
| 	lvx		v23,r11,$sp
 | |
| 	addi		r11,r11,32
 | |
| 	lvx		v24,r10,$sp
 | |
| 	addi		r10,r10,32
 | |
| 	lvx		v25,r11,$sp
 | |
| 	addi		r11,r11,32
 | |
| 	lvx		v26,r10,$sp
 | |
| 	addi		r10,r10,32
 | |
| 	lvx		v27,r11,$sp
 | |
| 	addi		r11,r11,32
 | |
| 	lvx		v28,r10,$sp
 | |
| 	addi		r10,r10,32
 | |
| 	lvx		v29,r11,$sp
 | |
| 	addi		r11,r11,32
 | |
| 	lvx		v30,r10,$sp
 | |
| 	lvx		v31,r11,$sp
 | |
| 	$POP		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
 | |
| 	$POP		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
 | |
| 	$POP		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
 | |
| 	$POP		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
 | |
| 	$POP		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
 | |
| 	$POP		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
 | |
| 	addi		$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
 | |
| 	blr
 | |
| 	.long		0
 | |
| 	.byte		0,12,0x04,0,0x80,6,6,0
 | |
| 	.long		0
 | |
| .size	.${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
 | |
| ___
 | |
| }}	}}}
 | |
| 
 | |
| #########################################################################
 | |
| {{{	# XTS procedures						#
 | |
| # int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len,	#
 | |
| #                             const AES_KEY *key1, const AES_KEY *key2,	#
 | |
| #                             [const] unsigned char iv[16]);		#
 | |
| # If $key2 is NULL, then a "tweak chaining" mode is engaged, in which	#
 | |
| # input tweak value is assumed to be encrypted already, and last tweak	#
 | |
| # value, one suitable for consecutive call on same chunk of data, is	#
 | |
| # written back to original buffer. In addition, in "tweak chaining"	#
 | |
| # mode only complete input blocks are processed.			#
 | |
| 
 | |
| my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) =	map("r$_",(3..10));
 | |
| my ($rndkey0,$rndkey1,$inout) =				map("v$_",(0..2));
 | |
| my ($output,$inptail,$inpperm,$leperm,$keyperm) =	map("v$_",(3..7));
 | |
| my ($tweak,$seven,$eighty7,$tmp,$tweak1) =		map("v$_",(8..12));
 | |
| my $taillen = $key2;
 | |
| 
 | |
|    ($inp,$idx) = ($idx,$inp);				# reassign
 | |
| 
 | |
| $code.=<<___;
 | |
| .globl	.${prefix}_xts_encrypt
 | |
| .align	5
 | |
| .${prefix}_xts_encrypt:
 | |
| 	mr		$inp,r3				# reassign
 | |
| 	li		r3,-1
 | |
| 	${UCMP}i	$len,16
 | |
| 	bltlr-
 | |
| 
 | |
| 	lis		r0,0xfff0
 | |
| 	mfspr		r12,256				# save vrsave
 | |
| 	li		r11,0
 | |
| 	mtspr		256,r0
 | |
| 
 | |
| 	vspltisb	$seven,0x07			# 0x070707..07
 | |
| 	le?lvsl		$leperm,r11,r11
 | |
| 	le?vspltisb	$tmp,0x0f
 | |
| 	le?vxor		$leperm,$leperm,$seven
 | |
| 
 | |
| 	li		$idx,15
 | |
| 	lvx		$tweak,0,$ivp			# load [unaligned] iv
 | |
| 	lvsl		$inpperm,0,$ivp
 | |
| 	lvx		$inptail,$idx,$ivp
 | |
| 	le?vxor		$inpperm,$inpperm,$tmp
 | |
| 	vperm		$tweak,$tweak,$inptail,$inpperm
 | |
| 
 | |
| 	neg		r11,$inp
 | |
| 	lvsr		$inpperm,0,r11			# prepare for unaligned load
 | |
| 	lvx		$inout,0,$inp
 | |
| 	addi		$inp,$inp,15			# 15 is not typo
 | |
| 	le?vxor		$inpperm,$inpperm,$tmp
 | |
| 
 | |
| 	${UCMP}i	$key2,0				# key2==NULL?
 | |
| 	beq		Lxts_enc_no_key2
 | |
| 
 | |
| 	?lvsl		$keyperm,0,$key2		# prepare for unaligned key
 | |
| 	lwz		$rounds,240($key2)
 | |
| 	srwi		$rounds,$rounds,1
 | |
| 	subi		$rounds,$rounds,1
 | |
| 	li		$idx,16
 | |
| 
 | |
| 	lvx		$rndkey0,0,$key2
 | |
| 	lvx		$rndkey1,$idx,$key2
 | |
| 	addi		$idx,$idx,16
 | |
| 	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
 | |
| 	vxor		$tweak,$tweak,$rndkey0
 | |
| 	lvx		$rndkey0,$idx,$key2
 | |
| 	addi		$idx,$idx,16
 | |
| 	mtctr		$rounds
 | |
| 
 | |
| Ltweak_xts_enc:
 | |
| 	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
 | |
| 	vcipher		$tweak,$tweak,$rndkey1
 | |
| 	lvx		$rndkey1,$idx,$key2
 | |
| 	addi		$idx,$idx,16
 | |
| 	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
 | |
| 	vcipher		$tweak,$tweak,$rndkey0
 | |
| 	lvx		$rndkey0,$idx,$key2
 | |
| 	addi		$idx,$idx,16
 | |
| 	bdnz		Ltweak_xts_enc
 | |
| 
 | |
| 	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
 | |
| 	vcipher		$tweak,$tweak,$rndkey1
 | |
| 	lvx		$rndkey1,$idx,$key2
 | |
| 	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
 | |
| 	vcipherlast	$tweak,$tweak,$rndkey0
 | |
| 
 | |
| 	li		$ivp,0				# don't chain the tweak
 | |
| 	b		Lxts_enc
 | |
| 
 | |
| Lxts_enc_no_key2:
 | |
| 	li		$idx,-16
 | |
| 	and		$len,$len,$idx			# in "tweak chaining"
 | |
| 							# mode only complete
 | |
| 							# blocks are processed
 | |
| Lxts_enc:
 | |
| 	lvx		$inptail,0,$inp
 | |
| 	addi		$inp,$inp,16
 | |
| 
 | |
| 	?lvsl		$keyperm,0,$key1		# prepare for unaligned key
 | |
| 	lwz		$rounds,240($key1)
 | |
| 	srwi		$rounds,$rounds,1
 | |
| 	subi		$rounds,$rounds,1
 | |
| 	li		$idx,16
 | |
| 
 | |
| 	vslb		$eighty7,$seven,$seven		# 0x808080..80
 | |
| 	vor		$eighty7,$eighty7,$seven	# 0x878787..87
 | |
| 	vspltisb	$tmp,1				# 0x010101..01
 | |
| 	vsldoi		$eighty7,$eighty7,$tmp,15	# 0x870101..01
 | |
| 
 | |
| 	${UCMP}i	$len,96
 | |
| 	bge		_aesp8_xts_encrypt6x
 | |
| 
 | |
| 	andi.		$taillen,$len,15
 | |
| 	subic		r0,$len,32
 | |
| 	subi		$taillen,$taillen,16
 | |
| 	subfe		r0,r0,r0
 | |
| 	and		r0,r0,$taillen
 | |
| 	add		$inp,$inp,r0
 | |
| 
 | |
| 	lvx		$rndkey0,0,$key1
 | |
| 	lvx		$rndkey1,$idx,$key1
 | |
| 	addi		$idx,$idx,16
 | |
| 	vperm		$inout,$inout,$inptail,$inpperm
 | |
| 	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
 | |
| 	vxor		$inout,$inout,$tweak
 | |
| 	vxor		$inout,$inout,$rndkey0
 | |
| 	lvx		$rndkey0,$idx,$key1
 | |
| 	addi		$idx,$idx,16
 | |
| 	mtctr		$rounds
 | |
| 	b		Loop_xts_enc
 | |
| 
 | |
| .align	5
 | |
| Loop_xts_enc:
 | |
| 	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
 | |
| 	vcipher		$inout,$inout,$rndkey1
 | |
| 	lvx		$rndkey1,$idx,$key1
 | |
| 	addi		$idx,$idx,16
 | |
| 	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
 | |
| 	vcipher		$inout,$inout,$rndkey0
 | |
| 	lvx		$rndkey0,$idx,$key1
 | |
| 	addi		$idx,$idx,16
 | |
| 	bdnz		Loop_xts_enc
 | |
| 
 | |
| 	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
 | |
| 	vcipher		$inout,$inout,$rndkey1
 | |
| 	lvx		$rndkey1,$idx,$key1
 | |
| 	li		$idx,16
 | |
| 	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
 | |
| 	vxor		$rndkey0,$rndkey0,$tweak
 | |
| 	vcipherlast	$output,$inout,$rndkey0
 | |
| 
 | |
| 	le?vperm	$tmp,$output,$output,$leperm
 | |
| 	be?nop
 | |
| 	le?stvx_u	$tmp,0,$out
 | |
| 	be?stvx_u	$output,0,$out
 | |
| 	addi		$out,$out,16
 | |
| 
 | |
| 	subic.		$len,$len,16
 | |
| 	beq		Lxts_enc_done
 | |
| 
 | |
| 	vmr		$inout,$inptail
 | |
| 	lvx		$inptail,0,$inp
 | |
| 	addi		$inp,$inp,16
 | |
| 	lvx		$rndkey0,0,$key1
 | |
| 	lvx		$rndkey1,$idx,$key1
 | |
| 	addi		$idx,$idx,16
 | |
| 
 | |
| 	subic		r0,$len,32
 | |
| 	subfe		r0,r0,r0
 | |
| 	and		r0,r0,$taillen
 | |
| 	add		$inp,$inp,r0
 | |
| 
 | |
| 	vsrab		$tmp,$tweak,$seven		# next tweak value
 | |
| 	vaddubm		$tweak,$tweak,$tweak
 | |
| 	vsldoi		$tmp,$tmp,$tmp,15
 | |
| 	vand		$tmp,$tmp,$eighty7
 | |
| 	vxor		$tweak,$tweak,$tmp
 | |
| 
 | |
| 	vperm		$inout,$inout,$inptail,$inpperm
 | |
| 	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
 | |
| 	vxor		$inout,$inout,$tweak
 | |
| 	vxor		$output,$output,$rndkey0	# just in case $len<16
 | |
| 	vxor		$inout,$inout,$rndkey0
 | |
| 	lvx		$rndkey0,$idx,$key1
 | |
| 	addi		$idx,$idx,16
 | |
| 
 | |
| 	mtctr		$rounds
 | |
| 	${UCMP}i	$len,16
 | |
| 	bge		Loop_xts_enc
 | |
| 
 | |
| 	vxor		$output,$output,$tweak
 | |
| 	lvsr		$inpperm,0,$len			# $inpperm is no longer needed
 | |
| 	vxor		$inptail,$inptail,$inptail	# $inptail is no longer needed
 | |
| 	vspltisb	$tmp,-1
 | |
| 	vperm		$inptail,$inptail,$tmp,$inpperm
 | |
| 	vsel		$inout,$inout,$output,$inptail
 | |
| 
 | |
| 	subi		r11,$out,17
 | |
| 	subi		$out,$out,16
 | |
| 	mtctr		$len
 | |
| 	li		$len,16
 | |
| Loop_xts_enc_steal:
 | |
| 	lbzu		r0,1(r11)
 | |
| 	stb		r0,16(r11)
 | |
| 	bdnz		Loop_xts_enc_steal
 | |
| 
 | |
| 	mtctr		$rounds
 | |
| 	b		Loop_xts_enc			# one more time...
 | |
| 
 | |
| Lxts_enc_done:
 | |
| 	${UCMP}i	$ivp,0
 | |
| 	beq		Lxts_enc_ret
 | |
| 
 | |
| 	vsrab		$tmp,$tweak,$seven		# next tweak value
 | |
| 	vaddubm		$tweak,$tweak,$tweak
 | |
| 	vsldoi		$tmp,$tmp,$tmp,15
 | |
| 	vand		$tmp,$tmp,$eighty7
 | |
| 	vxor		$tweak,$tweak,$tmp
 | |
| 
 | |
| 	le?vperm	$tweak,$tweak,$tweak,$leperm
 | |
| 	stvx_u		$tweak,0,$ivp
 | |
| 
 | |
| Lxts_enc_ret:
 | |
| 	mtspr		256,r12				# restore vrsave
 | |
| 	li		r3,0
 | |
| 	blr
 | |
| 	.long		0
 | |
| 	.byte		0,12,0x04,0,0x80,6,6,0
 | |
| 	.long		0
 | |
| .size	.${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
 | |
| 
 | |
| .globl	.${prefix}_xts_decrypt
 | |
| .align	5
 | |
| .${prefix}_xts_decrypt:
 | |
| 	mr		$inp,r3				# reassign
 | |
| 	li		r3,-1
 | |
| 	${UCMP}i	$len,16
 | |
| 	bltlr-
 | |
| 
 | |
| 	lis		r0,0xfff8
 | |
| 	mfspr		r12,256				# save vrsave
 | |
| 	li		r11,0
 | |
| 	mtspr		256,r0
 | |
| 
 | |
| 	andi.		r0,$len,15
 | |
| 	neg		r0,r0
 | |
| 	andi.		r0,r0,16
 | |
| 	sub		$len,$len,r0
 | |
| 
 | |
| 	vspltisb	$seven,0x07			# 0x070707..07
 | |
| 	le?lvsl		$leperm,r11,r11
 | |
| 	le?vspltisb	$tmp,0x0f
 | |
| 	le?vxor		$leperm,$leperm,$seven
 | |
| 
 | |
| 	li		$idx,15
 | |
| 	lvx		$tweak,0,$ivp			# load [unaligned] iv
 | |
| 	lvsl		$inpperm,0,$ivp
 | |
| 	lvx		$inptail,$idx,$ivp
 | |
| 	le?vxor		$inpperm,$inpperm,$tmp
 | |
| 	vperm		$tweak,$tweak,$inptail,$inpperm
 | |
| 
 | |
| 	neg		r11,$inp
 | |
| 	lvsr		$inpperm,0,r11			# prepare for unaligned load
 | |
| 	lvx		$inout,0,$inp
 | |
| 	addi		$inp,$inp,15			# 15 is not typo
 | |
| 	le?vxor		$inpperm,$inpperm,$tmp
 | |
| 
 | |
| 	${UCMP}i	$key2,0				# key2==NULL?
 | |
| 	beq		Lxts_dec_no_key2
 | |
| 
 | |
| 	?lvsl		$keyperm,0,$key2		# prepare for unaligned key
 | |
| 	lwz		$rounds,240($key2)
 | |
| 	srwi		$rounds,$rounds,1
 | |
| 	subi		$rounds,$rounds,1
 | |
| 	li		$idx,16
 | |
| 
 | |
| 	lvx		$rndkey0,0,$key2
 | |
| 	lvx		$rndkey1,$idx,$key2
 | |
| 	addi		$idx,$idx,16
 | |
| 	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
 | |
| 	vxor		$tweak,$tweak,$rndkey0
 | |
| 	lvx		$rndkey0,$idx,$key2
 | |
| 	addi		$idx,$idx,16
 | |
| 	mtctr		$rounds
 | |
| 
 | |
| Ltweak_xts_dec:
 | |
| 	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
 | |
| 	vcipher		$tweak,$tweak,$rndkey1
 | |
| 	lvx		$rndkey1,$idx,$key2
 | |
| 	addi		$idx,$idx,16
 | |
| 	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
 | |
| 	vcipher		$tweak,$tweak,$rndkey0
 | |
| 	lvx		$rndkey0,$idx,$key2
 | |
| 	addi		$idx,$idx,16
 | |
| 	bdnz		Ltweak_xts_dec
 | |
| 
 | |
| 	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
 | |
| 	vcipher		$tweak,$tweak,$rndkey1
 | |
| 	lvx		$rndkey1,$idx,$key2
 | |
| 	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
 | |
| 	vcipherlast	$tweak,$tweak,$rndkey0
 | |
| 
 | |
| 	li		$ivp,0				# don't chain the tweak
 | |
| 	b		Lxts_dec
 | |
| 
 | |
| Lxts_dec_no_key2:
 | |
| 	neg		$idx,$len
 | |
| 	andi.		$idx,$idx,15
 | |
| 	add		$len,$len,$idx			# in "tweak chaining"
 | |
| 							# mode only complete
 | |
| 							# blocks are processed
 | |
| Lxts_dec:
 | |
| 	lvx		$inptail,0,$inp
 | |
| 	addi		$inp,$inp,16
 | |
| 
 | |
| 	?lvsl		$keyperm,0,$key1		# prepare for unaligned key
 | |
| 	lwz		$rounds,240($key1)
 | |
| 	srwi		$rounds,$rounds,1
 | |
| 	subi		$rounds,$rounds,1
 | |
| 	li		$idx,16
 | |
| 
 | |
| 	vslb		$eighty7,$seven,$seven		# 0x808080..80
 | |
| 	vor		$eighty7,$eighty7,$seven	# 0x878787..87
 | |
| 	vspltisb	$tmp,1				# 0x010101..01
 | |
| 	vsldoi		$eighty7,$eighty7,$tmp,15	# 0x870101..01
 | |
| 
 | |
| 	${UCMP}i	$len,96
 | |
| 	bge		_aesp8_xts_decrypt6x
 | |
| 
 | |
| 	lvx		$rndkey0,0,$key1
 | |
| 	lvx		$rndkey1,$idx,$key1
 | |
| 	addi		$idx,$idx,16
 | |
| 	vperm		$inout,$inout,$inptail,$inpperm
 | |
| 	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
 | |
| 	vxor		$inout,$inout,$tweak
 | |
| 	vxor		$inout,$inout,$rndkey0
 | |
| 	lvx		$rndkey0,$idx,$key1
 | |
| 	addi		$idx,$idx,16
 | |
| 	mtctr		$rounds
 | |
| 
 | |
| 	${UCMP}i	$len,16
 | |
| 	blt		Ltail_xts_dec
 | |
| 	be?b		Loop_xts_dec
 | |
| 
 | |
| .align	5
 | |
| Loop_xts_dec:
 | |
| 	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
 | |
| 	vncipher	$inout,$inout,$rndkey1
 | |
| 	lvx		$rndkey1,$idx,$key1
 | |
| 	addi		$idx,$idx,16
 | |
| 	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
 | |
| 	vncipher	$inout,$inout,$rndkey0
 | |
| 	lvx		$rndkey0,$idx,$key1
 | |
| 	addi		$idx,$idx,16
 | |
| 	bdnz		Loop_xts_dec
 | |
| 
 | |
| 	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
 | |
| 	vncipher	$inout,$inout,$rndkey1
 | |
| 	lvx		$rndkey1,$idx,$key1
 | |
| 	li		$idx,16
 | |
| 	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
 | |
| 	vxor		$rndkey0,$rndkey0,$tweak
 | |
| 	vncipherlast	$output,$inout,$rndkey0
 | |
| 
 | |
| 	le?vperm	$tmp,$output,$output,$leperm
 | |
| 	be?nop
 | |
| 	le?stvx_u	$tmp,0,$out
 | |
| 	be?stvx_u	$output,0,$out
 | |
| 	addi		$out,$out,16
 | |
| 
 | |
| 	subic.		$len,$len,16
 | |
| 	beq		Lxts_dec_done
 | |
| 
 | |
| 	vmr		$inout,$inptail
 | |
| 	lvx		$inptail,0,$inp
 | |
| 	addi		$inp,$inp,16
 | |
| 	lvx		$rndkey0,0,$key1
 | |
| 	lvx		$rndkey1,$idx,$key1
 | |
| 	addi		$idx,$idx,16
 | |
| 
 | |
| 	vsrab		$tmp,$tweak,$seven		# next tweak value
 | |
| 	vaddubm		$tweak,$tweak,$tweak
 | |
| 	vsldoi		$tmp,$tmp,$tmp,15
 | |
| 	vand		$tmp,$tmp,$eighty7
 | |
| 	vxor		$tweak,$tweak,$tmp
 | |
| 
 | |
| 	vperm		$inout,$inout,$inptail,$inpperm
 | |
| 	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
 | |
| 	vxor		$inout,$inout,$tweak
 | |
| 	vxor		$inout,$inout,$rndkey0
 | |
| 	lvx		$rndkey0,$idx,$key1
 | |
| 	addi		$idx,$idx,16
 | |
| 
 | |
| 	mtctr		$rounds
 | |
| 	${UCMP}i	$len,16
 | |
| 	bge		Loop_xts_dec
 | |
| 
 | |
| Ltail_xts_dec:
 | |
| 	vsrab		$tmp,$tweak,$seven		# next tweak value
 | |
| 	vaddubm		$tweak1,$tweak,$tweak
 | |
| 	vsldoi		$tmp,$tmp,$tmp,15
 | |
| 	vand		$tmp,$tmp,$eighty7
 | |
| 	vxor		$tweak1,$tweak1,$tmp
 | |
| 
 | |
| 	subi		$inp,$inp,16
 | |
| 	add		$inp,$inp,$len
 | |
| 
 | |
| 	vxor		$inout,$inout,$tweak		# :-(
 | |
| 	vxor		$inout,$inout,$tweak1		# :-)
 | |
| 
 | |
| Loop_xts_dec_short:
 | |
| 	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
 | |
| 	vncipher	$inout,$inout,$rndkey1
 | |
| 	lvx		$rndkey1,$idx,$key1
 | |
| 	addi		$idx,$idx,16
 | |
| 	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
 | |
| 	vncipher	$inout,$inout,$rndkey0
 | |
| 	lvx		$rndkey0,$idx,$key1
 | |
| 	addi		$idx,$idx,16
 | |
| 	bdnz		Loop_xts_dec_short
 | |
| 
 | |
| 	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
 | |
| 	vncipher	$inout,$inout,$rndkey1
 | |
| 	lvx		$rndkey1,$idx,$key1
 | |
| 	li		$idx,16
 | |
| 	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
 | |
| 	vxor		$rndkey0,$rndkey0,$tweak1
 | |
| 	vncipherlast	$output,$inout,$rndkey0
 | |
| 
 | |
| 	le?vperm	$tmp,$output,$output,$leperm
 | |
| 	be?nop
 | |
| 	le?stvx_u	$tmp,0,$out
 | |
| 	be?stvx_u	$output,0,$out
 | |
| 
 | |
| 	vmr		$inout,$inptail
 | |
| 	lvx		$inptail,0,$inp
 | |
| 	#addi		$inp,$inp,16
 | |
| 	lvx		$rndkey0,0,$key1
 | |
| 	lvx		$rndkey1,$idx,$key1
 | |
| 	addi		$idx,$idx,16
 | |
| 	vperm		$inout,$inout,$inptail,$inpperm
 | |
| 	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
 | |
| 
 | |
| 	lvsr		$inpperm,0,$len			# $inpperm is no longer needed
 | |
| 	vxor		$inptail,$inptail,$inptail	# $inptail is no longer needed
 | |
| 	vspltisb	$tmp,-1
 | |
| 	vperm		$inptail,$inptail,$tmp,$inpperm
 | |
| 	vsel		$inout,$inout,$output,$inptail
 | |
| 
 | |
| 	vxor		$rndkey0,$rndkey0,$tweak
 | |
| 	vxor		$inout,$inout,$rndkey0
 | |
| 	lvx		$rndkey0,$idx,$key1
 | |
| 	addi		$idx,$idx,16
 | |
| 
 | |
| 	subi		r11,$out,1
 | |
| 	mtctr		$len
 | |
| 	li		$len,16
 | |
| Loop_xts_dec_steal:
 | |
| 	lbzu		r0,1(r11)
 | |
| 	stb		r0,16(r11)
 | |
| 	bdnz		Loop_xts_dec_steal
 | |
| 
 | |
| 	mtctr		$rounds
 | |
| 	b		Loop_xts_dec			# one more time...
 | |
| 
 | |
| Lxts_dec_done:
 | |
| 	${UCMP}i	$ivp,0
 | |
| 	beq		Lxts_dec_ret
 | |
| 
 | |
| 	vsrab		$tmp,$tweak,$seven		# next tweak value
 | |
| 	vaddubm		$tweak,$tweak,$tweak
 | |
| 	vsldoi		$tmp,$tmp,$tmp,15
 | |
| 	vand		$tmp,$tmp,$eighty7
 | |
| 	vxor		$tweak,$tweak,$tmp
 | |
| 
 | |
| 	le?vperm	$tweak,$tweak,$tweak,$leperm
 | |
| 	stvx_u		$tweak,0,$ivp
 | |
| 
 | |
| Lxts_dec_ret:
 | |
| 	mtspr		256,r12				# restore vrsave
 | |
| 	li		r3,0
 | |
| 	blr
 | |
| 	.long		0
 | |
| 	.byte		0,12,0x04,0,0x80,6,6,0
 | |
| 	.long		0
 | |
| .size	.${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
 | |
| ___
 | |
| #########################################################################
 | |
| {{	# Optimized XTS procedures					#
 | |
| my $key_=$key2;
 | |
| my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
 | |
|     $x00=0 if ($flavour =~ /osx/);
 | |
| my ($in0,  $in1,  $in2,  $in3,  $in4,  $in5 )=map("v$_",(0..5));
 | |
| my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
 | |
| my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
 | |
| my $rndkey0="v23";	# v24-v25 rotating buffer for first found keys
 | |
| 			# v26-v31 last 6 round keys
 | |
| my ($keyperm)=($out0);	# aliases with "caller", redundant assignment
 | |
| my $taillen=$x70;
 | |
| 
 | |
| $code.=<<___;
 | |
| .align	5
 | |
| _aesp8_xts_encrypt6x:
 | |
| 	$STU		$sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
 | |
| 	mflr		r11
 | |
| 	li		r7,`$FRAME+8*16+15`
 | |
| 	li		r3,`$FRAME+8*16+31`
 | |
| 	$PUSH		r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
 | |
| 	stvx		v20,r7,$sp		# ABI says so
 | |
| 	addi		r7,r7,32
 | |
| 	stvx		v21,r3,$sp
 | |
| 	addi		r3,r3,32
 | |
| 	stvx		v22,r7,$sp
 | |
| 	addi		r7,r7,32
 | |
| 	stvx		v23,r3,$sp
 | |
| 	addi		r3,r3,32
 | |
| 	stvx		v24,r7,$sp
 | |
| 	addi		r7,r7,32
 | |
| 	stvx		v25,r3,$sp
 | |
| 	addi		r3,r3,32
 | |
| 	stvx		v26,r7,$sp
 | |
| 	addi		r7,r7,32
 | |
| 	stvx		v27,r3,$sp
 | |
| 	addi		r3,r3,32
 | |
| 	stvx		v28,r7,$sp
 | |
| 	addi		r7,r7,32
 | |
| 	stvx		v29,r3,$sp
 | |
| 	addi		r3,r3,32
 | |
| 	stvx		v30,r7,$sp
 | |
| 	stvx		v31,r3,$sp
 | |
| 	li		r0,-1
 | |
| 	stw		$vrsave,`$FRAME+21*16-4`($sp)	# save vrsave
 | |
| 	li		$x10,0x10
 | |
| 	$PUSH		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
 | |
| 	li		$x20,0x20
 | |
| 	$PUSH		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
 | |
| 	li		$x30,0x30
 | |
| 	$PUSH		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
 | |
| 	li		$x40,0x40
 | |
| 	$PUSH		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
 | |
| 	li		$x50,0x50
 | |
| 	$PUSH		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
 | |
| 	li		$x60,0x60
 | |
| 	$PUSH		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
 | |
| 	li		$x70,0x70
 | |
| 	mtspr		256,r0
 | |
| 
 | |
| 	subi		$rounds,$rounds,3	# -4 in total
 | |
| 
 | |
| 	lvx		$rndkey0,$x00,$key1	# load key schedule
 | |
| 	lvx		v30,$x10,$key1
 | |
| 	addi		$key1,$key1,0x20
 | |
| 	lvx		v31,$x00,$key1
 | |
| 	?vperm		$rndkey0,$rndkey0,v30,$keyperm
 | |
| 	addi		$key_,$sp,$FRAME+15
 | |
| 	mtctr		$rounds
 | |
| 
 | |
| Load_xts_enc_key:
 | |
| 	?vperm		v24,v30,v31,$keyperm
 | |
| 	lvx		v30,$x10,$key1
 | |
| 	addi		$key1,$key1,0x20
 | |
| 	stvx		v24,$x00,$key_		# off-load round[1]
 | |
| 	?vperm		v25,v31,v30,$keyperm
 | |
| 	lvx		v31,$x00,$key1
 | |
| 	stvx		v25,$x10,$key_		# off-load round[2]
 | |
| 	addi		$key_,$key_,0x20
 | |
| 	bdnz		Load_xts_enc_key
 | |
| 
 | |
| 	lvx		v26,$x10,$key1
 | |
| 	?vperm		v24,v30,v31,$keyperm
 | |
| 	lvx		v27,$x20,$key1
 | |
| 	stvx		v24,$x00,$key_		# off-load round[3]
 | |
| 	?vperm		v25,v31,v26,$keyperm
 | |
| 	lvx		v28,$x30,$key1
 | |
| 	stvx		v25,$x10,$key_		# off-load round[4]
 | |
| 	addi		$key_,$sp,$FRAME+15	# rewind $key_
 | |
| 	?vperm		v26,v26,v27,$keyperm
 | |
| 	lvx		v29,$x40,$key1
 | |
| 	?vperm		v27,v27,v28,$keyperm
 | |
| 	lvx		v30,$x50,$key1
 | |
| 	?vperm		v28,v28,v29,$keyperm
 | |
| 	lvx		v31,$x60,$key1
 | |
| 	?vperm		v29,v29,v30,$keyperm
 | |
| 	lvx		$twk5,$x70,$key1	# borrow $twk5
 | |
| 	?vperm		v30,v30,v31,$keyperm
 | |
| 	lvx		v24,$x00,$key_		# pre-load round[1]
 | |
| 	?vperm		v31,v31,$twk5,$keyperm
 | |
| 	lvx		v25,$x10,$key_		# pre-load round[2]
 | |
| 
 | |
| 	 vperm		$in0,$inout,$inptail,$inpperm
 | |
| 	 subi		$inp,$inp,31		# undo "caller"
 | |
| 	vxor		$twk0,$tweak,$rndkey0
 | |
| 	vsrab		$tmp,$tweak,$seven	# next tweak value
 | |
| 	vaddubm		$tweak,$tweak,$tweak
 | |
| 	vsldoi		$tmp,$tmp,$tmp,15
 | |
| 	vand		$tmp,$tmp,$eighty7
 | |
| 	 vxor		$out0,$in0,$twk0
 | |
| 	vxor		$tweak,$tweak,$tmp
 | |
| 
 | |
| 	 lvx_u		$in1,$x10,$inp
 | |
| 	vxor		$twk1,$tweak,$rndkey0
 | |
| 	vsrab		$tmp,$tweak,$seven	# next tweak value
 | |
| 	vaddubm		$tweak,$tweak,$tweak
 | |
| 	vsldoi		$tmp,$tmp,$tmp,15
 | |
| 	 le?vperm	$in1,$in1,$in1,$leperm
 | |
| 	vand		$tmp,$tmp,$eighty7
 | |
| 	 vxor		$out1,$in1,$twk1
 | |
| 	vxor		$tweak,$tweak,$tmp
 | |
| 
 | |
| 	 lvx_u		$in2,$x20,$inp
 | |
| 	 andi.		$taillen,$len,15
 | |
| 	vxor		$twk2,$tweak,$rndkey0
 | |
| 	vsrab		$tmp,$tweak,$seven	# next tweak value
 | |
| 	vaddubm		$tweak,$tweak,$tweak
 | |
| 	vsldoi		$tmp,$tmp,$tmp,15
 | |
| 	 le?vperm	$in2,$in2,$in2,$leperm
 | |
| 	vand		$tmp,$tmp,$eighty7
 | |
| 	 vxor		$out2,$in2,$twk2
 | |
| 	vxor		$tweak,$tweak,$tmp
 | |
| 
 | |
| 	 lvx_u		$in3,$x30,$inp
 | |
| 	 sub		$len,$len,$taillen
 | |
| 	vxor		$twk3,$tweak,$rndkey0
 | |
| 	vsrab		$tmp,$tweak,$seven	# next tweak value
 | |
| 	vaddubm		$tweak,$tweak,$tweak
 | |
| 	vsldoi		$tmp,$tmp,$tmp,15
 | |
| 	 le?vperm	$in3,$in3,$in3,$leperm
 | |
| 	vand		$tmp,$tmp,$eighty7
 | |
| 	 vxor		$out3,$in3,$twk3
 | |
| 	vxor		$tweak,$tweak,$tmp
 | |
| 
 | |
| 	 lvx_u		$in4,$x40,$inp
 | |
| 	 subi		$len,$len,0x60
 | |
| 	vxor		$twk4,$tweak,$rndkey0
 | |
| 	vsrab		$tmp,$tweak,$seven	# next tweak value
 | |
| 	vaddubm		$tweak,$tweak,$tweak
 | |
| 	vsldoi		$tmp,$tmp,$tmp,15
 | |
| 	 le?vperm	$in4,$in4,$in4,$leperm
 | |
| 	vand		$tmp,$tmp,$eighty7
 | |
| 	 vxor		$out4,$in4,$twk4
 | |
| 	vxor		$tweak,$tweak,$tmp
 | |
| 
 | |
| 	 lvx_u		$in5,$x50,$inp
 | |
| 	 addi		$inp,$inp,0x60
 | |
| 	vxor		$twk5,$tweak,$rndkey0
 | |
| 	vsrab		$tmp,$tweak,$seven	# next tweak value
 | |
| 	vaddubm		$tweak,$tweak,$tweak
 | |
| 	vsldoi		$tmp,$tmp,$tmp,15
 | |
| 	 le?vperm	$in5,$in5,$in5,$leperm
 | |
| 	vand		$tmp,$tmp,$eighty7
 | |
| 	 vxor		$out5,$in5,$twk5
 | |
| 	vxor		$tweak,$tweak,$tmp
 | |
| 
 | |
| 	vxor		v31,v31,$rndkey0
 | |
| 	mtctr		$rounds
 | |
| 	b		Loop_xts_enc6x
 | |
| 
 | |
| .align	5
 | |
| Loop_xts_enc6x:
 | |
| 	vcipher		$out0,$out0,v24
 | |
| 	vcipher		$out1,$out1,v24
 | |
| 	vcipher		$out2,$out2,v24
 | |
| 	vcipher		$out3,$out3,v24
 | |
| 	vcipher		$out4,$out4,v24
 | |
| 	vcipher		$out5,$out5,v24
 | |
| 	lvx		v24,$x20,$key_		# round[3]
 | |
| 	addi		$key_,$key_,0x20
 | |
| 
 | |
| 	vcipher		$out0,$out0,v25
 | |
| 	vcipher		$out1,$out1,v25
 | |
| 	vcipher		$out2,$out2,v25
 | |
| 	vcipher		$out3,$out3,v25
 | |
| 	vcipher		$out4,$out4,v25
 | |
| 	vcipher		$out5,$out5,v25
 | |
| 	lvx		v25,$x10,$key_		# round[4]
 | |
| 	bdnz		Loop_xts_enc6x
 | |
| 
 | |
| 	subic		$len,$len,96		# $len-=96
 | |
| 	 vxor		$in0,$twk0,v31		# xor with last round key
 | |
| 	vcipher		$out0,$out0,v24
 | |
| 	vcipher		$out1,$out1,v24
 | |
| 	 vsrab		$tmp,$tweak,$seven	# next tweak value
 | |
| 	 vxor		$twk0,$tweak,$rndkey0
 | |
| 	 vaddubm	$tweak,$tweak,$tweak
 | |
| 	vcipher		$out2,$out2,v24
 | |
| 	vcipher		$out3,$out3,v24
 | |
| 	 vsldoi		$tmp,$tmp,$tmp,15
 | |
| 	vcipher		$out4,$out4,v24
 | |
| 	vcipher		$out5,$out5,v24
 | |
| 
 | |
| 	subfe.		r0,r0,r0		# borrow?-1:0
 | |
| 	 vand		$tmp,$tmp,$eighty7
 | |
| 	vcipher		$out0,$out0,v25
 | |
| 	vcipher		$out1,$out1,v25
 | |
| 	 vxor		$tweak,$tweak,$tmp
 | |
| 	vcipher		$out2,$out2,v25
 | |
| 	vcipher		$out3,$out3,v25
 | |
| 	 vxor		$in1,$twk1,v31
 | |
| 	 vsrab		$tmp,$tweak,$seven	# next tweak value
 | |
| 	 vxor		$twk1,$tweak,$rndkey0
 | |
| 	vcipher		$out4,$out4,v25
 | |
| 	vcipher		$out5,$out5,v25
 | |
| 
 | |
| 	and		r0,r0,$len
 | |
| 	 vaddubm	$tweak,$tweak,$tweak
 | |
| 	 vsldoi		$tmp,$tmp,$tmp,15
 | |
| 	vcipher		$out0,$out0,v26
 | |
| 	vcipher		$out1,$out1,v26
 | |
| 	 vand		$tmp,$tmp,$eighty7
 | |
| 	vcipher		$out2,$out2,v26
 | |
| 	vcipher		$out3,$out3,v26
 | |
| 	 vxor		$tweak,$tweak,$tmp
 | |
| 	vcipher		$out4,$out4,v26
 | |
| 	vcipher		$out5,$out5,v26
 | |
| 
 | |
| 	add		$inp,$inp,r0		# $inp is adjusted in such
 | |
| 						# way that at exit from the
 | |
| 						# loop inX-in5 are loaded
 | |
| 						# with last "words"
 | |
| 	 vxor		$in2,$twk2,v31
 | |
| 	 vsrab		$tmp,$tweak,$seven	# next tweak value
 | |
| 	 vxor		$twk2,$tweak,$rndkey0
 | |
| 	 vaddubm	$tweak,$tweak,$tweak
 | |
| 	vcipher		$out0,$out0,v27
 | |
| 	vcipher		$out1,$out1,v27
 | |
| 	 vsldoi		$tmp,$tmp,$tmp,15
 | |
| 	vcipher		$out2,$out2,v27
 | |
| 	vcipher		$out3,$out3,v27
 | |
| 	 vand		$tmp,$tmp,$eighty7
 | |
| 	vcipher		$out4,$out4,v27
 | |
| 	vcipher		$out5,$out5,v27
 | |
| 
 | |
| 	addi		$key_,$sp,$FRAME+15	# rewind $key_
 | |
| 	 vxor		$tweak,$tweak,$tmp
 | |
| 	vcipher		$out0,$out0,v28
 | |
| 	vcipher		$out1,$out1,v28
 | |
| 	 vxor		$in3,$twk3,v31
 | |
| 	 vsrab		$tmp,$tweak,$seven	# next tweak value
 | |
| 	 vxor		$twk3,$tweak,$rndkey0
 | |
| 	vcipher		$out2,$out2,v28
 | |
| 	vcipher		$out3,$out3,v28
 | |
| 	 vaddubm	$tweak,$tweak,$tweak
 | |
| 	 vsldoi		$tmp,$tmp,$tmp,15
 | |
| 	vcipher		$out4,$out4,v28
 | |
| 	vcipher		$out5,$out5,v28
 | |
| 	lvx		v24,$x00,$key_		# re-pre-load round[1]
 | |
| 	 vand		$tmp,$tmp,$eighty7
 | |
| 
 | |
| 	vcipher		$out0,$out0,v29
 | |
| 	vcipher		$out1,$out1,v29
 | |
| 	 vxor		$tweak,$tweak,$tmp
 | |
| 	vcipher		$out2,$out2,v29
 | |
| 	vcipher		$out3,$out3,v29
 | |
| 	 vxor		$in4,$twk4,v31
 | |
| 	 vsrab		$tmp,$tweak,$seven	# next tweak value
 | |
| 	 vxor		$twk4,$tweak,$rndkey0
 | |
| 	vcipher		$out4,$out4,v29
 | |
| 	vcipher		$out5,$out5,v29
 | |
| 	lvx		v25,$x10,$key_		# re-pre-load round[2]
 | |
| 	 vaddubm	$tweak,$tweak,$tweak
 | |
| 	 vsldoi		$tmp,$tmp,$tmp,15
 | |
| 
 | |
| 	vcipher		$out0,$out0,v30
 | |
| 	vcipher		$out1,$out1,v30
 | |
| 	 vand		$tmp,$tmp,$eighty7
 | |
| 	vcipher		$out2,$out2,v30
 | |
| 	vcipher		$out3,$out3,v30
 | |
| 	 vxor		$tweak,$tweak,$tmp
 | |
| 	vcipher		$out4,$out4,v30
 | |
| 	vcipher		$out5,$out5,v30
 | |
| 	 vxor		$in5,$twk5,v31
 | |
| 	 vsrab		$tmp,$tweak,$seven	# next tweak value
 | |
| 	 vxor		$twk5,$tweak,$rndkey0
 | |
| 
 | |
| 	vcipherlast	$out0,$out0,$in0
 | |
| 	 lvx_u		$in0,$x00,$inp		# load next input block
 | |
| 	 vaddubm	$tweak,$tweak,$tweak
 | |
| 	 vsldoi		$tmp,$tmp,$tmp,15
 | |
| 	vcipherlast	$out1,$out1,$in1
 | |
| 	 lvx_u		$in1,$x10,$inp
 | |
| 	vcipherlast	$out2,$out2,$in2
 | |
| 	 le?vperm	$in0,$in0,$in0,$leperm
 | |
| 	 lvx_u		$in2,$x20,$inp
 | |
| 	 vand		$tmp,$tmp,$eighty7
 | |
| 	vcipherlast	$out3,$out3,$in3
 | |
| 	 le?vperm	$in1,$in1,$in1,$leperm
 | |
| 	 lvx_u		$in3,$x30,$inp
 | |
| 	vcipherlast	$out4,$out4,$in4
 | |
| 	 le?vperm	$in2,$in2,$in2,$leperm
 | |
| 	 lvx_u		$in4,$x40,$inp
 | |
| 	 vxor		$tweak,$tweak,$tmp
 | |
| 	vcipherlast	$tmp,$out5,$in5		# last block might be needed
 | |
| 						# in stealing mode
 | |
| 	 le?vperm	$in3,$in3,$in3,$leperm
 | |
| 	 lvx_u		$in5,$x50,$inp
 | |
| 	 addi		$inp,$inp,0x60
 | |
| 	 le?vperm	$in4,$in4,$in4,$leperm
 | |
| 	 le?vperm	$in5,$in5,$in5,$leperm
 | |
| 
 | |
| 	le?vperm	$out0,$out0,$out0,$leperm
 | |
| 	le?vperm	$out1,$out1,$out1,$leperm
 | |
| 	stvx_u		$out0,$x00,$out		# store output
 | |
| 	 vxor		$out0,$in0,$twk0
 | |
| 	le?vperm	$out2,$out2,$out2,$leperm
 | |
| 	stvx_u		$out1,$x10,$out
 | |
| 	 vxor		$out1,$in1,$twk1
 | |
| 	le?vperm	$out3,$out3,$out3,$leperm
 | |
| 	stvx_u		$out2,$x20,$out
 | |
| 	 vxor		$out2,$in2,$twk2
 | |
| 	le?vperm	$out4,$out4,$out4,$leperm
 | |
| 	stvx_u		$out3,$x30,$out
 | |
| 	 vxor		$out3,$in3,$twk3
 | |
| 	le?vperm	$out5,$tmp,$tmp,$leperm
 | |
| 	stvx_u		$out4,$x40,$out
 | |
| 	 vxor		$out4,$in4,$twk4
 | |
| 	le?stvx_u	$out5,$x50,$out
 | |
| 	be?stvx_u	$tmp, $x50,$out
 | |
| 	 vxor		$out5,$in5,$twk5
 | |
| 	addi		$out,$out,0x60
 | |
| 
 | |
| 	mtctr		$rounds
 | |
| 	beq		Loop_xts_enc6x		# did $len-=96 borrow?
 | |
| 
 | |
| 	addic.		$len,$len,0x60
 | |
| 	beq		Lxts_enc6x_zero
 | |
| 	cmpwi		$len,0x20
 | |
| 	blt		Lxts_enc6x_one
 | |
| 	nop
 | |
| 	beq		Lxts_enc6x_two
 | |
| 	cmpwi		$len,0x40
 | |
| 	blt		Lxts_enc6x_three
 | |
| 	nop
 | |
| 	beq		Lxts_enc6x_four
 | |
| 
 | |
| Lxts_enc6x_five:
 | |
| 	vxor		$out0,$in1,$twk0
 | |
| 	vxor		$out1,$in2,$twk1
 | |
| 	vxor		$out2,$in3,$twk2
 | |
| 	vxor		$out3,$in4,$twk3
 | |
| 	vxor		$out4,$in5,$twk4
 | |
| 
 | |
| 	bl		_aesp8_xts_enc5x
 | |
| 
 | |
| 	le?vperm	$out0,$out0,$out0,$leperm
 | |
| 	vmr		$twk0,$twk5		# unused tweak
 | |
| 	le?vperm	$out1,$out1,$out1,$leperm
 | |
| 	stvx_u		$out0,$x00,$out		# store output
 | |
| 	le?vperm	$out2,$out2,$out2,$leperm
 | |
| 	stvx_u		$out1,$x10,$out
 | |
| 	le?vperm	$out3,$out3,$out3,$leperm
 | |
| 	stvx_u		$out2,$x20,$out
 | |
| 	vxor		$tmp,$out4,$twk5	# last block prep for stealing
 | |
| 	le?vperm	$out4,$out4,$out4,$leperm
 | |
| 	stvx_u		$out3,$x30,$out
 | |
| 	stvx_u		$out4,$x40,$out
 | |
| 	addi		$out,$out,0x50
 | |
| 	bne		Lxts_enc6x_steal
 | |
| 	b		Lxts_enc6x_done
 | |
| 
 | |
| .align	4
 | |
| Lxts_enc6x_four:
 | |
| 	vxor		$out0,$in2,$twk0
 | |
| 	vxor		$out1,$in3,$twk1
 | |
| 	vxor		$out2,$in4,$twk2
 | |
| 	vxor		$out3,$in5,$twk3
 | |
| 	vxor		$out4,$out4,$out4
 | |
| 
 | |
| 	bl		_aesp8_xts_enc5x
 | |
| 
 | |
| 	le?vperm	$out0,$out0,$out0,$leperm
 | |
| 	vmr		$twk0,$twk4		# unused tweak
 | |
| 	le?vperm	$out1,$out1,$out1,$leperm
 | |
| 	stvx_u		$out0,$x00,$out		# store output
 | |
| 	le?vperm	$out2,$out2,$out2,$leperm
 | |
| 	stvx_u		$out1,$x10,$out
 | |
| 	vxor		$tmp,$out3,$twk4	# last block prep for stealing
 | |
| 	le?vperm	$out3,$out3,$out3,$leperm
 | |
| 	stvx_u		$out2,$x20,$out
 | |
| 	stvx_u		$out3,$x30,$out
 | |
| 	addi		$out,$out,0x40
 | |
| 	bne		Lxts_enc6x_steal
 | |
| 	b		Lxts_enc6x_done
 | |
| 
 | |
| .align	4
 | |
| Lxts_enc6x_three:
 | |
| 	vxor		$out0,$in3,$twk0
 | |
| 	vxor		$out1,$in4,$twk1
 | |
| 	vxor		$out2,$in5,$twk2
 | |
| 	vxor		$out3,$out3,$out3
 | |
| 	vxor		$out4,$out4,$out4
 | |
| 
 | |
| 	bl		_aesp8_xts_enc5x
 | |
| 
 | |
| 	le?vperm	$out0,$out0,$out0,$leperm
 | |
| 	vmr		$twk0,$twk3		# unused tweak
 | |
| 	le?vperm	$out1,$out1,$out1,$leperm
 | |
| 	stvx_u		$out0,$x00,$out		# store output
 | |
| 	vxor		$tmp,$out2,$twk3	# last block prep for stealing
 | |
| 	le?vperm	$out2,$out2,$out2,$leperm
 | |
| 	stvx_u		$out1,$x10,$out
 | |
| 	stvx_u		$out2,$x20,$out
 | |
| 	addi		$out,$out,0x30
 | |
| 	bne		Lxts_enc6x_steal
 | |
| 	b		Lxts_enc6x_done
 | |
| 
 | |
| .align	4
 | |
| Lxts_enc6x_two:
 | |
| 	vxor		$out0,$in4,$twk0
 | |
| 	vxor		$out1,$in5,$twk1
 | |
| 	vxor		$out2,$out2,$out2
 | |
| 	vxor		$out3,$out3,$out3
 | |
| 	vxor		$out4,$out4,$out4
 | |
| 
 | |
| 	bl		_aesp8_xts_enc5x
 | |
| 
 | |
| 	le?vperm	$out0,$out0,$out0,$leperm
 | |
| 	vmr		$twk0,$twk2		# unused tweak
 | |
| 	vxor		$tmp,$out1,$twk2	# last block prep for stealing
 | |
| 	le?vperm	$out1,$out1,$out1,$leperm
 | |
| 	stvx_u		$out0,$x00,$out		# store output
 | |
| 	stvx_u		$out1,$x10,$out
 | |
| 	addi		$out,$out,0x20
 | |
| 	bne		Lxts_enc6x_steal
 | |
| 	b		Lxts_enc6x_done
 | |
| 
 | |
| .align	4
 | |
| Lxts_enc6x_one:
 | |
| 	vxor		$out0,$in5,$twk0
 | |
| 	nop
 | |
| Loop_xts_enc1x:
 | |
| 	vcipher		$out0,$out0,v24
 | |
| 	lvx		v24,$x20,$key_		# round[3]
 | |
| 	addi		$key_,$key_,0x20
 | |
| 
 | |
| 	vcipher		$out0,$out0,v25
 | |
| 	lvx		v25,$x10,$key_		# round[4]
 | |
| 	bdnz		Loop_xts_enc1x
 | |
| 
 | |
| 	add		$inp,$inp,$taillen
 | |
| 	cmpwi		$taillen,0
 | |
| 	vcipher		$out0,$out0,v24
 | |
| 
 | |
| 	subi		$inp,$inp,16
 | |
| 	vcipher		$out0,$out0,v25
 | |
| 
 | |
| 	lvsr		$inpperm,0,$taillen
 | |
| 	vcipher		$out0,$out0,v26
 | |
| 
 | |
| 	lvx_u		$in0,0,$inp
 | |
| 	vcipher		$out0,$out0,v27
 | |
| 
 | |
| 	addi		$key_,$sp,$FRAME+15	# rewind $key_
 | |
| 	vcipher		$out0,$out0,v28
 | |
| 	lvx		v24,$x00,$key_		# re-pre-load round[1]
 | |
| 
 | |
| 	vcipher		$out0,$out0,v29
 | |
| 	lvx		v25,$x10,$key_		# re-pre-load round[2]
 | |
| 	 vxor		$twk0,$twk0,v31
 | |
| 
 | |
| 	le?vperm	$in0,$in0,$in0,$leperm
 | |
| 	vcipher		$out0,$out0,v30
 | |
| 
 | |
| 	vperm		$in0,$in0,$in0,$inpperm
 | |
| 	vcipherlast	$out0,$out0,$twk0
 | |
| 
 | |
| 	vmr		$twk0,$twk1		# unused tweak
 | |
| 	vxor		$tmp,$out0,$twk1	# last block prep for stealing
 | |
| 	le?vperm	$out0,$out0,$out0,$leperm
 | |
| 	stvx_u		$out0,$x00,$out		# store output
 | |
| 	addi		$out,$out,0x10
 | |
| 	bne		Lxts_enc6x_steal
 | |
| 	b		Lxts_enc6x_done
 | |
| 
 | |
| .align	4
 | |
| Lxts_enc6x_zero:
 | |
| 	cmpwi		$taillen,0
 | |
| 	beq		Lxts_enc6x_done
 | |
| 
 | |
| 	add		$inp,$inp,$taillen
 | |
| 	subi		$inp,$inp,16
 | |
| 	lvx_u		$in0,0,$inp
 | |
| 	lvsr		$inpperm,0,$taillen	# $in5 is no more
 | |
| 	le?vperm	$in0,$in0,$in0,$leperm
 | |
| 	vperm		$in0,$in0,$in0,$inpperm
 | |
| 	vxor		$tmp,$tmp,$twk0
 | |
| Lxts_enc6x_steal:
 | |
| 	vxor		$in0,$in0,$twk0
 | |
| 	vxor		$out0,$out0,$out0
 | |
| 	vspltisb	$out1,-1
 | |
| 	vperm		$out0,$out0,$out1,$inpperm
 | |
| 	vsel		$out0,$in0,$tmp,$out0	# $tmp is last block, remember?
 | |
| 
 | |
| 	subi		r30,$out,17
 | |
| 	subi		$out,$out,16
 | |
| 	mtctr		$taillen
 | |
| Loop_xts_enc6x_steal:
 | |
| 	lbzu		r0,1(r30)
 | |
| 	stb		r0,16(r30)
 | |
| 	bdnz		Loop_xts_enc6x_steal
 | |
| 
 | |
| 	li		$taillen,0
 | |
| 	mtctr		$rounds
 | |
| 	b		Loop_xts_enc1x		# one more time...
 | |
| 
 | |
| .align	4
 | |
| Lxts_enc6x_done:
 | |
| 	${UCMP}i	$ivp,0
 | |
| 	beq		Lxts_enc6x_ret
 | |
| 
 | |
| 	vxor		$tweak,$twk0,$rndkey0
 | |
| 	le?vperm	$tweak,$tweak,$tweak,$leperm
 | |
| 	stvx_u		$tweak,0,$ivp
 | |
| 
 | |
| Lxts_enc6x_ret:
 | |
| 	mtlr		r11
 | |
| 	li		r10,`$FRAME+15`
 | |
| 	li		r11,`$FRAME+31`
 | |
| 	stvx		$seven,r10,$sp		# wipe copies of round keys
 | |
| 	addi		r10,r10,32
 | |
| 	stvx		$seven,r11,$sp
 | |
| 	addi		r11,r11,32
 | |
| 	stvx		$seven,r10,$sp
 | |
| 	addi		r10,r10,32
 | |
| 	stvx		$seven,r11,$sp
 | |
| 	addi		r11,r11,32
 | |
| 	stvx		$seven,r10,$sp
 | |
| 	addi		r10,r10,32
 | |
| 	stvx		$seven,r11,$sp
 | |
| 	addi		r11,r11,32
 | |
| 	stvx		$seven,r10,$sp
 | |
| 	addi		r10,r10,32
 | |
| 	stvx		$seven,r11,$sp
 | |
| 	addi		r11,r11,32
 | |
| 
 | |
| 	mtspr		256,$vrsave
 | |
| 	lvx		v20,r10,$sp		# ABI says so
 | |
| 	addi		r10,r10,32
 | |
| 	lvx		v21,r11,$sp
 | |
| 	addi		r11,r11,32
 | |
| 	lvx		v22,r10,$sp
 | |
| 	addi		r10,r10,32
 | |
| 	lvx		v23,r11,$sp
 | |
| 	addi		r11,r11,32
 | |
| 	lvx		v24,r10,$sp
 | |
| 	addi		r10,r10,32
 | |
| 	lvx		v25,r11,$sp
 | |
| 	addi		r11,r11,32
 | |
| 	lvx		v26,r10,$sp
 | |
| 	addi		r10,r10,32
 | |
| 	lvx		v27,r11,$sp
 | |
| 	addi		r11,r11,32
 | |
| 	lvx		v28,r10,$sp
 | |
| 	addi		r10,r10,32
 | |
| 	lvx		v29,r11,$sp
 | |
| 	addi		r11,r11,32
 | |
| 	lvx		v30,r10,$sp
 | |
| 	lvx		v31,r11,$sp
 | |
| 	$POP		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
 | |
| 	$POP		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
 | |
| 	$POP		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
 | |
| 	$POP		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
 | |
| 	$POP		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
 | |
| 	$POP		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
 | |
| 	addi		$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
 | |
| 	blr
 | |
| 	.long		0
 | |
| 	.byte		0,12,0x04,1,0x80,6,6,0
 | |
| 	.long		0
 | |
| 
 | |
| .align	5
 | |
| _aesp8_xts_enc5x:
 | |
| 	vcipher		$out0,$out0,v24
 | |
| 	vcipher		$out1,$out1,v24
 | |
| 	vcipher		$out2,$out2,v24
 | |
| 	vcipher		$out3,$out3,v24
 | |
| 	vcipher		$out4,$out4,v24
 | |
| 	lvx		v24,$x20,$key_		# round[3]
 | |
| 	addi		$key_,$key_,0x20
 | |
| 
 | |
| 	vcipher		$out0,$out0,v25
 | |
| 	vcipher		$out1,$out1,v25
 | |
| 	vcipher		$out2,$out2,v25
 | |
| 	vcipher		$out3,$out3,v25
 | |
| 	vcipher		$out4,$out4,v25
 | |
| 	lvx		v25,$x10,$key_		# round[4]
 | |
| 	bdnz		_aesp8_xts_enc5x
 | |
| 
 | |
| 	add		$inp,$inp,$taillen
 | |
| 	cmpwi		$taillen,0
 | |
| 	vcipher		$out0,$out0,v24
 | |
| 	vcipher		$out1,$out1,v24
 | |
| 	vcipher		$out2,$out2,v24
 | |
| 	vcipher		$out3,$out3,v24
 | |
| 	vcipher		$out4,$out4,v24
 | |
| 
 | |
| 	subi		$inp,$inp,16
 | |
| 	vcipher		$out0,$out0,v25
 | |
| 	vcipher		$out1,$out1,v25
 | |
| 	vcipher		$out2,$out2,v25
 | |
| 	vcipher		$out3,$out3,v25
 | |
| 	vcipher		$out4,$out4,v25
 | |
| 	 vxor		$twk0,$twk0,v31
 | |
| 
 | |
| 	vcipher		$out0,$out0,v26
 | |
| 	lvsr		$inpperm,0,$taillen	# $in5 is no more
 | |
| 	vcipher		$out1,$out1,v26
 | |
| 	vcipher		$out2,$out2,v26
 | |
| 	vcipher		$out3,$out3,v26
 | |
| 	vcipher		$out4,$out4,v26
 | |
| 	 vxor		$in1,$twk1,v31
 | |
| 
 | |
| 	vcipher		$out0,$out0,v27
 | |
| 	lvx_u		$in0,0,$inp
 | |
| 	vcipher		$out1,$out1,v27
 | |
| 	vcipher		$out2,$out2,v27
 | |
| 	vcipher		$out3,$out3,v27
 | |
| 	vcipher		$out4,$out4,v27
 | |
| 	 vxor		$in2,$twk2,v31
 | |
| 
 | |
| 	addi		$key_,$sp,$FRAME+15	# rewind $key_
 | |
| 	vcipher		$out0,$out0,v28
 | |
| 	vcipher		$out1,$out1,v28
 | |
| 	vcipher		$out2,$out2,v28
 | |
| 	vcipher		$out3,$out3,v28
 | |
| 	vcipher		$out4,$out4,v28
 | |
| 	lvx		v24,$x00,$key_		# re-pre-load round[1]
 | |
| 	 vxor		$in3,$twk3,v31
 | |
| 
 | |
| 	vcipher		$out0,$out0,v29
 | |
| 	le?vperm	$in0,$in0,$in0,$leperm
 | |
| 	vcipher		$out1,$out1,v29
 | |
| 	vcipher		$out2,$out2,v29
 | |
| 	vcipher		$out3,$out3,v29
 | |
| 	vcipher		$out4,$out4,v29
 | |
| 	lvx		v25,$x10,$key_		# re-pre-load round[2]
 | |
| 	 vxor		$in4,$twk4,v31
 | |
| 
 | |
| 	vcipher		$out0,$out0,v30
 | |
| 	vperm		$in0,$in0,$in0,$inpperm
 | |
| 	vcipher		$out1,$out1,v30
 | |
| 	vcipher		$out2,$out2,v30
 | |
| 	vcipher		$out3,$out3,v30
 | |
| 	vcipher		$out4,$out4,v30
 | |
| 
 | |
| 	vcipherlast	$out0,$out0,$twk0
 | |
| 	vcipherlast	$out1,$out1,$in1
 | |
| 	vcipherlast	$out2,$out2,$in2
 | |
| 	vcipherlast	$out3,$out3,$in3
 | |
| 	vcipherlast	$out4,$out4,$in4
 | |
| 	blr
 | |
|         .long   	0
 | |
|         .byte   	0,12,0x14,0,0,0,0,0
 | |
| 
 | |
| .align	5
 | |
| _aesp8_xts_decrypt6x:
 | |
| 	$STU		$sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
 | |
| 	mflr		r11
 | |
| 	li		r7,`$FRAME+8*16+15`
 | |
| 	li		r3,`$FRAME+8*16+31`
 | |
| 	$PUSH		r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
 | |
| 	stvx		v20,r7,$sp		# ABI says so
 | |
| 	addi		r7,r7,32
 | |
| 	stvx		v21,r3,$sp
 | |
| 	addi		r3,r3,32
 | |
| 	stvx		v22,r7,$sp
 | |
| 	addi		r7,r7,32
 | |
| 	stvx		v23,r3,$sp
 | |
| 	addi		r3,r3,32
 | |
| 	stvx		v24,r7,$sp
 | |
| 	addi		r7,r7,32
 | |
| 	stvx		v25,r3,$sp
 | |
| 	addi		r3,r3,32
 | |
| 	stvx		v26,r7,$sp
 | |
| 	addi		r7,r7,32
 | |
| 	stvx		v27,r3,$sp
 | |
| 	addi		r3,r3,32
 | |
| 	stvx		v28,r7,$sp
 | |
| 	addi		r7,r7,32
 | |
| 	stvx		v29,r3,$sp
 | |
| 	addi		r3,r3,32
 | |
| 	stvx		v30,r7,$sp
 | |
| 	stvx		v31,r3,$sp
 | |
| 	li		r0,-1
 | |
| 	stw		$vrsave,`$FRAME+21*16-4`($sp)	# save vrsave
 | |
| 	li		$x10,0x10
 | |
| 	$PUSH		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
 | |
| 	li		$x20,0x20
 | |
| 	$PUSH		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
 | |
| 	li		$x30,0x30
 | |
| 	$PUSH		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
 | |
| 	li		$x40,0x40
 | |
| 	$PUSH		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
 | |
| 	li		$x50,0x50
 | |
| 	$PUSH		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
 | |
| 	li		$x60,0x60
 | |
| 	$PUSH		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
 | |
| 	li		$x70,0x70
 | |
| 	mtspr		256,r0
 | |
| 
 | |
| 	subi		$rounds,$rounds,3	# -4 in total
 | |
| 
 | |
| 	lvx		$rndkey0,$x00,$key1	# load key schedule
 | |
| 	lvx		v30,$x10,$key1
 | |
| 	addi		$key1,$key1,0x20
 | |
| 	lvx		v31,$x00,$key1
 | |
| 	?vperm		$rndkey0,$rndkey0,v30,$keyperm
 | |
| 	addi		$key_,$sp,$FRAME+15
 | |
| 	mtctr		$rounds
 | |
| 
 | |
| Load_xts_dec_key:
 | |
| 	?vperm		v24,v30,v31,$keyperm
 | |
| 	lvx		v30,$x10,$key1
 | |
| 	addi		$key1,$key1,0x20
 | |
| 	stvx		v24,$x00,$key_		# off-load round[1]
 | |
| 	?vperm		v25,v31,v30,$keyperm
 | |
| 	lvx		v31,$x00,$key1
 | |
| 	stvx		v25,$x10,$key_		# off-load round[2]
 | |
| 	addi		$key_,$key_,0x20
 | |
| 	bdnz		Load_xts_dec_key
 | |
| 
 | |
| 	lvx		v26,$x10,$key1
 | |
| 	?vperm		v24,v30,v31,$keyperm
 | |
| 	lvx		v27,$x20,$key1
 | |
| 	stvx		v24,$x00,$key_		# off-load round[3]
 | |
| 	?vperm		v25,v31,v26,$keyperm
 | |
| 	lvx		v28,$x30,$key1
 | |
| 	stvx		v25,$x10,$key_		# off-load round[4]
 | |
| 	addi		$key_,$sp,$FRAME+15	# rewind $key_
 | |
| 	?vperm		v26,v26,v27,$keyperm
 | |
| 	lvx		v29,$x40,$key1
 | |
| 	?vperm		v27,v27,v28,$keyperm
 | |
| 	lvx		v30,$x50,$key1
 | |
| 	?vperm		v28,v28,v29,$keyperm
 | |
| 	lvx		v31,$x60,$key1
 | |
| 	?vperm		v29,v29,v30,$keyperm
 | |
| 	lvx		$twk5,$x70,$key1	# borrow $twk5
 | |
| 	?vperm		v30,v30,v31,$keyperm
 | |
| 	lvx		v24,$x00,$key_		# pre-load round[1]
 | |
| 	?vperm		v31,v31,$twk5,$keyperm
 | |
| 	lvx		v25,$x10,$key_		# pre-load round[2]
 | |
| 
 | |
| 	 vperm		$in0,$inout,$inptail,$inpperm
 | |
| 	 subi		$inp,$inp,31		# undo "caller"
 | |
| 	vxor		$twk0,$tweak,$rndkey0
 | |
| 	vsrab		$tmp,$tweak,$seven	# next tweak value
 | |
| 	vaddubm		$tweak,$tweak,$tweak
 | |
| 	vsldoi		$tmp,$tmp,$tmp,15
 | |
| 	vand		$tmp,$tmp,$eighty7
 | |
| 	 vxor		$out0,$in0,$twk0
 | |
| 	vxor		$tweak,$tweak,$tmp
 | |
| 
 | |
| 	 lvx_u		$in1,$x10,$inp
 | |
| 	vxor		$twk1,$tweak,$rndkey0
 | |
| 	vsrab		$tmp,$tweak,$seven	# next tweak value
 | |
| 	vaddubm		$tweak,$tweak,$tweak
 | |
| 	vsldoi		$tmp,$tmp,$tmp,15
 | |
| 	 le?vperm	$in1,$in1,$in1,$leperm
 | |
| 	vand		$tmp,$tmp,$eighty7
 | |
| 	 vxor		$out1,$in1,$twk1
 | |
| 	vxor		$tweak,$tweak,$tmp
 | |
| 
 | |
| 	 lvx_u		$in2,$x20,$inp
 | |
| 	 andi.		$taillen,$len,15
 | |
| 	vxor		$twk2,$tweak,$rndkey0
 | |
| 	vsrab		$tmp,$tweak,$seven	# next tweak value
 | |
| 	vaddubm		$tweak,$tweak,$tweak
 | |
| 	vsldoi		$tmp,$tmp,$tmp,15
 | |
| 	 le?vperm	$in2,$in2,$in2,$leperm
 | |
| 	vand		$tmp,$tmp,$eighty7
 | |
| 	 vxor		$out2,$in2,$twk2
 | |
| 	vxor		$tweak,$tweak,$tmp
 | |
| 
 | |
| 	 lvx_u		$in3,$x30,$inp
 | |
| 	 sub		$len,$len,$taillen
 | |
| 	vxor		$twk3,$tweak,$rndkey0
 | |
| 	vsrab		$tmp,$tweak,$seven	# next tweak value
 | |
| 	vaddubm		$tweak,$tweak,$tweak
 | |
| 	vsldoi		$tmp,$tmp,$tmp,15
 | |
| 	 le?vperm	$in3,$in3,$in3,$leperm
 | |
| 	vand		$tmp,$tmp,$eighty7
 | |
| 	 vxor		$out3,$in3,$twk3
 | |
| 	vxor		$tweak,$tweak,$tmp
 | |
| 
 | |
| 	 lvx_u		$in4,$x40,$inp
 | |
| 	 subi		$len,$len,0x60
 | |
| 	vxor		$twk4,$tweak,$rndkey0
 | |
| 	vsrab		$tmp,$tweak,$seven	# next tweak value
 | |
| 	vaddubm		$tweak,$tweak,$tweak
 | |
| 	vsldoi		$tmp,$tmp,$tmp,15
 | |
| 	 le?vperm	$in4,$in4,$in4,$leperm
 | |
| 	vand		$tmp,$tmp,$eighty7
 | |
| 	 vxor		$out4,$in4,$twk4
 | |
| 	vxor		$tweak,$tweak,$tmp
 | |
| 
 | |
| 	 lvx_u		$in5,$x50,$inp
 | |
| 	 addi		$inp,$inp,0x60
 | |
| 	vxor		$twk5,$tweak,$rndkey0
 | |
| 	vsrab		$tmp,$tweak,$seven	# next tweak value
 | |
| 	vaddubm		$tweak,$tweak,$tweak
 | |
| 	vsldoi		$tmp,$tmp,$tmp,15
 | |
| 	 le?vperm	$in5,$in5,$in5,$leperm
 | |
| 	vand		$tmp,$tmp,$eighty7
 | |
| 	 vxor		$out5,$in5,$twk5
 | |
| 	vxor		$tweak,$tweak,$tmp
 | |
| 
 | |
| 	vxor		v31,v31,$rndkey0
 | |
| 	mtctr		$rounds
 | |
| 	b		Loop_xts_dec6x
 | |
| 
 | |
| .align	5
 | |
| Loop_xts_dec6x:
 | |
| 	vncipher	$out0,$out0,v24
 | |
| 	vncipher	$out1,$out1,v24
 | |
| 	vncipher	$out2,$out2,v24
 | |
| 	vncipher	$out3,$out3,v24
 | |
| 	vncipher	$out4,$out4,v24
 | |
| 	vncipher	$out5,$out5,v24
 | |
| 	lvx		v24,$x20,$key_		# round[3]
 | |
| 	addi		$key_,$key_,0x20
 | |
| 
 | |
| 	vncipher	$out0,$out0,v25
 | |
| 	vncipher	$out1,$out1,v25
 | |
| 	vncipher	$out2,$out2,v25
 | |
| 	vncipher	$out3,$out3,v25
 | |
| 	vncipher	$out4,$out4,v25
 | |
| 	vncipher	$out5,$out5,v25
 | |
| 	lvx		v25,$x10,$key_		# round[4]
 | |
| 	bdnz		Loop_xts_dec6x
 | |
| 
 | |
| 	subic		$len,$len,96		# $len-=96
 | |
| 	 vxor		$in0,$twk0,v31		# xor with last round key
 | |
| 	vncipher	$out0,$out0,v24
 | |
| 	vncipher	$out1,$out1,v24
 | |
| 	 vsrab		$tmp,$tweak,$seven	# next tweak value
 | |
| 	 vxor		$twk0,$tweak,$rndkey0
 | |
| 	 vaddubm	$tweak,$tweak,$tweak
 | |
| 	vncipher	$out2,$out2,v24
 | |
| 	vncipher	$out3,$out3,v24
 | |
| 	 vsldoi		$tmp,$tmp,$tmp,15
 | |
| 	vncipher	$out4,$out4,v24
 | |
| 	vncipher	$out5,$out5,v24
 | |
| 
 | |
| 	subfe.		r0,r0,r0		# borrow?-1:0
 | |
| 	 vand		$tmp,$tmp,$eighty7
 | |
| 	vncipher	$out0,$out0,v25
 | |
| 	vncipher	$out1,$out1,v25
 | |
| 	 vxor		$tweak,$tweak,$tmp
 | |
| 	vncipher	$out2,$out2,v25
 | |
| 	vncipher	$out3,$out3,v25
 | |
| 	 vxor		$in1,$twk1,v31
 | |
| 	 vsrab		$tmp,$tweak,$seven	# next tweak value
 | |
| 	 vxor		$twk1,$tweak,$rndkey0
 | |
| 	vncipher	$out4,$out4,v25
 | |
| 	vncipher	$out5,$out5,v25
 | |
| 
 | |
| 	and		r0,r0,$len
 | |
| 	 vaddubm	$tweak,$tweak,$tweak
 | |
| 	 vsldoi		$tmp,$tmp,$tmp,15
 | |
| 	vncipher	$out0,$out0,v26
 | |
| 	vncipher	$out1,$out1,v26
 | |
| 	 vand		$tmp,$tmp,$eighty7
 | |
| 	vncipher	$out2,$out2,v26
 | |
| 	vncipher	$out3,$out3,v26
 | |
| 	 vxor		$tweak,$tweak,$tmp
 | |
| 	vncipher	$out4,$out4,v26
 | |
| 	vncipher	$out5,$out5,v26
 | |
| 
 | |
| 	add		$inp,$inp,r0		# $inp is adjusted in such
 | |
| 						# way that at exit from the
 | |
| 						# loop inX-in5 are loaded
 | |
| 						# with last "words"
 | |
| 	 vxor		$in2,$twk2,v31
 | |
| 	 vsrab		$tmp,$tweak,$seven	# next tweak value
 | |
| 	 vxor		$twk2,$tweak,$rndkey0
 | |
| 	 vaddubm	$tweak,$tweak,$tweak
 | |
| 	vncipher	$out0,$out0,v27
 | |
| 	vncipher	$out1,$out1,v27
 | |
| 	 vsldoi		$tmp,$tmp,$tmp,15
 | |
| 	vncipher	$out2,$out2,v27
 | |
| 	vncipher	$out3,$out3,v27
 | |
| 	 vand		$tmp,$tmp,$eighty7
 | |
| 	vncipher	$out4,$out4,v27
 | |
| 	vncipher	$out5,$out5,v27
 | |
| 
 | |
| 	addi		$key_,$sp,$FRAME+15	# rewind $key_
 | |
| 	 vxor		$tweak,$tweak,$tmp
 | |
| 	vncipher	$out0,$out0,v28
 | |
| 	vncipher	$out1,$out1,v28
 | |
| 	 vxor		$in3,$twk3,v31
 | |
| 	 vsrab		$tmp,$tweak,$seven	# next tweak value
 | |
| 	 vxor		$twk3,$tweak,$rndkey0
 | |
| 	vncipher	$out2,$out2,v28
 | |
| 	vncipher	$out3,$out3,v28
 | |
| 	 vaddubm	$tweak,$tweak,$tweak
 | |
| 	 vsldoi		$tmp,$tmp,$tmp,15
 | |
| 	vncipher	$out4,$out4,v28
 | |
| 	vncipher	$out5,$out5,v28
 | |
| 	lvx		v24,$x00,$key_		# re-pre-load round[1]
 | |
| 	 vand		$tmp,$tmp,$eighty7
 | |
| 
 | |
| 	vncipher	$out0,$out0,v29
 | |
| 	vncipher	$out1,$out1,v29
 | |
| 	 vxor		$tweak,$tweak,$tmp
 | |
| 	vncipher	$out2,$out2,v29
 | |
| 	vncipher	$out3,$out3,v29
 | |
| 	 vxor		$in4,$twk4,v31
 | |
| 	 vsrab		$tmp,$tweak,$seven	# next tweak value
 | |
| 	 vxor		$twk4,$tweak,$rndkey0
 | |
| 	vncipher	$out4,$out4,v29
 | |
| 	vncipher	$out5,$out5,v29
 | |
| 	lvx		v25,$x10,$key_		# re-pre-load round[2]
 | |
| 	 vaddubm	$tweak,$tweak,$tweak
 | |
| 	 vsldoi		$tmp,$tmp,$tmp,15
 | |
| 
 | |
| 	vncipher	$out0,$out0,v30
 | |
| 	vncipher	$out1,$out1,v30
 | |
| 	 vand		$tmp,$tmp,$eighty7
 | |
| 	vncipher	$out2,$out2,v30
 | |
| 	vncipher	$out3,$out3,v30
 | |
| 	 vxor		$tweak,$tweak,$tmp
 | |
| 	vncipher	$out4,$out4,v30
 | |
| 	vncipher	$out5,$out5,v30
 | |
| 	 vxor		$in5,$twk5,v31
 | |
| 	 vsrab		$tmp,$tweak,$seven	# next tweak value
 | |
| 	 vxor		$twk5,$tweak,$rndkey0
 | |
| 
 | |
| 	vncipherlast	$out0,$out0,$in0
 | |
| 	 lvx_u		$in0,$x00,$inp		# load next input block
 | |
| 	 vaddubm	$tweak,$tweak,$tweak
 | |
| 	 vsldoi		$tmp,$tmp,$tmp,15
 | |
| 	vncipherlast	$out1,$out1,$in1
 | |
| 	 lvx_u		$in1,$x10,$inp
 | |
| 	vncipherlast	$out2,$out2,$in2
 | |
| 	 le?vperm	$in0,$in0,$in0,$leperm
 | |
| 	 lvx_u		$in2,$x20,$inp
 | |
| 	 vand		$tmp,$tmp,$eighty7
 | |
| 	vncipherlast	$out3,$out3,$in3
 | |
| 	 le?vperm	$in1,$in1,$in1,$leperm
 | |
| 	 lvx_u		$in3,$x30,$inp
 | |
| 	vncipherlast	$out4,$out4,$in4
 | |
| 	 le?vperm	$in2,$in2,$in2,$leperm
 | |
| 	 lvx_u		$in4,$x40,$inp
 | |
| 	 vxor		$tweak,$tweak,$tmp
 | |
| 	vncipherlast	$out5,$out5,$in5
 | |
| 	 le?vperm	$in3,$in3,$in3,$leperm
 | |
| 	 lvx_u		$in5,$x50,$inp
 | |
| 	 addi		$inp,$inp,0x60
 | |
| 	 le?vperm	$in4,$in4,$in4,$leperm
 | |
| 	 le?vperm	$in5,$in5,$in5,$leperm
 | |
| 
 | |
| 	le?vperm	$out0,$out0,$out0,$leperm
 | |
| 	le?vperm	$out1,$out1,$out1,$leperm
 | |
| 	stvx_u		$out0,$x00,$out		# store output
 | |
| 	 vxor		$out0,$in0,$twk0
 | |
| 	le?vperm	$out2,$out2,$out2,$leperm
 | |
| 	stvx_u		$out1,$x10,$out
 | |
| 	 vxor		$out1,$in1,$twk1
 | |
| 	le?vperm	$out3,$out3,$out3,$leperm
 | |
| 	stvx_u		$out2,$x20,$out
 | |
| 	 vxor		$out2,$in2,$twk2
 | |
| 	le?vperm	$out4,$out4,$out4,$leperm
 | |
| 	stvx_u		$out3,$x30,$out
 | |
| 	 vxor		$out3,$in3,$twk3
 | |
| 	le?vperm	$out5,$out5,$out5,$leperm
 | |
| 	stvx_u		$out4,$x40,$out
 | |
| 	 vxor		$out4,$in4,$twk4
 | |
| 	stvx_u		$out5,$x50,$out
 | |
| 	 vxor		$out5,$in5,$twk5
 | |
| 	addi		$out,$out,0x60
 | |
| 
 | |
| 	mtctr		$rounds
 | |
| 	beq		Loop_xts_dec6x		# did $len-=96 borrow?
 | |
| 
 | |
| 	addic.		$len,$len,0x60
 | |
| 	beq		Lxts_dec6x_zero
 | |
| 	cmpwi		$len,0x20
 | |
| 	blt		Lxts_dec6x_one
 | |
| 	nop
 | |
| 	beq		Lxts_dec6x_two
 | |
| 	cmpwi		$len,0x40
 | |
| 	blt		Lxts_dec6x_three
 | |
| 	nop
 | |
| 	beq		Lxts_dec6x_four
 | |
| 
 | |
| Lxts_dec6x_five:
 | |
| 	vxor		$out0,$in1,$twk0
 | |
| 	vxor		$out1,$in2,$twk1
 | |
| 	vxor		$out2,$in3,$twk2
 | |
| 	vxor		$out3,$in4,$twk3
 | |
| 	vxor		$out4,$in5,$twk4
 | |
| 
 | |
| 	bl		_aesp8_xts_dec5x
 | |
| 
 | |
| 	le?vperm	$out0,$out0,$out0,$leperm
 | |
| 	vmr		$twk0,$twk5		# unused tweak
 | |
| 	vxor		$twk1,$tweak,$rndkey0
 | |
| 	le?vperm	$out1,$out1,$out1,$leperm
 | |
| 	stvx_u		$out0,$x00,$out		# store output
 | |
| 	vxor		$out0,$in0,$twk1
 | |
| 	le?vperm	$out2,$out2,$out2,$leperm
 | |
| 	stvx_u		$out1,$x10,$out
 | |
| 	le?vperm	$out3,$out3,$out3,$leperm
 | |
| 	stvx_u		$out2,$x20,$out
 | |
| 	le?vperm	$out4,$out4,$out4,$leperm
 | |
| 	stvx_u		$out3,$x30,$out
 | |
| 	stvx_u		$out4,$x40,$out
 | |
| 	addi		$out,$out,0x50
 | |
| 	bne		Lxts_dec6x_steal
 | |
| 	b		Lxts_dec6x_done
 | |
| 
 | |
| .align	4
 | |
| Lxts_dec6x_four:
 | |
| 	vxor		$out0,$in2,$twk0
 | |
| 	vxor		$out1,$in3,$twk1
 | |
| 	vxor		$out2,$in4,$twk2
 | |
| 	vxor		$out3,$in5,$twk3
 | |
| 	vxor		$out4,$out4,$out4
 | |
| 
 | |
| 	bl		_aesp8_xts_dec5x
 | |
| 
 | |
| 	le?vperm	$out0,$out0,$out0,$leperm
 | |
| 	vmr		$twk0,$twk4		# unused tweak
 | |
| 	vmr		$twk1,$twk5
 | |
| 	le?vperm	$out1,$out1,$out1,$leperm
 | |
| 	stvx_u		$out0,$x00,$out		# store output
 | |
| 	vxor		$out0,$in0,$twk5
 | |
| 	le?vperm	$out2,$out2,$out2,$leperm
 | |
| 	stvx_u		$out1,$x10,$out
 | |
| 	le?vperm	$out3,$out3,$out3,$leperm
 | |
| 	stvx_u		$out2,$x20,$out
 | |
| 	stvx_u		$out3,$x30,$out
 | |
| 	addi		$out,$out,0x40
 | |
| 	bne		Lxts_dec6x_steal
 | |
| 	b		Lxts_dec6x_done
 | |
| 
 | |
| .align	4
 | |
| Lxts_dec6x_three:
 | |
| 	vxor		$out0,$in3,$twk0
 | |
| 	vxor		$out1,$in4,$twk1
 | |
| 	vxor		$out2,$in5,$twk2
 | |
| 	vxor		$out3,$out3,$out3
 | |
| 	vxor		$out4,$out4,$out4
 | |
| 
 | |
| 	bl		_aesp8_xts_dec5x
 | |
| 
 | |
| 	le?vperm	$out0,$out0,$out0,$leperm
 | |
| 	vmr		$twk0,$twk3		# unused tweak
 | |
| 	vmr		$twk1,$twk4
 | |
| 	le?vperm	$out1,$out1,$out1,$leperm
 | |
| 	stvx_u		$out0,$x00,$out		# store output
 | |
| 	vxor		$out0,$in0,$twk4
 | |
| 	le?vperm	$out2,$out2,$out2,$leperm
 | |
| 	stvx_u		$out1,$x10,$out
 | |
| 	stvx_u		$out2,$x20,$out
 | |
| 	addi		$out,$out,0x30
 | |
| 	bne		Lxts_dec6x_steal
 | |
| 	b		Lxts_dec6x_done
 | |
| 
 | |
| .align	4
 | |
| Lxts_dec6x_two:
 | |
| 	vxor		$out0,$in4,$twk0
 | |
| 	vxor		$out1,$in5,$twk1
 | |
| 	vxor		$out2,$out2,$out2
 | |
| 	vxor		$out3,$out3,$out3
 | |
| 	vxor		$out4,$out4,$out4
 | |
| 
 | |
| 	bl		_aesp8_xts_dec5x
 | |
| 
 | |
| 	le?vperm	$out0,$out0,$out0,$leperm
 | |
| 	vmr		$twk0,$twk2		# unused tweak
 | |
| 	vmr		$twk1,$twk3
 | |
| 	le?vperm	$out1,$out1,$out1,$leperm
 | |
| 	stvx_u		$out0,$x00,$out		# store output
 | |
| 	vxor		$out0,$in0,$twk3
 | |
| 	stvx_u		$out1,$x10,$out
 | |
| 	addi		$out,$out,0x20
 | |
| 	bne		Lxts_dec6x_steal
 | |
| 	b		Lxts_dec6x_done
 | |
| 
 | |
| .align	4
 | |
| Lxts_dec6x_one:
 | |
| 	vxor		$out0,$in5,$twk0
 | |
| 	nop
 | |
| Loop_xts_dec1x:
 | |
| 	vncipher	$out0,$out0,v24
 | |
| 	lvx		v24,$x20,$key_		# round[3]
 | |
| 	addi		$key_,$key_,0x20
 | |
| 
 | |
| 	vncipher	$out0,$out0,v25
 | |
| 	lvx		v25,$x10,$key_		# round[4]
 | |
| 	bdnz		Loop_xts_dec1x
 | |
| 
 | |
| 	subi		r0,$taillen,1
 | |
| 	vncipher	$out0,$out0,v24
 | |
| 
 | |
| 	andi.		r0,r0,16
 | |
| 	cmpwi		$taillen,0
 | |
| 	vncipher	$out0,$out0,v25
 | |
| 
 | |
| 	sub		$inp,$inp,r0
 | |
| 	vncipher	$out0,$out0,v26
 | |
| 
 | |
| 	lvx_u		$in0,0,$inp
 | |
| 	vncipher	$out0,$out0,v27
 | |
| 
 | |
| 	addi		$key_,$sp,$FRAME+15	# rewind $key_
 | |
| 	vncipher	$out0,$out0,v28
 | |
| 	lvx		v24,$x00,$key_		# re-pre-load round[1]
 | |
| 
 | |
| 	vncipher	$out0,$out0,v29
 | |
| 	lvx		v25,$x10,$key_		# re-pre-load round[2]
 | |
| 	 vxor		$twk0,$twk0,v31
 | |
| 
 | |
| 	le?vperm	$in0,$in0,$in0,$leperm
 | |
| 	vncipher	$out0,$out0,v30
 | |
| 
 | |
| 	mtctr		$rounds
 | |
| 	vncipherlast	$out0,$out0,$twk0
 | |
| 
 | |
| 	vmr		$twk0,$twk1		# unused tweak
 | |
| 	vmr		$twk1,$twk2
 | |
| 	le?vperm	$out0,$out0,$out0,$leperm
 | |
| 	stvx_u		$out0,$x00,$out		# store output
 | |
| 	addi		$out,$out,0x10
 | |
| 	vxor		$out0,$in0,$twk2
 | |
| 	bne		Lxts_dec6x_steal
 | |
| 	b		Lxts_dec6x_done
 | |
| 
 | |
| .align	4
 | |
| Lxts_dec6x_zero:
 | |
| 	cmpwi		$taillen,0
 | |
| 	beq		Lxts_dec6x_done
 | |
| 
 | |
| 	lvx_u		$in0,0,$inp
 | |
| 	le?vperm	$in0,$in0,$in0,$leperm
 | |
| 	vxor		$out0,$in0,$twk1
 | |
| Lxts_dec6x_steal:
 | |
| 	vncipher	$out0,$out0,v24
 | |
| 	lvx		v24,$x20,$key_		# round[3]
 | |
| 	addi		$key_,$key_,0x20
 | |
| 
 | |
| 	vncipher	$out0,$out0,v25
 | |
| 	lvx		v25,$x10,$key_		# round[4]
 | |
| 	bdnz		Lxts_dec6x_steal
 | |
| 
 | |
| 	add		$inp,$inp,$taillen
 | |
| 	vncipher	$out0,$out0,v24
 | |
| 
 | |
| 	cmpwi		$taillen,0
 | |
| 	vncipher	$out0,$out0,v25
 | |
| 
 | |
| 	lvx_u		$in0,0,$inp
 | |
| 	vncipher	$out0,$out0,v26
 | |
| 
 | |
| 	lvsr		$inpperm,0,$taillen	# $in5 is no more
 | |
| 	vncipher	$out0,$out0,v27
 | |
| 
 | |
| 	addi		$key_,$sp,$FRAME+15	# rewind $key_
 | |
| 	vncipher	$out0,$out0,v28
 | |
| 	lvx		v24,$x00,$key_		# re-pre-load round[1]
 | |
| 
 | |
| 	vncipher	$out0,$out0,v29
 | |
| 	lvx		v25,$x10,$key_		# re-pre-load round[2]
 | |
| 	 vxor		$twk1,$twk1,v31
 | |
| 
 | |
| 	le?vperm	$in0,$in0,$in0,$leperm
 | |
| 	vncipher	$out0,$out0,v30
 | |
| 
 | |
| 	vperm		$in0,$in0,$in0,$inpperm
 | |
| 	vncipherlast	$tmp,$out0,$twk1
 | |
| 
 | |
| 	le?vperm	$out0,$tmp,$tmp,$leperm
 | |
| 	le?stvx_u	$out0,0,$out
 | |
| 	be?stvx_u	$tmp,0,$out
 | |
| 
 | |
| 	vxor		$out0,$out0,$out0
 | |
| 	vspltisb	$out1,-1
 | |
| 	vperm		$out0,$out0,$out1,$inpperm
 | |
| 	vsel		$out0,$in0,$tmp,$out0
 | |
| 	vxor		$out0,$out0,$twk0
 | |
| 
 | |
| 	subi		r30,$out,1
 | |
| 	mtctr		$taillen
 | |
| Loop_xts_dec6x_steal:
 | |
| 	lbzu		r0,1(r30)
 | |
| 	stb		r0,16(r30)
 | |
| 	bdnz		Loop_xts_dec6x_steal
 | |
| 
 | |
| 	li		$taillen,0
 | |
| 	mtctr		$rounds
 | |
| 	b		Loop_xts_dec1x		# one more time...
 | |
| 
 | |
| .align	4
 | |
| Lxts_dec6x_done:
 | |
| 	${UCMP}i	$ivp,0
 | |
| 	beq		Lxts_dec6x_ret
 | |
| 
 | |
| 	vxor		$tweak,$twk0,$rndkey0
 | |
| 	le?vperm	$tweak,$tweak,$tweak,$leperm
 | |
| 	stvx_u		$tweak,0,$ivp
 | |
| 
 | |
| Lxts_dec6x_ret:
 | |
| 	mtlr		r11
 | |
| 	li		r10,`$FRAME+15`
 | |
| 	li		r11,`$FRAME+31`
 | |
| 	stvx		$seven,r10,$sp		# wipe copies of round keys
 | |
| 	addi		r10,r10,32
 | |
| 	stvx		$seven,r11,$sp
 | |
| 	addi		r11,r11,32
 | |
| 	stvx		$seven,r10,$sp
 | |
| 	addi		r10,r10,32
 | |
| 	stvx		$seven,r11,$sp
 | |
| 	addi		r11,r11,32
 | |
| 	stvx		$seven,r10,$sp
 | |
| 	addi		r10,r10,32
 | |
| 	stvx		$seven,r11,$sp
 | |
| 	addi		r11,r11,32
 | |
| 	stvx		$seven,r10,$sp
 | |
| 	addi		r10,r10,32
 | |
| 	stvx		$seven,r11,$sp
 | |
| 	addi		r11,r11,32
 | |
| 
 | |
| 	mtspr		256,$vrsave
 | |
| 	lvx		v20,r10,$sp		# ABI says so
 | |
| 	addi		r10,r10,32
 | |
| 	lvx		v21,r11,$sp
 | |
| 	addi		r11,r11,32
 | |
| 	lvx		v22,r10,$sp
 | |
| 	addi		r10,r10,32
 | |
| 	lvx		v23,r11,$sp
 | |
| 	addi		r11,r11,32
 | |
| 	lvx		v24,r10,$sp
 | |
| 	addi		r10,r10,32
 | |
| 	lvx		v25,r11,$sp
 | |
| 	addi		r11,r11,32
 | |
| 	lvx		v26,r10,$sp
 | |
| 	addi		r10,r10,32
 | |
| 	lvx		v27,r11,$sp
 | |
| 	addi		r11,r11,32
 | |
| 	lvx		v28,r10,$sp
 | |
| 	addi		r10,r10,32
 | |
| 	lvx		v29,r11,$sp
 | |
| 	addi		r11,r11,32
 | |
| 	lvx		v30,r10,$sp
 | |
| 	lvx		v31,r11,$sp
 | |
| 	$POP		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
 | |
| 	$POP		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
 | |
| 	$POP		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
 | |
| 	$POP		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
 | |
| 	$POP		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
 | |
| 	$POP		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
 | |
| 	addi		$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
 | |
| 	blr
 | |
| 	.long		0
 | |
| 	.byte		0,12,0x04,1,0x80,6,6,0
 | |
| 	.long		0
 | |
| 
 | |
| .align	5
 | |
| _aesp8_xts_dec5x:
 | |
| 	vncipher	$out0,$out0,v24
 | |
| 	vncipher	$out1,$out1,v24
 | |
| 	vncipher	$out2,$out2,v24
 | |
| 	vncipher	$out3,$out3,v24
 | |
| 	vncipher	$out4,$out4,v24
 | |
| 	lvx		v24,$x20,$key_		# round[3]
 | |
| 	addi		$key_,$key_,0x20
 | |
| 
 | |
| 	vncipher	$out0,$out0,v25
 | |
| 	vncipher	$out1,$out1,v25
 | |
| 	vncipher	$out2,$out2,v25
 | |
| 	vncipher	$out3,$out3,v25
 | |
| 	vncipher	$out4,$out4,v25
 | |
| 	lvx		v25,$x10,$key_		# round[4]
 | |
| 	bdnz		_aesp8_xts_dec5x
 | |
| 
 | |
| 	subi		r0,$taillen,1
 | |
| 	vncipher	$out0,$out0,v24
 | |
| 	vncipher	$out1,$out1,v24
 | |
| 	vncipher	$out2,$out2,v24
 | |
| 	vncipher	$out3,$out3,v24
 | |
| 	vncipher	$out4,$out4,v24
 | |
| 
 | |
| 	andi.		r0,r0,16
 | |
| 	cmpwi		$taillen,0
 | |
| 	vncipher	$out0,$out0,v25
 | |
| 	vncipher	$out1,$out1,v25
 | |
| 	vncipher	$out2,$out2,v25
 | |
| 	vncipher	$out3,$out3,v25
 | |
| 	vncipher	$out4,$out4,v25
 | |
| 	 vxor		$twk0,$twk0,v31
 | |
| 
 | |
| 	sub		$inp,$inp,r0
 | |
| 	vncipher	$out0,$out0,v26
 | |
| 	vncipher	$out1,$out1,v26
 | |
| 	vncipher	$out2,$out2,v26
 | |
| 	vncipher	$out3,$out3,v26
 | |
| 	vncipher	$out4,$out4,v26
 | |
| 	 vxor		$in1,$twk1,v31
 | |
| 
 | |
| 	vncipher	$out0,$out0,v27
 | |
| 	lvx_u		$in0,0,$inp
 | |
| 	vncipher	$out1,$out1,v27
 | |
| 	vncipher	$out2,$out2,v27
 | |
| 	vncipher	$out3,$out3,v27
 | |
| 	vncipher	$out4,$out4,v27
 | |
| 	 vxor		$in2,$twk2,v31
 | |
| 
 | |
| 	addi		$key_,$sp,$FRAME+15	# rewind $key_
 | |
| 	vncipher	$out0,$out0,v28
 | |
| 	vncipher	$out1,$out1,v28
 | |
| 	vncipher	$out2,$out2,v28
 | |
| 	vncipher	$out3,$out3,v28
 | |
| 	vncipher	$out4,$out4,v28
 | |
| 	lvx		v24,$x00,$key_		# re-pre-load round[1]
 | |
| 	 vxor		$in3,$twk3,v31
 | |
| 
 | |
| 	vncipher	$out0,$out0,v29
 | |
| 	le?vperm	$in0,$in0,$in0,$leperm
 | |
| 	vncipher	$out1,$out1,v29
 | |
| 	vncipher	$out2,$out2,v29
 | |
| 	vncipher	$out3,$out3,v29
 | |
| 	vncipher	$out4,$out4,v29
 | |
| 	lvx		v25,$x10,$key_		# re-pre-load round[2]
 | |
| 	 vxor		$in4,$twk4,v31
 | |
| 
 | |
| 	vncipher	$out0,$out0,v30
 | |
| 	vncipher	$out1,$out1,v30
 | |
| 	vncipher	$out2,$out2,v30
 | |
| 	vncipher	$out3,$out3,v30
 | |
| 	vncipher	$out4,$out4,v30
 | |
| 
 | |
| 	vncipherlast	$out0,$out0,$twk0
 | |
| 	vncipherlast	$out1,$out1,$in1
 | |
| 	vncipherlast	$out2,$out2,$in2
 | |
| 	vncipherlast	$out3,$out3,$in3
 | |
| 	vncipherlast	$out4,$out4,$in4
 | |
| 	mtctr		$rounds
 | |
| 	blr
 | |
|         .long   	0
 | |
|         .byte   	0,12,0x14,0,0,0,0,0
 | |
| ___
 | |
| }}	}}}
 | |
| 
 | |
| my $consts=1;
 | |
| foreach(split("\n",$code)) {
 | |
|         s/\`([^\`]*)\`/eval($1)/geo;
 | |
| 
 | |
| 	# constants table endian-specific conversion
 | |
| 	if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
 | |
| 	    my $conv=$3;
 | |
| 	    my @bytes=();
 | |
| 
 | |
| 	    # convert to endian-agnostic format
 | |
| 	    if ($1 eq "long") {
 | |
| 	      foreach (split(/,\s*/,$2)) {
 | |
| 		my $l = /^0/?oct:int;
 | |
| 		push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
 | |
| 	      }
 | |
| 	    } else {
 | |
| 		@bytes = map(/^0/?oct:int,split(/,\s*/,$2));
 | |
| 	    }
 | |
| 
 | |
| 	    # little-endian conversion
 | |
| 	    if ($flavour =~ /le$/o) {
 | |
| 		SWITCH: for($conv)  {
 | |
| 		    /\?inv/ && do   { @bytes=map($_^0xf,@bytes); last; };
 | |
| 		    /\?rev/ && do   { @bytes=reverse(@bytes);    last; };
 | |
| 		}
 | |
| 	    }
 | |
| 
 | |
| 	    #emit
 | |
| 	    print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
 | |
| 	    next;
 | |
| 	}
 | |
| 	$consts=0 if (m/Lconsts:/o);	# end of table
 | |
| 
 | |
| 	# instructions prefixed with '?' are endian-specific and need
 | |
| 	# to be adjusted accordingly...
 | |
| 	if ($flavour =~ /le$/o) {	# little-endian
 | |
| 	    s/le\?//o		or
 | |
| 	    s/be\?/#be#/o	or
 | |
| 	    s/\?lvsr/lvsl/o	or
 | |
| 	    s/\?lvsl/lvsr/o	or
 | |
| 	    s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
 | |
| 	    s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
 | |
| 	    s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
 | |
| 	} else {			# big-endian
 | |
| 	    s/le\?/#le#/o	or
 | |
| 	    s/be\?//o		or
 | |
| 	    s/\?([a-z]+)/$1/o;
 | |
| 	}
 | |
| 
 | |
|         print $_,"\n";
 | |
| }
 | |
| 
 | |
| close STDOUT;
 |