Home | History | Annotate | Download | only in asm
      1 #!/usr/bin/env perl
      2 
      3 # ====================================================================
      4 # Written by Andy Polyakov <appro (at] fy.chalmers.se> for the OpenSSL
      5 # project. The module is, however, dual licensed under OpenSSL and
      6 # CRYPTOGAMS licenses depending on where you obtain it. For further
      7 # details see http://www.openssl.org/~appro/cryptogams/.
      8 # ====================================================================
      9 
     10 # SHA1 block procedure for PA-RISC.
     11 
     12 # June 2009.
     13 #
     14 # On PA-7100LC performance is >30% better than gcc 3.2 generated code
     15 # for aligned input and >50% better for unaligned. Compared to vendor
     16 # compiler on PA-8600 it's almost 60% faster in 64-bit build and just
     17 # few percent faster in 32-bit one (this for aligned input, data for
     18 # unaligned input is not available).
     19 #
     20 # Special thanks to polarhome.com for providing HP-UX account.
     21 
     22 $flavour = shift;
     23 $output = shift;
     24 open STDOUT,">$output";
     25 
     26 if ($flavour =~ /64/) {
     27 	$LEVEL		="2.0W";
     28 	$SIZE_T		=8;
     29 	$FRAME_MARKER	=80;
     30 	$SAVED_RP	=16;
     31 	$PUSH		="std";
     32 	$PUSHMA		="std,ma";
     33 	$POP		="ldd";
     34 	$POPMB		="ldd,mb";
     35 } else {
     36 	$LEVEL		="1.0";
     37 	$SIZE_T		=4;
     38 	$FRAME_MARKER	=48;
     39 	$SAVED_RP	=20;
     40 	$PUSH		="stw";
     41 	$PUSHMA		="stwm";
     42 	$POP		="ldw";
     43 	$POPMB		="ldwm";
     44 }
     45 
     46 $FRAME=14*$SIZE_T+$FRAME_MARKER;# 14 saved regs + frame marker
     47 				#                 [+ argument transfer]
     48 $ctx="%r26";		# arg0
     49 $inp="%r25";		# arg1
     50 $num="%r24";		# arg2
     51 
     52 $t0="%r28";
     53 $t1="%r29";
     54 $K="%r31";
     55 
     56 @X=("%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8",
     57     "%r9", "%r10","%r11","%r12","%r13","%r14","%r15","%r16",$t0);
     58 
     59 @V=($A,$B,$C,$D,$E)=("%r19","%r20","%r21","%r22","%r23");
     60 
     61 sub BODY_00_19 {
     62 my ($i,$a,$b,$c,$d,$e)=@_;
     63 my $j=$i+1;
     64 $code.=<<___ if ($i<15);
     65 	addl	$K,$e,$e	; $i
     66 	shd	$a,$a,27,$t1
     67 	addl	@X[$i],$e,$e
     68 	and	$c,$b,$t0
     69 	addl	$t1,$e,$e
     70 	andcm	$d,$b,$t1
     71 	shd	$b,$b,2,$b
     72 	or	$t1,$t0,$t0
     73 	addl	$t0,$e,$e
     74 ___
     75 $code.=<<___ if ($i>=15);	# with forward Xupdate
     76 	addl	$K,$e,$e	; $i
     77 	shd	$a,$a,27,$t1
     78 	xor	@X[($j+2)%16],@X[$j%16],@X[$j%16]
     79 	addl	@X[$i%16],$e,$e
     80 	and	$c,$b,$t0
     81 	xor	@X[($j+8)%16],@X[$j%16],@X[$j%16]
     82 	addl	$t1,$e,$e
     83 	andcm	$d,$b,$t1
     84 	shd	$b,$b,2,$b
     85 	or	$t1,$t0,$t0
     86 	xor	@X[($j+13)%16],@X[$j%16],@X[$j%16]
     87 	add	$t0,$e,$e
     88 	shd	@X[$j%16],@X[$j%16],31,@X[$j%16]
     89 ___
     90 }
     91 
     92 sub BODY_20_39 {
     93 my ($i,$a,$b,$c,$d,$e)=@_;
     94 my $j=$i+1;
     95 $code.=<<___ if ($i<79);
     96 	xor	@X[($j+2)%16],@X[$j%16],@X[$j%16]	; $i
     97 	addl	$K,$e,$e
     98 	shd	$a,$a,27,$t1
     99 	xor	@X[($j+8)%16],@X[$j%16],@X[$j%16]
    100 	addl	@X[$i%16],$e,$e
    101 	xor	$b,$c,$t0
    102 	xor	@X[($j+13)%16],@X[$j%16],@X[$j%16]
    103 	addl	$t1,$e,$e
    104 	shd	$b,$b,2,$b
    105 	xor	$d,$t0,$t0
    106 	shd	@X[$j%16],@X[$j%16],31,@X[$j%16]
    107 	addl	$t0,$e,$e
    108 ___
    109 $code.=<<___ if ($i==79);	# with context load
    110 	ldw	0($ctx),@X[0]	; $i
    111 	addl	$K,$e,$e
    112 	shd	$a,$a,27,$t1
    113 	ldw	4($ctx),@X[1]
    114 	addl	@X[$i%16],$e,$e
    115 	xor	$b,$c,$t0
    116 	ldw	8($ctx),@X[2]
    117 	addl	$t1,$e,$e
    118 	shd	$b,$b,2,$b
    119 	xor	$d,$t0,$t0
    120 	ldw	12($ctx),@X[3]
    121 	addl	$t0,$e,$e
    122 	ldw	16($ctx),@X[4]
    123 ___
    124 }
    125 
    126 sub BODY_40_59 {
    127 my ($i,$a,$b,$c,$d,$e)=@_;
    128 my $j=$i+1;
    129 $code.=<<___;
    130 	shd	$a,$a,27,$t1	; $i
    131 	addl	$K,$e,$e
    132 	xor	@X[($j+2)%16],@X[$j%16],@X[$j%16]
    133 	xor	$d,$c,$t0
    134 	addl	@X[$i%16],$e,$e
    135 	xor	@X[($j+8)%16],@X[$j%16],@X[$j%16]
    136 	and	$b,$t0,$t0
    137 	addl	$t1,$e,$e
    138 	shd	$b,$b,2,$b
    139 	xor	@X[($j+13)%16],@X[$j%16],@X[$j%16]
    140 	addl	$t0,$e,$e
    141 	and	$d,$c,$t1
    142 	shd	@X[$j%16],@X[$j%16],31,@X[$j%16]
    143 	addl	$t1,$e,$e
    144 ___
    145 }
    146 
    147 $code=<<___;
    148 	.LEVEL	$LEVEL
    149 	.SPACE	\$TEXT\$
    150 	.SUBSPA	\$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
    151 
    152 	.EXPORT	sha1_block_data_order,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR
    153 sha1_block_data_order
    154 	.PROC
    155 	.CALLINFO	FRAME=`$FRAME-14*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=16
    156 	.ENTRY
    157 	$PUSH	%r2,-$SAVED_RP(%sp)	; standard prologue
    158 	$PUSHMA	%r3,$FRAME(%sp)
    159 	$PUSH	%r4,`-$FRAME+1*$SIZE_T`(%sp)
    160 	$PUSH	%r5,`-$FRAME+2*$SIZE_T`(%sp)
    161 	$PUSH	%r6,`-$FRAME+3*$SIZE_T`(%sp)
    162 	$PUSH	%r7,`-$FRAME+4*$SIZE_T`(%sp)
    163 	$PUSH	%r8,`-$FRAME+5*$SIZE_T`(%sp)
    164 	$PUSH	%r9,`-$FRAME+6*$SIZE_T`(%sp)
    165 	$PUSH	%r10,`-$FRAME+7*$SIZE_T`(%sp)
    166 	$PUSH	%r11,`-$FRAME+8*$SIZE_T`(%sp)
    167 	$PUSH	%r12,`-$FRAME+9*$SIZE_T`(%sp)
    168 	$PUSH	%r13,`-$FRAME+10*$SIZE_T`(%sp)
    169 	$PUSH	%r14,`-$FRAME+11*$SIZE_T`(%sp)
    170 	$PUSH	%r15,`-$FRAME+12*$SIZE_T`(%sp)
    171 	$PUSH	%r16,`-$FRAME+13*$SIZE_T`(%sp)
    172 
    173 	ldw	0($ctx),$A
    174 	ldw	4($ctx),$B
    175 	ldw	8($ctx),$C
    176 	ldw	12($ctx),$D
    177 	ldw	16($ctx),$E
    178 
    179 	extru	$inp,31,2,$t0		; t0=inp&3;
    180 	sh3addl	$t0,%r0,$t0		; t0*=8;
    181 	subi	32,$t0,$t0		; t0=32-t0;
    182 	mtctl	$t0,%cr11		; %sar=t0;
    183 
    184 L\$oop
    185 	ldi	3,$t0
    186 	andcm	$inp,$t0,$t0		; 64-bit neutral
    187 ___
    188 	for ($i=0;$i<15;$i++) {		# load input block
    189 	$code.="\tldw	`4*$i`($t0),@X[$i]\n";		}
    190 $code.=<<___;
    191 	cmpb,*=	$inp,$t0,L\$aligned
    192 	ldw	60($t0),@X[15]
    193 	ldw	64($t0),@X[16]
    194 ___
    195 	for ($i=0;$i<16;$i++) {		# align input
    196 	$code.="\tvshd	@X[$i],@X[$i+1],@X[$i]\n";	}
    197 $code.=<<___;
    198 L\$aligned
    199 	ldil	L'0x5a827000,$K		; K_00_19
    200 	ldo	0x999($K),$K
    201 ___
    202 for ($i=0;$i<20;$i++)   { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
    203 $code.=<<___;
    204 	ldil	L'0x6ed9e000,$K		; K_20_39
    205 	ldo	0xba1($K),$K
    206 ___
    207 
    208 for (;$i<40;$i++)       { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
    209 $code.=<<___;
    210 	ldil	L'0x8f1bb000,$K		; K_40_59
    211 	ldo	0xcdc($K),$K
    212 ___
    213 
    214 for (;$i<60;$i++)       { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
    215 $code.=<<___;
    216 	ldil	L'0xca62c000,$K		; K_60_79
    217 	ldo	0x1d6($K),$K
    218 ___
    219 for (;$i<80;$i++)       { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
    220 
    221 $code.=<<___;
    222 	addl	@X[0],$A,$A
    223 	addl	@X[1],$B,$B
    224 	addl	@X[2],$C,$C
    225 	addl	@X[3],$D,$D
    226 	addl	@X[4],$E,$E
    227 	stw	$A,0($ctx)
    228 	stw	$B,4($ctx)
    229 	stw	$C,8($ctx)
    230 	stw	$D,12($ctx)
    231 	stw	$E,16($ctx)
    232 	addib,*<> -1,$num,L\$oop
    233 	ldo	64($inp),$inp
    234 
    235 	$POP	`-$FRAME-$SAVED_RP`(%sp),%r2	; standard epilogue
    236 	$POP	`-$FRAME+1*$SIZE_T`(%sp),%r4
    237 	$POP	`-$FRAME+2*$SIZE_T`(%sp),%r5
    238 	$POP	`-$FRAME+3*$SIZE_T`(%sp),%r6
    239 	$POP	`-$FRAME+4*$SIZE_T`(%sp),%r7
    240 	$POP	`-$FRAME+5*$SIZE_T`(%sp),%r8
    241 	$POP	`-$FRAME+6*$SIZE_T`(%sp),%r9
    242 	$POP	`-$FRAME+7*$SIZE_T`(%sp),%r10
    243 	$POP	`-$FRAME+8*$SIZE_T`(%sp),%r11
    244 	$POP	`-$FRAME+9*$SIZE_T`(%sp),%r12
    245 	$POP	`-$FRAME+10*$SIZE_T`(%sp),%r13
    246 	$POP	`-$FRAME+11*$SIZE_T`(%sp),%r14
    247 	$POP	`-$FRAME+12*$SIZE_T`(%sp),%r15
    248 	$POP	`-$FRAME+13*$SIZE_T`(%sp),%r16
    249 	bv	(%r2)
    250 	.EXIT
    251 	$POPMB	-$FRAME(%sp),%r3
    252 	.PROCEND
    253 	.STRINGZ "SHA1 block transform for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>"
    254 ___
    255 
    256 $code =~ s/\`([^\`]*)\`/eval $1/gem;
    257 $code =~ s/,\*/,/gm if ($SIZE_T==4);
    258 print $code;
    259 close STDOUT;
    260