Home | History | Annotate | Download | only in sha1
      1 // Copyright 2014 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 //
      5 // ARM version of md5block.go
      6 
      7 #include "textflag.h"
      8 
      9 // SHA1 block routine. See sha1block.go for Go equivalent.
     10 //
     11 // There are 80 rounds of 4 types:
     12 //   - rounds 0-15 are type 1 and load data (ROUND1 macro).
     13 //   - rounds 16-19 are type 1 and do not load data (ROUND1x macro).
     14 //   - rounds 20-39 are type 2 and do not load data (ROUND2 macro).
     15 //   - rounds 40-59 are type 3 and do not load data (ROUND3 macro).
     16 //   - rounds 60-79 are type 4 and do not load data (ROUND4 macro).
     17 //
     18 // Each round loads or shuffles the data, then computes a per-round
     19 // function of b, c, d, and then mixes the result into and rotates the
     20 // five registers a, b, c, d, e holding the intermediate results.
     21 //
     22 // The register rotation is implemented by rotating the arguments to
     23 // the round macros instead of by explicit move instructions.
     24 
     25 // Register definitions
     26 #define Rdata	R0	// Pointer to incoming data
     27 #define Rconst	R1	// Current constant for SHA round
     28 #define Ra	R2		// SHA1 accumulator
     29 #define Rb	R3		// SHA1 accumulator
     30 #define Rc	R4		// SHA1 accumulator
     31 #define Rd	R5		// SHA1 accumulator
     32 #define Re	R6		// SHA1 accumulator
     33 #define Rt0	R7		// Temporary
     34 #define Rt1	R8		// Temporary
     35 // r9, r10 are forbidden
     36 // r11 is OK provided you check the assembler that no synthetic instructions use it
     37 #define Rt2	R11		// Temporary
     38 #define Rctr	R12	// loop counter
     39 #define Rw	R14		// point to w buffer
     40 
     41 // func block(dig *digest, p []byte)
     42 // 0(FP) is *digest
     43 // 4(FP) is p.array (struct Slice)
     44 // 8(FP) is p.len
     45 //12(FP) is p.cap
     46 //
     47 // Stack frame
     48 #define p_end	end-4(SP)		// pointer to the end of data
     49 #define p_data	data-8(SP)	// current data pointer (unused?)
     50 #define w_buf	buf-(8+4*80)(SP)	//80 words temporary buffer w uint32[80]
     51 #define saved	abcde-(8+4*80+4*5)(SP)	// saved sha1 registers a,b,c,d,e - these must be last (unused?)
     52 // Total size +4 for saved LR is 352
     53 
     54 	// w[i] = p[j]<<24 | p[j+1]<<16 | p[j+2]<<8 | p[j+3]
     55 	// e += w[i]
     56 #define LOAD(Re) \
     57 	MOVBU	2(Rdata), Rt0 ; \
     58 	MOVBU	3(Rdata), Rt1 ; \
     59 	MOVBU	1(Rdata), Rt2 ; \
     60 	ORR	Rt0<<8, Rt1, Rt0	    ; \
     61 	MOVBU.P	4(Rdata), Rt1 ; \
     62 	ORR	Rt2<<16, Rt0, Rt0	    ; \
     63 	ORR	Rt1<<24, Rt0, Rt0	    ; \
     64 	MOVW.P	Rt0, 4(Rw)		    ; \
     65 	ADD	Rt0, Re, Re
     66 
     67 	// tmp := w[(i-3)&0xf] ^ w[(i-8)&0xf] ^ w[(i-14)&0xf] ^ w[(i)&0xf]
     68 	// w[i&0xf] = tmp<<1 | tmp>>(32-1)
     69 	// e += w[i&0xf]
     70 #define SHUFFLE(Re) \
     71 	MOVW	(-16*4)(Rw), Rt0 ; \
     72 	MOVW	(-14*4)(Rw), Rt1 ; \
     73 	MOVW	(-8*4)(Rw), Rt2  ; \
     74 	EOR	Rt0, Rt1, Rt0  ; \
     75 	MOVW	(-3*4)(Rw), Rt1  ; \
     76 	EOR	Rt2, Rt0, Rt0  ; \
     77 	EOR	Rt0, Rt1, Rt0  ; \
     78 	MOVW	Rt0@>(32-1), Rt0  ; \
     79 	MOVW.P	Rt0, 4(Rw)	  ; \
     80 	ADD	Rt0, Re, Re
     81 
     82 	// t1 = (b & c) | ((~b) & d)
     83 #define FUNC1(Ra, Rb, Rc, Rd, Re) \
     84 	MVN	Rb, Rt1	   ; \
     85 	AND	Rb, Rc, Rt0  ; \
     86 	AND	Rd, Rt1, Rt1 ; \
     87 	ORR	Rt0, Rt1, Rt1
     88 
     89 	// t1 = b ^ c ^ d
     90 #define FUNC2(Ra, Rb, Rc, Rd, Re) \
     91 	EOR	Rb, Rc, Rt1 ; \
     92 	EOR	Rd, Rt1, Rt1
     93 
     94 	// t1 = (b & c) | (b & d) | (c & d) =
     95 	// t1 = (b & c) | ((b | c) & d)
     96 #define FUNC3(Ra, Rb, Rc, Rd, Re) \
     97 	ORR	Rb, Rc, Rt0  ; \
     98 	AND	Rb, Rc, Rt1  ; \
     99 	AND	Rd, Rt0, Rt0 ; \
    100 	ORR	Rt0, Rt1, Rt1
    101 
    102 #define FUNC4 FUNC2
    103 
    104 	// a5 := a<<5 | a>>(32-5)
    105 	// b = b<<30 | b>>(32-30)
    106 	// e = a5 + t1 + e + const
    107 #define MIX(Ra, Rb, Rc, Rd, Re) \
    108 	ADD	Rt1, Re, Re	 ; \
    109 	MOVW	Rb@>(32-30), Rb	 ; \
    110 	ADD	Ra@>(32-5), Re, Re ; \
    111 	ADD	Rconst, Re, Re
    112 
    113 #define ROUND1(Ra, Rb, Rc, Rd, Re) \
    114 	LOAD(Re)		; \
    115 	FUNC1(Ra, Rb, Rc, Rd, Re)	; \
    116 	MIX(Ra, Rb, Rc, Rd, Re)
    117 
    118 #define ROUND1x(Ra, Rb, Rc, Rd, Re) \
    119 	SHUFFLE(Re)	; \
    120 	FUNC1(Ra, Rb, Rc, Rd, Re)	; \
    121 	MIX(Ra, Rb, Rc, Rd, Re)
    122 
    123 #define ROUND2(Ra, Rb, Rc, Rd, Re) \
    124 	SHUFFLE(Re)	; \
    125 	FUNC2(Ra, Rb, Rc, Rd, Re)	; \
    126 	MIX(Ra, Rb, Rc, Rd, Re)
    127 
    128 #define ROUND3(Ra, Rb, Rc, Rd, Re) \
    129 	SHUFFLE(Re)	; \
    130 	FUNC3(Ra, Rb, Rc, Rd, Re)	; \
    131 	MIX(Ra, Rb, Rc, Rd, Re)
    132 
    133 #define ROUND4(Ra, Rb, Rc, Rd, Re) \
    134 	SHUFFLE(Re)	; \
    135 	FUNC4(Ra, Rb, Rc, Rd, Re)	; \
    136 	MIX(Ra, Rb, Rc, Rd, Re)
    137 
    138 
    139 // func block(dig *digest, p []byte)
    140 TEXT	block(SB), 0, $352-16
    141 	MOVW	p+4(FP), Rdata	// pointer to the data
    142 	MOVW	p_len+8(FP), Rt0	// number of bytes
    143 	ADD	Rdata, Rt0
    144 	MOVW	Rt0, p_end	// pointer to end of data
    145 
    146 	// Load up initial SHA1 accumulator
    147 	MOVW	dig+0(FP), Rt0
    148 	MOVM.IA (Rt0), [Ra,Rb,Rc,Rd,Re]
    149 
    150 loop:
    151 	// Save registers at SP+4 onwards
    152 	MOVM.IB [Ra,Rb,Rc,Rd,Re], (R13)
    153 
    154 	MOVW	$w_buf, Rw
    155 	MOVW	$0x5A827999, Rconst
    156 	MOVW	$3, Rctr
    157 loop1:	ROUND1(Ra, Rb, Rc, Rd, Re)
    158 	ROUND1(Re, Ra, Rb, Rc, Rd)
    159 	ROUND1(Rd, Re, Ra, Rb, Rc)
    160 	ROUND1(Rc, Rd, Re, Ra, Rb)
    161 	ROUND1(Rb, Rc, Rd, Re, Ra)
    162 	SUB.S	$1, Rctr
    163 	BNE	loop1
    164 
    165 	ROUND1(Ra, Rb, Rc, Rd, Re)
    166 	ROUND1x(Re, Ra, Rb, Rc, Rd)
    167 	ROUND1x(Rd, Re, Ra, Rb, Rc)
    168 	ROUND1x(Rc, Rd, Re, Ra, Rb)
    169 	ROUND1x(Rb, Rc, Rd, Re, Ra)
    170 
    171 	MOVW	$0x6ED9EBA1, Rconst
    172 	MOVW	$4, Rctr
    173 loop2:	ROUND2(Ra, Rb, Rc, Rd, Re)
    174 	ROUND2(Re, Ra, Rb, Rc, Rd)
    175 	ROUND2(Rd, Re, Ra, Rb, Rc)
    176 	ROUND2(Rc, Rd, Re, Ra, Rb)
    177 	ROUND2(Rb, Rc, Rd, Re, Ra)
    178 	SUB.S	$1, Rctr
    179 	BNE	loop2
    180 
    181 	MOVW	$0x8F1BBCDC, Rconst
    182 	MOVW	$4, Rctr
    183 loop3:	ROUND3(Ra, Rb, Rc, Rd, Re)
    184 	ROUND3(Re, Ra, Rb, Rc, Rd)
    185 	ROUND3(Rd, Re, Ra, Rb, Rc)
    186 	ROUND3(Rc, Rd, Re, Ra, Rb)
    187 	ROUND3(Rb, Rc, Rd, Re, Ra)
    188 	SUB.S	$1, Rctr
    189 	BNE	loop3
    190 
    191 	MOVW	$0xCA62C1D6, Rconst
    192 	MOVW	$4, Rctr
    193 loop4:	ROUND4(Ra, Rb, Rc, Rd, Re)
    194 	ROUND4(Re, Ra, Rb, Rc, Rd)
    195 	ROUND4(Rd, Re, Ra, Rb, Rc)
    196 	ROUND4(Rc, Rd, Re, Ra, Rb)
    197 	ROUND4(Rb, Rc, Rd, Re, Ra)
    198 	SUB.S	$1, Rctr
    199 	BNE	loop4
    200 
    201 	// Accumulate - restoring registers from SP+4
    202 	MOVM.IB (R13), [Rt0,Rt1,Rt2,Rctr,Rw]
    203 	ADD	Rt0, Ra
    204 	ADD	Rt1, Rb
    205 	ADD	Rt2, Rc
    206 	ADD	Rctr, Rd
    207 	ADD	Rw, Re
    208 
    209 	MOVW	p_end, Rt0
    210 	CMP	Rt0, Rdata
    211 	BLO	loop
    212 
    213 	// Save final SHA1 accumulator
    214 	MOVW	dig+0(FP), Rt0
    215 	MOVM.IA [Ra,Rb,Rc,Rd,Re], (Rt0)
    216 
    217 	RET
    218