Home | History | Annotate | Download | only in sha1
      1 // Copyright 2013 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 #include "textflag.h"
      6 
      7 // SHA-1 block routine. See sha1block.go for Go equivalent.
      8 //
      9 // There are 80 rounds of 4 types:
     10 //   - rounds 0-15 are type 1 and load data (ROUND1 macro).
     11 //   - rounds 16-19 are type 1 and do not load data (ROUND1x macro).
     12 //   - rounds 20-39 are type 2 and do not load data (ROUND2 macro).
     13 //   - rounds 40-59 are type 3 and do not load data (ROUND3 macro).
     14 //   - rounds 60-79 are type 4 and do not load data (ROUND4 macro).
     15 //
     16 // Each round loads or shuffles the data, then computes a per-round
     17 // function of b, c, d, and then mixes the result into and rotates the
     18 // five registers a, b, c, d, e holding the intermediate results.
     19 //
     20 // The register rotation is implemented by rotating the arguments to
     21 // the round macros instead of by explicit move instructions.
     22 //
     23 // amd64p32 version.
     24 // To ensure safety for Native Client, avoids use of BP and R15
     25 // as well as two-register addressing modes.
     26 
     27 #define LOAD(index) \
     28 	MOVL	(index*4)(SI), R10; \
     29 	BSWAPL	R10; \
     30 	MOVL	R10, (index*4)(SP)
     31 
     32 #define SHUFFLE(index) \
     33 	MOVL	(((index)&0xf)*4)(SP), R10; \
     34 	XORL	(((index-3)&0xf)*4)(SP), R10; \
     35 	XORL	(((index-8)&0xf)*4)(SP), R10; \
     36 	XORL	(((index-14)&0xf)*4)(SP), R10; \
     37 	ROLL	$1, R10; \
     38 	MOVL	R10, (((index)&0xf)*4)(SP)
     39 
     40 #define FUNC1(a, b, c, d, e) \
     41 	MOVL	d, R9; \
     42 	XORL	c, R9; \
     43 	ANDL	b, R9; \
     44 	XORL	d, R9
     45 
     46 #define FUNC2(a, b, c, d, e) \
     47 	MOVL	b, R9; \
     48 	XORL	c, R9; \
     49 	XORL	d, R9
     50 
     51 #define FUNC3(a, b, c, d, e) \
     52 	MOVL	b, R8; \
     53 	ORL	c, R8; \
     54 	ANDL	d, R8; \
     55 	MOVL	b, R9; \
     56 	ANDL	c, R9; \
     57 	ORL	R8, R9
     58 
     59 #define FUNC4 FUNC2
     60 
     61 #define MIX(a, b, c, d, e, const) \
     62 	ROLL	$30, b; \
     63 	ADDL	R9, e; \
     64 	MOVL	a, R8; \
     65 	ROLL	$5, R8; \
     66 	LEAL	const(e)(R10*1), e; \
     67 	ADDL	R8, e
     68 
     69 #define ROUND1(a, b, c, d, e, index) \
     70 	LOAD(index); \
     71 	FUNC1(a, b, c, d, e); \
     72 	MIX(a, b, c, d, e, 0x5A827999)
     73 
     74 #define ROUND1x(a, b, c, d, e, index) \
     75 	SHUFFLE(index); \
     76 	FUNC1(a, b, c, d, e); \
     77 	MIX(a, b, c, d, e, 0x5A827999)
     78 
     79 #define ROUND2(a, b, c, d, e, index) \
     80 	SHUFFLE(index); \
     81 	FUNC2(a, b, c, d, e); \
     82 	MIX(a, b, c, d, e, 0x6ED9EBA1)
     83 
     84 #define ROUND3(a, b, c, d, e, index) \
     85 	SHUFFLE(index); \
     86 	FUNC3(a, b, c, d, e); \
     87 	MIX(a, b, c, d, e, 0x8F1BBCDC)
     88 
     89 #define ROUND4(a, b, c, d, e, index) \
     90 	SHUFFLE(index); \
     91 	FUNC4(a, b, c, d, e); \
     92 	MIX(a, b, c, d, e, 0xCA62C1D6)
     93 
     94 TEXT block(SB),NOSPLIT,$64-16
     95 	MOVL	dig+0(FP),	R14
     96 	MOVL	p_base+4(FP),	SI
     97 	MOVL	p_len+8(FP),	DX
     98 	SHRQ	$6,		DX
     99 	SHLQ	$6,		DX
    100 
    101 	LEAQ	(SI)(DX*1),	DI
    102 	MOVL	(0*4)(R14),	AX
    103 	MOVL	(1*4)(R14),	BX
    104 	MOVL	(2*4)(R14),	CX
    105 	MOVL	(3*4)(R14),	DX
    106 	MOVL	(4*4)(R14),	R13
    107 
    108 	CMPQ	SI,		DI
    109 	JEQ	end
    110 
    111 loop:
    112 #define BP R13 /* keep diff from sha1block_amd64.s small */
    113 	ROUND1(AX, BX, CX, DX, BP, 0)
    114 	ROUND1(BP, AX, BX, CX, DX, 1)
    115 	ROUND1(DX, BP, AX, BX, CX, 2)
    116 	ROUND1(CX, DX, BP, AX, BX, 3)
    117 	ROUND1(BX, CX, DX, BP, AX, 4)
    118 	ROUND1(AX, BX, CX, DX, BP, 5)
    119 	ROUND1(BP, AX, BX, CX, DX, 6)
    120 	ROUND1(DX, BP, AX, BX, CX, 7)
    121 	ROUND1(CX, DX, BP, AX, BX, 8)
    122 	ROUND1(BX, CX, DX, BP, AX, 9)
    123 	ROUND1(AX, BX, CX, DX, BP, 10)
    124 	ROUND1(BP, AX, BX, CX, DX, 11)
    125 	ROUND1(DX, BP, AX, BX, CX, 12)
    126 	ROUND1(CX, DX, BP, AX, BX, 13)
    127 	ROUND1(BX, CX, DX, BP, AX, 14)
    128 	ROUND1(AX, BX, CX, DX, BP, 15)
    129 
    130 	ROUND1x(BP, AX, BX, CX, DX, 16)
    131 	ROUND1x(DX, BP, AX, BX, CX, 17)
    132 	ROUND1x(CX, DX, BP, AX, BX, 18)
    133 	ROUND1x(BX, CX, DX, BP, AX, 19)
    134 
    135 	ROUND2(AX, BX, CX, DX, BP, 20)
    136 	ROUND2(BP, AX, BX, CX, DX, 21)
    137 	ROUND2(DX, BP, AX, BX, CX, 22)
    138 	ROUND2(CX, DX, BP, AX, BX, 23)
    139 	ROUND2(BX, CX, DX, BP, AX, 24)
    140 	ROUND2(AX, BX, CX, DX, BP, 25)
    141 	ROUND2(BP, AX, BX, CX, DX, 26)
    142 	ROUND2(DX, BP, AX, BX, CX, 27)
    143 	ROUND2(CX, DX, BP, AX, BX, 28)
    144 	ROUND2(BX, CX, DX, BP, AX, 29)
    145 	ROUND2(AX, BX, CX, DX, BP, 30)
    146 	ROUND2(BP, AX, BX, CX, DX, 31)
    147 	ROUND2(DX, BP, AX, BX, CX, 32)
    148 	ROUND2(CX, DX, BP, AX, BX, 33)
    149 	ROUND2(BX, CX, DX, BP, AX, 34)
    150 	ROUND2(AX, BX, CX, DX, BP, 35)
    151 	ROUND2(BP, AX, BX, CX, DX, 36)
    152 	ROUND2(DX, BP, AX, BX, CX, 37)
    153 	ROUND2(CX, DX, BP, AX, BX, 38)
    154 	ROUND2(BX, CX, DX, BP, AX, 39)
    155 
    156 	ROUND3(AX, BX, CX, DX, BP, 40)
    157 	ROUND3(BP, AX, BX, CX, DX, 41)
    158 	ROUND3(DX, BP, AX, BX, CX, 42)
    159 	ROUND3(CX, DX, BP, AX, BX, 43)
    160 	ROUND3(BX, CX, DX, BP, AX, 44)
    161 	ROUND3(AX, BX, CX, DX, BP, 45)
    162 	ROUND3(BP, AX, BX, CX, DX, 46)
    163 	ROUND3(DX, BP, AX, BX, CX, 47)
    164 	ROUND3(CX, DX, BP, AX, BX, 48)
    165 	ROUND3(BX, CX, DX, BP, AX, 49)
    166 	ROUND3(AX, BX, CX, DX, BP, 50)
    167 	ROUND3(BP, AX, BX, CX, DX, 51)
    168 	ROUND3(DX, BP, AX, BX, CX, 52)
    169 	ROUND3(CX, DX, BP, AX, BX, 53)
    170 	ROUND3(BX, CX, DX, BP, AX, 54)
    171 	ROUND3(AX, BX, CX, DX, BP, 55)
    172 	ROUND3(BP, AX, BX, CX, DX, 56)
    173 	ROUND3(DX, BP, AX, BX, CX, 57)
    174 	ROUND3(CX, DX, BP, AX, BX, 58)
    175 	ROUND3(BX, CX, DX, BP, AX, 59)
    176 
    177 	ROUND4(AX, BX, CX, DX, BP, 60)
    178 	ROUND4(BP, AX, BX, CX, DX, 61)
    179 	ROUND4(DX, BP, AX, BX, CX, 62)
    180 	ROUND4(CX, DX, BP, AX, BX, 63)
    181 	ROUND4(BX, CX, DX, BP, AX, 64)
    182 	ROUND4(AX, BX, CX, DX, BP, 65)
    183 	ROUND4(BP, AX, BX, CX, DX, 66)
    184 	ROUND4(DX, BP, AX, BX, CX, 67)
    185 	ROUND4(CX, DX, BP, AX, BX, 68)
    186 	ROUND4(BX, CX, DX, BP, AX, 69)
    187 	ROUND4(AX, BX, CX, DX, BP, 70)
    188 	ROUND4(BP, AX, BX, CX, DX, 71)
    189 	ROUND4(DX, BP, AX, BX, CX, 72)
    190 	ROUND4(CX, DX, BP, AX, BX, 73)
    191 	ROUND4(BX, CX, DX, BP, AX, 74)
    192 	ROUND4(AX, BX, CX, DX, BP, 75)
    193 	ROUND4(BP, AX, BX, CX, DX, 76)
    194 	ROUND4(DX, BP, AX, BX, CX, 77)
    195 	ROUND4(CX, DX, BP, AX, BX, 78)
    196 	ROUND4(BX, CX, DX, BP, AX, 79)
    197 #undef BP
    198 
    199 	ADDL	(0*4)(R14), AX
    200 	ADDL	(1*4)(R14), BX
    201 	ADDL	(2*4)(R14), CX
    202 	ADDL	(3*4)(R14), DX
    203 	ADDL	(4*4)(R14), R13
    204 
    205 	MOVL	AX, (0*4)(R14)
    206 	MOVL	BX, (1*4)(R14)
    207 	MOVL	CX, (2*4)(R14)
    208 	MOVL	DX, (3*4)(R14)
    209 	MOVL	R13, (4*4)(R14)
    210 
    211 	ADDQ	$64, SI
    212 	CMPQ	SI, DI
    213 	JB	loop
    214 
    215 end:
    216 	RET
    217