Home | History | Annotate | Download | only in sha256
      1 // Copyright 2013 The Go Authors.  All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 #include "textflag.h"
      6 
      7 // SHA256 block routine. See sha256block.go for Go equivalent.
      8 //
      9 // The algorithm is detailed in FIPS 180-4:
     10 //
     11 //  http://csrc.nist.gov/publications/fips/fips180-4/fips-180-4.pdf
     12 //
     13 // Wt = Mt; for 0 <= t <= 15
     14 // Wt = SIGMA1(Wt-2) + SIGMA0(Wt-15) + Wt-16; for 16 <= t <= 63
     15 //
     16 // a = H0
     17 // b = H1
     18 // c = H2
     19 // d = H3
     20 // e = H4
     21 // f = H5
     22 // g = H6
     23 // h = H7
     24 //
     25 // for t = 0 to 63 {
     26 //    T1 = h + BIGSIGMA1(e) + Ch(e,f,g) + Kt + Wt
     27 //    T2 = BIGSIGMA0(a) + Maj(a,b,c)
     28 //    h = g
     29 //    g = f
     30 //    f = e
     31 //    e = d + T1
     32 //    d = c
     33 //    c = b
     34 //    b = a
     35 //    a = T1 + T2
     36 // }
     37 //
     38 // H0 = a + H0
     39 // H1 = b + H1
     40 // H2 = c + H2
     41 // H3 = d + H3
     42 // H4 = e + H4
     43 // H5 = f + H5
     44 // H6 = g + H6
     45 // H7 = h + H7
     46 
     47 // Wt = Mt; for 0 <= t <= 15
     48 #define MSGSCHEDULE0(index) \
     49 	MOVL	(index*4)(SI), AX; \
     50 	BSWAPL	AX; \
     51 	MOVL	AX, (index*4)(BP)
     52 
     53 // Wt = SIGMA1(Wt-2) + Wt-7 + SIGMA0(Wt-15) + Wt-16; for 16 <= t <= 63
     54 //   SIGMA0(x) = ROTR(7,x) XOR ROTR(18,x) XOR SHR(3,x)
     55 //   SIGMA1(x) = ROTR(17,x) XOR ROTR(19,x) XOR SHR(10,x)
     56 #define MSGSCHEDULE1(index) \
     57 	MOVL	((index-2)*4)(BP), AX; \
     58 	MOVL	AX, CX; \
     59 	RORL	$17, AX; \
     60 	MOVL	CX, DX; \
     61 	RORL	$19, CX; \
     62 	SHRL	$10, DX; \
     63 	MOVL	((index-15)*4)(BP), BX; \
     64 	XORL	CX, AX; \
     65 	MOVL	BX, CX; \
     66 	XORL	DX, AX; \
     67 	RORL	$7, BX; \
     68 	MOVL	CX, DX; \
     69 	SHRL	$3, DX; \
     70 	RORL	$18, CX; \
     71 	ADDL	((index-7)*4)(BP), AX; \
     72 	XORL	CX, BX; \
     73 	XORL	DX, BX; \
     74 	ADDL	((index-16)*4)(BP), BX; \
     75 	ADDL	BX, AX; \
     76 	MOVL	AX, ((index)*4)(BP)
     77 
     78 // Calculate T1 in AX - uses AX, CX and DX registers.
     79 // h is also used as an accumulator. Wt is passed in AX.
     80 //   T1 = h + BIGSIGMA1(e) + Ch(e, f, g) + Kt + Wt
     81 //     BIGSIGMA1(x) = ROTR(6,x) XOR ROTR(11,x) XOR ROTR(25,x)
     82 //     Ch(x, y, z) = (x AND y) XOR (NOT x AND z)
     83 #define SHA256T1(const, e, f, g, h) \
     84 	ADDL	AX, h; \
     85 	MOVL	e, AX; \
     86 	ADDL	$const, h; \
     87 	MOVL	e, CX; \
     88 	RORL	$6, AX; \
     89 	MOVL	e, DX; \
     90 	RORL	$11, CX; \
     91 	XORL	CX, AX; \
     92 	MOVL	e, CX; \
     93 	RORL	$25, DX; \
     94 	ANDL	f, CX; \
     95 	XORL	AX, DX; \
     96 	MOVL	e, AX; \
     97 	NOTL	AX; \
     98 	ADDL	DX, h; \
     99 	ANDL	g, AX; \
    100 	XORL	CX, AX; \
    101 	ADDL	h, AX
    102 
    103 // Calculate T2 in BX - uses BX, CX, DX and DI registers.
    104 //   T2 = BIGSIGMA0(a) + Maj(a, b, c)
    105 //     BIGSIGMA0(x) = ROTR(2,x) XOR ROTR(13,x) XOR ROTR(22,x)
    106 //     Maj(x, y, z) = (x AND y) XOR (x AND z) XOR (y AND z)
    107 #define SHA256T2(a, b, c) \
    108 	MOVL	a, DI; \
    109 	MOVL	c, BX; \
    110 	RORL	$2, DI; \
    111 	MOVL	a, DX; \
    112 	ANDL	b, BX; \
    113 	RORL	$13, DX; \
    114 	MOVL	a, CX; \
    115 	ANDL	c, CX; \
    116 	XORL	DX, DI; \
    117 	XORL	CX, BX; \
    118 	MOVL	a, DX; \
    119 	MOVL	b, CX; \
    120 	RORL	$22, DX; \
    121 	ANDL	a, CX; \
    122 	XORL	CX, BX; \
    123 	XORL	DX, DI; \
    124 	ADDL	DI, BX
    125 
    126 // Calculate T1 and T2, then e = d + T1 and a = T1 + T2.
    127 // The values for e and a are stored in d and h, ready for rotation.
    128 #define SHA256ROUND(index, const, a, b, c, d, e, f, g, h) \
    129 	SHA256T1(const, e, f, g, h); \
    130 	SHA256T2(a, b, c); \
    131 	MOVL	BX, h; \
    132 	ADDL	AX, d; \
    133 	ADDL	AX, h
    134 
    135 #define SHA256ROUND0(index, const, a, b, c, d, e, f, g, h) \
    136 	MSGSCHEDULE0(index); \
    137 	SHA256ROUND(index, const, a, b, c, d, e, f, g, h)
    138 
    139 #define SHA256ROUND1(index, const, a, b, c, d, e, f, g, h) \
    140 	MSGSCHEDULE1(index); \
    141 	SHA256ROUND(index, const, a, b, c, d, e, f, g, h)
    142 
    143 TEXT block(SB),0,$264-32
    144 	MOVQ	p_base+8(FP), SI
    145 	MOVQ	p_len+16(FP), DX
    146 	SHRQ	$6, DX
    147 	SHLQ	$6, DX
    148 
    149 	LEAQ	(SI)(DX*1), DI
    150 	MOVQ	DI, 256(SP)
    151 	CMPQ	SI, DI
    152 	JEQ	end
    153 
    154 	MOVQ	dig+0(FP), BP
    155 	MOVL	(0*4)(BP), R8		// a = H0
    156 	MOVL	(1*4)(BP), R9		// b = H1
    157 	MOVL	(2*4)(BP), R10		// c = H2
    158 	MOVL	(3*4)(BP), R11		// d = H3
    159 	MOVL	(4*4)(BP), R12		// e = H4
    160 	MOVL	(5*4)(BP), R13		// f = H5
    161 	MOVL	(6*4)(BP), R14		// g = H6
    162 	MOVL	(7*4)(BP), R15		// h = H7
    163 
    164 loop:
    165 	MOVQ	SP, BP			// message schedule
    166 
    167 	SHA256ROUND0(0, 0x428a2f98, R8, R9, R10, R11, R12, R13, R14, R15)
    168 	SHA256ROUND0(1, 0x71374491, R15, R8, R9, R10, R11, R12, R13, R14)
    169 	SHA256ROUND0(2, 0xb5c0fbcf, R14, R15, R8, R9, R10, R11, R12, R13)
    170 	SHA256ROUND0(3, 0xe9b5dba5, R13, R14, R15, R8, R9, R10, R11, R12)
    171 	SHA256ROUND0(4, 0x3956c25b, R12, R13, R14, R15, R8, R9, R10, R11)
    172 	SHA256ROUND0(5, 0x59f111f1, R11, R12, R13, R14, R15, R8, R9, R10)
    173 	SHA256ROUND0(6, 0x923f82a4, R10, R11, R12, R13, R14, R15, R8, R9)
    174 	SHA256ROUND0(7, 0xab1c5ed5, R9, R10, R11, R12, R13, R14, R15, R8)
    175 	SHA256ROUND0(8, 0xd807aa98, R8, R9, R10, R11, R12, R13, R14, R15)
    176 	SHA256ROUND0(9, 0x12835b01, R15, R8, R9, R10, R11, R12, R13, R14)
    177 	SHA256ROUND0(10, 0x243185be, R14, R15, R8, R9, R10, R11, R12, R13)
    178 	SHA256ROUND0(11, 0x550c7dc3, R13, R14, R15, R8, R9, R10, R11, R12)
    179 	SHA256ROUND0(12, 0x72be5d74, R12, R13, R14, R15, R8, R9, R10, R11)
    180 	SHA256ROUND0(13, 0x80deb1fe, R11, R12, R13, R14, R15, R8, R9, R10)
    181 	SHA256ROUND0(14, 0x9bdc06a7, R10, R11, R12, R13, R14, R15, R8, R9)
    182 	SHA256ROUND0(15, 0xc19bf174, R9, R10, R11, R12, R13, R14, R15, R8)
    183 
    184 	SHA256ROUND1(16, 0xe49b69c1, R8, R9, R10, R11, R12, R13, R14, R15)
    185 	SHA256ROUND1(17, 0xefbe4786, R15, R8, R9, R10, R11, R12, R13, R14)
    186 	SHA256ROUND1(18, 0x0fc19dc6, R14, R15, R8, R9, R10, R11, R12, R13)
    187 	SHA256ROUND1(19, 0x240ca1cc, R13, R14, R15, R8, R9, R10, R11, R12)
    188 	SHA256ROUND1(20, 0x2de92c6f, R12, R13, R14, R15, R8, R9, R10, R11)
    189 	SHA256ROUND1(21, 0x4a7484aa, R11, R12, R13, R14, R15, R8, R9, R10)
    190 	SHA256ROUND1(22, 0x5cb0a9dc, R10, R11, R12, R13, R14, R15, R8, R9)
    191 	SHA256ROUND1(23, 0x76f988da, R9, R10, R11, R12, R13, R14, R15, R8)
    192 	SHA256ROUND1(24, 0x983e5152, R8, R9, R10, R11, R12, R13, R14, R15)
    193 	SHA256ROUND1(25, 0xa831c66d, R15, R8, R9, R10, R11, R12, R13, R14)
    194 	SHA256ROUND1(26, 0xb00327c8, R14, R15, R8, R9, R10, R11, R12, R13)
    195 	SHA256ROUND1(27, 0xbf597fc7, R13, R14, R15, R8, R9, R10, R11, R12)
    196 	SHA256ROUND1(28, 0xc6e00bf3, R12, R13, R14, R15, R8, R9, R10, R11)
    197 	SHA256ROUND1(29, 0xd5a79147, R11, R12, R13, R14, R15, R8, R9, R10)
    198 	SHA256ROUND1(30, 0x06ca6351, R10, R11, R12, R13, R14, R15, R8, R9)
    199 	SHA256ROUND1(31, 0x14292967, R9, R10, R11, R12, R13, R14, R15, R8)
    200 	SHA256ROUND1(32, 0x27b70a85, R8, R9, R10, R11, R12, R13, R14, R15)
    201 	SHA256ROUND1(33, 0x2e1b2138, R15, R8, R9, R10, R11, R12, R13, R14)
    202 	SHA256ROUND1(34, 0x4d2c6dfc, R14, R15, R8, R9, R10, R11, R12, R13)
    203 	SHA256ROUND1(35, 0x53380d13, R13, R14, R15, R8, R9, R10, R11, R12)
    204 	SHA256ROUND1(36, 0x650a7354, R12, R13, R14, R15, R8, R9, R10, R11)
    205 	SHA256ROUND1(37, 0x766a0abb, R11, R12, R13, R14, R15, R8, R9, R10)
    206 	SHA256ROUND1(38, 0x81c2c92e, R10, R11, R12, R13, R14, R15, R8, R9)
    207 	SHA256ROUND1(39, 0x92722c85, R9, R10, R11, R12, R13, R14, R15, R8)
    208 	SHA256ROUND1(40, 0xa2bfe8a1, R8, R9, R10, R11, R12, R13, R14, R15)
    209 	SHA256ROUND1(41, 0xa81a664b, R15, R8, R9, R10, R11, R12, R13, R14)
    210 	SHA256ROUND1(42, 0xc24b8b70, R14, R15, R8, R9, R10, R11, R12, R13)
    211 	SHA256ROUND1(43, 0xc76c51a3, R13, R14, R15, R8, R9, R10, R11, R12)
    212 	SHA256ROUND1(44, 0xd192e819, R12, R13, R14, R15, R8, R9, R10, R11)
    213 	SHA256ROUND1(45, 0xd6990624, R11, R12, R13, R14, R15, R8, R9, R10)
    214 	SHA256ROUND1(46, 0xf40e3585, R10, R11, R12, R13, R14, R15, R8, R9)
    215 	SHA256ROUND1(47, 0x106aa070, R9, R10, R11, R12, R13, R14, R15, R8)
    216 	SHA256ROUND1(48, 0x19a4c116, R8, R9, R10, R11, R12, R13, R14, R15)
    217 	SHA256ROUND1(49, 0x1e376c08, R15, R8, R9, R10, R11, R12, R13, R14)
    218 	SHA256ROUND1(50, 0x2748774c, R14, R15, R8, R9, R10, R11, R12, R13)
    219 	SHA256ROUND1(51, 0x34b0bcb5, R13, R14, R15, R8, R9, R10, R11, R12)
    220 	SHA256ROUND1(52, 0x391c0cb3, R12, R13, R14, R15, R8, R9, R10, R11)
    221 	SHA256ROUND1(53, 0x4ed8aa4a, R11, R12, R13, R14, R15, R8, R9, R10)
    222 	SHA256ROUND1(54, 0x5b9cca4f, R10, R11, R12, R13, R14, R15, R8, R9)
    223 	SHA256ROUND1(55, 0x682e6ff3, R9, R10, R11, R12, R13, R14, R15, R8)
    224 	SHA256ROUND1(56, 0x748f82ee, R8, R9, R10, R11, R12, R13, R14, R15)
    225 	SHA256ROUND1(57, 0x78a5636f, R15, R8, R9, R10, R11, R12, R13, R14)
    226 	SHA256ROUND1(58, 0x84c87814, R14, R15, R8, R9, R10, R11, R12, R13)
    227 	SHA256ROUND1(59, 0x8cc70208, R13, R14, R15, R8, R9, R10, R11, R12)
    228 	SHA256ROUND1(60, 0x90befffa, R12, R13, R14, R15, R8, R9, R10, R11)
    229 	SHA256ROUND1(61, 0xa4506ceb, R11, R12, R13, R14, R15, R8, R9, R10)
    230 	SHA256ROUND1(62, 0xbef9a3f7, R10, R11, R12, R13, R14, R15, R8, R9)
    231 	SHA256ROUND1(63, 0xc67178f2, R9, R10, R11, R12, R13, R14, R15, R8)
    232 
    233 	MOVQ	dig+0(FP), BP
    234 	ADDL	(0*4)(BP), R8	// H0 = a + H0
    235 	MOVL	R8, (0*4)(BP)
    236 	ADDL	(1*4)(BP), R9	// H1 = b + H1
    237 	MOVL	R9, (1*4)(BP)
    238 	ADDL	(2*4)(BP), R10	// H2 = c + H2
    239 	MOVL	R10, (2*4)(BP)
    240 	ADDL	(3*4)(BP), R11	// H3 = d + H3
    241 	MOVL	R11, (3*4)(BP)
    242 	ADDL	(4*4)(BP), R12	// H4 = e + H4
    243 	MOVL	R12, (4*4)(BP)
    244 	ADDL	(5*4)(BP), R13	// H5 = f + H5
    245 	MOVL	R13, (5*4)(BP)
    246 	ADDL	(6*4)(BP), R14	// H6 = g + H6
    247 	MOVL	R14, (6*4)(BP)
    248 	ADDL	(7*4)(BP), R15	// H7 = h + H7
    249 	MOVL	R15, (7*4)(BP)
    250 
    251 	ADDQ	$64, SI
    252 	CMPQ	SI, 256(SP)
    253 	JB	loop
    254 
    255 end:
    256 	RET
    257