Home | History | Annotate | Download | only in sha512
      1 // Copyright 2013 The Go Authors.  All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 #include "textflag.h"
      6 
      7 // SHA512 block routine. See sha512block.go for Go equivalent.
      8 //
      9 // The algorithm is detailed in FIPS 180-4:
     10 //
     11 //  http://csrc.nist.gov/publications/fips/fips180-4/fips-180-4.pdf
     12 //
     13 // Wt = Mt; for 0 <= t <= 15
     14 // Wt = SIGMA1(Wt-2) + SIGMA0(Wt-15) + Wt-16; for 16 <= t <= 79
     15 //
     16 // a = H0
     17 // b = H1
     18 // c = H2
     19 // d = H3
     20 // e = H4
     21 // f = H5
     22 // g = H6
     23 // h = H7
     24 //
     25 // for t = 0 to 79 {
     26 //    T1 = h + BIGSIGMA1(e) + Ch(e,f,g) + Kt + Wt
     27 //    T2 = BIGSIGMA0(a) + Maj(a,b,c)
     28 //    h = g
     29 //    g = f
     30 //    f = e
     31 //    e = d + T1
     32 //    d = c
     33 //    c = b
     34 //    b = a
     35 //    a = T1 + T2
     36 // }
     37 //
     38 // H0 = a + H0
     39 // H1 = b + H1
     40 // H2 = c + H2
     41 // H3 = d + H3
     42 // H4 = e + H4
     43 // H5 = f + H5
     44 // H6 = g + H6
     45 // H7 = h + H7
     46 
     47 // Wt = Mt; for 0 <= t <= 15
     48 #define MSGSCHEDULE0(index) \
     49 	MOVQ	(index*8)(SI), AX; \
     50 	BSWAPQ	AX; \
     51 	MOVQ	AX, (index*8)(BP)
     52 
     53 // Wt = SIGMA1(Wt-2) + Wt-7 + SIGMA0(Wt-15) + Wt-16; for 16 <= t <= 79
     54 //   SIGMA0(x) = ROTR(1,x) XOR ROTR(8,x) XOR SHR(7,x)
     55 //   SIGMA1(x) = ROTR(19,x) XOR ROTR(61,x) XOR SHR(6,x)
     56 #define MSGSCHEDULE1(index) \
     57 	MOVQ	((index-2)*8)(BP), AX; \
     58 	MOVQ	AX, CX; \
     59 	RORQ	$19, AX; \
     60 	MOVQ	CX, DX; \
     61 	RORQ	$61, CX; \
     62 	SHRQ	$6, DX; \
     63 	MOVQ	((index-15)*8)(BP), BX; \
     64 	XORQ	CX, AX; \
     65 	MOVQ	BX, CX; \
     66 	XORQ	DX, AX; \
     67 	RORQ	$1, BX; \
     68 	MOVQ	CX, DX; \
     69 	SHRQ	$7, DX; \
     70 	RORQ	$8, CX; \
     71 	ADDQ	((index-7)*8)(BP), AX; \
     72 	XORQ	CX, BX; \
     73 	XORQ	DX, BX; \
     74 	ADDQ	((index-16)*8)(BP), BX; \
     75 	ADDQ	BX, AX; \
     76 	MOVQ	AX, ((index)*8)(BP)
     77 
     78 // Calculate T1 in AX - uses AX, CX and DX registers.
     79 // h is also used as an accumulator. Wt is passed in AX.
     80 //   T1 = h + BIGSIGMA1(e) + Ch(e, f, g) + Kt + Wt
     81 //     BIGSIGMA1(x) = ROTR(14,x) XOR ROTR(18,x) XOR ROTR(41,x)
     82 //     Ch(x, y, z) = (x AND y) XOR (NOT x AND z)
     83 #define SHA512T1(const, e, f, g, h) \
     84 	MOVQ	$const, DX; \
     85 	ADDQ	AX, h; \
     86 	MOVQ	e, AX; \
     87 	ADDQ	DX, h; \
     88 	MOVQ	e, CX; \
     89 	RORQ	$14, AX; \
     90 	MOVQ	e, DX; \
     91 	RORQ	$18, CX; \
     92 	XORQ	CX, AX; \
     93 	MOVQ	e, CX; \
     94 	RORQ	$41, DX; \
     95 	ANDQ	f, CX; \
     96 	XORQ	AX, DX; \
     97 	MOVQ	e, AX; \
     98 	NOTQ	AX; \
     99 	ADDQ	DX, h; \
    100 	ANDQ	g, AX; \
    101 	XORQ	CX, AX; \
    102 	ADDQ	h, AX
    103 
    104 // Calculate T2 in BX - uses BX, CX, DX and DI registers.
    105 //   T2 = BIGSIGMA0(a) + Maj(a, b, c)
    106 //     BIGSIGMA0(x) = ROTR(28,x) XOR ROTR(34,x) XOR ROTR(39,x)
    107 //     Maj(x, y, z) = (x AND y) XOR (x AND z) XOR (y AND z)
    108 #define SHA512T2(a, b, c) \
    109 	MOVQ	a, DI; \
    110 	MOVQ	c, BX; \
    111 	RORQ	$28, DI; \
    112 	MOVQ	a, DX; \
    113 	ANDQ	b, BX; \
    114 	RORQ	$34, DX; \
    115 	MOVQ	a, CX; \
    116 	ANDQ	c, CX; \
    117 	XORQ	DX, DI; \
    118 	XORQ	CX, BX; \
    119 	MOVQ	a, DX; \
    120 	MOVQ	b, CX; \
    121 	RORQ	$39, DX; \
    122 	ANDQ	a, CX; \
    123 	XORQ	CX, BX; \
    124 	XORQ	DX, DI; \
    125 	ADDQ	DI, BX
    126 
    127 // Calculate T1 and T2, then e = d + T1 and a = T1 + T2.
    128 // The values for e and a are stored in d and h, ready for rotation.
    129 #define SHA512ROUND(index, const, a, b, c, d, e, f, g, h) \
    130 	SHA512T1(const, e, f, g, h); \
    131 	SHA512T2(a, b, c); \
    132 	MOVQ	BX, h; \
    133 	ADDQ	AX, d; \
    134 	ADDQ	AX, h
    135 
    136 #define SHA512ROUND0(index, const, a, b, c, d, e, f, g, h) \
    137 	MSGSCHEDULE0(index); \
    138 	SHA512ROUND(index, const, a, b, c, d, e, f, g, h)
    139 
    140 #define SHA512ROUND1(index, const, a, b, c, d, e, f, g, h) \
    141 	MSGSCHEDULE1(index); \
    142 	SHA512ROUND(index, const, a, b, c, d, e, f, g, h)
    143 
    144 TEXT block(SB),0,$648-32
    145 	MOVQ	p_base+8(FP), SI
    146 	MOVQ	p_len+16(FP), DX
    147 	SHRQ	$7, DX
    148 	SHLQ	$7, DX
    149 
    150 	LEAQ	(SI)(DX*1), DI
    151 	MOVQ	DI, 640(SP)
    152 	CMPQ	SI, DI
    153 	JEQ	end
    154 
    155 	MOVQ	dig+0(FP), BP
    156 	MOVQ	(0*8)(BP), R8		// a = H0
    157 	MOVQ	(1*8)(BP), R9		// b = H1
    158 	MOVQ	(2*8)(BP), R10		// c = H2
    159 	MOVQ	(3*8)(BP), R11		// d = H3
    160 	MOVQ	(4*8)(BP), R12		// e = H4
    161 	MOVQ	(5*8)(BP), R13		// f = H5
    162 	MOVQ	(6*8)(BP), R14		// g = H6
    163 	MOVQ	(7*8)(BP), R15		// h = H7
    164 
    165 loop:
    166 	MOVQ	SP, BP			// message schedule
    167 
    168 	SHA512ROUND0(0, 0x428a2f98d728ae22, R8, R9, R10, R11, R12, R13, R14, R15)
    169 	SHA512ROUND0(1, 0x7137449123ef65cd, R15, R8, R9, R10, R11, R12, R13, R14)
    170 	SHA512ROUND0(2, 0xb5c0fbcfec4d3b2f, R14, R15, R8, R9, R10, R11, R12, R13)
    171 	SHA512ROUND0(3, 0xe9b5dba58189dbbc, R13, R14, R15, R8, R9, R10, R11, R12)
    172 	SHA512ROUND0(4, 0x3956c25bf348b538, R12, R13, R14, R15, R8, R9, R10, R11)
    173 	SHA512ROUND0(5, 0x59f111f1b605d019, R11, R12, R13, R14, R15, R8, R9, R10)
    174 	SHA512ROUND0(6, 0x923f82a4af194f9b, R10, R11, R12, R13, R14, R15, R8, R9)
    175 	SHA512ROUND0(7, 0xab1c5ed5da6d8118, R9, R10, R11, R12, R13, R14, R15, R8)
    176 	SHA512ROUND0(8, 0xd807aa98a3030242, R8, R9, R10, R11, R12, R13, R14, R15)
    177 	SHA512ROUND0(9, 0x12835b0145706fbe, R15, R8, R9, R10, R11, R12, R13, R14)
    178 	SHA512ROUND0(10, 0x243185be4ee4b28c, R14, R15, R8, R9, R10, R11, R12, R13)
    179 	SHA512ROUND0(11, 0x550c7dc3d5ffb4e2, R13, R14, R15, R8, R9, R10, R11, R12)
    180 	SHA512ROUND0(12, 0x72be5d74f27b896f, R12, R13, R14, R15, R8, R9, R10, R11)
    181 	SHA512ROUND0(13, 0x80deb1fe3b1696b1, R11, R12, R13, R14, R15, R8, R9, R10)
    182 	SHA512ROUND0(14, 0x9bdc06a725c71235, R10, R11, R12, R13, R14, R15, R8, R9)
    183 	SHA512ROUND0(15, 0xc19bf174cf692694, R9, R10, R11, R12, R13, R14, R15, R8)
    184 
    185 	SHA512ROUND1(16, 0xe49b69c19ef14ad2, R8, R9, R10, R11, R12, R13, R14, R15)
    186 	SHA512ROUND1(17, 0xefbe4786384f25e3, R15, R8, R9, R10, R11, R12, R13, R14)
    187 	SHA512ROUND1(18, 0x0fc19dc68b8cd5b5, R14, R15, R8, R9, R10, R11, R12, R13)
    188 	SHA512ROUND1(19, 0x240ca1cc77ac9c65, R13, R14, R15, R8, R9, R10, R11, R12)
    189 	SHA512ROUND1(20, 0x2de92c6f592b0275, R12, R13, R14, R15, R8, R9, R10, R11)
    190 	SHA512ROUND1(21, 0x4a7484aa6ea6e483, R11, R12, R13, R14, R15, R8, R9, R10)
    191 	SHA512ROUND1(22, 0x5cb0a9dcbd41fbd4, R10, R11, R12, R13, R14, R15, R8, R9)
    192 	SHA512ROUND1(23, 0x76f988da831153b5, R9, R10, R11, R12, R13, R14, R15, R8)
    193 	SHA512ROUND1(24, 0x983e5152ee66dfab, R8, R9, R10, R11, R12, R13, R14, R15)
    194 	SHA512ROUND1(25, 0xa831c66d2db43210, R15, R8, R9, R10, R11, R12, R13, R14)
    195 	SHA512ROUND1(26, 0xb00327c898fb213f, R14, R15, R8, R9, R10, R11, R12, R13)
    196 	SHA512ROUND1(27, 0xbf597fc7beef0ee4, R13, R14, R15, R8, R9, R10, R11, R12)
    197 	SHA512ROUND1(28, 0xc6e00bf33da88fc2, R12, R13, R14, R15, R8, R9, R10, R11)
    198 	SHA512ROUND1(29, 0xd5a79147930aa725, R11, R12, R13, R14, R15, R8, R9, R10)
    199 	SHA512ROUND1(30, 0x06ca6351e003826f, R10, R11, R12, R13, R14, R15, R8, R9)
    200 	SHA512ROUND1(31, 0x142929670a0e6e70, R9, R10, R11, R12, R13, R14, R15, R8)
    201 	SHA512ROUND1(32, 0x27b70a8546d22ffc, R8, R9, R10, R11, R12, R13, R14, R15)
    202 	SHA512ROUND1(33, 0x2e1b21385c26c926, R15, R8, R9, R10, R11, R12, R13, R14)
    203 	SHA512ROUND1(34, 0x4d2c6dfc5ac42aed, R14, R15, R8, R9, R10, R11, R12, R13)
    204 	SHA512ROUND1(35, 0x53380d139d95b3df, R13, R14, R15, R8, R9, R10, R11, R12)
    205 	SHA512ROUND1(36, 0x650a73548baf63de, R12, R13, R14, R15, R8, R9, R10, R11)
    206 	SHA512ROUND1(37, 0x766a0abb3c77b2a8, R11, R12, R13, R14, R15, R8, R9, R10)
    207 	SHA512ROUND1(38, 0x81c2c92e47edaee6, R10, R11, R12, R13, R14, R15, R8, R9)
    208 	SHA512ROUND1(39, 0x92722c851482353b, R9, R10, R11, R12, R13, R14, R15, R8)
    209 	SHA512ROUND1(40, 0xa2bfe8a14cf10364, R8, R9, R10, R11, R12, R13, R14, R15)
    210 	SHA512ROUND1(41, 0xa81a664bbc423001, R15, R8, R9, R10, R11, R12, R13, R14)
    211 	SHA512ROUND1(42, 0xc24b8b70d0f89791, R14, R15, R8, R9, R10, R11, R12, R13)
    212 	SHA512ROUND1(43, 0xc76c51a30654be30, R13, R14, R15, R8, R9, R10, R11, R12)
    213 	SHA512ROUND1(44, 0xd192e819d6ef5218, R12, R13, R14, R15, R8, R9, R10, R11)
    214 	SHA512ROUND1(45, 0xd69906245565a910, R11, R12, R13, R14, R15, R8, R9, R10)
    215 	SHA512ROUND1(46, 0xf40e35855771202a, R10, R11, R12, R13, R14, R15, R8, R9)
    216 	SHA512ROUND1(47, 0x106aa07032bbd1b8, R9, R10, R11, R12, R13, R14, R15, R8)
    217 	SHA512ROUND1(48, 0x19a4c116b8d2d0c8, R8, R9, R10, R11, R12, R13, R14, R15)
    218 	SHA512ROUND1(49, 0x1e376c085141ab53, R15, R8, R9, R10, R11, R12, R13, R14)
    219 	SHA512ROUND1(50, 0x2748774cdf8eeb99, R14, R15, R8, R9, R10, R11, R12, R13)
    220 	SHA512ROUND1(51, 0x34b0bcb5e19b48a8, R13, R14, R15, R8, R9, R10, R11, R12)
    221 	SHA512ROUND1(52, 0x391c0cb3c5c95a63, R12, R13, R14, R15, R8, R9, R10, R11)
    222 	SHA512ROUND1(53, 0x4ed8aa4ae3418acb, R11, R12, R13, R14, R15, R8, R9, R10)
    223 	SHA512ROUND1(54, 0x5b9cca4f7763e373, R10, R11, R12, R13, R14, R15, R8, R9)
    224 	SHA512ROUND1(55, 0x682e6ff3d6b2b8a3, R9, R10, R11, R12, R13, R14, R15, R8)
    225 	SHA512ROUND1(56, 0x748f82ee5defb2fc, R8, R9, R10, R11, R12, R13, R14, R15)
    226 	SHA512ROUND1(57, 0x78a5636f43172f60, R15, R8, R9, R10, R11, R12, R13, R14)
    227 	SHA512ROUND1(58, 0x84c87814a1f0ab72, R14, R15, R8, R9, R10, R11, R12, R13)
    228 	SHA512ROUND1(59, 0x8cc702081a6439ec, R13, R14, R15, R8, R9, R10, R11, R12)
    229 	SHA512ROUND1(60, 0x90befffa23631e28, R12, R13, R14, R15, R8, R9, R10, R11)
    230 	SHA512ROUND1(61, 0xa4506cebde82bde9, R11, R12, R13, R14, R15, R8, R9, R10)
    231 	SHA512ROUND1(62, 0xbef9a3f7b2c67915, R10, R11, R12, R13, R14, R15, R8, R9)
    232 	SHA512ROUND1(63, 0xc67178f2e372532b, R9, R10, R11, R12, R13, R14, R15, R8)
    233 	SHA512ROUND1(64, 0xca273eceea26619c, R8, R9, R10, R11, R12, R13, R14, R15)
    234 	SHA512ROUND1(65, 0xd186b8c721c0c207, R15, R8, R9, R10, R11, R12, R13, R14)
    235 	SHA512ROUND1(66, 0xeada7dd6cde0eb1e, R14, R15, R8, R9, R10, R11, R12, R13)
    236 	SHA512ROUND1(67, 0xf57d4f7fee6ed178, R13, R14, R15, R8, R9, R10, R11, R12)
    237 	SHA512ROUND1(68, 0x06f067aa72176fba, R12, R13, R14, R15, R8, R9, R10, R11)
    238 	SHA512ROUND1(69, 0x0a637dc5a2c898a6, R11, R12, R13, R14, R15, R8, R9, R10)
    239 	SHA512ROUND1(70, 0x113f9804bef90dae, R10, R11, R12, R13, R14, R15, R8, R9)
    240 	SHA512ROUND1(71, 0x1b710b35131c471b, R9, R10, R11, R12, R13, R14, R15, R8)
    241 	SHA512ROUND1(72, 0x28db77f523047d84, R8, R9, R10, R11, R12, R13, R14, R15)
    242 	SHA512ROUND1(73, 0x32caab7b40c72493, R15, R8, R9, R10, R11, R12, R13, R14)
    243 	SHA512ROUND1(74, 0x3c9ebe0a15c9bebc, R14, R15, R8, R9, R10, R11, R12, R13)
    244 	SHA512ROUND1(75, 0x431d67c49c100d4c, R13, R14, R15, R8, R9, R10, R11, R12)
    245 	SHA512ROUND1(76, 0x4cc5d4becb3e42b6, R12, R13, R14, R15, R8, R9, R10, R11)
    246 	SHA512ROUND1(77, 0x597f299cfc657e2a, R11, R12, R13, R14, R15, R8, R9, R10)
    247 	SHA512ROUND1(78, 0x5fcb6fab3ad6faec, R10, R11, R12, R13, R14, R15, R8, R9)
    248 	SHA512ROUND1(79, 0x6c44198c4a475817, R9, R10, R11, R12, R13, R14, R15, R8)
    249 
    250 	MOVQ	dig+0(FP), BP
    251 	ADDQ	(0*8)(BP), R8	// H0 = a + H0
    252 	MOVQ	R8, (0*8)(BP)
    253 	ADDQ	(1*8)(BP), R9	// H1 = b + H1
    254 	MOVQ	R9, (1*8)(BP)
    255 	ADDQ	(2*8)(BP), R10	// H2 = c + H2
    256 	MOVQ	R10, (2*8)(BP)
    257 	ADDQ	(3*8)(BP), R11	// H3 = d + H3
    258 	MOVQ	R11, (3*8)(BP)
    259 	ADDQ	(4*8)(BP), R12	// H4 = e + H4
    260 	MOVQ	R12, (4*8)(BP)
    261 	ADDQ	(5*8)(BP), R13	// H5 = f + H5
    262 	MOVQ	R13, (5*8)(BP)
    263 	ADDQ	(6*8)(BP), R14	// H6 = g + H6
    264 	MOVQ	R14, (6*8)(BP)
    265 	ADDQ	(7*8)(BP), R15	// H7 = h + H7
    266 	MOVQ	R15, (7*8)(BP)
    267 
    268 	ADDQ	$128, SI
    269 	CMPQ	SI, 640(SP)
    270 	JB	loop
    271 
    272 end:
    273 	RET
    274