Home | History | Annotate | Download | only in md5
      1 // Copyright 2013 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 //
      5 // ARM version of md5block.go
      6 
      7 #include "textflag.h"
      8 
      9 // Register definitions
     10 #define Rtable	R0	// Pointer to MD5 constants table
     11 #define Rdata	R1	// Pointer to data to hash
     12 #define Ra	R2	// MD5 accumulator
     13 #define Rb	R3	// MD5 accumulator
     14 #define Rc	R4	// MD5 accumulator
     15 #define Rd	R5	// MD5 accumulator
     16 #define Rc0	R6	// MD5 constant
     17 #define Rc1	R7	// MD5 constant
     18 #define Rc2	R8	// MD5 constant
     19 // r9, r10 are forbidden
     20 // r11 is OK provided you check the assembler that no synthetic instructions use it
     21 #define Rc3	R11	// MD5 constant
     22 #define Rt0	R12	// temporary
     23 #define Rt1	R14	// temporary
     24 
     25 // func block(dig *digest, p []byte)
     26 // 0(FP) is *digest
     27 // 4(FP) is p.array (struct Slice)
     28 // 8(FP) is p.len
     29 //12(FP) is p.cap
     30 //
     31 // Stack frame
     32 #define p_end	end-4(SP)	// pointer to the end of data
     33 #define p_data	data-8(SP)	// current data pointer
     34 #define buf	buffer-(8+4*16)(SP)	//16 words temporary buffer
     35 		// 3 words at 4..12(R13) for called routine parameters
     36 
     37 TEXT	block(SB), NOSPLIT, $84-16
     38 	MOVW	p+4(FP), Rdata	// pointer to the data
     39 	MOVW	p_len+8(FP), Rt0	// number of bytes
     40 	ADD	Rdata, Rt0
     41 	MOVW	Rt0, p_end	// pointer to end of data
     42 
     43 loop:
     44 	MOVW	Rdata, p_data	// Save Rdata
     45 	AND.S	$3, Rdata, Rt0	// TST $3, Rdata not working see issue 5921
     46 	BEQ	aligned			// aligned detected - skip copy
     47 
     48 	// Copy the unaligned source data into the aligned temporary buffer
     49 	// memmove(to=4(R13), from=8(R13), n=12(R13)) - Corrupts all registers
     50 	MOVW	$buf, Rtable	// to
     51 	MOVW	$64, Rc0		// n
     52 	MOVM.IB	[Rtable,Rdata,Rc0], (R13)
     53 	BL	runtimememmove(SB)
     54 
     55 	// Point to the local aligned copy of the data
     56 	MOVW	$buf, Rdata
     57 
     58 aligned:
     59 	// Point to the table of constants
     60 	// A PC relative add would be cheaper than this
     61 	MOVW	$table(SB), Rtable
     62 
     63 	// Load up initial MD5 accumulator
     64 	MOVW	dig+0(FP), Rc0
     65 	MOVM.IA (Rc0), [Ra,Rb,Rc,Rd]
     66 
     67 // a += (((c^d)&b)^d) + X[index] + const
     68 // a = a<<shift | a>>(32-shift) + b
     69 #define ROUND1(Ra, Rb, Rc, Rd, index, shift, Rconst) \
     70 	EOR	Rc, Rd, Rt0		; \
     71 	AND	Rb, Rt0			; \
     72 	EOR	Rd, Rt0			; \
     73 	MOVW	(index<<2)(Rdata), Rt1	; \
     74 	ADD	Rt1, Rt0			; \
     75 	ADD	Rconst, Rt0			; \
     76 	ADD	Rt0, Ra			; \
     77 	ADD	Ra@>(32-shift), Rb, Ra	;
     78 
     79 	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
     80 	ROUND1(Ra, Rb, Rc, Rd,  0,	7, Rc0)
     81 	ROUND1(Rd, Ra, Rb, Rc,  1, 12, Rc1)
     82 	ROUND1(Rc, Rd, Ra, Rb,  2, 17, Rc2)
     83 	ROUND1(Rb, Rc, Rd, Ra,  3, 22, Rc3)
     84 
     85 	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
     86 	ROUND1(Ra, Rb, Rc, Rd,  4,	7, Rc0)
     87 	ROUND1(Rd, Ra, Rb, Rc,  5, 12, Rc1)
     88 	ROUND1(Rc, Rd, Ra, Rb,  6, 17, Rc2)
     89 	ROUND1(Rb, Rc, Rd, Ra,  7, 22, Rc3)
     90 
     91 	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
     92 	ROUND1(Ra, Rb, Rc, Rd,  8,	7, Rc0)
     93 	ROUND1(Rd, Ra, Rb, Rc,  9, 12, Rc1)
     94 	ROUND1(Rc, Rd, Ra, Rb, 10, 17, Rc2)
     95 	ROUND1(Rb, Rc, Rd, Ra, 11, 22, Rc3)
     96 
     97 	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
     98 	ROUND1(Ra, Rb, Rc, Rd, 12,	7, Rc0)
     99 	ROUND1(Rd, Ra, Rb, Rc, 13, 12, Rc1)
    100 	ROUND1(Rc, Rd, Ra, Rb, 14, 17, Rc2)
    101 	ROUND1(Rb, Rc, Rd, Ra, 15, 22, Rc3)
    102 
    103 // a += (((b^c)&d)^c) + X[index] + const
    104 // a = a<<shift | a>>(32-shift) + b
    105 #define ROUND2(Ra, Rb, Rc, Rd, index, shift, Rconst) \
    106 	EOR	Rb, Rc, Rt0		; \
    107 	AND	Rd, Rt0			; \
    108 	EOR	Rc, Rt0			; \
    109 	MOVW	(index<<2)(Rdata), Rt1	; \
    110 	ADD	Rt1, Rt0			; \
    111 	ADD	Rconst, Rt0			; \
    112 	ADD	Rt0, Ra			; \
    113 	ADD	Ra@>(32-shift), Rb, Ra	;
    114 
    115 	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
    116 	ROUND2(Ra, Rb, Rc, Rd,  1,	5, Rc0)
    117 	ROUND2(Rd, Ra, Rb, Rc,  6,	9, Rc1)
    118 	ROUND2(Rc, Rd, Ra, Rb, 11, 14, Rc2)
    119 	ROUND2(Rb, Rc, Rd, Ra,  0, 20, Rc3)
    120 
    121 	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
    122 	ROUND2(Ra, Rb, Rc, Rd,  5,	5, Rc0)
    123 	ROUND2(Rd, Ra, Rb, Rc, 10,	9, Rc1)
    124 	ROUND2(Rc, Rd, Ra, Rb, 15, 14, Rc2)
    125 	ROUND2(Rb, Rc, Rd, Ra,  4, 20, Rc3)
    126 
    127 	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
    128 	ROUND2(Ra, Rb, Rc, Rd,  9,	5, Rc0)
    129 	ROUND2(Rd, Ra, Rb, Rc, 14,	9, Rc1)
    130 	ROUND2(Rc, Rd, Ra, Rb,  3, 14, Rc2)
    131 	ROUND2(Rb, Rc, Rd, Ra,  8, 20, Rc3)
    132 
    133 	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
    134 	ROUND2(Ra, Rb, Rc, Rd, 13,	5, Rc0)
    135 	ROUND2(Rd, Ra, Rb, Rc,  2,	9, Rc1)
    136 	ROUND2(Rc, Rd, Ra, Rb,  7, 14, Rc2)
    137 	ROUND2(Rb, Rc, Rd, Ra, 12, 20, Rc3)
    138 
    139 // a += (b^c^d) + X[index] + const
    140 // a = a<<shift | a>>(32-shift) + b
    141 #define ROUND3(Ra, Rb, Rc, Rd, index, shift, Rconst) \
    142 	EOR	Rb, Rc, Rt0		; \
    143 	EOR	Rd, Rt0			; \
    144 	MOVW	(index<<2)(Rdata), Rt1	; \
    145 	ADD	Rt1, Rt0			; \
    146 	ADD	Rconst, Rt0			; \
    147 	ADD	Rt0, Ra			; \
    148 	ADD	Ra@>(32-shift), Rb, Ra	;
    149 
    150 	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
    151 	ROUND3(Ra, Rb, Rc, Rd,  5,	4, Rc0)
    152 	ROUND3(Rd, Ra, Rb, Rc,  8, 11, Rc1)
    153 	ROUND3(Rc, Rd, Ra, Rb, 11, 16, Rc2)
    154 	ROUND3(Rb, Rc, Rd, Ra, 14, 23, Rc3)
    155 
    156 	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
    157 	ROUND3(Ra, Rb, Rc, Rd,  1,	4, Rc0)
    158 	ROUND3(Rd, Ra, Rb, Rc,  4, 11, Rc1)
    159 	ROUND3(Rc, Rd, Ra, Rb,  7, 16, Rc2)
    160 	ROUND3(Rb, Rc, Rd, Ra, 10, 23, Rc3)
    161 
    162 	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
    163 	ROUND3(Ra, Rb, Rc, Rd, 13,	4, Rc0)
    164 	ROUND3(Rd, Ra, Rb, Rc,  0, 11, Rc1)
    165 	ROUND3(Rc, Rd, Ra, Rb,  3, 16, Rc2)
    166 	ROUND3(Rb, Rc, Rd, Ra,  6, 23, Rc3)
    167 
    168 	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
    169 	ROUND3(Ra, Rb, Rc, Rd,  9,	4, Rc0)
    170 	ROUND3(Rd, Ra, Rb, Rc, 12, 11, Rc1)
    171 	ROUND3(Rc, Rd, Ra, Rb, 15, 16, Rc2)
    172 	ROUND3(Rb, Rc, Rd, Ra,  2, 23, Rc3)
    173 
    174 // a += (c^(b|^d)) + X[index] + const
    175 // a = a<<shift | a>>(32-shift) + b
    176 #define ROUND4(Ra, Rb, Rc, Rd, index, shift, Rconst) \
    177 	MVN	Rd, Rt0			; \
    178 	ORR	Rb, Rt0			; \
    179 	EOR	Rc, Rt0			; \
    180 	MOVW	(index<<2)(Rdata), Rt1	; \
    181 	ADD	Rt1, Rt0			; \
    182 	ADD	Rconst, Rt0			; \
    183 	ADD	Rt0, Ra			; \
    184 	ADD	Ra@>(32-shift), Rb, Ra	;
    185 
    186 	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
    187 	ROUND4(Ra, Rb, Rc, Rd,  0,	6, Rc0)
    188 	ROUND4(Rd, Ra, Rb, Rc,  7, 10, Rc1)
    189 	ROUND4(Rc, Rd, Ra, Rb, 14, 15, Rc2)
    190 	ROUND4(Rb, Rc, Rd, Ra,  5, 21, Rc3)
    191 
    192 	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
    193 	ROUND4(Ra, Rb, Rc, Rd, 12,	6, Rc0)
    194 	ROUND4(Rd, Ra, Rb, Rc,  3, 10, Rc1)
    195 	ROUND4(Rc, Rd, Ra, Rb, 10, 15, Rc2)
    196 	ROUND4(Rb, Rc, Rd, Ra,  1, 21, Rc3)
    197 
    198 	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
    199 	ROUND4(Ra, Rb, Rc, Rd,  8,	6, Rc0)
    200 	ROUND4(Rd, Ra, Rb, Rc, 15, 10, Rc1)
    201 	ROUND4(Rc, Rd, Ra, Rb,  6, 15, Rc2)
    202 	ROUND4(Rb, Rc, Rd, Ra, 13, 21, Rc3)
    203 
    204 	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
    205 	ROUND4(Ra, Rb, Rc, Rd,  4,	6, Rc0)
    206 	ROUND4(Rd, Ra, Rb, Rc, 11, 10, Rc1)
    207 	ROUND4(Rc, Rd, Ra, Rb,  2, 15, Rc2)
    208 	ROUND4(Rb, Rc, Rd, Ra,  9, 21, Rc3)
    209 
    210 	MOVW	dig+0(FP), Rt0
    211 	MOVM.IA (Rt0), [Rc0,Rc1,Rc2,Rc3]
    212 
    213 	ADD	Rc0, Ra
    214 	ADD	Rc1, Rb
    215 	ADD	Rc2, Rc
    216 	ADD	Rc3, Rd
    217 
    218 	MOVM.IA [Ra,Rb,Rc,Rd], (Rt0)
    219 
    220 	MOVW	p_data, Rdata
    221 	MOVW	p_end, Rt0
    222 	ADD	$64, Rdata
    223 	CMP	Rt0, Rdata
    224 	BLO	loop
    225 
    226 	RET
    227 
    228 // MD5 constants table
    229 
    230 	// Round 1
    231 	DATA	table+0x00(SB)/4, $0xd76aa478
    232 	DATA	table+0x04(SB)/4, $0xe8c7b756
    233 	DATA	table+0x08(SB)/4, $0x242070db
    234 	DATA	table+0x0c(SB)/4, $0xc1bdceee
    235 	DATA	table+0x10(SB)/4, $0xf57c0faf
    236 	DATA	table+0x14(SB)/4, $0x4787c62a
    237 	DATA	table+0x18(SB)/4, $0xa8304613
    238 	DATA	table+0x1c(SB)/4, $0xfd469501
    239 	DATA	table+0x20(SB)/4, $0x698098d8
    240 	DATA	table+0x24(SB)/4, $0x8b44f7af
    241 	DATA	table+0x28(SB)/4, $0xffff5bb1
    242 	DATA	table+0x2c(SB)/4, $0x895cd7be
    243 	DATA	table+0x30(SB)/4, $0x6b901122
    244 	DATA	table+0x34(SB)/4, $0xfd987193
    245 	DATA	table+0x38(SB)/4, $0xa679438e
    246 	DATA	table+0x3c(SB)/4, $0x49b40821
    247 	// Round 2
    248 	DATA	table+0x40(SB)/4, $0xf61e2562
    249 	DATA	table+0x44(SB)/4, $0xc040b340
    250 	DATA	table+0x48(SB)/4, $0x265e5a51
    251 	DATA	table+0x4c(SB)/4, $0xe9b6c7aa
    252 	DATA	table+0x50(SB)/4, $0xd62f105d
    253 	DATA	table+0x54(SB)/4, $0x02441453
    254 	DATA	table+0x58(SB)/4, $0xd8a1e681
    255 	DATA	table+0x5c(SB)/4, $0xe7d3fbc8
    256 	DATA	table+0x60(SB)/4, $0x21e1cde6
    257 	DATA	table+0x64(SB)/4, $0xc33707d6
    258 	DATA	table+0x68(SB)/4, $0xf4d50d87
    259 	DATA	table+0x6c(SB)/4, $0x455a14ed
    260 	DATA	table+0x70(SB)/4, $0xa9e3e905
    261 	DATA	table+0x74(SB)/4, $0xfcefa3f8
    262 	DATA	table+0x78(SB)/4, $0x676f02d9
    263 	DATA	table+0x7c(SB)/4, $0x8d2a4c8a
    264 	// Round 3
    265 	DATA	table+0x80(SB)/4, $0xfffa3942
    266 	DATA	table+0x84(SB)/4, $0x8771f681
    267 	DATA	table+0x88(SB)/4, $0x6d9d6122
    268 	DATA	table+0x8c(SB)/4, $0xfde5380c
    269 	DATA	table+0x90(SB)/4, $0xa4beea44
    270 	DATA	table+0x94(SB)/4, $0x4bdecfa9
    271 	DATA	table+0x98(SB)/4, $0xf6bb4b60
    272 	DATA	table+0x9c(SB)/4, $0xbebfbc70
    273 	DATA	table+0xa0(SB)/4, $0x289b7ec6
    274 	DATA	table+0xa4(SB)/4, $0xeaa127fa
    275 	DATA	table+0xa8(SB)/4, $0xd4ef3085
    276 	DATA	table+0xac(SB)/4, $0x04881d05
    277 	DATA	table+0xb0(SB)/4, $0xd9d4d039
    278 	DATA	table+0xb4(SB)/4, $0xe6db99e5
    279 	DATA	table+0xb8(SB)/4, $0x1fa27cf8
    280 	DATA	table+0xbc(SB)/4, $0xc4ac5665
    281 	// Round 4
    282 	DATA	table+0xc0(SB)/4, $0xf4292244
    283 	DATA	table+0xc4(SB)/4, $0x432aff97
    284 	DATA	table+0xc8(SB)/4, $0xab9423a7
    285 	DATA	table+0xcc(SB)/4, $0xfc93a039
    286 	DATA	table+0xd0(SB)/4, $0x655b59c3
    287 	DATA	table+0xd4(SB)/4, $0x8f0ccc92
    288 	DATA	table+0xd8(SB)/4, $0xffeff47d
    289 	DATA	table+0xdc(SB)/4, $0x85845dd1
    290 	DATA	table+0xe0(SB)/4, $0x6fa87e4f
    291 	DATA	table+0xe4(SB)/4, $0xfe2ce6e0
    292 	DATA	table+0xe8(SB)/4, $0xa3014314
    293 	DATA	table+0xec(SB)/4, $0x4e0811a1
    294 	DATA	table+0xf0(SB)/4, $0xf7537e82
    295 	DATA	table+0xf4(SB)/4, $0xbd3af235
    296 	DATA	table+0xf8(SB)/4, $0x2ad7d2bb
    297 	DATA	table+0xfc(SB)/4, $0xeb86d391
    298 	// Global definition
    299 	GLOBL	table(SB),8,$256
    300