Home | History | Annotate | Download | only in md5
      1 // Original source:
      2 //	http://www.zorinaq.com/papers/md5-amd64.html
      3 //	http://www.zorinaq.com/papers/md5-amd64.tar.bz2
      4 //
      5 // MD5 optimized for ppc64le using Go's assembler for
      6 // ppc64le, based on md5block_amd64.s implementation by
      7 // the Go authors.
      8 //
      9 // Author: Marc Bevand <bevand_m (at) epita.fr>
     10 // Licence: I hereby disclaim the copyright on this code and place it
     11 // in the public domain.
     12 
     13 #include "textflag.h"
     14 
     15 // TODO: Could be updated for ppc64 big endian
     16 // by using the correct byte reverse instruction.
     17 // Changes required in the Go assembler to make
     18 // that instruction work.
     19 
     20 #define MOVE_LITTLE_ENDIAN MOVWZ
     21 
     22 TEXT block(SB),NOSPLIT,$0-32
     23 	MOVD	dig+0(FP), R10
     24 	MOVD	p+8(FP), R6
     25 	MOVD	p_len+16(FP), R5
     26 	SLD	$6, R5
     27 	SRD	$6, R5
     28 	ADD	R6, R5, R7
     29 
     30 	MOVWZ	0(R10), R22
     31 	MOVWZ	4(R10), R3
     32 	MOVWZ	8(R10), R4
     33 	MOVWZ	12(R10), R5
     34 	CMP	R6, R7
     35 	BEQ	end
     36 
     37 loop:
     38 	MOVWZ	R22, R14
     39 	MOVWZ	R3, R15
     40 	MOVWZ	R4, R16
     41 	MOVWZ	R5, R17
     42 
     43 	MOVE_LITTLE_ENDIAN	0(R6), R8
     44 	MOVWZ	R5, R9
     45 
     46 #define ROUND1(a, b, c, d, index, const, shift) \
     47 	XOR	c, R9; \
     48 	ADD	$const, a; \
     49 	ADD	R8, a; \
     50 	AND	b, R9; \
     51 	XOR	d, R9; \
     52 	MOVE_LITTLE_ENDIAN	(index*4)(R6), R8; \
     53 	ADD	R9, a; \
     54 	RLWMI	$shift, a, $0xffffffff, a; \
     55 	MOVWZ	c, R9; \
     56 	ADD	b, a; \
     57 	MOVWZ	a, a
     58 
     59 	ROUND1(R22,R3,R4,R5, 1,0xd76aa478, 7);
     60 	ROUND1(R5,R22,R3,R4, 2,0xe8c7b756,12);
     61 	ROUND1(R4,R5,R22,R3, 3,0x242070db,17);
     62 	ROUND1(R3,R4,R5,R22, 4,0xc1bdceee,22);
     63 	ROUND1(R22,R3,R4,R5, 5,0xf57c0faf, 7);
     64 	ROUND1(R5,R22,R3,R4, 6,0x4787c62a,12);
     65 	ROUND1(R4,R5,R22,R3, 7,0xa8304613,17);
     66 	ROUND1(R3,R4,R5,R22, 8,0xfd469501,22);
     67 	ROUND1(R22,R3,R4,R5, 9,0x698098d8, 7);
     68 	ROUND1(R5,R22,R3,R4,10,0x8b44f7af,12);
     69 	ROUND1(R4,R5,R22,R3,11,0xffff5bb1,17);
     70 	ROUND1(R3,R4,R5,R22,12,0x895cd7be,22);
     71 	ROUND1(R22,R3,R4,R5,13,0x6b901122, 7);
     72 	ROUND1(R5,R22,R3,R4,14,0xfd987193,12);
     73 	ROUND1(R4,R5,R22,R3,15,0xa679438e,17);
     74 	ROUND1(R3,R4,R5,R22, 0,0x49b40821,22);
     75 
     76 	MOVE_LITTLE_ENDIAN	(1*4)(R6), R8
     77 	MOVWZ	R5, R9
     78 	MOVWZ	R5, R10
     79 
     80 #define ROUND2(a, b, c, d, index, const, shift) \
     81 	XOR	$0xffffffff, R9; \ // NOTW R9
     82 	ADD	$const, a; \
     83 	ADD	R8, a; \
     84 	AND	b, R10; \
     85 	AND	c, R9; \
     86 	MOVE_LITTLE_ENDIAN	(index*4)(R6), R8; \
     87 	OR	R9, R10; \
     88 	MOVWZ	c, R9; \
     89 	ADD	R10, a; \
     90 	MOVWZ	c, R10; \
     91 	RLWMI	$shift, a, $0xffffffff, a; \
     92 	ADD	b, a; \
     93 	MOVWZ	a, a
     94 
     95 	ROUND2(R22,R3,R4,R5, 6,0xf61e2562, 5);
     96 	ROUND2(R5,R22,R3,R4,11,0xc040b340, 9);
     97 	ROUND2(R4,R5,R22,R3, 0,0x265e5a51,14);
     98 	ROUND2(R3,R4,R5,R22, 5,0xe9b6c7aa,20);
     99 	ROUND2(R22,R3,R4,R5,10,0xd62f105d, 5);
    100 	ROUND2(R5,R22,R3,R4,15, 0x2441453, 9);
    101 	ROUND2(R4,R5,R22,R3, 4,0xd8a1e681,14);
    102 	ROUND2(R3,R4,R5,R22, 9,0xe7d3fbc8,20);
    103 	ROUND2(R22,R3,R4,R5,14,0x21e1cde6, 5);
    104 	ROUND2(R5,R22,R3,R4, 3,0xc33707d6, 9);
    105 	ROUND2(R4,R5,R22,R3, 8,0xf4d50d87,14);
    106 	ROUND2(R3,R4,R5,R22,13,0x455a14ed,20);
    107 	ROUND2(R22,R3,R4,R5, 2,0xa9e3e905, 5);
    108 	ROUND2(R5,R22,R3,R4, 7,0xfcefa3f8, 9);
    109 	ROUND2(R4,R5,R22,R3,12,0x676f02d9,14);
    110 	ROUND2(R3,R4,R5,R22, 0,0x8d2a4c8a,20);
    111 
    112 	MOVE_LITTLE_ENDIAN	(5*4)(R6), R8
    113 	MOVWZ	R4, R9
    114 
    115 #define ROUND3(a, b, c, d, index, const, shift) \
    116 	ADD	$const, a; \
    117 	ADD	R8, a; \
    118 	MOVE_LITTLE_ENDIAN	(index*4)(R6), R8; \
    119 	XOR	d, R9; \
    120 	XOR	b, R9; \
    121 	ADD	R9, a; \
    122 	RLWMI	$shift, a, $0xffffffff, a; \
    123 	MOVWZ	b, R9; \
    124 	ADD	b, a; \
    125 	MOVWZ	a, a
    126 
    127 	ROUND3(R22,R3,R4,R5, 8,0xfffa3942, 4);
    128 	ROUND3(R5,R22,R3,R4,11,0x8771f681,11);
    129 	ROUND3(R4,R5,R22,R3,14,0x6d9d6122,16);
    130 	ROUND3(R3,R4,R5,R22, 1,0xfde5380c,23);
    131 	ROUND3(R22,R3,R4,R5, 4,0xa4beea44, 4);
    132 	ROUND3(R5,R22,R3,R4, 7,0x4bdecfa9,11);
    133 	ROUND3(R4,R5,R22,R3,10,0xf6bb4b60,16);
    134 	ROUND3(R3,R4,R5,R22,13,0xbebfbc70,23);
    135 	ROUND3(R22,R3,R4,R5, 0,0x289b7ec6, 4);
    136 	ROUND3(R5,R22,R3,R4, 3,0xeaa127fa,11);
    137 	ROUND3(R4,R5,R22,R3, 6,0xd4ef3085,16);
    138 	ROUND3(R3,R4,R5,R22, 9, 0x4881d05,23);
    139 	ROUND3(R22,R3,R4,R5,12,0xd9d4d039, 4);
    140 	ROUND3(R5,R22,R3,R4,15,0xe6db99e5,11);
    141 	ROUND3(R4,R5,R22,R3, 2,0x1fa27cf8,16);
    142 	ROUND3(R3,R4,R5,R22, 0,0xc4ac5665,23);
    143 
    144 	MOVE_LITTLE_ENDIAN	(0*4)(R6), R8
    145 	MOVWZ	$0xffffffff, R9
    146 	XOR	R5, R9
    147 
    148 #define ROUND4(a, b, c, d, index, const, shift) \
    149 	ADD	$const, a; \
    150 	ADD	R8, a; \
    151 	OR	b, R9; \
    152 	XOR	c, R9; \
    153 	ADD	R9, a; \
    154 	MOVE_LITTLE_ENDIAN	(index*4)(R6), R8; \
    155 	MOVWZ	$0xffffffff, R9; \
    156 	RLWMI	$shift, a, $0xffffffff, a; \
    157 	XOR	c, R9; \
    158 	ADD	b, a; \
    159 	MOVWZ	a, a
    160 
    161 	ROUND4(R22,R3,R4,R5, 7,0xf4292244, 6);
    162 	ROUND4(R5,R22,R3,R4,14,0x432aff97,10);
    163 	ROUND4(R4,R5,R22,R3, 5,0xab9423a7,15);
    164 	ROUND4(R3,R4,R5,R22,12,0xfc93a039,21);
    165 	ROUND4(R22,R3,R4,R5, 3,0x655b59c3, 6);
    166 	ROUND4(R5,R22,R3,R4,10,0x8f0ccc92,10);
    167 	ROUND4(R4,R5,R22,R3, 1,0xffeff47d,15);
    168 	ROUND4(R3,R4,R5,R22, 8,0x85845dd1,21);
    169 	ROUND4(R22,R3,R4,R5,15,0x6fa87e4f, 6);
    170 	ROUND4(R5,R22,R3,R4, 6,0xfe2ce6e0,10);
    171 	ROUND4(R4,R5,R22,R3,13,0xa3014314,15);
    172 	ROUND4(R3,R4,R5,R22, 4,0x4e0811a1,21);
    173 	ROUND4(R22,R3,R4,R5,11,0xf7537e82, 6);
    174 	ROUND4(R5,R22,R3,R4, 2,0xbd3af235,10);
    175 	ROUND4(R4,R5,R22,R3, 9,0x2ad7d2bb,15);
    176 	ROUND4(R3,R4,R5,R22, 0,0xeb86d391,21);
    177 
    178 	ADD	R14, R22
    179 	ADD	R15, R3
    180 	ADD	R16, R4
    181 	ADD	R17, R5
    182 	ADD	$64, R6
    183 	CMP	R6, R7
    184 	BLT	loop
    185 
    186 end:
    187 	MOVD	dig+0(FP), R10
    188 	MOVWZ	R22, 0(R10)
    189 	MOVWZ	R3, 4(R10)
    190 	MOVWZ	R4, 8(R10)
    191 	MOVWZ	R5, 12(R10)
    192 	RET
    193