1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // This code was translated into a form compatible with 5a from the public 6 // domain source by Andrew Moon: github.com/floodyberry/poly1305-opt/blob/master/app/extensions/poly1305. 7 8 // +build arm,!gccgo,!appengine 9 10 DATA poly1305_init_constants_armv6<>+0x00(SB)/4, $0x3ffffff 11 DATA poly1305_init_constants_armv6<>+0x04(SB)/4, $0x3ffff03 12 DATA poly1305_init_constants_armv6<>+0x08(SB)/4, $0x3ffc0ff 13 DATA poly1305_init_constants_armv6<>+0x0c(SB)/4, $0x3f03fff 14 DATA poly1305_init_constants_armv6<>+0x10(SB)/4, $0x00fffff 15 GLOBL poly1305_init_constants_armv6<>(SB), 8, $20 16 17 // Warning: the linker may use R11 to synthesize certain instructions. Please 18 // take care and verify that no synthetic instructions use it. 19 20 TEXT poly1305_init_ext_armv6<>(SB),4,$-4 21 MOVM.DB.W [R4-R11], (R13) 22 MOVM.IA.W (R1), [R2-R5] 23 MOVW $poly1305_init_constants_armv6<>(SB), R7 24 MOVW R2, R8 25 MOVW R2>>26, R9 26 MOVW R3>>20, g 27 MOVW R4>>14, R11 28 MOVW R5>>8, R12 29 ORR R3<<6, R9, R9 30 ORR R4<<12, g, g 31 ORR R5<<18, R11, R11 32 MOVM.IA (R7), [R2-R6] 33 AND R8, R2, R2 34 AND R9, R3, R3 35 AND g, R4, R4 36 AND R11, R5, R5 37 AND R12, R6, R6 38 MOVM.IA.W [R2-R6], (R0) 39 EOR R2, R2, R2 40 EOR R3, R3, R3 41 EOR R4, R4, R4 42 EOR R5, R5, R5 43 EOR R6, R6, R6 44 MOVM.IA.W [R2-R6], (R0) 45 MOVM.IA.W (R1), [R2-R5] 46 MOVM.IA [R2-R6], (R0) 47 MOVM.IA.W (R13), [R4-R11] 48 RET 49 50 #define MOVW_UNALIGNED(Rsrc, Rdst, Rtmp, offset) \ 51 MOVBU (offset+0)(Rsrc), Rtmp; \ 52 MOVBU Rtmp, (offset+0)(Rdst); \ 53 MOVBU (offset+1)(Rsrc), Rtmp; \ 54 MOVBU Rtmp, (offset+1)(Rdst); \ 55 MOVBU (offset+2)(Rsrc), Rtmp; \ 56 MOVBU Rtmp, (offset+2)(Rdst); \ 57 MOVBU (offset+3)(Rsrc), Rtmp; \ 58 MOVBU Rtmp, (offset+3)(Rdst) 59 60 TEXT poly1305_blocks_armv6<>(SB),4,$-4 61 MOVM.DB.W [R4, R5, R6, R7, R8, R9, g, R11, R14], (R13) 62 SUB $128, R13 63 MOVW R0, 36(R13) 64 MOVW R1, 40(R13) 65 MOVW R2, 44(R13) 66 MOVW R1, R14 67 MOVW R2, R12 68 MOVW 56(R0), R8 69 WORD $0xe1180008 // TST R8, R8 not working see issue 5921 70 EOR R6, R6, R6 71 MOVW.EQ $(1<<24), R6 72 MOVW R6, 32(R13) 73 ADD $64, R13, g 74 MOVM.IA (R0), [R0-R9] 75 MOVM.IA [R0-R4], (g) 76 CMP $16, R12 77 BLO poly1305_blocks_armv6_done 78 poly1305_blocks_armv6_mainloop: 79 WORD $0xe31e0003 // TST R14, #3 not working see issue 5921 80 BEQ poly1305_blocks_armv6_mainloop_aligned 81 ADD $48, R13, g 82 MOVW_UNALIGNED(R14, g, R0, 0) 83 MOVW_UNALIGNED(R14, g, R0, 4) 84 MOVW_UNALIGNED(R14, g, R0, 8) 85 MOVW_UNALIGNED(R14, g, R0, 12) 86 MOVM.IA (g), [R0-R3] 87 ADD $16, R14 88 B poly1305_blocks_armv6_mainloop_loaded 89 poly1305_blocks_armv6_mainloop_aligned: 90 MOVM.IA.W (R14), [R0-R3] 91 poly1305_blocks_armv6_mainloop_loaded: 92 MOVW R0>>26, g 93 MOVW R1>>20, R11 94 MOVW R2>>14, R12 95 MOVW R14, 40(R13) 96 MOVW R3>>8, R4 97 ORR R1<<6, g, g 98 ORR R2<<12, R11, R11 99 ORR R3<<18, R12, R12 100 BIC $0xfc000000, R0, R0 101 BIC $0xfc000000, g, g 102 MOVW 32(R13), R3 103 BIC $0xfc000000, R11, R11 104 BIC $0xfc000000, R12, R12 105 ADD R0, R5, R5 106 ADD g, R6, R6 107 ORR R3, R4, R4 108 ADD R11, R7, R7 109 ADD $64, R13, R14 110 ADD R12, R8, R8 111 ADD R4, R9, R9 112 MOVM.IA (R14), [R0-R4] 113 MULLU R4, R5, (R11, g) 114 MULLU R3, R5, (R14, R12) 115 MULALU R3, R6, (R11, g) 116 MULALU R2, R6, (R14, R12) 117 MULALU R2, R7, (R11, g) 118 MULALU R1, R7, (R14, R12) 119 ADD R4<<2, R4, R4 120 ADD R3<<2, R3, R3 121 MULALU R1, R8, (R11, g) 122 MULALU R0, R8, (R14, R12) 123 MULALU R0, R9, (R11, g) 124 MULALU R4, R9, (R14, R12) 125 MOVW g, 24(R13) 126 MOVW R11, 28(R13) 127 MOVW R12, 16(R13) 128 MOVW R14, 20(R13) 129 MULLU R2, R5, (R11, g) 130 MULLU R1, R5, (R14, R12) 131 MULALU R1, R6, (R11, g) 132 MULALU R0, R6, (R14, R12) 133 MULALU R0, R7, (R11, g) 134 MULALU R4, R7, (R14, R12) 135 ADD R2<<2, R2, R2 136 ADD R1<<2, R1, R1 137 MULALU R4, R8, (R11, g) 138 MULALU R3, R8, (R14, R12) 139 MULALU R3, R9, (R11, g) 140 MULALU R2, R9, (R14, R12) 141 MOVW g, 8(R13) 142 MOVW R11, 12(R13) 143 MOVW R12, 0(R13) 144 MOVW R14, w+4(SP) 145 MULLU R0, R5, (R11, g) 146 MULALU R4, R6, (R11, g) 147 MULALU R3, R7, (R11, g) 148 MULALU R2, R8, (R11, g) 149 MULALU R1, R9, (R11, g) 150 MOVM.IA (R13), [R0-R7] 151 MOVW g>>26, R12 152 MOVW R4>>26, R14 153 ORR R11<<6, R12, R12 154 ORR R5<<6, R14, R14 155 BIC $0xfc000000, g, g 156 BIC $0xfc000000, R4, R4 157 ADD.S R12, R0, R0 158 ADC $0, R1, R1 159 ADD.S R14, R6, R6 160 ADC $0, R7, R7 161 MOVW R0>>26, R12 162 MOVW R6>>26, R14 163 ORR R1<<6, R12, R12 164 ORR R7<<6, R14, R14 165 BIC $0xfc000000, R0, R0 166 BIC $0xfc000000, R6, R6 167 ADD R14<<2, R14, R14 168 ADD.S R12, R2, R2 169 ADC $0, R3, R3 170 ADD R14, g, g 171 MOVW R2>>26, R12 172 MOVW g>>26, R14 173 ORR R3<<6, R12, R12 174 BIC $0xfc000000, g, R5 175 BIC $0xfc000000, R2, R7 176 ADD R12, R4, R4 177 ADD R14, R0, R0 178 MOVW R4>>26, R12 179 BIC $0xfc000000, R4, R8 180 ADD R12, R6, R9 181 MOVW w+44(SP), R12 182 MOVW w+40(SP), R14 183 MOVW R0, R6 184 CMP $32, R12 185 SUB $16, R12, R12 186 MOVW R12, 44(R13) 187 BHS poly1305_blocks_armv6_mainloop 188 poly1305_blocks_armv6_done: 189 MOVW 36(R13), R12 190 MOVW R5, 20(R12) 191 MOVW R6, 24(R12) 192 MOVW R7, 28(R12) 193 MOVW R8, 32(R12) 194 MOVW R9, 36(R12) 195 ADD $128, R13, R13 196 MOVM.IA.W (R13), [R4, R5, R6, R7, R8, R9, g, R11, R14] 197 RET 198 199 #define MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp) \ 200 MOVBU.P 1(Rsrc), Rtmp; \ 201 MOVBU.P Rtmp, 1(Rdst); \ 202 MOVBU.P 1(Rsrc), Rtmp; \ 203 MOVBU.P Rtmp, 1(Rdst) 204 205 #define MOVWP_UNALIGNED(Rsrc, Rdst, Rtmp) \ 206 MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp); \ 207 MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp) 208 209 TEXT poly1305_finish_ext_armv6<>(SB),4,$-4 210 MOVM.DB.W [R4, R5, R6, R7, R8, R9, g, R11, R14], (R13) 211 SUB $16, R13, R13 212 MOVW R0, R5 213 MOVW R1, R6 214 MOVW R2, R7 215 MOVW R3, R8 216 AND.S R2, R2, R2 217 BEQ poly1305_finish_ext_armv6_noremaining 218 EOR R0, R0 219 MOVW R13, R9 220 MOVW R0, 0(R13) 221 MOVW R0, 4(R13) 222 MOVW R0, 8(R13) 223 MOVW R0, 12(R13) 224 WORD $0xe3110003 // TST R1, #3 not working see issue 5921 225 BEQ poly1305_finish_ext_armv6_aligned 226 WORD $0xe3120008 // TST R2, #8 not working see issue 5921 227 BEQ poly1305_finish_ext_armv6_skip8 228 MOVWP_UNALIGNED(R1, R9, g) 229 MOVWP_UNALIGNED(R1, R9, g) 230 poly1305_finish_ext_armv6_skip8: 231 WORD $0xe3120004 // TST $4, R2 not working see issue 5921 232 BEQ poly1305_finish_ext_armv6_skip4 233 MOVWP_UNALIGNED(R1, R9, g) 234 poly1305_finish_ext_armv6_skip4: 235 WORD $0xe3120002 // TST $2, R2 not working see issue 5921 236 BEQ poly1305_finish_ext_armv6_skip2 237 MOVHUP_UNALIGNED(R1, R9, g) 238 B poly1305_finish_ext_armv6_skip2 239 poly1305_finish_ext_armv6_aligned: 240 WORD $0xe3120008 // TST R2, #8 not working see issue 5921 241 BEQ poly1305_finish_ext_armv6_skip8_aligned 242 MOVM.IA.W (R1), [g-R11] 243 MOVM.IA.W [g-R11], (R9) 244 poly1305_finish_ext_armv6_skip8_aligned: 245 WORD $0xe3120004 // TST $4, R2 not working see issue 5921 246 BEQ poly1305_finish_ext_armv6_skip4_aligned 247 MOVW.P 4(R1), g 248 MOVW.P g, 4(R9) 249 poly1305_finish_ext_armv6_skip4_aligned: 250 WORD $0xe3120002 // TST $2, R2 not working see issue 5921 251 BEQ poly1305_finish_ext_armv6_skip2 252 MOVHU.P 2(R1), g 253 MOVH.P g, 2(R9) 254 poly1305_finish_ext_armv6_skip2: 255 WORD $0xe3120001 // TST $1, R2 not working see issue 5921 256 BEQ poly1305_finish_ext_armv6_skip1 257 MOVBU.P 1(R1), g 258 MOVBU.P g, 1(R9) 259 poly1305_finish_ext_armv6_skip1: 260 MOVW $1, R11 261 MOVBU R11, 0(R9) 262 MOVW R11, 56(R5) 263 MOVW R5, R0 264 MOVW R13, R1 265 MOVW $16, R2 266 BL poly1305_blocks_armv6<>(SB) 267 poly1305_finish_ext_armv6_noremaining: 268 MOVW 20(R5), R0 269 MOVW 24(R5), R1 270 MOVW 28(R5), R2 271 MOVW 32(R5), R3 272 MOVW 36(R5), R4 273 MOVW R4>>26, R12 274 BIC $0xfc000000, R4, R4 275 ADD R12<<2, R12, R12 276 ADD R12, R0, R0 277 MOVW R0>>26, R12 278 BIC $0xfc000000, R0, R0 279 ADD R12, R1, R1 280 MOVW R1>>26, R12 281 BIC $0xfc000000, R1, R1 282 ADD R12, R2, R2 283 MOVW R2>>26, R12 284 BIC $0xfc000000, R2, R2 285 ADD R12, R3, R3 286 MOVW R3>>26, R12 287 BIC $0xfc000000, R3, R3 288 ADD R12, R4, R4 289 ADD $5, R0, R6 290 MOVW R6>>26, R12 291 BIC $0xfc000000, R6, R6 292 ADD R12, R1, R7 293 MOVW R7>>26, R12 294 BIC $0xfc000000, R7, R7 295 ADD R12, R2, g 296 MOVW g>>26, R12 297 BIC $0xfc000000, g, g 298 ADD R12, R3, R11 299 MOVW $-(1<<26), R12 300 ADD R11>>26, R12, R12 301 BIC $0xfc000000, R11, R11 302 ADD R12, R4, R14 303 MOVW R14>>31, R12 304 SUB $1, R12 305 AND R12, R6, R6 306 AND R12, R7, R7 307 AND R12, g, g 308 AND R12, R11, R11 309 AND R12, R14, R14 310 MVN R12, R12 311 AND R12, R0, R0 312 AND R12, R1, R1 313 AND R12, R2, R2 314 AND R12, R3, R3 315 AND R12, R4, R4 316 ORR R6, R0, R0 317 ORR R7, R1, R1 318 ORR g, R2, R2 319 ORR R11, R3, R3 320 ORR R14, R4, R4 321 ORR R1<<26, R0, R0 322 MOVW R1>>6, R1 323 ORR R2<<20, R1, R1 324 MOVW R2>>12, R2 325 ORR R3<<14, R2, R2 326 MOVW R3>>18, R3 327 ORR R4<<8, R3, R3 328 MOVW 40(R5), R6 329 MOVW 44(R5), R7 330 MOVW 48(R5), g 331 MOVW 52(R5), R11 332 ADD.S R6, R0, R0 333 ADC.S R7, R1, R1 334 ADC.S g, R2, R2 335 ADC.S R11, R3, R3 336 MOVM.IA [R0-R3], (R8) 337 MOVW R5, R12 338 EOR R0, R0, R0 339 EOR R1, R1, R1 340 EOR R2, R2, R2 341 EOR R3, R3, R3 342 EOR R4, R4, R4 343 EOR R5, R5, R5 344 EOR R6, R6, R6 345 EOR R7, R7, R7 346 MOVM.IA.W [R0-R7], (R12) 347 MOVM.IA [R0-R7], (R12) 348 ADD $16, R13, R13 349 MOVM.IA.W (R13), [R4, R5, R6, R7, R8, R9, g, R11, R14] 350 RET 351 352 // func poly1305_auth_armv6(out *[16]byte, m *byte, mlen uint32, key *[32]key) 353 TEXT poly1305_auth_armv6(SB),0,$280-16 354 MOVW out+0(FP), R4 355 MOVW m+4(FP), R5 356 MOVW mlen+8(FP), R6 357 MOVW key+12(FP), R7 358 359 MOVW R13, R8 360 BIC $63, R13 361 SUB $64, R13, R13 362 MOVW R13, R0 363 MOVW R7, R1 364 BL poly1305_init_ext_armv6<>(SB) 365 BIC.S $15, R6, R2 366 BEQ poly1305_auth_armv6_noblocks 367 MOVW R13, R0 368 MOVW R5, R1 369 ADD R2, R5, R5 370 SUB R2, R6, R6 371 BL poly1305_blocks_armv6<>(SB) 372 poly1305_auth_armv6_noblocks: 373 MOVW R13, R0 374 MOVW R5, R1 375 MOVW R6, R2 376 MOVW R4, R3 377 BL poly1305_finish_ext_armv6<>(SB) 378 MOVW R8, R13 379 RET 380