1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build !plan9 6 7 #include "textflag.h" 8 9 // NOTE: Windows externalthreadhandler expects memclr to preserve DX. 10 11 // void runtimememclr(void*, uintptr) 12 TEXT runtimememclr(SB), NOSPLIT, $0-8 13 MOVL ptr+0(FP), DI 14 MOVL n+4(FP), BX 15 XORL AX, AX 16 17 // MOVOU seems always faster than REP STOSL. 18 tail: 19 TESTL BX, BX 20 JEQ _0 21 CMPL BX, $2 22 JBE _1or2 23 CMPL BX, $4 24 JBE _3or4 25 CMPL BX, $8 26 JBE _5through8 27 CMPL BX, $16 28 JBE _9through16 29 TESTL $0x4000000, runtimecpuid_edx(SB) // check for sse2 30 JEQ nosse2 31 PXOR X0, X0 32 CMPL BX, $32 33 JBE _17through32 34 CMPL BX, $64 35 JBE _33through64 36 CMPL BX, $128 37 JBE _65through128 38 CMPL BX, $256 39 JBE _129through256 40 // TODO: use branch table and BSR to make this just a single dispatch 41 42 loop: 43 MOVOU X0, 0(DI) 44 MOVOU X0, 16(DI) 45 MOVOU X0, 32(DI) 46 MOVOU X0, 48(DI) 47 MOVOU X0, 64(DI) 48 MOVOU X0, 80(DI) 49 MOVOU X0, 96(DI) 50 MOVOU X0, 112(DI) 51 MOVOU X0, 128(DI) 52 MOVOU X0, 144(DI) 53 MOVOU X0, 160(DI) 54 MOVOU X0, 176(DI) 55 MOVOU X0, 192(DI) 56 MOVOU X0, 208(DI) 57 MOVOU X0, 224(DI) 58 MOVOU X0, 240(DI) 59 SUBL $256, BX 60 ADDL $256, DI 61 CMPL BX, $256 62 JAE loop 63 JMP tail 64 65 _1or2: 66 MOVB AX, (DI) 67 MOVB AX, -1(DI)(BX*1) 68 RET 69 _0: 70 RET 71 _3or4: 72 MOVW AX, (DI) 73 MOVW AX, -2(DI)(BX*1) 74 RET 75 _5through8: 76 MOVL AX, (DI) 77 MOVL AX, -4(DI)(BX*1) 78 RET 79 _9through16: 80 MOVL AX, (DI) 81 MOVL AX, 4(DI) 82 MOVL AX, -8(DI)(BX*1) 83 MOVL AX, -4(DI)(BX*1) 84 RET 85 _17through32: 86 MOVOU X0, (DI) 87 MOVOU X0, -16(DI)(BX*1) 88 RET 89 _33through64: 90 MOVOU X0, (DI) 91 MOVOU X0, 16(DI) 92 MOVOU X0, -32(DI)(BX*1) 93 MOVOU X0, -16(DI)(BX*1) 94 RET 95 _65through128: 96 MOVOU X0, (DI) 97 MOVOU X0, 16(DI) 98 MOVOU X0, 32(DI) 99 MOVOU X0, 48(DI) 100 MOVOU X0, -64(DI)(BX*1) 101 MOVOU X0, -48(DI)(BX*1) 102 MOVOU X0, -32(DI)(BX*1) 103 MOVOU X0, -16(DI)(BX*1) 104 RET 105 _129through256: 106 MOVOU X0, (DI) 107 MOVOU X0, 16(DI) 108 MOVOU X0, 32(DI) 109 MOVOU X0, 48(DI) 110 MOVOU X0, 64(DI) 111 MOVOU X0, 80(DI) 112 MOVOU X0, 96(DI) 113 MOVOU X0, 112(DI) 114 MOVOU X0, -128(DI)(BX*1) 115 MOVOU X0, -112(DI)(BX*1) 116 MOVOU X0, -96(DI)(BX*1) 117 MOVOU X0, -80(DI)(BX*1) 118 MOVOU X0, -64(DI)(BX*1) 119 MOVOU X0, -48(DI)(BX*1) 120 MOVOU X0, -32(DI)(BX*1) 121 MOVOU X0, -16(DI)(BX*1) 122 RET 123 nosse2: 124 MOVL BX, CX 125 SHRL $2, CX 126 REP 127 STOSL 128 ANDL $3, BX 129 JNE tail 130 RET 131