1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build !plan9 6 7 #include "textflag.h" 8 9 // NOTE: Windows externalthreadhandler expects memclr to preserve DX. 10 11 // void runtimememclr(void*, uintptr) 12 TEXT runtimememclr(SB), NOSPLIT, $0-16 13 MOVQ ptr+0(FP), DI 14 MOVQ n+8(FP), BX 15 XORQ AX, AX 16 17 // MOVOU seems always faster than REP STOSQ. 18 tail: 19 TESTQ BX, BX 20 JEQ _0 21 CMPQ BX, $2 22 JBE _1or2 23 CMPQ BX, $4 24 JBE _3or4 25 CMPQ BX, $8 26 JBE _5through8 27 CMPQ BX, $16 28 JBE _9through16 29 PXOR X0, X0 30 CMPQ BX, $32 31 JBE _17through32 32 CMPQ BX, $64 33 JBE _33through64 34 CMPQ BX, $128 35 JBE _65through128 36 CMPQ BX, $256 37 JBE _129through256 38 // TODO: use branch table and BSR to make this just a single dispatch 39 // TODO: for really big clears, use MOVNTDQ. 40 41 loop: 42 MOVOU X0, 0(DI) 43 MOVOU X0, 16(DI) 44 MOVOU X0, 32(DI) 45 MOVOU X0, 48(DI) 46 MOVOU X0, 64(DI) 47 MOVOU X0, 80(DI) 48 MOVOU X0, 96(DI) 49 MOVOU X0, 112(DI) 50 MOVOU X0, 128(DI) 51 MOVOU X0, 144(DI) 52 MOVOU X0, 160(DI) 53 MOVOU X0, 176(DI) 54 MOVOU X0, 192(DI) 55 MOVOU X0, 208(DI) 56 MOVOU X0, 224(DI) 57 MOVOU X0, 240(DI) 58 SUBQ $256, BX 59 ADDQ $256, DI 60 CMPQ BX, $256 61 JAE loop 62 JMP tail 63 64 _1or2: 65 MOVB AX, (DI) 66 MOVB AX, -1(DI)(BX*1) 67 RET 68 _0: 69 RET 70 _3or4: 71 MOVW AX, (DI) 72 MOVW AX, -2(DI)(BX*1) 73 RET 74 _5through8: 75 MOVL AX, (DI) 76 MOVL AX, -4(DI)(BX*1) 77 RET 78 _9through16: 79 MOVQ AX, (DI) 80 MOVQ AX, -8(DI)(BX*1) 81 RET 82 _17through32: 83 MOVOU X0, (DI) 84 MOVOU X0, -16(DI)(BX*1) 85 RET 86 _33through64: 87 MOVOU X0, (DI) 88 MOVOU X0, 16(DI) 89 MOVOU X0, -32(DI)(BX*1) 90 MOVOU X0, -16(DI)(BX*1) 91 RET 92 _65through128: 93 MOVOU X0, (DI) 94 MOVOU X0, 16(DI) 95 MOVOU X0, 32(DI) 96 MOVOU X0, 48(DI) 97 MOVOU X0, -64(DI)(BX*1) 98 MOVOU X0, -48(DI)(BX*1) 99 MOVOU X0, -32(DI)(BX*1) 100 MOVOU X0, -16(DI)(BX*1) 101 RET 102 _129through256: 103 MOVOU X0, (DI) 104 MOVOU X0, 16(DI) 105 MOVOU X0, 32(DI) 106 MOVOU X0, 48(DI) 107 MOVOU X0, 64(DI) 108 MOVOU X0, 80(DI) 109 MOVOU X0, 96(DI) 110 MOVOU X0, 112(DI) 111 MOVOU X0, -128(DI)(BX*1) 112 MOVOU X0, -112(DI)(BX*1) 113 MOVOU X0, -96(DI)(BX*1) 114 MOVOU X0, -80(DI)(BX*1) 115 MOVOU X0, -64(DI)(BX*1) 116 MOVOU X0, -48(DI)(BX*1) 117 MOVOU X0, -32(DI)(BX*1) 118 MOVOU X0, -16(DI)(BX*1) 119 RET 120