Home | History | Annotate | Download | only in runtime
      1 // Copyright 2014 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // +build !plan9
      6 
      7 #include "textflag.h"
      8 
      9 // NOTE: Windows externalthreadhandler expects memclr to preserve DX.
     10 
     11 // void runtimememclr(void*, uintptr)
     12 TEXT runtimememclr(SB), NOSPLIT, $0-8
     13 	MOVL	ptr+0(FP), DI
     14 	MOVL	n+4(FP), BX
     15 	XORL	AX, AX
     16 
     17 	// MOVOU seems always faster than REP STOSL.
     18 tail:
     19 	TESTL	BX, BX
     20 	JEQ	_0
     21 	CMPL	BX, $2
     22 	JBE	_1or2
     23 	CMPL	BX, $4
     24 	JBE	_3or4
     25 	CMPL	BX, $8
     26 	JBE	_5through8
     27 	CMPL	BX, $16
     28 	JBE	_9through16
     29 	TESTL	$0x4000000, runtimecpuid_edx(SB) // check for sse2
     30 	JEQ	nosse2
     31 	PXOR	X0, X0
     32 	CMPL	BX, $32
     33 	JBE	_17through32
     34 	CMPL	BX, $64
     35 	JBE	_33through64
     36 	CMPL	BX, $128
     37 	JBE	_65through128
     38 	CMPL	BX, $256
     39 	JBE	_129through256
     40 	// TODO: use branch table and BSR to make this just a single dispatch
     41 
     42 loop:
     43 	MOVOU	X0, 0(DI)
     44 	MOVOU	X0, 16(DI)
     45 	MOVOU	X0, 32(DI)
     46 	MOVOU	X0, 48(DI)
     47 	MOVOU	X0, 64(DI)
     48 	MOVOU	X0, 80(DI)
     49 	MOVOU	X0, 96(DI)
     50 	MOVOU	X0, 112(DI)
     51 	MOVOU	X0, 128(DI)
     52 	MOVOU	X0, 144(DI)
     53 	MOVOU	X0, 160(DI)
     54 	MOVOU	X0, 176(DI)
     55 	MOVOU	X0, 192(DI)
     56 	MOVOU	X0, 208(DI)
     57 	MOVOU	X0, 224(DI)
     58 	MOVOU	X0, 240(DI)
     59 	SUBL	$256, BX
     60 	ADDL	$256, DI
     61 	CMPL	BX, $256
     62 	JAE	loop
     63 	JMP	tail
     64 
     65 _1or2:
     66 	MOVB	AX, (DI)
     67 	MOVB	AX, -1(DI)(BX*1)
     68 	RET
     69 _0:
     70 	RET
     71 _3or4:
     72 	MOVW	AX, (DI)
     73 	MOVW	AX, -2(DI)(BX*1)
     74 	RET
     75 _5through8:
     76 	MOVL	AX, (DI)
     77 	MOVL	AX, -4(DI)(BX*1)
     78 	RET
     79 _9through16:
     80 	MOVL	AX, (DI)
     81 	MOVL	AX, 4(DI)
     82 	MOVL	AX, -8(DI)(BX*1)
     83 	MOVL	AX, -4(DI)(BX*1)
     84 	RET
     85 _17through32:
     86 	MOVOU	X0, (DI)
     87 	MOVOU	X0, -16(DI)(BX*1)
     88 	RET
     89 _33through64:
     90 	MOVOU	X0, (DI)
     91 	MOVOU	X0, 16(DI)
     92 	MOVOU	X0, -32(DI)(BX*1)
     93 	MOVOU	X0, -16(DI)(BX*1)
     94 	RET
     95 _65through128:
     96 	MOVOU	X0, (DI)
     97 	MOVOU	X0, 16(DI)
     98 	MOVOU	X0, 32(DI)
     99 	MOVOU	X0, 48(DI)
    100 	MOVOU	X0, -64(DI)(BX*1)
    101 	MOVOU	X0, -48(DI)(BX*1)
    102 	MOVOU	X0, -32(DI)(BX*1)
    103 	MOVOU	X0, -16(DI)(BX*1)
    104 	RET
    105 _129through256:
    106 	MOVOU	X0, (DI)
    107 	MOVOU	X0, 16(DI)
    108 	MOVOU	X0, 32(DI)
    109 	MOVOU	X0, 48(DI)
    110 	MOVOU	X0, 64(DI)
    111 	MOVOU	X0, 80(DI)
    112 	MOVOU	X0, 96(DI)
    113 	MOVOU	X0, 112(DI)
    114 	MOVOU	X0, -128(DI)(BX*1)
    115 	MOVOU	X0, -112(DI)(BX*1)
    116 	MOVOU	X0, -96(DI)(BX*1)
    117 	MOVOU	X0, -80(DI)(BX*1)
    118 	MOVOU	X0, -64(DI)(BX*1)
    119 	MOVOU	X0, -48(DI)(BX*1)
    120 	MOVOU	X0, -32(DI)(BX*1)
    121 	MOVOU	X0, -16(DI)(BX*1)
    122 	RET
    123 nosse2:
    124 	MOVL	BX, CX
    125 	SHRL	$2, CX
    126 	REP
    127 	STOSL
    128 	ANDL	$3, BX
    129 	JNE	tail
    130 	RET
    131