Home | History | Annotate | Download | only in runtime
      1 // Copyright 2014 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // +build !plan9
      6 
      7 #include "textflag.h"
      8 
      9 // NOTE: Windows externalthreadhandler expects memclr to preserve DX.
     10 
     11 // void runtimememclrNoHeapPointers(void*, uintptr)
     12 TEXT runtimememclrNoHeapPointers(SB), NOSPLIT, $0-8
     13 	MOVL	ptr+0(FP), DI
     14 	MOVL	n+4(FP), BX
     15 	XORL	AX, AX
     16 
     17 	// MOVOU seems always faster than REP STOSL.
     18 tail:
     19 	TESTL	BX, BX
     20 	JEQ	_0
     21 	CMPL	BX, $2
     22 	JBE	_1or2
     23 	CMPL	BX, $4
     24 	JB	_3
     25 	JE	_4
     26 	CMPL	BX, $8
     27 	JBE	_5through8
     28 	CMPL	BX, $16
     29 	JBE	_9through16
     30 	CMPB	runtimesupport_sse2(SB), $1
     31 	JNE	nosse2
     32 	PXOR	X0, X0
     33 	CMPL	BX, $32
     34 	JBE	_17through32
     35 	CMPL	BX, $64
     36 	JBE	_33through64
     37 	CMPL	BX, $128
     38 	JBE	_65through128
     39 	CMPL	BX, $256
     40 	JBE	_129through256
     41 	// TODO: use branch table and BSR to make this just a single dispatch
     42 
     43 loop:
     44 	MOVOU	X0, 0(DI)
     45 	MOVOU	X0, 16(DI)
     46 	MOVOU	X0, 32(DI)
     47 	MOVOU	X0, 48(DI)
     48 	MOVOU	X0, 64(DI)
     49 	MOVOU	X0, 80(DI)
     50 	MOVOU	X0, 96(DI)
     51 	MOVOU	X0, 112(DI)
     52 	MOVOU	X0, 128(DI)
     53 	MOVOU	X0, 144(DI)
     54 	MOVOU	X0, 160(DI)
     55 	MOVOU	X0, 176(DI)
     56 	MOVOU	X0, 192(DI)
     57 	MOVOU	X0, 208(DI)
     58 	MOVOU	X0, 224(DI)
     59 	MOVOU	X0, 240(DI)
     60 	SUBL	$256, BX
     61 	ADDL	$256, DI
     62 	CMPL	BX, $256
     63 	JAE	loop
     64 	JMP	tail
     65 
     66 _1or2:
     67 	MOVB	AX, (DI)
     68 	MOVB	AX, -1(DI)(BX*1)
     69 	RET
     70 _0:
     71 	RET
     72 _3:
     73 	MOVW	AX, (DI)
     74 	MOVB	AX, 2(DI)
     75 	RET
     76 _4:
     77 	// We need a separate case for 4 to make sure we clear pointers atomically.
     78 	MOVL	AX, (DI)
     79 	RET
     80 _5through8:
     81 	MOVL	AX, (DI)
     82 	MOVL	AX, -4(DI)(BX*1)
     83 	RET
     84 _9through16:
     85 	MOVL	AX, (DI)
     86 	MOVL	AX, 4(DI)
     87 	MOVL	AX, -8(DI)(BX*1)
     88 	MOVL	AX, -4(DI)(BX*1)
     89 	RET
     90 _17through32:
     91 	MOVOU	X0, (DI)
     92 	MOVOU	X0, -16(DI)(BX*1)
     93 	RET
     94 _33through64:
     95 	MOVOU	X0, (DI)
     96 	MOVOU	X0, 16(DI)
     97 	MOVOU	X0, -32(DI)(BX*1)
     98 	MOVOU	X0, -16(DI)(BX*1)
     99 	RET
    100 _65through128:
    101 	MOVOU	X0, (DI)
    102 	MOVOU	X0, 16(DI)
    103 	MOVOU	X0, 32(DI)
    104 	MOVOU	X0, 48(DI)
    105 	MOVOU	X0, -64(DI)(BX*1)
    106 	MOVOU	X0, -48(DI)(BX*1)
    107 	MOVOU	X0, -32(DI)(BX*1)
    108 	MOVOU	X0, -16(DI)(BX*1)
    109 	RET
    110 _129through256:
    111 	MOVOU	X0, (DI)
    112 	MOVOU	X0, 16(DI)
    113 	MOVOU	X0, 32(DI)
    114 	MOVOU	X0, 48(DI)
    115 	MOVOU	X0, 64(DI)
    116 	MOVOU	X0, 80(DI)
    117 	MOVOU	X0, 96(DI)
    118 	MOVOU	X0, 112(DI)
    119 	MOVOU	X0, -128(DI)(BX*1)
    120 	MOVOU	X0, -112(DI)(BX*1)
    121 	MOVOU	X0, -96(DI)(BX*1)
    122 	MOVOU	X0, -80(DI)(BX*1)
    123 	MOVOU	X0, -64(DI)(BX*1)
    124 	MOVOU	X0, -48(DI)(BX*1)
    125 	MOVOU	X0, -32(DI)(BX*1)
    126 	MOVOU	X0, -16(DI)(BX*1)
    127 	RET
    128 nosse2:
    129 	MOVL	BX, CX
    130 	SHRL	$2, CX
    131 	REP
    132 	STOSL
    133 	ANDL	$3, BX
    134 	JNE	tail
    135 	RET
    136