Home | History | Annotate | Download | only in runtime
      1 // Copyright 2015 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // +build ignore
      6 
      7 // runtimeduffzero is a Duff's device for zeroing memory.
      8 // The compiler jumps to computed addresses within
      9 // the routine to zero chunks of memory.
     10 // Do not change duffzero without also
     11 // changing the uses in cmd/compile/internal/*/*.go.
     12 
     13 // runtimeduffcopy is a Duff's device for copying memory.
     14 // The compiler jumps to computed addresses within
     15 // the routine to copy chunks of memory.
     16 // Source and destination must not overlap.
     17 // Do not change duffcopy without also
     18 // changing the uses in cmd/compile/internal/*/*.go.
     19 
     20 // See the zero* and copy* generators below
     21 // for architecture-specific comments.
     22 
     23 // mkduff generates duff_*.s.
     24 package main
     25 
     26 import (
     27 	"bytes"
     28 	"fmt"
     29 	"io"
     30 	"io/ioutil"
     31 	"log"
     32 )
     33 
     34 func main() {
     35 	gen("amd64", notags, zeroAMD64, copyAMD64)
     36 	gen("386", notags, zero386, copy386)
     37 	gen("arm", notags, zeroARM, copyARM)
     38 	gen("arm64", notags, zeroARM64, copyARM64)
     39 	gen("ppc64x", tagsPPC64x, zeroPPC64x, copyPPC64x)
     40 	gen("mips64x", tagsMIPS64x, zeroMIPS64x, copyMIPS64x)
     41 }
     42 
     43 func gen(arch string, tags, zero, copy func(io.Writer)) {
     44 	var buf bytes.Buffer
     45 
     46 	fmt.Fprintln(&buf, "// Code generated by mkduff.go; DO NOT EDIT.")
     47 	fmt.Fprintln(&buf, "// Run go generate from src/runtime to update.")
     48 	fmt.Fprintln(&buf, "// See mkduff.go for comments.")
     49 	tags(&buf)
     50 	fmt.Fprintln(&buf, "#include \"textflag.h\"")
     51 	fmt.Fprintln(&buf)
     52 	zero(&buf)
     53 	fmt.Fprintln(&buf)
     54 	copy(&buf)
     55 
     56 	if err := ioutil.WriteFile("duff_"+arch+".s", buf.Bytes(), 0644); err != nil {
     57 		log.Fatalln(err)
     58 	}
     59 }
     60 
     61 func notags(w io.Writer) { fmt.Fprintln(w) }
     62 
     63 func zeroAMD64(w io.Writer) {
     64 	// X0: zero
     65 	// DI: ptr to memory to be zeroed
     66 	// DI is updated as a side effect.
     67 	fmt.Fprintln(w, "TEXT runtimeduffzero(SB), NOSPLIT, $0-0")
     68 	for i := 0; i < 16; i++ {
     69 		fmt.Fprintln(w, "\tMOVUPS\tX0,(DI)")
     70 		fmt.Fprintln(w, "\tMOVUPS\tX0,16(DI)")
     71 		fmt.Fprintln(w, "\tMOVUPS\tX0,32(DI)")
     72 		fmt.Fprintln(w, "\tMOVUPS\tX0,48(DI)")
     73 		fmt.Fprintln(w, "\tLEAQ\t64(DI),DI") // We use lea instead of add, to avoid clobbering flags
     74 		fmt.Fprintln(w)
     75 	}
     76 	fmt.Fprintln(w, "\tRET")
     77 }
     78 
     79 func copyAMD64(w io.Writer) {
     80 	// SI: ptr to source memory
     81 	// DI: ptr to destination memory
     82 	// SI and DI are updated as a side effect.
     83 	//
     84 	// This is equivalent to a sequence of MOVSQ but
     85 	// for some reason that is 3.5x slower than this code.
     86 	// The STOSQ in duffzero seem fine, though.
     87 	fmt.Fprintln(w, "TEXT runtimeduffcopy(SB), NOSPLIT, $0-0")
     88 	for i := 0; i < 64; i++ {
     89 		fmt.Fprintln(w, "\tMOVUPS\t(SI), X0")
     90 		fmt.Fprintln(w, "\tADDQ\t$16, SI")
     91 		fmt.Fprintln(w, "\tMOVUPS\tX0, (DI)")
     92 		fmt.Fprintln(w, "\tADDQ\t$16, DI")
     93 		fmt.Fprintln(w)
     94 	}
     95 	fmt.Fprintln(w, "\tRET")
     96 }
     97 
     98 func zero386(w io.Writer) {
     99 	// AX: zero
    100 	// DI: ptr to memory to be zeroed
    101 	// DI is updated as a side effect.
    102 	fmt.Fprintln(w, "TEXT runtimeduffzero(SB), NOSPLIT, $0-0")
    103 	for i := 0; i < 128; i++ {
    104 		fmt.Fprintln(w, "\tSTOSL")
    105 	}
    106 	fmt.Fprintln(w, "\tRET")
    107 }
    108 
    109 func copy386(w io.Writer) {
    110 	// SI: ptr to source memory
    111 	// DI: ptr to destination memory
    112 	// SI and DI are updated as a side effect.
    113 	//
    114 	// This is equivalent to a sequence of MOVSL but
    115 	// for some reason MOVSL is really slow.
    116 	fmt.Fprintln(w, "TEXT runtimeduffcopy(SB), NOSPLIT, $0-0")
    117 	for i := 0; i < 128; i++ {
    118 		fmt.Fprintln(w, "\tMOVL\t(SI), CX")
    119 		fmt.Fprintln(w, "\tADDL\t$4, SI")
    120 		fmt.Fprintln(w, "\tMOVL\tCX, (DI)")
    121 		fmt.Fprintln(w, "\tADDL\t$4, DI")
    122 		fmt.Fprintln(w)
    123 	}
    124 	fmt.Fprintln(w, "\tRET")
    125 }
    126 
    127 func zeroARM(w io.Writer) {
    128 	// R0: zero
    129 	// R1: ptr to memory to be zeroed
    130 	// R1 is updated as a side effect.
    131 	fmt.Fprintln(w, "TEXT runtimeduffzero(SB), NOSPLIT, $0-0")
    132 	for i := 0; i < 128; i++ {
    133 		fmt.Fprintln(w, "\tMOVW.P\tR0, 4(R1)")
    134 	}
    135 	fmt.Fprintln(w, "\tRET")
    136 }
    137 
    138 func copyARM(w io.Writer) {
    139 	// R0: scratch space
    140 	// R1: ptr to source memory
    141 	// R2: ptr to destination memory
    142 	// R1 and R2 are updated as a side effect
    143 	fmt.Fprintln(w, "TEXT runtimeduffcopy(SB), NOSPLIT, $0-0")
    144 	for i := 0; i < 128; i++ {
    145 		fmt.Fprintln(w, "\tMOVW.P\t4(R1), R0")
    146 		fmt.Fprintln(w, "\tMOVW.P\tR0, 4(R2)")
    147 		fmt.Fprintln(w)
    148 	}
    149 	fmt.Fprintln(w, "\tRET")
    150 }
    151 
    152 func zeroARM64(w io.Writer) {
    153 	// ZR: always zero
    154 	// R16 (aka REGRT1): ptr to memory to be zeroed
    155 	// On return, R16 points to the last zeroed dword.
    156 	fmt.Fprintln(w, "TEXT runtimeduffzero(SB), NOSPLIT, $-8-0")
    157 	for i := 0; i < 63; i++ {
    158 		fmt.Fprintln(w, "\tSTP.P\t(ZR, ZR), 16(R16)")
    159 	}
    160 	fmt.Fprintln(w, "\tSTP\t(ZR, ZR), (R16)")
    161 	fmt.Fprintln(w, "\tRET")
    162 }
    163 
    164 func copyARM64(w io.Writer) {
    165 	// R16 (aka REGRT1): ptr to source memory
    166 	// R17 (aka REGRT2): ptr to destination memory
    167 	// R27 (aka REGTMP): scratch space
    168 	// R16 and R17 are updated as a side effect
    169 	fmt.Fprintln(w, "TEXT runtimeduffcopy(SB), NOSPLIT, $0-0")
    170 	for i := 0; i < 128; i++ {
    171 		fmt.Fprintln(w, "\tMOVD.P\t8(R16), R27")
    172 		fmt.Fprintln(w, "\tMOVD.P\tR27, 8(R17)")
    173 		fmt.Fprintln(w)
    174 	}
    175 	fmt.Fprintln(w, "\tRET")
    176 }
    177 
    178 func tagsPPC64x(w io.Writer) {
    179 	fmt.Fprintln(w)
    180 	fmt.Fprintln(w, "// +build ppc64 ppc64le")
    181 	fmt.Fprintln(w)
    182 }
    183 
    184 func zeroPPC64x(w io.Writer) {
    185 	// R0: always zero
    186 	// R3 (aka REGRT1): ptr to memory to be zeroed - 8
    187 	// On return, R3 points to the last zeroed dword.
    188 	fmt.Fprintln(w, "TEXT runtimeduffzero(SB), NOSPLIT|NOFRAME, $0-0")
    189 	for i := 0; i < 128; i++ {
    190 		fmt.Fprintln(w, "\tMOVDU\tR0, 8(R3)")
    191 	}
    192 	fmt.Fprintln(w, "\tRET")
    193 }
    194 
    195 func copyPPC64x(w io.Writer) {
    196 	fmt.Fprintln(w, "// TODO: Implement runtimeduffcopy.")
    197 }
    198 
    199 func tagsMIPS64x(w io.Writer) {
    200 	fmt.Fprintln(w)
    201 	fmt.Fprintln(w, "// +build mips64 mips64le")
    202 	fmt.Fprintln(w)
    203 }
    204 
    205 func zeroMIPS64x(w io.Writer) {
    206 	// R0: always zero
    207 	// R1 (aka REGRT1): ptr to memory to be zeroed - 8
    208 	// On return, R1 points to the last zeroed dword.
    209 	fmt.Fprintln(w, "TEXT runtimeduffzero(SB), NOSPLIT, $-8-0")
    210 	for i := 0; i < 128; i++ {
    211 		fmt.Fprintln(w, "\tMOVV\tR0, 8(R1)")
    212 		fmt.Fprintln(w, "\tADDV\t$8, R1")
    213 	}
    214 	fmt.Fprintln(w, "\tRET")
    215 }
    216 
    217 func copyMIPS64x(w io.Writer) {
    218 	fmt.Fprintln(w, "// TODO: Implement runtimeduffcopy.")
    219 }
    220