1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build ignore 6 7 // runtimeduffzero is a Duff's device for zeroing memory. 8 // The compiler jumps to computed addresses within 9 // the routine to zero chunks of memory. 10 // Do not change duffzero without also 11 // changing clearfat in cmd/?g/ggen.go. 12 13 // runtimeduffcopy is a Duff's device for copying memory. 14 // The compiler jumps to computed addresses within 15 // the routine to copy chunks of memory. 16 // Source and destination must not overlap. 17 // Do not change duffcopy without also 18 // changing blockcopy in cmd/?g/cgen.go. 19 20 // See the zero* and copy* generators below 21 // for architecture-specific comments. 22 23 // mkduff generates duff_*.s. 24 package main 25 26 import ( 27 "bytes" 28 "fmt" 29 "io" 30 "io/ioutil" 31 "log" 32 ) 33 34 func main() { 35 gen("amd64", notags, zeroAMD64, copyAMD64) 36 gen("386", notags, zero386, copy386) 37 gen("arm", notags, zeroARM, copyARM) 38 gen("arm64", notags, zeroARM64, copyARM64) 39 gen("ppc64x", tagsPPC64x, zeroPPC64x, copyPPC64x) 40 } 41 42 func gen(arch string, tags, zero, copy func(io.Writer)) { 43 var buf bytes.Buffer 44 45 fmt.Fprintln(&buf, "// AUTO-GENERATED by mkduff.go") 46 fmt.Fprintln(&buf, "// Run go generate from src/runtime to update.") 47 fmt.Fprintln(&buf, "// See mkduff.go for comments.") 48 tags(&buf) 49 fmt.Fprintln(&buf, "#include \"textflag.h\"") 50 fmt.Fprintln(&buf) 51 zero(&buf) 52 fmt.Fprintln(&buf) 53 copy(&buf) 54 55 if err := ioutil.WriteFile("duff_"+arch+".s", buf.Bytes(), 0644); err != nil { 56 log.Fatalln(err) 57 } 58 } 59 60 func notags(w io.Writer) { fmt.Fprintln(w) } 61 62 func zeroAMD64(w io.Writer) { 63 // AX: zero 64 // DI: ptr to memory to be zeroed 65 // DI is updated as a side effect. 66 fmt.Fprintln(w, "TEXT runtimeduffzero(SB), NOSPLIT, $0-0") 67 for i := 0; i < 31; i++ { 68 fmt.Fprintln(w, "\tMOVQ\tAX,(DI)") 69 fmt.Fprintln(w, "\tMOVQ\tAX,8(DI)") 70 fmt.Fprintln(w, "\tMOVQ\tAX,16(DI)") 71 fmt.Fprintln(w, "\tMOVQ\tAX,24(DI)") 72 fmt.Fprintln(w, "\tADDQ\t$32,DI") 73 fmt.Fprintln(w) 74 } 75 for i := 0; i < 4; i++ { 76 fmt.Fprintln(w, "\tSTOSQ") 77 } 78 fmt.Fprintln(w, "\tRET") 79 } 80 81 func copyAMD64(w io.Writer) { 82 // SI: ptr to source memory 83 // DI: ptr to destination memory 84 // SI and DI are updated as a side effect. 85 // 86 // This is equivalent to a sequence of MOVSQ but 87 // for some reason that is 3.5x slower than this code. 88 // The STOSQ in duffzero seem fine, though. 89 fmt.Fprintln(w, "TEXT runtimeduffcopy(SB), NOSPLIT, $0-0") 90 for i := 0; i < 128; i++ { 91 fmt.Fprintln(w, "\tMOVQ\t(SI), CX") 92 fmt.Fprintln(w, "\tADDQ\t$8, SI") 93 fmt.Fprintln(w, "\tMOVQ\tCX, (DI)") 94 fmt.Fprintln(w, "\tADDQ\t$8, DI") 95 fmt.Fprintln(w) 96 } 97 fmt.Fprintln(w, "\tRET") 98 } 99 100 func zero386(w io.Writer) { 101 // AX: zero 102 // DI: ptr to memory to be zeroed 103 // DI is updated as a side effect. 104 fmt.Fprintln(w, "TEXT runtimeduffzero(SB), NOSPLIT, $0-0") 105 for i := 0; i < 128; i++ { 106 fmt.Fprintln(w, "\tSTOSL") 107 } 108 fmt.Fprintln(w, "\tRET") 109 } 110 111 func copy386(w io.Writer) { 112 // SI: ptr to source memory 113 // DI: ptr to destination memory 114 // SI and DI are updated as a side effect. 115 // 116 // This is equivalent to a sequence of MOVSL but 117 // for some reason MOVSL is really slow. 118 fmt.Fprintln(w, "TEXT runtimeduffcopy(SB), NOSPLIT, $0-0") 119 for i := 0; i < 128; i++ { 120 fmt.Fprintln(w, "\tMOVL\t(SI), CX") 121 fmt.Fprintln(w, "\tADDL\t$4, SI") 122 fmt.Fprintln(w, "\tMOVL\tCX, (DI)") 123 fmt.Fprintln(w, "\tADDL\t$4, DI") 124 fmt.Fprintln(w) 125 } 126 fmt.Fprintln(w, "\tRET") 127 } 128 129 func zeroARM(w io.Writer) { 130 // R0: zero 131 // R1: ptr to memory to be zeroed 132 // R1 is updated as a side effect. 133 fmt.Fprintln(w, "TEXT runtimeduffzero(SB), NOSPLIT, $0-0") 134 for i := 0; i < 128; i++ { 135 fmt.Fprintln(w, "\tMOVW.P\tR0, 4(R1)") 136 } 137 fmt.Fprintln(w, "\tRET") 138 } 139 140 func copyARM(w io.Writer) { 141 // R0: scratch space 142 // R1: ptr to source memory 143 // R2: ptr to destination memory 144 // R1 and R2 are updated as a side effect 145 fmt.Fprintln(w, "TEXT runtimeduffcopy(SB), NOSPLIT, $0-0") 146 for i := 0; i < 128; i++ { 147 fmt.Fprintln(w, "\tMOVW.P\t4(R1), R0") 148 fmt.Fprintln(w, "\tMOVW.P\tR0, 4(R2)") 149 fmt.Fprintln(w) 150 } 151 fmt.Fprintln(w, "\tRET") 152 } 153 154 func zeroARM64(w io.Writer) { 155 // ZR: always zero 156 // R16 (aka REGRT1): ptr to memory to be zeroed - 8 157 // On return, R16 points to the last zeroed dword. 158 fmt.Fprintln(w, "TEXT runtimeduffzero(SB), NOSPLIT, $-8-0") 159 for i := 0; i < 128; i++ { 160 fmt.Fprintln(w, "\tMOVD.W\tZR, 8(R16)") 161 } 162 fmt.Fprintln(w, "\tRET") 163 } 164 165 func copyARM64(w io.Writer) { 166 fmt.Fprintln(w, "// TODO: Implement runtimeduffcopy.") 167 } 168 169 func tagsPPC64x(w io.Writer) { 170 fmt.Fprintln(w) 171 fmt.Fprintln(w, "// +build ppc64 ppc64le") 172 fmt.Fprintln(w) 173 } 174 175 func zeroPPC64x(w io.Writer) { 176 // R0: always zero 177 // R3 (aka REGRT1): ptr to memory to be zeroed - 8 178 // On return, R3 points to the last zeroed dword. 179 fmt.Fprintln(w, "TEXT runtimeduffzero(SB), NOSPLIT, $-8-0") 180 for i := 0; i < 128; i++ { 181 fmt.Fprintln(w, "\tMOVDU\tR0, 8(R3)") 182 } 183 fmt.Fprintln(w, "\tRETURN") 184 } 185 186 func copyPPC64x(w io.Writer) { 187 fmt.Fprintln(w, "// TODO: Implement runtimeduffcopy.") 188 } 189