1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package amd64 6 7 import ( 8 "cmd/compile/internal/gc" 9 "cmd/internal/obj" 10 "cmd/internal/obj/x86" 11 ) 12 13 func blockcopy(n, ns *gc.Node, osrc, odst, w int64) { 14 var noddi gc.Node 15 gc.Nodreg(&noddi, gc.Types[gc.Tptr], x86.REG_DI) 16 var nodsi gc.Node 17 gc.Nodreg(&nodsi, gc.Types[gc.Tptr], x86.REG_SI) 18 19 var nodl gc.Node 20 var nodr gc.Node 21 if n.Ullman >= ns.Ullman { 22 gc.Agenr(n, &nodr, &nodsi) 23 if ns.Op == gc.ONAME { 24 gc.Gvardef(ns) 25 } 26 gc.Agenr(ns, &nodl, &noddi) 27 } else { 28 if ns.Op == gc.ONAME { 29 gc.Gvardef(ns) 30 } 31 gc.Agenr(ns, &nodl, &noddi) 32 gc.Agenr(n, &nodr, &nodsi) 33 } 34 35 if nodl.Reg != x86.REG_DI { 36 gmove(&nodl, &noddi) 37 } 38 if nodr.Reg != x86.REG_SI { 39 gmove(&nodr, &nodsi) 40 } 41 gc.Regfree(&nodl) 42 gc.Regfree(&nodr) 43 44 c := w % 8 // bytes 45 q := w / 8 // quads 46 47 var oldcx gc.Node 48 var cx gc.Node 49 savex(x86.REG_CX, &cx, &oldcx, nil, gc.Types[gc.TINT64]) 50 51 // if we are copying forward on the stack and 52 // the src and dst overlap, then reverse direction 53 if osrc < odst && odst < osrc+w { 54 // reverse direction 55 gins(x86.ASTD, nil, nil) // set direction flag 56 if c > 0 { 57 gconreg(addptr, w-1, x86.REG_SI) 58 gconreg(addptr, w-1, x86.REG_DI) 59 60 gconreg(movptr, c, x86.REG_CX) 61 gins(x86.AREP, nil, nil) // repeat 62 gins(x86.AMOVSB, nil, nil) // MOVB *(SI)-,*(DI)- 63 } 64 65 if q > 0 { 66 if c > 0 { 67 gconreg(addptr, -7, x86.REG_SI) 68 gconreg(addptr, -7, x86.REG_DI) 69 } else { 70 gconreg(addptr, w-8, x86.REG_SI) 71 gconreg(addptr, w-8, x86.REG_DI) 72 } 73 74 gconreg(movptr, q, x86.REG_CX) 75 gins(x86.AREP, nil, nil) // repeat 76 gins(x86.AMOVSQ, nil, nil) // MOVQ *(SI)-,*(DI)- 77 } 78 79 // we leave with the flag clear 80 gins(x86.ACLD, nil, nil) 81 } else { 82 // normal direction 83 if q > 128 || (gc.Nacl && q >= 4) { 84 gconreg(movptr, q, x86.REG_CX) 85 gins(x86.AREP, nil, nil) // repeat 86 gins(x86.AMOVSQ, nil, nil) // MOVQ *(SI)+,*(DI)+ 87 } else if q >= 4 { 88 p := gins(obj.ADUFFCOPY, nil, nil) 89 p.To.Type = obj.TYPE_ADDR 90 p.To.Sym = gc.Linksym(gc.Pkglookup("duffcopy", gc.Runtimepkg)) 91 92 // 14 and 128 = magic constants: see ../../runtime/asm_amd64.s 93 p.To.Offset = 14 * (128 - q) 94 } else if !gc.Nacl && c == 0 { 95 // We don't need the MOVSQ side-effect of updating SI and DI, 96 // and issuing a sequence of MOVQs directly is faster. 97 nodsi.Op = gc.OINDREG 98 99 noddi.Op = gc.OINDREG 100 for q > 0 { 101 gmove(&nodsi, &cx) // MOVQ x+(SI),CX 102 gmove(&cx, &noddi) // MOVQ CX,x+(DI) 103 nodsi.Xoffset += 8 104 noddi.Xoffset += 8 105 q-- 106 } 107 } else { 108 for q > 0 { 109 gins(x86.AMOVSQ, nil, nil) // MOVQ *(SI)+,*(DI)+ 110 q-- 111 } 112 } 113 114 // copy the remaining c bytes 115 if w < 4 || c <= 1 || (odst < osrc && osrc < odst+w) { 116 for c > 0 { 117 gins(x86.AMOVSB, nil, nil) // MOVB *(SI)+,*(DI)+ 118 c-- 119 } 120 } else if w < 8 || c <= 4 { 121 nodsi.Op = gc.OINDREG 122 noddi.Op = gc.OINDREG 123 cx.Type = gc.Types[gc.TINT32] 124 nodsi.Type = gc.Types[gc.TINT32] 125 noddi.Type = gc.Types[gc.TINT32] 126 if c > 4 { 127 nodsi.Xoffset = 0 128 noddi.Xoffset = 0 129 gmove(&nodsi, &cx) 130 gmove(&cx, &noddi) 131 } 132 133 nodsi.Xoffset = c - 4 134 noddi.Xoffset = c - 4 135 gmove(&nodsi, &cx) 136 gmove(&cx, &noddi) 137 } else { 138 nodsi.Op = gc.OINDREG 139 noddi.Op = gc.OINDREG 140 cx.Type = gc.Types[gc.TINT64] 141 nodsi.Type = gc.Types[gc.TINT64] 142 noddi.Type = gc.Types[gc.TINT64] 143 nodsi.Xoffset = c - 8 144 noddi.Xoffset = c - 8 145 gmove(&nodsi, &cx) 146 gmove(&cx, &noddi) 147 } 148 } 149 150 restx(&cx, &oldcx) 151 } 152