1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package arm64 6 7 import ( 8 "cmd/compile/internal/gc" 9 "cmd/internal/obj" 10 "cmd/internal/obj/arm64" 11 ) 12 13 func blockcopy(n, res *gc.Node, osrc, odst, w int64) { 14 // determine alignment. 15 // want to avoid unaligned access, so have to use 16 // smaller operations for less aligned types. 17 // for example moving [4]byte must use 4 MOVB not 1 MOVW. 18 align := int(n.Type.Align) 19 20 var op int 21 switch align { 22 default: 23 gc.Fatal("sgen: invalid alignment %d for %v", align, n.Type) 24 25 case 1: 26 op = arm64.AMOVB 27 28 case 2: 29 op = arm64.AMOVH 30 31 case 4: 32 op = arm64.AMOVW 33 34 case 8: 35 op = arm64.AMOVD 36 } 37 38 if w%int64(align) != 0 { 39 gc.Fatal("sgen: unaligned size %d (align=%d) for %v", w, align, n.Type) 40 } 41 c := int32(w / int64(align)) 42 43 if osrc%int64(align) != 0 || odst%int64(align) != 0 { 44 gc.Fatal("sgen: unaligned offset src %d or dst %d (align %d)", osrc, odst, align) 45 } 46 47 // if we are copying forward on the stack and 48 // the src and dst overlap, then reverse direction 49 dir := align 50 51 if osrc < odst && int64(odst) < int64(osrc)+w { 52 dir = -dir 53 } 54 55 var dst gc.Node 56 var src gc.Node 57 if n.Ullman >= res.Ullman { 58 gc.Agenr(n, &dst, res) // temporarily use dst 59 gc.Regalloc(&src, gc.Types[gc.Tptr], nil) 60 gins(arm64.AMOVD, &dst, &src) 61 if res.Op == gc.ONAME { 62 gc.Gvardef(res) 63 } 64 gc.Agen(res, &dst) 65 } else { 66 if res.Op == gc.ONAME { 67 gc.Gvardef(res) 68 } 69 gc.Agenr(res, &dst, res) 70 gc.Agenr(n, &src, nil) 71 } 72 73 var tmp gc.Node 74 gc.Regalloc(&tmp, gc.Types[gc.Tptr], nil) 75 76 // set up end marker 77 var nend gc.Node 78 79 // move src and dest to the end of block if necessary 80 if dir < 0 { 81 if c >= 4 { 82 gc.Regalloc(&nend, gc.Types[gc.Tptr], nil) 83 gins(arm64.AMOVD, &src, &nend) 84 } 85 86 p := gins(arm64.AADD, nil, &src) 87 p.From.Type = obj.TYPE_CONST 88 p.From.Offset = w 89 90 p = gins(arm64.AADD, nil, &dst) 91 p.From.Type = obj.TYPE_CONST 92 p.From.Offset = w 93 } else { 94 p := gins(arm64.AADD, nil, &src) 95 p.From.Type = obj.TYPE_CONST 96 p.From.Offset = int64(-dir) 97 98 p = gins(arm64.AADD, nil, &dst) 99 p.From.Type = obj.TYPE_CONST 100 p.From.Offset = int64(-dir) 101 102 if c >= 4 { 103 gc.Regalloc(&nend, gc.Types[gc.Tptr], nil) 104 p := gins(arm64.AMOVD, &src, &nend) 105 p.From.Type = obj.TYPE_ADDR 106 p.From.Offset = w 107 } 108 } 109 110 // move 111 // TODO: enable duffcopy for larger copies. 112 if c >= 4 { 113 p := gins(op, &src, &tmp) 114 p.From.Type = obj.TYPE_MEM 115 p.From.Offset = int64(dir) 116 p.Scond = arm64.C_XPRE 117 ploop := p 118 119 p = gins(op, &tmp, &dst) 120 p.To.Type = obj.TYPE_MEM 121 p.To.Offset = int64(dir) 122 p.Scond = arm64.C_XPRE 123 124 p = gcmp(arm64.ACMP, &src, &nend) 125 126 gc.Patch(gc.Gbranch(arm64.ABNE, nil, 0), ploop) 127 gc.Regfree(&nend) 128 } else { 129 // TODO(austin): Instead of generating ADD $-8,R8; ADD 130 // $-8,R7; n*(MOVDU 8(R8),R9; MOVDU R9,8(R7);) just 131 // generate the offsets directly and eliminate the 132 // ADDs. That will produce shorter, more 133 // pipeline-able code. 134 var p *obj.Prog 135 for { 136 tmp14 := c 137 c-- 138 if tmp14 <= 0 { 139 break 140 } 141 142 p = gins(op, &src, &tmp) 143 p.From.Type = obj.TYPE_MEM 144 p.From.Offset = int64(dir) 145 p.Scond = arm64.C_XPRE 146 147 p = gins(op, &tmp, &dst) 148 p.To.Type = obj.TYPE_MEM 149 p.To.Offset = int64(dir) 150 p.Scond = arm64.C_XPRE 151 } 152 } 153 154 gc.Regfree(&dst) 155 gc.Regfree(&src) 156 gc.Regfree(&tmp) 157 } 158