Home | History | Annotate | Download | only in amd64
      1 // Copyright 2009 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 package amd64
      6 
      7 import (
      8 	"cmd/compile/internal/gc"
      9 	"cmd/internal/obj"
     10 	"cmd/internal/obj/x86"
     11 )
     12 
     13 func blockcopy(n, ns *gc.Node, osrc, odst, w int64) {
     14 	var noddi gc.Node
     15 	gc.Nodreg(&noddi, gc.Types[gc.Tptr], x86.REG_DI)
     16 	var nodsi gc.Node
     17 	gc.Nodreg(&nodsi, gc.Types[gc.Tptr], x86.REG_SI)
     18 
     19 	var nodl gc.Node
     20 	var nodr gc.Node
     21 	if n.Ullman >= ns.Ullman {
     22 		gc.Agenr(n, &nodr, &nodsi)
     23 		if ns.Op == gc.ONAME {
     24 			gc.Gvardef(ns)
     25 		}
     26 		gc.Agenr(ns, &nodl, &noddi)
     27 	} else {
     28 		if ns.Op == gc.ONAME {
     29 			gc.Gvardef(ns)
     30 		}
     31 		gc.Agenr(ns, &nodl, &noddi)
     32 		gc.Agenr(n, &nodr, &nodsi)
     33 	}
     34 
     35 	if nodl.Reg != x86.REG_DI {
     36 		gmove(&nodl, &noddi)
     37 	}
     38 	if nodr.Reg != x86.REG_SI {
     39 		gmove(&nodr, &nodsi)
     40 	}
     41 	gc.Regfree(&nodl)
     42 	gc.Regfree(&nodr)
     43 
     44 	c := w % 8 // bytes
     45 	q := w / 8 // quads
     46 
     47 	var oldcx gc.Node
     48 	var cx gc.Node
     49 	savex(x86.REG_CX, &cx, &oldcx, nil, gc.Types[gc.TINT64])
     50 
     51 	// if we are copying forward on the stack and
     52 	// the src and dst overlap, then reverse direction
     53 	if osrc < odst && odst < osrc+w {
     54 		// reverse direction
     55 		gins(x86.ASTD, nil, nil) // set direction flag
     56 		if c > 0 {
     57 			gconreg(addptr, w-1, x86.REG_SI)
     58 			gconreg(addptr, w-1, x86.REG_DI)
     59 
     60 			gconreg(movptr, c, x86.REG_CX)
     61 			gins(x86.AREP, nil, nil)   // repeat
     62 			gins(x86.AMOVSB, nil, nil) // MOVB *(SI)-,*(DI)-
     63 		}
     64 
     65 		if q > 0 {
     66 			if c > 0 {
     67 				gconreg(addptr, -7, x86.REG_SI)
     68 				gconreg(addptr, -7, x86.REG_DI)
     69 			} else {
     70 				gconreg(addptr, w-8, x86.REG_SI)
     71 				gconreg(addptr, w-8, x86.REG_DI)
     72 			}
     73 
     74 			gconreg(movptr, q, x86.REG_CX)
     75 			gins(x86.AREP, nil, nil)   // repeat
     76 			gins(x86.AMOVSQ, nil, nil) // MOVQ *(SI)-,*(DI)-
     77 		}
     78 
     79 		// we leave with the flag clear
     80 		gins(x86.ACLD, nil, nil)
     81 	} else {
     82 		// normal direction
     83 		if q > 128 || (gc.Nacl && q >= 4) {
     84 			gconreg(movptr, q, x86.REG_CX)
     85 			gins(x86.AREP, nil, nil)   // repeat
     86 			gins(x86.AMOVSQ, nil, nil) // MOVQ *(SI)+,*(DI)+
     87 		} else if q >= 4 {
     88 			p := gins(obj.ADUFFCOPY, nil, nil)
     89 			p.To.Type = obj.TYPE_ADDR
     90 			p.To.Sym = gc.Linksym(gc.Pkglookup("duffcopy", gc.Runtimepkg))
     91 
     92 			// 14 and 128 = magic constants: see ../../runtime/asm_amd64.s
     93 			p.To.Offset = 14 * (128 - q)
     94 		} else if !gc.Nacl && c == 0 {
     95 			// We don't need the MOVSQ side-effect of updating SI and DI,
     96 			// and issuing a sequence of MOVQs directly is faster.
     97 			nodsi.Op = gc.OINDREG
     98 
     99 			noddi.Op = gc.OINDREG
    100 			for q > 0 {
    101 				gmove(&nodsi, &cx) // MOVQ x+(SI),CX
    102 				gmove(&cx, &noddi) // MOVQ CX,x+(DI)
    103 				nodsi.Xoffset += 8
    104 				noddi.Xoffset += 8
    105 				q--
    106 			}
    107 		} else {
    108 			for q > 0 {
    109 				gins(x86.AMOVSQ, nil, nil) // MOVQ *(SI)+,*(DI)+
    110 				q--
    111 			}
    112 		}
    113 
    114 		// copy the remaining c bytes
    115 		if w < 4 || c <= 1 || (odst < osrc && osrc < odst+w) {
    116 			for c > 0 {
    117 				gins(x86.AMOVSB, nil, nil) // MOVB *(SI)+,*(DI)+
    118 				c--
    119 			}
    120 		} else if w < 8 || c <= 4 {
    121 			nodsi.Op = gc.OINDREG
    122 			noddi.Op = gc.OINDREG
    123 			cx.Type = gc.Types[gc.TINT32]
    124 			nodsi.Type = gc.Types[gc.TINT32]
    125 			noddi.Type = gc.Types[gc.TINT32]
    126 			if c > 4 {
    127 				nodsi.Xoffset = 0
    128 				noddi.Xoffset = 0
    129 				gmove(&nodsi, &cx)
    130 				gmove(&cx, &noddi)
    131 			}
    132 
    133 			nodsi.Xoffset = c - 4
    134 			noddi.Xoffset = c - 4
    135 			gmove(&nodsi, &cx)
    136 			gmove(&cx, &noddi)
    137 		} else {
    138 			nodsi.Op = gc.OINDREG
    139 			noddi.Op = gc.OINDREG
    140 			cx.Type = gc.Types[gc.TINT64]
    141 			nodsi.Type = gc.Types[gc.TINT64]
    142 			noddi.Type = gc.Types[gc.TINT64]
    143 			nodsi.Xoffset = c - 8
    144 			noddi.Xoffset = c - 8
    145 			gmove(&nodsi, &cx)
    146 			gmove(&cx, &noddi)
    147 		}
    148 	}
    149 
    150 	restx(&cx, &oldcx)
    151 }
    152