1 // Derived from Inferno's libkern/memmove-386.s (adapted for amd64) 2 // http://code.google.com/p/inferno-os/source/browse/libkern/memmove-386.s 3 // 4 // Copyright 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Revisions Copyright 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com). All rights reserved. 6 // Portions Copyright 2009 The Go Authors. All rights reserved. 7 // 8 // Permission is hereby granted, free of charge, to any person obtaining a copy 9 // of this software and associated documentation files (the "Software"), to deal 10 // in the Software without restriction, including without limitation the rights 11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 // copies of the Software, and to permit persons to whom the Software is 13 // furnished to do so, subject to the following conditions: 14 // 15 // The above copyright notice and this permission notice shall be included in 16 // all copies or substantial portions of the Software. 17 // 18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 // THE SOFTWARE. 25 26 #include "textflag.h" 27 28 // void runtimememmove(void*, void*, uintptr) 29 TEXT runtimememmove(SB), NOSPLIT, $0-24 30 31 MOVQ to+0(FP), DI 32 MOVQ from+8(FP), SI 33 MOVQ n+16(FP), BX 34 35 // REP instructions have a high startup cost, so we handle small sizes 36 // with some straightline code. The REP MOVSQ instruction is really fast 37 // for large sizes. The cutover is approximately 1K. 38 tail: 39 TESTQ BX, BX 40 JEQ move_0 41 CMPQ BX, $2 42 JBE move_1or2 43 CMPQ BX, $4 44 JBE move_3or4 45 CMPQ BX, $8 46 JBE move_5through8 47 CMPQ BX, $16 48 JBE move_9through16 49 50 /* 51 * check and set for backwards 52 */ 53 CMPQ SI, DI 54 JLS back 55 56 /* 57 * forward copy loop 58 */ 59 forward: 60 MOVQ BX, CX 61 SHRQ $3, CX 62 ANDQ $7, BX 63 64 REP; MOVSQ 65 JMP tail 66 67 back: 68 /* 69 * check overlap 70 */ 71 MOVQ SI, CX 72 ADDQ BX, CX 73 CMPQ CX, DI 74 JLS forward 75 76 /* 77 * whole thing backwards has 78 * adjusted addresses 79 */ 80 ADDQ BX, DI 81 ADDQ BX, SI 82 STD 83 84 /* 85 * copy 86 */ 87 MOVQ BX, CX 88 SHRQ $3, CX 89 ANDQ $7, BX 90 91 SUBQ $8, DI 92 SUBQ $8, SI 93 REP; MOVSQ 94 95 CLD 96 ADDQ $8, DI 97 ADDQ $8, SI 98 SUBQ BX, DI 99 SUBQ BX, SI 100 JMP tail 101 102 move_1or2: 103 MOVB (SI), AX 104 MOVB -1(SI)(BX*1), CX 105 MOVB AX, (DI) 106 MOVB CX, -1(DI)(BX*1) 107 RET 108 move_0: 109 RET 110 move_3or4: 111 MOVW (SI), AX 112 MOVW -2(SI)(BX*1), CX 113 MOVW AX, (DI) 114 MOVW CX, -2(DI)(BX*1) 115 RET 116 move_5through8: 117 MOVL (SI), AX 118 MOVL -4(SI)(BX*1), CX 119 MOVL AX, (DI) 120 MOVL CX, -4(DI)(BX*1) 121 RET 122 move_9through16: 123 MOVQ (SI), AX 124 MOVQ -8(SI)(BX*1), CX 125 MOVQ AX, (DI) 126 MOVQ CX, -8(DI)(BX*1) 127 RET 128