1 // Derived from Inferno's libkern/memmove-386.s (adapted for amd64) 2 // https://bitbucket.org/inferno-os/inferno-os/src/default/libkern/memmove-386.s 3 // 4 // Copyright 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Revisions Copyright 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com). All rights reserved. 6 // Portions Copyright 2009 The Go Authors. All rights reserved. 7 // 8 // Permission is hereby granted, free of charge, to any person obtaining a copy 9 // of this software and associated documentation files (the "Software"), to deal 10 // in the Software without restriction, including without limitation the rights 11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 // copies of the Software, and to permit persons to whom the Software is 13 // furnished to do so, subject to the following conditions: 14 // 15 // The above copyright notice and this permission notice shall be included in 16 // all copies or substantial portions of the Software. 17 // 18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 // THE SOFTWARE. 25 26 #include "textflag.h" 27 28 // void runtimememmove(void*, void*, uintptr) 29 TEXT runtimememmove(SB), NOSPLIT, $0-24 30 31 MOVQ to+0(FP), DI 32 MOVQ from+8(FP), SI 33 MOVQ n+16(FP), BX 34 35 // REP instructions have a high startup cost, so we handle small sizes 36 // with some straightline code. The REP MOVSQ instruction is really fast 37 // for large sizes. The cutover is approximately 1K. 38 tail: 39 TESTQ BX, BX 40 JEQ move_0 41 CMPQ BX, $2 42 JBE move_1or2 43 CMPQ BX, $4 44 JBE move_3or4 45 CMPQ BX, $8 46 JB move_5through7 47 JE move_8 48 CMPQ BX, $16 49 JBE move_9through16 50 51 /* 52 * check and set for backwards 53 */ 54 CMPQ SI, DI 55 JLS back 56 57 /* 58 * forward copy loop 59 */ 60 forward: 61 MOVQ BX, CX 62 SHRQ $3, CX 63 ANDQ $7, BX 64 65 REP; MOVSQ 66 JMP tail 67 68 back: 69 /* 70 * check overlap 71 */ 72 MOVQ SI, CX 73 ADDQ BX, CX 74 CMPQ CX, DI 75 JLS forward 76 77 /* 78 * whole thing backwards has 79 * adjusted addresses 80 */ 81 ADDQ BX, DI 82 ADDQ BX, SI 83 STD 84 85 /* 86 * copy 87 */ 88 MOVQ BX, CX 89 SHRQ $3, CX 90 ANDQ $7, BX 91 92 SUBQ $8, DI 93 SUBQ $8, SI 94 REP; MOVSQ 95 96 CLD 97 ADDQ $8, DI 98 ADDQ $8, SI 99 SUBQ BX, DI 100 SUBQ BX, SI 101 JMP tail 102 103 move_1or2: 104 MOVB (SI), AX 105 MOVB -1(SI)(BX*1), CX 106 MOVB AX, (DI) 107 MOVB CX, -1(DI)(BX*1) 108 RET 109 move_0: 110 RET 111 move_3or4: 112 MOVW (SI), AX 113 MOVW -2(SI)(BX*1), CX 114 MOVW AX, (DI) 115 MOVW CX, -2(DI)(BX*1) 116 RET 117 move_5through7: 118 MOVL (SI), AX 119 MOVL -4(SI)(BX*1), CX 120 MOVL AX, (DI) 121 MOVL CX, -4(DI)(BX*1) 122 RET 123 move_8: 124 // We need a separate case for 8 to make sure we write pointers atomically. 125 MOVQ (SI), AX 126 MOVQ AX, (DI) 127 RET 128 move_9through16: 129 MOVQ (SI), AX 130 MOVQ -8(SI)(BX*1), CX 131 MOVQ AX, (DI) 132 MOVQ CX, -8(DI)(BX*1) 133 RET 134