1 ;------------------------------------------------------------------------------ 2 ; 3 ; Copyright (c) 2006, Intel Corporation. All rights reserved.<BR> 4 ; This program and the accompanying materials 5 ; are licensed and made available under the terms and conditions of the BSD License 6 ; which accompanies this distribution. The full text of the license may be found at 7 ; http://opensource.org/licenses/bsd-license.php. 8 ; 9 ; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, 10 ; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. 11 ; 12 ; Module Name: 13 ; 14 ; CopyMem.nasm 15 ; 16 ; Abstract: 17 ; 18 ; CopyMem function 19 ; 20 ; Notes: 21 ; 22 ;------------------------------------------------------------------------------ 23 24 DEFAULT REL 25 SECTION .text 26 27 ;------------------------------------------------------------------------------ 28 ; VOID * 29 ; EFIAPI 30 ; InternalMemCopyMem ( 31 ; IN VOID *Destination, 32 ; IN VOID *Source, 33 ; IN UINTN Count 34 ; ); 35 ;------------------------------------------------------------------------------ 36 global ASM_PFX(InternalMemCopyMem) 37 ASM_PFX(InternalMemCopyMem): 38 push rsi 39 push rdi 40 mov rsi, rdx ; rsi <- Source 41 mov rdi, rcx ; rdi <- Destination 42 lea r9, [rsi + r8 - 1] ; r9 <- Last byte of Source 43 cmp rsi, rdi 44 mov rax, rdi ; rax <- Destination as return value 45 jae .0 ; Copy forward if Source > Destination 46 cmp r9, rdi ; Overlapped? 47 jae @CopyBackward ; Copy backward if overlapped 48 .0: 49 xor rcx, rcx 50 sub rcx, rdi ; rcx <- -rdi 51 and rcx, 15 ; rcx + rsi should be 16 bytes aligned 52 jz .1 ; skip if rcx == 0 53 cmp rcx, r8 54 cmova rcx, r8 55 sub r8, rcx 56 rep movsb 57 .1: 58 mov rcx, r8 59 and r8, 15 60 shr rcx, 4 ; rcx <- # of DQwords to copy 61 jz @CopyBytes 62 movdqa [rsp + 0x18], xmm0 ; save xmm0 on stack 63 .2: 64 movdqu xmm0, [rsi] ; rsi may not be 16-byte aligned 65 movntdq [rdi], xmm0 ; rdi should be 16-byte aligned 66 add rsi, 16 67 add rdi, 16 68 loop .2 69 mfence 70 movdqa xmm0, [rsp + 0x18] ; restore xmm0 71 jmp @CopyBytes ; copy remaining bytes 72 @CopyBackward: 73 mov rsi, r9 ; rsi <- Last byte of Source 74 lea rdi, [rdi + r8 - 1] ; rdi <- Last byte of Destination 75 std 76 @CopyBytes: 77 mov rcx, r8 78 rep movsb 79 cld 80 pop rdi 81 pop rsi 82 ret 83 84