1#
2# ConvertAsm.py: Automatically generated from CopyMem.asm
3#
4#------------------------------------------------------------------------------
5#
6# Copyright (c) 2006 - 2009, Intel Corporation. All rights reserved.<BR>
7# This program and the accompanying materials
8# are licensed and made available under the terms and conditions of the BSD License
9# which accompanies this distribution.  The full text of the license may be found at
10# http://opensource.org/licenses/bsd-license.php.
11#
12# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
13# WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
14#
15# Module Name:
16#
17#   CopyMem.S
18#
19# Abstract:
20#
21#   CopyMem function
22#
23# Notes:
24#
25#------------------------------------------------------------------------------
26
27#------------------------------------------------------------------------------
28#  VOID *
29#  EFIAPI
30#  InternalMemCopyMem (
31#    IN VOID   *Destination,
32#    IN VOID   *Source,
33#    IN UINTN  Count
34#    )
35#------------------------------------------------------------------------------
36ASM_GLOBAL ASM_PFX(InternalMemCopyMem)
37ASM_PFX(InternalMemCopyMem):
38    pushq   %rsi
39    pushq   %rdi
40    movq    %rdx, %rsi                  # rsi <- Source
41    movq    %rcx, %rdi                  # rdi <- Destination
42    leaq    -1(%rsi,%r8,), %r9          # r9 <- Last byte of Source
43    cmpq    %rdi, %rsi
44    movq    %rdi, %rax                  # rax <- Destination as return value
45    jae     L0                          # Copy forward if Source > Destination
46    cmpq    %rdi, %r9                   # Overlapped?
47    jae     L_CopyBackward              # Copy backward if overlapped
48L0:
49    xorq    %rcx, %rcx
50    subq    %rdi, %rcx                  # rcx <- -rdi
51    andq    $15, %rcx                   # rcx + rsi should be 16 bytes aligned
52    jz      L1                          # skip if rcx == 0
53    cmpq    %r8, %rcx
54    cmova   %r8, %rcx
55    subq    %rcx, %r8
56    rep     movsb
57L1:
58    movq    %r8,  %rcx
59    andq    $15, %r8
60    shrq    $4, %rcx                    # rcx <- # of DQwords to copy
61    jz      L_CopyBytes
62    movdqu  %xmm0, 0x18(%rsp)           # save xmm0 on stack
63L2:
64    movdqu  (%rsi), %xmm0               # rsi may not be 16-byte aligned
65    movntdq %xmm0, (%rdi)               # rdi should be 16-byte aligned
66    addq    $16, %rsi
67    addq    $16, %rdi
68    loop    L2
69    mfence
70    movdqa  0x18(%rsp), %xmm0            # restore xmm0
71    jmp     L_CopyBytes                  # copy remaining bytes
72L_CopyBackward:
73    movq    %r9, %rsi                   # rsi <- Last byte of Source
74    leaq     -1(%rdi, %r8,), %rdi       # rdi <- Last byte of Destination
75    std
76L_CopyBytes:
77    movq    %r8, %rcx
78    rep     movsb
79    cld
80    popq    %rdi
81    popq    %rsi
82    ret
83