1;------------------------------------------------------------------------------
2;
3; Copyright (c) 2006, Intel Corporation. All rights reserved.<BR>
4; This program and the accompanying materials
5; are licensed and made available under the terms and conditions of the BSD License
6; which accompanies this distribution.  The full text of the license may be found at
7; http://opensource.org/licenses/bsd-license.php.
8;
9; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
10; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
11;
12; Module Name:
13;
14;   SetMem.asm
15;
16; Abstract:
17;
18;   SetMem function
19;
20; Notes:
21;
22;------------------------------------------------------------------------------
23
24    .code
25
26;------------------------------------------------------------------------------
27;  VOID *
28;  InternalMemSetMem (
29;    IN VOID   *Buffer,
30;    IN UINTN  Count,
31;    IN UINT8  Value
32;    )
33;------------------------------------------------------------------------------
34InternalMemSetMem   PROC    USES    rdi
35    mov     rdi, rcx                    ; rdi <- Buffer
36    mov     al, r8b                     ; al <- Value
37    mov     r9, rdi                     ; r9 <- Buffer as return value
38    xor     rcx, rcx
39    sub     rcx, rdi
40    and     rcx, 15                     ; rcx + rdi aligns on 16-byte boundary
41    jz      @F
42    cmp     rcx, rdx
43    cmova   rcx, rdx
44    sub     rdx, rcx
45    rep     stosb
46@@:
47    mov     rcx, rdx
48    and     rdx, 15
49    shr     rcx, 4
50    jz      @SetBytes
51    mov     ah, al                      ; ax <- Value repeats twice
52    movdqa  [rsp + 10h], xmm0           ; save xmm0
53    movd    xmm0, eax                   ; xmm0[0..16] <- Value repeats twice
54    pshuflw xmm0, xmm0, 0               ; xmm0[0..63] <- Value repeats 8 times
55    movlhps xmm0, xmm0                  ; xmm0 <- Value repeats 16 times
56@@:
57    movntdq [rdi], xmm0                 ; rdi should be 16-byte aligned
58    add     rdi, 16
59    loop    @B
60    mfence
61    movdqa  xmm0, [rsp + 10h]           ; restore xmm0
62@SetBytes:
63    mov     ecx, edx                    ; high 32 bits of rcx are always zero
64    rep     stosb
65    mov     rax, r9                     ; rax <- Return value
66    ret
67InternalMemSetMem   ENDP
68
69    END
70