1#------------------------------------------------------------------------------
2#
3# Copyright (c) 2016, Linaro Ltd. All rights reserved.<BR>
4#
5# This program and the accompanying materials are licensed and made available
6# under the terms and conditions of the BSD License which accompanies this
7# distribution.  The full text of the license may be found at
8# http://opensource.org/licenses/bsd-license.php
9#
10# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
11# WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
12#
13#------------------------------------------------------------------------------
14
15    .text
16    .thumb
17    .syntax unified
18    .align  5
19ASM_GLOBAL ASM_PFX(InternalMemSetMem16)
20ASM_PFX(InternalMemSetMem16):
21    uxth    r2, r2
22    lsl     r1, r1, #1
23    orr     r2, r2, r2, lsl #16
24    b       0f
25
26ASM_GLOBAL ASM_PFX(InternalMemSetMem32)
27ASM_PFX(InternalMemSetMem32):
28    lsl     r1, r1, #2
29    b       0f
30
31ASM_GLOBAL ASM_PFX(InternalMemSetMem64)
32ASM_PFX(InternalMemSetMem64):
33    lsl     r1, r1, #3
34    b       1f
35
36    .align  5
37ASM_GLOBAL ASM_PFX(InternalMemSetMem)
38ASM_PFX(InternalMemSetMem):
39    uxtb    r2, r2
40    orr     r2, r2, r2, lsl #8
41    orr     r2, r2, r2, lsl #16
42    b       0f
43
44ASM_GLOBAL ASM_PFX(InternalMemZeroMem)
45ASM_PFX(InternalMemZeroMem):
46    movs    r2, #0
470:  mov     r3, r2
48
491:  push    {r4, lr}
50    cmp     r1, #16                 // fewer than 16 bytes of input?
51    add     r1, r1, r0              // r1 := dst + length
52    add     lr, r0, #16
53    blt     2f
54    bic     lr, lr, #15             // align output pointer
55
56    str     r2, [r0]                // potentially unaligned store of 4 bytes
57    str     r3, [r0, #4]            // potentially unaligned store of 4 bytes
58    str     r2, [r0, #8]            // potentially unaligned store of 4 bytes
59    str     r3, [r0, #12]           // potentially unaligned store of 4 bytes
60    beq     1f
61
620:  add     lr, lr, #16             // advance the output pointer by 16 bytes
63    subs    r4, r1, lr              // past the output?
64    blt     3f                      // break out of the loop
65    strd    r2, r3, [lr, #-16]      // aligned store of 16 bytes
66    strd    r2, r3, [lr, #-8]
67    bne     0b                      // goto beginning of loop
681:  pop     {r4, pc}
69
702:  subs    r4, r1, lr
713:  adds    r4, r4, #16
72    subs    r1, r1, #8
73    cmp     r4, #4                  // between 4 and 15 bytes?
74    blt     4f
75    cmp     r4, #8                  // between 8 and 15 bytes?
76    sub     r4, lr, #16
77    str     r2, [r4]                // overlapping store of 4 + (4 + 4) + 4 bytes
78    it      gt
79    strgt.n r3, [r4, #4]
80    it      gt
81    strgt.n r2, [r1]
82    str     r3, [r1, #4]
83    pop     {r4, pc}
84
854:  cmp     r4, #2                  // 2 or 3 bytes?
86    strb    r2, [lr, #-16]          // store 1 byte
87    it      ge
88    strhge.n r2, [r1, #6]           // store 2 bytes
89    pop     {r4, pc}
90