Files
pico-loader/common/ndsabi/memcpy.s
2025-11-23 17:14:17 +01:00

127 lines
2.4 KiB
ArmAsm

// SPDX-License-Identifier: Zlib
// SPDX-FileNotice: Modified from the original version by the BlocksDS project.
//
// Copyright (C) 2021-2023 agbabi contributors
//
// ABI:
// __aeabi_memcpy, __aeabi_memcpy4, __aeabi_memcpy8
// Standard:
// memcpy
// Support:
// __ndsabi_memcpy2, __ndsabi_memcpy1
#include <nds/asminc.h>
#include "macros.inc"
.syntax unified
.arm
BEGIN_ASM_FUNC __aeabi_memcpy
@ >6-bytes is roughly the threshold when byte-by-byte copy is slower
cmp r2, #6
ble __ndsabi_memcpy1
align_switch r0, r1, r3, __ndsabi_memcpy1, .Lcopy_halves
@ Check if r0 (or r1) needs word aligning
rsbs r3, r0, #4
joaobapt_test r3
@ Copy byte head to align
ldrbmi r3, [r1], #1
strbmi r3, [r0], #1
submi r2, r2, #1
@ r0, r1 are now half aligned
@ Copy half head to align
ldrhcs r3, [r1], #2
strhcs r3, [r0], #2
subcs r2, r2, #2
@ r0, r1 are now word aligned
BEGIN_ASM_FUNC_NO_SECTION __aeabi_memcpy8
BEGIN_ASM_FUNC_NO_SECTION __aeabi_memcpy4
cmp r2, #32
blt .Lcopy_words
@ Word aligned, 32-byte copy
push {r4-r10}
.Lloop_32:
subs r2, r2, #32
ldmiage r1!, {r3-r10}
stmiage r0!, {r3-r10}
bgt .Lloop_32
pop {r4-r10}
bxeq lr
@ < 32 bytes remaining to be copied
add r2, r2, #32
.Lcopy_words:
cmp r2, #4
blt .Lcopy_halves
.Lloop_4:
subs r2, r2, #4
ldrge r3, [r1], #4
strge r3, [r0], #4
bgt .Lloop_4
bxeq lr
@ Copy byte & half tail
@ This test still works when r2 is negative
joaobapt_test r2
@ Copy half
ldrhcs r3, [r1], #2
strhcs r3, [r0], #2
@ Copy byte
ldrbmi r3, [r1]
strbmi r3, [r0]
bx lr
.Lcopy_halves:
@ Copy byte head to align
tst r0, #1
ldrbne r3, [r1], #1
strbne r3, [r0], #1
subne r2, r2, #1
@ r0, r1 are now half aligned
BEGIN_ASM_FUNC_NO_SECTION __ndsabi_memcpy2
subs r2, r2, #2
ldrhge r3, [r1], #2
strhge r3, [r0], #2
bgt __ndsabi_memcpy2
bxeq lr
@ Copy byte tail
adds r2, r2, #2
ldrbne r3, [r1]
strbne r3, [r0]
bx lr
BEGIN_ASM_FUNC __ndsabi_memcpy1
subs r2, r2, #1
ldrbge r3, [r1], #1
strbge r3, [r0], #1
bgt __ndsabi_memcpy1
bx lr
BEGIN_ASM_FUNC memcpy
push {r0, lr}
bl __aeabi_memcpy
pop {r0, lr}
bx lr