mirror of
https://github.com/cemu-project/Cemu.git
synced 2025-01-07 15:48:15 +01:00
273 lines
5.8 KiB
ArmAsm
273 lines
5.8 KiB
ArmAsm
@/******************************************************************************
|
|
@ *
|
|
@ * Copyright (C) 2015 The Android Open Source Project
|
|
@ *
|
|
@ * Licensed under the Apache License, Version 2.0 (the "License");
|
|
@ * you may not use this file except in compliance with the License.
|
|
@ * You may obtain a copy of the License at:
|
|
@ *
|
|
@ * http://www.apache.org/licenses/LICENSE-2.0
|
|
@ *
|
|
@ * Unless required by applicable law or agreed to in writing, software
|
|
@ * distributed under the License is distributed on an "AS IS" BASIS,
|
|
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
@ * See the License for the specific language governing permissions and
|
|
@ * limitations under the License.
|
|
@ *
|
|
@ *****************************************************************************
|
|
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
|
@*/
|
|
@**
|
|
@ *******************************************************************************
|
|
@ * @file
|
|
@ * ih264_mem_fns_neon.s
|
|
@ *
|
|
@ * @brief
|
|
@ * Contains function definitions for memory manipulation
|
|
@ *
|
|
@ * @author
|
|
@ * Naveen SR
|
|
@ *
|
|
@ * @par List of Functions:
|
|
@ * - ih264_memcpy_mul_8_a9q()
|
|
@ * - ih264_memcpy_a9q()
|
|
@ * - ih264_memset_mul_8_a9q()
|
|
@ * - ih264_memset_a9q()
|
|
@ * - ih264_memset_16bit_mul_8_a9q()
|
|
@ * - ih264_memset_a9q()
|
|
@ *
|
|
@ * @remarks
|
|
@ * None
|
|
@ *
|
|
@ *******************************************************************************
|
|
@*
|
|
|
|
@**
|
|
@*******************************************************************************
|
|
@*
|
|
@* @brief
|
|
@* memcpy of a 1d array
|
|
@*
|
|
@* @par Description:
|
|
@* Does memcpy of 8bit data from source to destination for 8,16 or 32 number of bytes
|
|
@*
|
|
@* @param[in] pu1_dst
|
|
@* UWORD8 pointer to the destination
|
|
@*
|
|
@* @param[in] pu1_src
|
|
@* UWORD8 pointer to the source
|
|
@*
|
|
@* @param[in] num_bytes
|
|
@* number of bytes to copy
|
|
@* @returns
|
|
@*
|
|
@* @remarks
|
|
@* None
|
|
@*
|
|
@*******************************************************************************
|
|
@*
|
|
@void ih264_memcpy_mul_8(UWORD8 *pu1_dst,
|
|
@ UWORD8 *pu1_src,
|
|
@ UWORD32 num_bytes)
|
|
@**************Variables Vs Registers*************************
|
|
@ r0 => *pu1_dst
|
|
@ r1 => *pu1_src
|
|
@ r2 => num_bytes
|
|
|
|
.text
|
|
.p2align 2
|
|
|
|
|
|
.global ih264_memcpy_mul_8_a9q
|
|
|
|
ih264_memcpy_mul_8_a9q:
|
|
|
|
loop_neon_memcpy_mul_8:
|
|
@ Memcpy 8 bytes
|
|
vld1.8 d0, [r1]!
|
|
vst1.8 d0, [r0]!
|
|
|
|
subs r2, r2, #8
|
|
bne loop_neon_memcpy_mul_8
|
|
bx lr
|
|
|
|
|
|
|
|
@*******************************************************************************
|
|
@*
|
|
@void ih264_memcpy(UWORD8 *pu1_dst,
|
|
@ UWORD8 *pu1_src,
|
|
@ UWORD32 num_bytes)
|
|
@**************Variables Vs Registers*************************
|
|
@ r0 => *pu1_dst
|
|
@ r1 => *pu1_src
|
|
@ r2 => num_bytes
|
|
|
|
|
|
|
|
.global ih264_memcpy_a9q
|
|
|
|
ih264_memcpy_a9q:
|
|
subs r2, #8
|
|
blt memcpy
|
|
loop_neon_memcpy:
|
|
@ Memcpy 8 bytes
|
|
vld1.8 d0, [r1]!
|
|
vst1.8 d0, [r0]!
|
|
|
|
subs r2, #8
|
|
bge loop_neon_memcpy
|
|
cmp r2, #-8
|
|
bxeq lr
|
|
|
|
memcpy:
|
|
add r2, #8
|
|
|
|
loop_memcpy:
|
|
ldrb r3, [r1], #1
|
|
strb r3, [r0], #1
|
|
subs r2, #1
|
|
bne loop_memcpy
|
|
bx lr
|
|
|
|
|
|
|
|
|
|
@void ih264_memset_mul_8(UWORD8 *pu1_dst,
|
|
@ UWORD8 value,
|
|
@ UWORD32 num_bytes)
|
|
@**************Variables Vs Registers*************************
|
|
@ r0 => *pu1_dst
|
|
@ r1 => value
|
|
@ r2 => num_bytes
|
|
|
|
|
|
|
|
|
|
|
|
.global ih264_memset_mul_8_a9q
|
|
|
|
ih264_memset_mul_8_a9q:
|
|
|
|
@ Assumptions: numbytes is either 8, 16 or 32
|
|
vdup.8 d0, r1
|
|
loop_memset_mul_8:
|
|
@ Memset 8 bytes
|
|
vst1.8 d0, [r0]!
|
|
|
|
subs r2, r2, #8
|
|
bne loop_memset_mul_8
|
|
|
|
bx lr
|
|
|
|
|
|
|
|
|
|
@void ih264_memset(UWORD8 *pu1_dst,
|
|
@ UWORD8 value,
|
|
@ UWORD8 num_bytes)
|
|
@**************Variables Vs Registers*************************
|
|
@ r0 => *pu1_dst
|
|
@ r1 => value
|
|
@ r2 => num_bytes
|
|
|
|
|
|
|
|
.global ih264_memset_a9q
|
|
|
|
ih264_memset_a9q:
|
|
subs r2, #8
|
|
blt memset
|
|
vdup.8 d0, r1
|
|
loop_neon_memset:
|
|
@ Memcpy 8 bytes
|
|
vst1.8 d0, [r0]!
|
|
|
|
subs r2, #8
|
|
bge loop_neon_memset
|
|
cmp r2, #-8
|
|
bxeq lr
|
|
|
|
memset:
|
|
add r2, #8
|
|
|
|
loop_memset:
|
|
strb r1, [r0], #1
|
|
subs r2, #1
|
|
bne loop_memset
|
|
bx lr
|
|
|
|
|
|
|
|
|
|
@void ih264_memset_16bit_mul_8(UWORD16 *pu2_dst,
|
|
@ UWORD16 value,
|
|
@ UWORD32 num_words)
|
|
@**************Variables Vs Registers*************************
|
|
@ r0 => *pu2_dst
|
|
@ r1 => value
|
|
@ r2 => num_words
|
|
|
|
|
|
|
|
|
|
|
|
.global ih264_memset_16bit_mul_8_a9q
|
|
|
|
ih264_memset_16bit_mul_8_a9q:
|
|
|
|
@ Assumptions: num_words is either 8, 16 or 32
|
|
|
|
@ Memset 8 words
|
|
vdup.16 d0, r1
|
|
loop_memset_16bit_mul_8:
|
|
vst1.16 d0, [r0]!
|
|
vst1.16 d0, [r0]!
|
|
|
|
subs r2, r2, #8
|
|
bne loop_memset_16bit_mul_8
|
|
|
|
bx lr
|
|
|
|
|
|
|
|
|
|
@void ih264_memset_16bit(UWORD16 *pu2_dst,
|
|
@ UWORD16 value,
|
|
@ UWORD32 num_words)
|
|
@**************Variables Vs Registers*************************
|
|
@ r0 => *pu2_dst
|
|
@ r1 => value
|
|
@ r2 => num_words
|
|
|
|
|
|
|
|
.global ih264_memset_16bit_a9q
|
|
|
|
ih264_memset_16bit_a9q:
|
|
subs r2, #8
|
|
blt memset_16bit
|
|
vdup.16 d0, r1
|
|
loop_neon_memset_16bit:
|
|
@ Memset 8 words
|
|
vst1.16 d0, [r0]!
|
|
vst1.16 d0, [r0]!
|
|
|
|
subs r2, #8
|
|
bge loop_neon_memset_16bit
|
|
cmp r2, #-8
|
|
bxeq lr
|
|
|
|
memset_16bit:
|
|
add r2, #8
|
|
|
|
loop_memset_16bit:
|
|
strh r1, [r0], #2
|
|
subs r2, #1
|
|
bne loop_memset_16bit
|
|
bx lr
|
|
|
|
|
|
|
|
|