mirror of
https://github.com/cemu-project/Cemu.git
synced 2024-11-30 04:54:18 +01:00
544 lines
15 KiB
ArmAsm
544 lines
15 KiB
ArmAsm
@/******************************************************************************
|
|
@ *
|
|
@ * Copyright (C) 2015 The Android Open Source Project
|
|
@ *
|
|
@ * Licensed under the Apache License, Version 2.0 (the "License");
|
|
@ * you may not use this file except in compliance with the License.
|
|
@ * You may obtain a copy of the License at:
|
|
@ *
|
|
@ * http://www.apache.org/licenses/LICENSE-2.0
|
|
@ *
|
|
@ * Unless required by applicable law or agreed to in writing, software
|
|
@ * distributed under the License is distributed on an "AS IS" BASIS,
|
|
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
@ * See the License for the specific language governing permissions and
|
|
@ * limitations under the License.
|
|
@ *
|
|
@ *****************************************************************************
|
|
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
|
@*/
|
|
@**
|
|
@******************************************************************************
|
|
@* @file
|
|
@* ih264_intra_pred_chroma_a9q.s
|
|
@*
|
|
@* @brief
|
|
@* Contains function definitions for intra chroma prediction .
|
|
@*
|
|
@* @author
|
|
@* Ittiam
|
|
@*
|
|
@* @par List of Functions:
|
|
@*
|
|
@* - ih264_intra_pred_chroma_mode_horz_a9q()
|
|
@* - ih264_intra_pred_chroma_8x8_mode_vert_a9q()
|
|
@* - ih264_intra_pred_chroma_mode_dc_a9q()
|
|
@* - ih264_intra_pred_chroma_mode_plane_a9q()
|
|
@*
|
|
@* @remarks
|
|
@* None
|
|
@*
|
|
@*******************************************************************************
|
|
@*
|
|
|
|
@* All the functions here are replicated from ih264_chroma_intra_pred_filters.c
|
|
@
|
|
|
|
.text
|
|
.p2align 2
|
|
|
|
.extern ih264_gai1_intrapred_chroma_plane_coeffs1
|
|
.hidden ih264_gai1_intrapred_chroma_plane_coeffs1
|
|
.extern ih264_gai1_intrapred_chroma_plane_coeffs2
|
|
.hidden ih264_gai1_intrapred_chroma_plane_coeffs2
|
|
scratch_chroma_intrapred_addr1:
|
|
.long ih264_gai1_intrapred_chroma_plane_coeffs1 - scrlblc1 - 8
|
|
|
|
scratch_intrapred_chroma_plane_addr1:
|
|
.long ih264_gai1_intrapred_chroma_plane_coeffs2 - scrlblc2 - 8
|
|
@**
|
|
@*******************************************************************************
|
|
@*
|
|
@*ih264_intra_pred_chroma_8x8_mode_dc
|
|
@*
|
|
@* @brief
|
|
@* Perform Intra prediction for chroma_8x8 mode:DC
|
|
@*
|
|
@* @par Description:
|
|
@* Perform Intra prediction for chroma_8x8 mode:DC ,described in sec 8.3.4.1
|
|
@*
|
|
@* @param[in] pu1_src
|
|
@* UWORD8 pointer to the source containing alternate U and V samples
|
|
@*
|
|
@* @param[out] pu1_dst
|
|
@* UWORD8 pointer to the destination with alternate U and V samples
|
|
@*
|
|
@* @param[in] src_strd
|
|
@* integer source stride
|
|
@*
|
|
@* @param[in] dst_strd
|
|
@* integer destination stride
|
|
@*
|
|
@** @param[in] ui_neighboravailability
|
|
@* availability of neighbouring pixels
|
|
@*
|
|
@* @returns
|
|
@*
|
|
@* @remarks
|
|
@* None
|
|
@*
|
|
@*******************************************************************************
|
|
@void ih264_intra_pred_chroma_8x8_mode_dc(UWORD8 *pu1_src,
|
|
@ UWORD8 *pu1_dst,
|
|
@ WORD32 src_strd,
|
|
@ WORD32 dst_strd,
|
|
@ WORD32 ui_neighboravailability)
|
|
|
|
@**************Variables Vs Registers*****************************************
|
|
@ r0 => *pu1_src
|
|
@ r1 => *pu1_dst
|
|
@ r2 => src_strd
|
|
@ r3 => dst_strd
|
|
@ r4 => ui_neighboravailability
|
|
|
|
.global ih264_intra_pred_chroma_8x8_mode_dc_a9q
|
|
|
|
ih264_intra_pred_chroma_8x8_mode_dc_a9q:
|
|
|
|
stmfd sp!, {r4, r14} @store register values to stack
|
|
ldr r4, [sp, #8] @r4 => ui_neighboravailability
|
|
vpush {d8-d15}
|
|
|
|
ands r2, r4, #0x01 @CHECKING IF LEFT_AVAILABLE ELSE BRANCHING TO ONLY TOP AVAILABLE
|
|
beq top_available
|
|
ands r2, r4, #0x04 @CHECKING IF TOP_AVAILABLE ELSE BRANCHING TO ONLY LEFT AVAILABLE
|
|
beq left_available
|
|
|
|
vld1.u8 {q0}, [r0] @BOTH LEFT AND TOP AVAILABLE
|
|
add r0, r0, #18
|
|
vld1.u8 {q1}, [r0]
|
|
vaddl.u8 q2, d1, d2
|
|
vaddl.u8 q3, d0, d3
|
|
vmovl.u8 q1, d3
|
|
vmovl.u8 q0, d0
|
|
|
|
vadd.u16 d12, d4, d5
|
|
vadd.u16 d13, d2, d3
|
|
vadd.u16 d15, d6, d7
|
|
vadd.u16 d14, d0, d1
|
|
|
|
vpadd.u32 d12, d12, d15
|
|
vpadd.u32 d14, d13, d14
|
|
vqrshrun.s16 d12, q6, #3
|
|
vqrshrun.s16 d14, q7, #2
|
|
vdup.u16 d8, d12[0]
|
|
vdup.u16 d9, d14[0]
|
|
vdup.u16 d10, d14[1]
|
|
vdup.u16 d11, d12[1]
|
|
b str_pred
|
|
|
|
top_available: @ONLY TOP AVAILABLE
|
|
ands r2, r4, #0x04 @CHECKING TOP AVAILABILTY OR ELSE BRANCH TO NONE AVAILABLE
|
|
beq none_available
|
|
|
|
add r0, r0, #18
|
|
vld1.u8 {q0}, [r0]
|
|
vmovl.u8 q1, d0
|
|
vmovl.u8 q2, d1
|
|
vadd.u16 d0, d2, d3
|
|
vadd.u16 d1, d4, d5
|
|
vpaddl.u32 q0, q0
|
|
vqrshrun.s16 d0, q0, #2
|
|
vdup.u16 d8, d0[0]
|
|
vdup.u16 d9, d0[2]
|
|
vmov q5, q4
|
|
b str_pred
|
|
|
|
left_available: @ONLY LEFT AVAILABLE
|
|
vld1.u8 {q0}, [r0]
|
|
vmovl.u8 q1, d0
|
|
vmovl.u8 q2, d1
|
|
vadd.u16 d0, d2, d3
|
|
vadd.u16 d1, d4, d5
|
|
vpaddl.u32 q0, q0
|
|
vqrshrun.s16 d0, q0, #2
|
|
vdup.u16 q5, d0[0]
|
|
vdup.u16 q4, d0[2]
|
|
b str_pred
|
|
|
|
none_available: @NONE AVAILABLE
|
|
vmov.u8 q4, #128
|
|
vmov.u8 q5, #128
|
|
|
|
str_pred:
|
|
vst1.8 {q4}, [r1], r3
|
|
vst1.8 {q4}, [r1], r3
|
|
vst1.8 {q4}, [r1], r3
|
|
vst1.8 {q4}, [r1], r3
|
|
vst1.8 {q5}, [r1], r3
|
|
vst1.8 {q5}, [r1], r3
|
|
vst1.8 {q5}, [r1], r3
|
|
vst1.8 {q5}, [r1], r3
|
|
|
|
vpop {d8-d15}
|
|
ldmfd sp!, {r4, pc} @Restoring registers from stack
|
|
|
|
|
|
|
|
@******************************************************************************
|
|
|
|
|
|
@**
|
|
@*******************************************************************************
|
|
@*
|
|
@*ih264_intra_pred_chroma_8x8_mode_horz
|
|
@*
|
|
@* @brief
|
|
@* Perform Intra prediction for chroma_8x8 mode:Horizontal
|
|
@*
|
|
@* @par Description:
|
|
@* Perform Intra prediction for chroma_8x8 mode:Horizontal ,described in sec 8.3.4.2
|
|
@*
|
|
@* @param[in] pu1_src
|
|
@* UWORD8 pointer to the source containing alternate U and V samples
|
|
@*
|
|
@* @param[out] pu1_dst
|
|
@* UWORD8 pointer to the destination with alternate U and V samples
|
|
@*
|
|
@* @param[in] src_strd
|
|
@* integer source stride
|
|
@*
|
|
@* @param[in] dst_strd
|
|
@* integer destination stride
|
|
@*
|
|
@* @param[in] ui_neighboravailability
|
|
@* availability of neighbouring pixels(Not used in this function)
|
|
@*
|
|
@* @returns
|
|
@*
|
|
@* @remarks
|
|
@* None
|
|
@*
|
|
@*******************************************************************************
|
|
@*
|
|
@void ih264_intra_pred_chroma_8x8_mode_horz(UWORD8 *pu1_src,
|
|
@ UWORD8 *pu1_dst,
|
|
@ WORD32 src_strd,
|
|
@ WORD32 dst_strd,
|
|
@ WORD32 ui_neighboravailability)
|
|
@**************Variables Vs Registers*****************************************
|
|
@ r0 => *pu1_src
|
|
@ r1 => *pu1_dst
|
|
@ r2 => src_strd
|
|
@ r3 => dst_strd
|
|
@ r4 => ui_neighboravailability
|
|
|
|
|
|
.global ih264_intra_pred_chroma_8x8_mode_horz_a9q
|
|
|
|
ih264_intra_pred_chroma_8x8_mode_horz_a9q:
|
|
|
|
stmfd sp!, {r14} @store register values to stack
|
|
|
|
vld1.u8 {q0}, [r0]
|
|
mov r2, #6
|
|
|
|
vdup.u16 q1, d1[3]
|
|
vdup.u16 q2, d1[2]
|
|
vst1.8 {q1}, [r1], r3
|
|
|
|
loop_8x8_horz:
|
|
vext.8 q0, q0, q0, #12
|
|
vst1.8 {q2}, [r1], r3
|
|
vdup.u16 q1, d1[3]
|
|
subs r2, #2
|
|
vdup.u16 q2, d1[2]
|
|
vst1.8 {q1}, [r1], r3
|
|
bne loop_8x8_horz
|
|
|
|
vext.8 q0, q0, q0, #12
|
|
vst1.8 {q2}, [r1], r3
|
|
|
|
ldmfd sp!, {pc} @restoring registers from stack
|
|
|
|
|
|
|
|
|
|
@**
|
|
@*******************************************************************************
|
|
@*
|
|
@*ih264_intra_pred_chroma_8x8_mode_vert
|
|
@*
|
|
@* @brief
|
|
@* Perform Intra prediction for chroma_8x8 mode:vertical
|
|
@*
|
|
@* @par Description:
|
|
@*Perform Intra prediction for chroma_8x8 mode:vertical ,described in sec 8.3.4.3
|
|
@*
|
|
@* @param[in] pu1_src
|
|
@* UWORD8 pointer to the source containing alternate U and V samples
|
|
@*
|
|
@* @param[out] pu1_dst
|
|
@* UWORD8 pointer to the destination with alternate U and V samples
|
|
@*
|
|
@* @param[in] src_strd
|
|
@* integer source stride
|
|
@*
|
|
@* @param[in] dst_strd
|
|
@* integer destination stride
|
|
@*
|
|
@* @param[in] ui_neighboravailability
|
|
@* availability of neighbouring pixels(Not used in this function)
|
|
@*
|
|
@* @returns
|
|
@*
|
|
@* @remarks
|
|
@* None
|
|
@*
|
|
@*******************************************************************************
|
|
@void ih264_intra_pred_chroma_8x8_mode_vert(UWORD8 *pu1_src,
|
|
@ UWORD8 *pu1_dst,
|
|
@ WORD32 src_strd,
|
|
@ WORD32 dst_strd,
|
|
@ WORD32 ui_neighboravailability)
|
|
|
|
@**************Variables Vs Registers*****************************************
|
|
@ r0 => *pu1_src
|
|
@ r1 => *pu1_dst
|
|
@ r2 => src_strd
|
|
@ r3 => dst_strd
|
|
@ r4 => ui_neighboravailability
|
|
|
|
|
|
.global ih264_intra_pred_chroma_8x8_mode_vert_a9q
|
|
|
|
ih264_intra_pred_chroma_8x8_mode_vert_a9q:
|
|
|
|
stmfd sp!, {r4-r12, r14} @store register values to stack
|
|
|
|
add r0, r0, #18
|
|
vld1.8 {q0}, [r0]
|
|
|
|
vst1.8 {q0}, [r1], r3
|
|
vst1.8 {q0}, [r1], r3
|
|
vst1.8 {q0}, [r1], r3
|
|
vst1.8 {q0}, [r1], r3
|
|
vst1.8 {q0}, [r1], r3
|
|
vst1.8 {q0}, [r1], r3
|
|
vst1.8 {q0}, [r1], r3
|
|
vst1.8 {q0}, [r1], r3
|
|
|
|
ldmfd sp!, {r4-r12, pc} @Restoring registers from stack
|
|
|
|
|
|
|
|
|
|
@******************************************************************************
|
|
|
|
|
|
@**
|
|
@*******************************************************************************
|
|
@*
|
|
@*ih264_intra_pred_chroma_8x8_mode_plane
|
|
@*
|
|
@* @brief
|
|
@* Perform Intra prediction for chroma_8x8 mode:PLANE
|
|
@*
|
|
@* @par Description:
|
|
@* Perform Intra prediction for chroma_8x8 mode:PLANE ,described in sec 8.3.4.4
|
|
@*
|
|
@* @param[in] pu1_src
|
|
@* UWORD8 pointer to the source containing alternate U and V samples
|
|
@*
|
|
@* @param[out] pu1_dst
|
|
@* UWORD8 pointer to the destination with alternate U and V samples
|
|
@*
|
|
@* @param[in] src_strd
|
|
@* integer source stride
|
|
@*
|
|
@* @param[in] dst_strd
|
|
@* integer destination stride
|
|
@*
|
|
@* @param[in] ui_neighboravailability
|
|
@* availability of neighbouring pixels
|
|
@*
|
|
@* @returns
|
|
@*
|
|
@* @remarks
|
|
@* None
|
|
@*
|
|
@*******************************************************************************
|
|
@void ih264_intra_pred_chroma_8x8_mode_plane(UWORD8 *pu1_src,
|
|
@ UWORD8 *pu1_dst,
|
|
@ WORD32 src_strd,
|
|
@ WORD32 dst_strd,
|
|
@ WORD32 ui_neighboravailability)
|
|
|
|
@**************Variables Vs Registers*****************************************
|
|
@ r0 => *pu1_src
|
|
@ r1 => *pu1_dst
|
|
@ r2 => src_strd
|
|
@ r3 => dst_strd
|
|
@ r4 => ui_neighboravailability
|
|
|
|
.global ih264_intra_pred_chroma_8x8_mode_plane_a9q
|
|
ih264_intra_pred_chroma_8x8_mode_plane_a9q:
|
|
|
|
stmfd sp!, {r4-r10, r12, lr}
|
|
vpush {d8-d15}
|
|
|
|
vld1.32 d0, [r0]
|
|
add r10, r0, #10
|
|
vld1.32 d1, [r10]
|
|
add r10, r10, #6
|
|
vrev64.16 d5, d0
|
|
vld1.32 d2, [r10]!
|
|
add r10, r10, #2
|
|
vrev64.16 d7, d2
|
|
vld1.32 d3, [r10]
|
|
sub r5, r3, #8
|
|
ldr r12, scratch_chroma_intrapred_addr1
|
|
scrlblc1:
|
|
add r12, r12, pc
|
|
vsubl.u8 q5, d5, d1
|
|
vld1.64 {q4}, [r12] @ Load multiplication factors 1 to 8 into D3
|
|
vsubl.u8 q6, d3, d7
|
|
vmul.s16 q7, q5, q4
|
|
vmul.s16 q8, q6, q4
|
|
vuzp.16 q7, q8
|
|
|
|
vpadd.s16 d14, d14
|
|
vpadd.s16 d15, d15
|
|
vpadd.s16 d16, d16
|
|
vpadd.s16 d17, d17
|
|
vpadd.s16 d14, d14
|
|
vpadd.s16 d15, d15
|
|
vpadd.s16 d16, d16
|
|
vpadd.s16 d17, d17
|
|
|
|
mov r6, #34
|
|
vdup.16 q9, r6
|
|
|
|
vmull.s16 q11, d14, d18
|
|
vmull.s16 q12, d15, d18
|
|
vmull.s16 q13, d16, d18
|
|
vmull.s16 q14, d17, d18
|
|
|
|
vrshrn.s32 d10, q11, #6
|
|
vrshrn.s32 d12, q12, #6
|
|
vrshrn.s32 d13, q13, #6
|
|
vrshrn.s32 d14, q14, #6
|
|
|
|
|
|
ldrb r6, [r0], #1
|
|
add r10, r0, #31
|
|
ldrb r8, [r0], #1
|
|
ldrb r7, [r10], #1
|
|
ldrb r9, [r10], #1
|
|
|
|
add r6, r6, r7
|
|
add r8, r8, r9
|
|
lsl r6, r6, #4
|
|
lsl r8, r8, #4
|
|
|
|
vdup.16 q0, r6
|
|
vdup.16 q1, r8
|
|
vdup.16 q2, d12[0]
|
|
vdup.16 q3, d10[0]
|
|
|
|
vdup.16 q12, d14[0]
|
|
vdup.16 q13, d13[0]
|
|
vzip.16 q2, q12
|
|
vzip.16 q3, q13
|
|
vzip.16 q0, q1
|
|
|
|
ldr r12, scratch_intrapred_chroma_plane_addr1
|
|
scrlblc2:
|
|
add r12, r12, pc
|
|
vld1.64 {q4}, [r12]
|
|
vmov.16 q5, q4
|
|
vmov q11, q4
|
|
vzip.16 q4, q5
|
|
|
|
vmul.s16 q6, q2, q4
|
|
vmul.s16 q8, q2, q5
|
|
vadd.s16 q6, q0, q6
|
|
vadd.s16 q8, q0, q8
|
|
|
|
|
|
vdup.16 q10, d22[0]
|
|
vmul.s16 q2, q3, q10
|
|
vdup.16 q15, d22[1]
|
|
vmul.s16 q9, q3, q10
|
|
vmul.s16 q7, q3, q15
|
|
vmul.s16 q4, q3, q15
|
|
vadd.s16 q12, q6, q2
|
|
vadd.s16 q0, q8, q9
|
|
vadd.s16 q1, q6, q7
|
|
vqrshrun.s16 d28, q12, #5
|
|
vadd.s16 q13, q8, q4
|
|
vqrshrun.s16 d29, q0, #5
|
|
vdup.16 q10, d22[2]
|
|
vst1.8 {q14}, [r1], r3
|
|
vqrshrun.s16 d28, q1, #5
|
|
vqrshrun.s16 d29, q13, #5
|
|
vmul.s16 q2, q3, q10
|
|
vmul.s16 q9, q3, q10
|
|
vst1.8 {q14}, [r1], r3
|
|
vadd.s16 q12, q6, q2
|
|
vadd.s16 q0, q8, q9
|
|
vdup.16 q15, d22[3]
|
|
vqrshrun.s16 d28, q12, #5
|
|
vqrshrun.s16 d29, q0, #5
|
|
vmul.s16 q7, q3, q15
|
|
vmul.s16 q4, q3, q15
|
|
vst1.8 {q14}, [r1], r3
|
|
vadd.s16 q1, q6, q7
|
|
vadd.s16 q13, q8, q4
|
|
vdup.16 q10, d23[0]
|
|
vqrshrun.s16 d28, q1, #5
|
|
vqrshrun.s16 d29, q13, #5
|
|
vmul.s16 q2, q3, q10
|
|
vmul.s16 q9, q3, q10
|
|
vst1.8 {q14}, [r1], r3
|
|
vadd.s16 q12, q6, q2
|
|
vadd.s16 q0, q8, q9
|
|
vdup.16 q15, d23[1]
|
|
vqrshrun.s16 d28, q12, #5
|
|
vqrshrun.s16 d29, q0, #5
|
|
vmul.s16 q7, q3, q15
|
|
vmul.s16 q4, q3, q15
|
|
vst1.8 {q14}, [r1], r3
|
|
vadd.s16 q1, q6, q7
|
|
vadd.s16 q13, q8, q4
|
|
vdup.16 q10, d23[2]
|
|
vqrshrun.s16 d28, q1, #5
|
|
vqrshrun.s16 d29, q13, #5
|
|
vmul.s16 q2, q3, q10
|
|
vmul.s16 q9, q3, q10
|
|
vst1.8 {q14}, [r1], r3
|
|
vadd.s16 q12, q6, q2
|
|
vadd.s16 q0, q8, q9
|
|
vdup.16 q15, d23[3]
|
|
vqrshrun.s16 d28, q12, #5
|
|
vqrshrun.s16 d29, q0, #5
|
|
vmul.s16 q7, q3, q15
|
|
vmul.s16 q4, q3, q15
|
|
vst1.8 {q14}, [r1], r3
|
|
vadd.s16 q1, q6, q7
|
|
vadd.s16 q13, q8, q4
|
|
vqrshrun.s16 d28, q1, #5
|
|
vqrshrun.s16 d29, q13, #5
|
|
vst1.8 {q14}, [r1], r3
|
|
|
|
|
|
|
|
end_func_plane:
|
|
|
|
vpop {d8-d15}
|
|
ldmfd sp!, {r4-r10, r12, pc}
|
|
|
|
|
|
|
|
|