snes9xgx/gu_psasm.S

695 lines
16 KiB
ArmAsm
Raw Normal View History

#include <asm.h>
#define A00_A01 fr0
#define A02_A03 fr1
#define A10_A11 fr2
#define A12_A13 fr3
#define A20_A21 fr4
#define A22_A23 fr5
#define B00_B01 fr6
#define B02_B03 fr7
#define B10_B11 fr8
#define B12_B13 fr9
#define B20_B21 fr10
#define B22_B23 fr11
#define D00_D01 fr12
#define D02_D03 fr13
#define D10_D11 fr14
#define D12_D13 fr15
#define D20_D21 fr2
#define D22_D23 fr0
#define UNIT01 fr31
#define RET_REG fr1
#define V1_XY fr2
#define V1_Z fr3
#define V2_XY fr4
#define V2_Z fr5
#define D1_XY fr6
#define D1_Z fr7
#define D2_XY fr8
#define D2_Z fr9
#define W1_XY fr10
#define W1_Z fr11
#define W2_XY fr12
#define W2_Z fr13
.globl ps_guMtxConcat
//r3 = mtxA, r4 = mtxB, r5 = mtxAB
ps_guMtxConcat:
stwu r1,-64(r1)
psq_l A00_A01,0(r3),0,0
stfd fr14,8(r1)
psq_l B00_B01,0(r4),0,0
lis r6,Unit01@ha
psq_l B02_B03,8(r4),0,0
stfd fr15,16(r1)
addi 6,6,Unit01@l
stfd fr31,40(r1)
psq_l B10_B11,16(r4),0,0
ps_muls0 D00_D01,B00_B01,A00_A01
psq_l A10_A11,16(r3),0,0
ps_muls0 D02_D03,B02_B03,A00_A01
psq_l UNIT01,0(r6),0,0
ps_muls0 D10_D11,B00_B01,A10_A11
psq_l B12_B13,24(r4),0,0
ps_muls0 D12_D13,B02_B03,A10_A11
psq_l A02_A03,8(r3),0,0
ps_madds1 D00_D01,B10_B11,A00_A01,D00_D01
psq_l A12_A13,24(r3),0,0
ps_madds1 D10_D11,B10_B11,A10_A11,D10_D11
psq_l B20_B21,32(r4),0,0
ps_madds1 D02_D03,B12_B13,A00_A01,D02_D03
psq_l B22_B23,40(r4),0,0
ps_madds1 D12_D13,B12_B13,A10_A11,D12_D13
psq_l A20_A21,32(r3),0,0
psq_l A22_A23,40(r3),0,0
ps_madds0 D00_D01,B20_B21,A02_A03,D00_D01
ps_madds0 D02_D03,B22_B23,A02_A03,D02_D03
ps_madds0 D10_D11,B20_B21,A12_A13,D10_D11
ps_madds0 D12_D13,B22_B23,A12_A13,D12_D13
psq_st D00_D01,0(r5),0,0
ps_muls0 D20_D21,B00_B01,A20_A21
ps_madds1 D02_D03,UNIT01,A02_A03,D02_D03
ps_muls0 D22_D23,B02_B03,A20_A21
psq_st D10_D11,16(r5),0,0
ps_madds1 D12_D13,UNIT01,A12_A13,D12_D13
psq_st D02_D03,8(r5),0,0
ps_madds1 D20_D21,B10_B11,A20_A21,D20_D21
ps_madds1 D22_D23,B12_B13,A20_A21,D22_D23
ps_madds0 D20_D21,B20_B21,A22_A23,D20_D21
lfd fr14,8(r1)
psq_st D12_D13,24(r5),0,0
ps_madds0 D22_D23,B22_B23,A22_A23,D22_D23
psq_st D20_D21,32(r5),0,0
ps_madds1 D22_D23,UNIT01,A22_A23,D22_D23
lfd fr15,16(r1)
psq_st D22_D23,40(r5),0,0
lfd fr31,40(r1)
addi r1,r1,64
blr
.globl ps_guMtxIdentity
//r3 == mtx
ps_guMtxIdentity:
lis r9,Unit01@ha
addi r9,r9,Unit01@l
lfs fr0,0(r9)
lfs fr1,4(r9)
psq_st fr0,8(r3),0,0
ps_merge01 fr2,fr0,fr1
psq_st fr0,24(r3),0,0
ps_merge10 fr3,fr1,fr0
psq_st fr0,32(r3),0,0
psq_st fr2,16(r3),0,0
psq_st fr3,0(r3),0,0
psq_st fr3,40(r3),0,0
blr
.globl ps_guMtxCopy
//r3 = src, r4 = dst
ps_guMtxCopy:
psq_l fr0,0(r3),0,0
psq_st fr0,0(r4),0,0
psq_l fr1,8(r3),0,0
psq_st fr1,8(r4),0,0
psq_l fr2,16(r3),0,0
psq_st fr2,16(r4),0,0
psq_l fr3,24(r3),0,0
psq_st fr3,24(r4),0,0
psq_l fr4,32(r3),0,0
psq_st fr4,32(r4),0,0
psq_l fr5,40(r3),0,0
psq_st fr5,40(r4),0,0
blr
.globl ps_guMtxTranspose
//r3 = src, r4 = xpose
ps_guMtxTranspose:
lis r9,Unit01@ha
addi r9,r9,Unit01@l
lfs fr0,0(r9)
psq_l fr1,0(r3),0,0
stfs fr0,44(r4)
psq_l fr2,16(r3),0,0
ps_merge00 fr5,fr1,fr2
psq_l fr3,8(r3),1,0
ps_merge11 fr6,fr1,fr2
psq_l fr4,24(r3),1,0
psq_st fr5,0(r4),0,0
psq_l fr1,32(r3),0,0
ps_merge00 fr7,fr3,fr4
psq_st fr6,16(r4),0,0
ps_merge00 fr5,fr1,fr0
psq_st fr7,32(r4),0,0
ps_merge10 fr6,fr1,fr0
psq_st fr5,8(r4),0,0
lfs fr3,40(r3)
psq_st fr6,24(r4),0,0
stfs fr3,40(r4)
blr
.globl ps_guMtxInverse
//r3 = src, r4 = inv
ps_guMtxInverse:
psq_l fr0,0(r3),1,0
psq_l fr1,4(r3),0,0
psq_l fr2,16(r3),1,0
ps_merge10 fr6,fr1,fr0
psq_l fr3,20(r3),0,0
psq_l fr4,32(r3),1,0
ps_merge10 fr7,fr3,fr2
psq_l fr5,36(r3),0,0
ps_mul fr11,fr3,fr6
ps_mul fr13,fr5,fr7
ps_merge10 fr8,fr5,fr4
ps_msub fr11,fr1,fr7,fr11
ps_mul fr12,fr1,fr8
ps_msub fr13,fr3,fr8,fr13
ps_mul fr10,fr3,fr4
ps_msub fr12,fr5,fr6,fr12
ps_mul fr9,fr0,fr5
ps_mul fr8,fr1,fr2
ps_sub fr6,fr6,fr6
ps_msub fr10,fr2,fr5,fr10
ps_mul fr7,fr0,fr13
ps_msub fr9,fr1,fr4,fr9
ps_madd fr7,fr2,fr12,fr7
ps_msub fr8,fr0,fr3,fr8
ps_madd fr7,fr4,fr11,fr7
ps_cmpo0 cr0,fr7,fr6
bne 0f
li r3,0
blr
0: fres fr0,fr7
ps_add fr6,fr0,fr0
ps_mul fr5,fr0,fr0
ps_nmsub fr0,fr7,fr5,fr6
lfs fr1,12(r3)
ps_muls0 fr13,fr13,fr0
lfs fr2,28(r3)
ps_muls0 fr12,fr12,fr0
lfs fr3,44(r3)
ps_muls0 fr11,fr11,fr0
ps_merge00 fr5,fr13,fr12
ps_muls0 fr10,fr10,fr0
ps_merge11 fr4,fr13,fr12
ps_muls0 fr9,fr9,fr0
psq_st fr5,0(r4),0,0
ps_mul fr6,fr13,fr1
psq_st fr4,16(r4),0,0
ps_muls0 fr8,fr8,fr0
ps_madd fr6,fr12,fr2,fr6
psq_st fr10,32(r4),1,0
ps_nmadd fr6,fr11,fr3,fr6
psq_st fr9,36(r4),1,0
ps_mul fr7,fr10,fr1
ps_merge00 fr5,fr11,fr6
psq_st fr8,40(r4),1,0
ps_merge11 fr4,fr11,fr6
psq_st fr5,8(r4),0,0
ps_madd fr7,fr9,fr2,fr7
psq_st fr4,24(r4),0,0
ps_nmadd fr7,fr8,fr3,fr7
li r3,1
psq_st fr7,44(r4),1,0
blr
.globl ps_guMtxInvXpos
//r3 = src, r4 = invx
ps_guMtxInvXpos:
psq_l fr0, 0(r3), 1, 0
psq_l fr1, 4(r3), 0, 0
psq_l fr2, 16(r3), 1, 0
ps_merge10 fr6, fr1, fr0
psq_l fr3, 20(r3), 0, 0
psq_l fr4, 32(r3), 1, 0
ps_merge10 fr7, fr3, fr2
psq_l fr5, 36(r3), 0, 0
ps_mul fr11, fr3, fr6
ps_merge10 fr8, fr5, fr4
ps_mul fr13, fr5, fr7
ps_msub fr11, fr1, fr7, fr11
ps_mul fr12, fr1, fr8
ps_msub fr13, fr3, fr8, fr13
ps_msub fr12, fr5, fr6, fr12
ps_mul fr10, fr3, fr4
ps_mul fr9, fr0, fr5
ps_mul fr8, fr1, fr2
ps_msub fr10, fr2, fr5, fr10
ps_msub fr9, fr1, fr4, fr9
ps_msub fr8, fr0, fr3, fr8
ps_mul fr7, fr0, fr13
ps_sub fr1, fr1, fr1
ps_madd fr7, fr2, fr12, fr7
ps_madd fr7, fr4, fr11, fr7
ps_cmpo0 cr0, fr7, fr1
bne 0f
addi r3, 0, 0
blr
0: fres fr0, fr7
psq_st fr1, 12(r4), 1, 0
ps_add fr6, fr0, fr0
ps_mul fr5, fr0, fr0
psq_st fr1, 28(r4), 1, 0
ps_nmsub fr0, fr7, fr5, fr6
psq_st fr1, 44(r4), 1, 0
ps_muls0 fr13, fr13, fr0
ps_muls0 fr12, fr12, fr0
ps_muls0 fr11, fr11, fr0
psq_st fr13, 0(r4), 0, 0
psq_st fr12, 16(r4), 0, 0
ps_muls0 fr10, fr10, fr0
ps_muls0 fr9, fr9, fr0
psq_st fr11, 32(r4), 0, 0
psq_st fr10, 8(r4), 1, 0
ps_muls0 fr8, fr8, fr0
addi r3, 0, 1
psq_st fr9, 24(r4), 1, 0
psq_st fr8, 40(r4), 1, 0
blr
.globl ps_guMtxScale
//r3 = mtx,fr1 = xS,fr2 = yS,fr3 = zS
ps_guMtxScale:
lis r9,Unit01@ha
addi r9,r9,Unit01@l
lfs fr0,0(r9)
stfs fr1,0(r3)
psq_st fr0,4(r3),0,0
psq_st fr0,12(r3),0,0
stfs fr2,20(r3)
psq_st fr0,24(r3),0,0
psq_st fr0,32(r3),0,0
stfs fr3,40(r3)
stfs fr0,44(r3)
blr
.globl ps_guMtxScaleApply
//r3 = src,r4 = dst,fr1 = xS,fr2 = yS,fr3 = zS
ps_guMtxScaleApply:
psq_l fr4,0(r3),0,0
psq_l fr5,8(r3),0,0
ps_muls0 fr4,fr4,fr1
psq_l fr6,16(r3),0,0
ps_muls0 fr5,fr5,fr1
psq_l fr7,24(r3),0,0
ps_muls0 fr6,fr6,fr2
psq_l fr8,32(r3),0,0
psq_st fr4,0(r4),0,0
ps_muls0 fr7,fr7,fr2
psq_l fr2,40(r3),0,0
psq_st fr5,8(r4),0,0
ps_muls0 fr8,fr8,fr3
psq_st fr6,16(r4),0,0
ps_muls0 fr2,fr2,fr3
psq_st fr7,24(r4),0,0
psq_st fr8,32(r4),0,0
psq_st fr2,40(r4),0,0
blr
.globl ps_guMtxTrans
//r3 = mtx,fr1 = xT,fr2 = yT,fr3 = zT
ps_guMtxTrans:
lis r9,Unit01@ha
addi r9,r9,Unit01@l
lfs fr4,0(r9)
lfs fr5,4(r9)
stfs fr1,12(r3)
stfs fr2,28(r3)
psq_st fr4,4(r3),0,0
psq_st fr4,32(r3),0,0
stfs fr5,20(r3)
stfs fr4,24(r3)
stfs fr5,40(r3)
stfs fr3,44(r3)
stfs fr5,0(r3)
blr
.globl ps_guMtxTransApply
//r3 = src,r4 = dst,fr1 = xT,fr2 = yT,fr3 = zT
ps_guMtxTransApply:
psq_l fr4,0(r3),0,0
psq_l fr5,8(r3),0,0
psq_l fr7,24(r3),0,0
psq_l fr8,40(r3),0,0
ps_sum1 fr5,fr1,fr5,fr5
psq_l fr6,16(r3),0,0
ps_sum1 fr7,fr2,fr7,fr7
psq_l fr9,32(r3),0,0
ps_sum1 fr8,fr3,fr8,fr8
psq_st fr4,0(r4),0,0
psq_st fr5,8(r4),0,0
psq_st fr6,16(r4),0,0
psq_st fr7,24(r4),0,0
psq_st fr9,32(r4),0,0
psq_st fr8,40(r4),0,0
blr
.globl ps_guMtxRotTrig
//r3 = mt,r4 = axis,fr1 = sinA,fr2 = cosA
ps_guMtxRotTrig:
lis r9,Unit01@ha
addi r9,r9,Unit01@l
lfs fr3,0(r9)
lfs fr4,4(r9)
ori r4,r4,0x20
ps_neg fr5,fr1
cmplwi r4,'x'
beq 0f
cmplwi r4,'y'
beq 1f
cmplwi r4,'z'
beq 2f
b 3f
0:
psq_st fr4,0(r3),1,0
psq_st fr3,4(r3),0,0
ps_merge00 fr6,fr1,fr2
psq_st fr3,12(r3),0,0
ps_merge00 fr7,fr2,fr5
psq_st fr3,28(r3),0,0
psq_st fr3,44(r3),1,0
psq_st fr6,36(r3),0,0
psq_st fr7,20(r3),0,0
b 3f
1:
ps_merge00 fr6,fr2,fr3
ps_merge00 fr7,fr3,fr4
psq_st fr3,24(r3),0,0
psq_st fr6,0(r3),0,0
ps_merge00 fr8,fr5,fr3
ps_merge00 fr9,fr1,fr3
psq_st fr6,40(r3),0,0
psq_st fr7,16(r3),0,0
psq_st fr9,8(r3),0,0
psq_st fr8,32(r3),0,0
b 3f
2:
psq_st fr3,8(r3),0,0
ps_merge00 fr6,fr1,fr2
ps_merge00 fr8,fr2,fr5
psq_st fr3,24(r3),0,0
psq_st fr3,32(r3),0,0
ps_merge00 fr7,fr4,fr3
psq_st fr6,16(r3),0,0
psq_st fr8,0(r3),0,0
psq_st fr7,40(r3),0,0
3:
blr
.globl ps_guMtxReflect
//r3 = mtx,r4 = vec1,r5 = vec2
ps_guMtxReflect:
lis r9,Unit01@ha
addi r9,r9,Unit01@l
lfs fr0,4(r9)
psq_l fr1,8(r5),1,0
psq_l fr2,0(r5),0,0
psq_l fr3,0(r4),0,0
ps_nmadd fr5,fr1,fr0,fr1
psq_l fr4,8(r4),1,0
ps_nmadd fr6,fr2,fr0,fr2
ps_muls0 fr7,fr2,fr5
ps_mul fr8,fr6,fr3
ps_muls0 fr9,fr2,fr6
ps_sum0 fr8,fr8,fr8,fr8
ps_muls1 fr10,fr2,fr6
psq_st fr7,32(r3),0,0
ps_sum0 fr2,fr2,fr2,fr0
ps_nmadd fr8,fr5,fr4,fr8
ps_sum1 fr10,fr0,fr10,fr10
psq_st fr9,0(r3),0,0
ps_muls0 fr11,fr2,fr8
ps_merge00 fr12,fr5,fr8
psq_st fr10,16(r3),0,0
ps_merge00 fr13,fr7,fr11
ps_muls0 fr12,fr12,fr1
ps_merge11 fr11,fr7,fr11
psq_st fr13,8(r3),0,0
ps_sum0 fr12,fr12,fr12,fr0
psq_st fr11,24(r3),0,0
psq_st fr12,40(r3),0,0
blr
.globl ps_guVecAdd
//r3 = v1,r4 = v2,r5 = dst
ps_guVecAdd:
psq_l V1_XY,0(r3),0,0
psq_l V2_XY,0(r4),0,0
ps_add D1_XY,V1_XY,V2_XY
psq_st D1_XY,0(r5),0,0
psq_l V1_Z,8(r3),1,0
psq_l V2_Z,8(r4),1,0
ps_add D1_Z,V1_Z,V2_Z
psq_st D1_Z,8(r5),1,0
blr
.globl ps_guVecSub
//r3 = v1,r4 = v2,r5 = dst
ps_guVecSub:
psq_l V1_XY,0(r3),0,0
psq_l V2_XY,0(r4),0,0
ps_sub D1_XY,V1_XY,V2_XY
psq_st D1_XY,0(r5),0,0
psq_l V1_Z,8(r3),1,0
psq_l V2_Z,8(r4),1,0
ps_sub D1_Z,V1_Z,V2_Z
psq_st D1_Z,8(r5),1,0
blr
.globl ps_guVecScale
//r3 = src,r4 = dst,fr1 = S
ps_guVecScale:
psq_l fr2,0(r3),0,0
psq_l fr3,8(r3),1,0
ps_muls0 fr4,fr2,fr1
psq_st fr4,0(r4),0,0
ps_muls0 fr4,fr3,fr1
psq_st fr4,8(r4),1,0
blr
.globl ps_guVecNormalize
//r3 = v
ps_guVecNormalize:
lis r9,NrmData@ha
addi r9,r9,NrmData@l
lfs fr0,0(r9)
lfs fr1,4(r9)
psq_l fr2,0(r3),0,0
ps_mul fr4,fr2,fr2
psq_l fr3,8(r3),1,0
ps_madd fr5,fr3,fr3,fr4
ps_sum0 fr6,fr5,fr3,fr4
frsqrte fr7,fr6
fmuls fr8,fr7,fr7
fmuls fr9,fr7,fr0
fnmsubs fr8,fr8,fr6,fr1
fmuls fr7,fr8,fr9
ps_muls0 fr2,fr2,fr7
psq_st fr2,0(r3),0,0
ps_muls0 fr3,fr3,fr7
psq_st fr3,8(r3),1,0
blr
.globl ps_guVecCross
//r3 = v1,r4 = v2,r5 = v12
ps_guVecCross:
psq_l fr1,0(r4),0,0
lfs fr2,8(r3)
psq_l fr0,0(r3),0,0
ps_merge10 fr6,fr1,fr1
lfs fr3,8(r4)
ps_mul fr4,fr1,fr2
ps_muls0 fr7,fr1,fr0
ps_msub fr5,fr0,fr3,fr4
ps_msub fr8,fr0,fr6,fr7
ps_merge11 fr9,fr5,fr5
ps_merge01 fr10,fr5,fr8
psq_st fr9,0(r5),1,0
ps_neg fr10,fr10
psq_st fr10,4(r5),0,0
blr
.globl ps_guVecDotProduct
//r3 = vec1,r4 = vec2
ps_guVecDotProduct:
psq_l fr2,4(r3),0,0
psq_l fr3,4(r4),0,0
ps_mul fr2,fr2,fr3
psq_l fr5,0(r3),0,0
psq_l fr4,0(r4),0,0
ps_madd fr3,fr5,fr4,fr2
ps_sum0 fr1,fr3,fr2,fr2
blr
.globl ps_guVecMultiply
ps_guVecMultiply:
psq_l fr0,0(r4),0,0
psq_l fr2,0(r3),0,0
psq_l fr1,8(r4),1,0
ps_mul fr4,fr2,fr0
psq_l fr3,8(r3),0,0
ps_madd fr5,fr3,fr1,fr4
psq_l fr8,16(r3),0,0
ps_sum0 fr6,fr5,fr6,fr5
psq_l fr9,24(r3),0,0
ps_mul fr10,fr8,fr0
psq_st fr6,0(r5),1,0
ps_madd fr11,fr9,fr1,fr10
psq_l fr2,32(r3),0,0
ps_sum0 fr12,fr11,fr12,fr11
psq_l fr3,40(r3),0,0
ps_mul fr4,fr2,fr0
psq_st fr12,4(r5),1,0
ps_madd fr5,fr3,fr1,fr4
ps_sum0 fr6,fr5,fr6,fr5
psq_st fr6,8(r5),1,0
blr
.globl ps_guVecMultiplySR
// r3 = mt, r4 = src, r5 = dst
ps_guVecMultiplySR:
psq_l fr0,0(r3),0,0 // m[0][0], m[0][1] GQR0 = 0
// fp6 - x y
psq_l fr6,0(r4),0,0
psq_l fr2,16(r3),0,0 // m[1][0], m[1][1]
// fp8 = m00x m01y // next X
ps_mul fr8,fr0,fr6
psq_l fr4,32(r3),0,0 // m[2][0], m[2][1]
// fp10 = m10x m11y // next Y
ps_mul fr10,fr2,fr6
psq_l fr7,8(r4),1,0 // fp7 - z,1.0
// fp12 = m20x m21y // next Z
ps_mul fr12,fr4,fr6 // YYY last FP6 usage
psq_l fr3,24(r3),0,0 // m[1][2], m[1][3]
ps_sum0 fr8,fr8,fr8,fr8
psq_l fr5,40(r3),0,0 // m[2][2], m[2][3]
ps_sum0 fr10,fr10,fr10,fr10
psq_l fr1,8(r3),0,0 // m[0][2], m[0][3]
ps_sum0 fr12,fr12,fr12,fr12
ps_madd fr9,fr1,fr7,fr8
psq_st fr9,0(r5),1,0 // store X
ps_madd fr11,fr3,fr7,fr10
psq_st fr11,4(r5),1,0 // store Y
ps_madd fr13,fr5,fr7,fr12
psq_st fr13,8(r5),1,0 // sore Z
blr
.globl ps_guQuatAdd
//r3 = a, r4 = b, r5 = ab
ps_guQuatAdd:
psq_l fr0,0(r3),0,0
psq_l fr1,0(r4),0,0
ps_add fr1,fr0,fr1
psq_st fr1,0(r5),0,0
psq_l fr0,8(r3),0,0
psq_l fr1,8(r4),0,0
ps_add fr1,fr0,fr1
psq_st fr1,8(r5),0,0
blr
.globl ps_guQuatSub
//r3 = a, r4 = b, r5 = ab
ps_guQuatSub:
psq_l fr0,0(r3),0,0
psq_l fr1,0(r4),0,0
ps_sub fr1,fr0,fr1
psq_st fr1,0(r5),0,0
psq_l fr0,8(r3),0,0
psq_l fr1,8(r4),0,0
ps_sub fr1,fr0,fr1
psq_st fr1,8(r5),0,0
blr
.globl ps_guQuatMultiply
//r3 = a, r4 = b, r5 = ab
ps_guQuatMultiply:
psq_l fr0,0(r3),0,0
psq_l fr1,8(r3),0,0
psq_l fr2,0(r4),0,0
ps_neg fr4,fr0
psq_l fr3,8(r4),0,0
ps_neg fr5,fr1
ps_merge01 fr6,fr4,fr0
ps_muls0 fr8,fr1,fr2
ps_muls0 fr9,fr4,fr2
ps_merge01 fr7,fr5,fr1
ps_muls1 fr11,fr6,fr2
ps_madds0 fr8,fr6,fr3,fr8
ps_muls1 fr10,fr7,fr2
ps_madds0 fr9,fr7,fr3,fr9
ps_madds1 fr11,fr5,fr3,fr11
ps_merge10 fr8,fr8,fr8
ps_madds1 fr10,fr0,fr3,fr10
ps_merge10 fr9,fr9,fr9
ps_add fr8,fr8,fr8
psq_st fr8,0(r5),0,0
ps_sub fr9,fr9,fr9
psq_st fr9,8(r5),0,0
blr
.globl ps_quQuatScale
//r3 = q,r4 = r, fr1 = scale
ps_guQuatScale:
psq_l fr4,0(r3),0,0
psq_l fr5,8(r3),0,0
ps_muls0 fr4,fr4,fr1
psq_st fr4,0(r4),0,0
ps_muls0 fr5,fr5,fr1
psq_st fr5,8(r4),0,0
blr
.globl ps_guQuatDotProduct
//r3 = p, r4 = q ; fr1 = res
ps_guQuatDotProduct:
psq_l fr2,0(r3),0,0
psq_l fr4,0(r4),0,0
ps_mul fr1,fr2,fr4
psq_l fr3,8(r3),0,0
psq_l fr5,8(r4),0,0
ps_madd fr1,fr3,fr5,fr1
ps_sum0 fr1,fr1,fr1,fr1
blr
.globl ps_guQuatNormalize
//r3 = src, r4 = unit
ps_guQuatNormalize:
lis r9,NrmData@ha
addi r9,r9,NrmData@l
lfs fr9,0(r9)
lfs fr10,4(r9)
lis r9,QuatEpsilon@ha
lfs fr8,QuatEpsilon@l(r9)
psq_l fr0,0(r3),0,0
ps_mul fr2,fr0,fr0
psq_l fr1,8(r3),0,0
ps_sub fr5,fr8,fr8
ps_madd fr2,fr2,fr2,fr2
frsqrte fr3,fr2
ps_sub fr4,fr2,fr8
fmul fr6,fr3,fr3
fmul fr7,fr3,fr9
fnmsub fr6,fr6,fr2,fr10
fmul fr3,fr6,fr7
ps_sel fr3,fr4,fr3,fr5
ps_muls0 fr0,fr0,fr3
ps_muls0 fr1,fr1,fr3
psq_st fr0,0(r4),0,0
psq_st fr1,8(r4),0,0
blr
.section .data
.balign 4
QuatEpsilon:
.float 0.00001
Unit01:
.float 0.0, 1.0
NrmData:
.float 0.5, 3.0