DSP HLE improvements...

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@3499 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
fires.gc 2009-06-20 01:33:13 +00:00
parent 891ff575b0
commit d4a0e1f8d0
2 changed files with 714 additions and 358 deletions

View File

@ -19,7 +19,6 @@
// Zelda: The Windwaker, Mario Sunshine, Mario Kart, Twilight Princess
#include "../Globals.h"
#include "FileUtil.h"
#include "UCodes.h"
#include "UCode_Zelda.h"
#include "../MailHandler.h"
@ -27,184 +26,328 @@
#include "../main.h"
#include "Mixer.h"
// TODO: replace with table from RAM.
const u16 afccoef[16][2] =
#include "WaveFile.h"
class CResampler
{
{0,0}, {0x0800,0}, {0, 0x0800},{0x0400,0x0400},
{0x1000,0xf800},{0x0e00,0xfa00},{0x0c00,0xfc00},{0x1200,0xf600},
{0x1068,0xf738},{0x12c0,0xf704},{0x1400,0xf400},{0x0800,0xf800},
{0x0400,0xfc00},{0xfc00,0x0400},{0xfc00,0}, {0xf800,0}
public:
CResampler(short* samples, int num_stereo_samples, int core_sample_rate)
: m_mode(1)
, m_queueSize(0)
{
int PV1l=0,PV2l=0,PV3l=0,PV4l=0;
int acc=0;
while (num_stereo_samples)
{
acc += core_sample_rate;
while (num_stereo_samples && (acc >= 48000))
{
PV4l=PV3l;
PV3l=PV2l;
PV2l=PV1l;
PV1l=*(samples++); //32bit processing
num_stereo_samples--;
acc-=48000;
}
// defaults to nearest
s32 DataL = PV1l;
if (m_mode == 1) { //linear
DataL = PV1l + ((PV2l - PV1l)*acc)/48000;
}
else if (m_mode == 2) {//cubic
s32 a0l = PV1l - PV2l - PV4l + PV3l;
s32 a1l = PV4l - PV3l - a0l;
s32 a2l = PV1l - PV4l;
s32 a3l = PV2l;
s32 t0l = ((a0l )*acc)/48000;
s32 t1l = ((t0l+a1l)*acc)/48000;
s32 t2l = ((t1l+a2l)*acc)/48000;
s32 t3l = ((t2l+a3l));
DataL = t3l;
}
int l = DataL;
if (l < -32767) l = -32767;
if (l > 32767) l = 32767;
sample_queue.push(l);
m_queueSize += 1;
}
}
FixedSizeQueue<s16, queue_maxlength> sample_queue;
int m_queueSize;
int m_mode;
};
// Decoder from in_cube by hcs/destop/etc. Haven't yet found a valid use for it.
// Looking at in_cube, it seems to be 9 bytes of input = 16 samples of output.
// Different from AX ADPCM which is 8 bytes of input = 14 samples of output.
// input = location of encoded source samples
// out = location of destination buffer (16 bits / sample)
void AFCdecodebuffer(const u8 *input, s16 *out, short *histp, short *hist2p)
{
short nibbles[16];
u8 *dst = (u8 *)out;
short hist = *histp;
short hist2 = *hist2p;
const u8 *src = input;
// 9 bytes input - first byte contain delta scaling and coef index.
const short delta = 1 << (((*src) >> 4) & 0xf);
const short idx = *src & 0xf;
src++;
// denibble the rest of the 8 bytes into 16 values.
for (int i = 0; i < 16; i += 2)
{
int j = *src >> 4;
nibbles[i] = j;
j = *src & 0xF;
nibbles[i + 1] = j;
src++;
}
// make the nibbles signed.
for (int i = 0; i < 16; i++)
{
if (nibbles[i] >= 8)
nibbles[i] = nibbles[i] - 16;
}
// Perform ADPCM filtering.
for (int i = 0; i < 16; i++)
{
int sample = (delta * nibbles[i]) << 11;
sample += ((long)hist * afccoef[idx][0]) + ((long)hist2 * afccoef[idx][1]);
sample = sample >> 11;
// Clamp sample.
if (sample > 32767) {
sample = 32767;
}
if (sample < -32768) {
sample = -32768;
}
*(short*)dst = (short)sample;
dst = dst + 2;
hist2 = hist;
hist = (short)sample;
}
// Store state.
*histp = hist;
*hist2p = hist2;
}
CUCode_Zelda::CUCode_Zelda(CMailHandler& _rMailHandler)
: IUCode(_rMailHandler)
, m_bSyncInProgress(false)
, m_SyncIndex(0)
, m_SyncStep(0)
, m_SyncMaxStep(0)
, m_bSyncCmdPending(false)
, m_SyncEndSync(0)
, m_SyncCurStep(0)
, m_SyncCount(0)
, m_SyncMax(0)
, m_numSteps(0)
, m_bListInProgress(false)
, m_step(0)
, m_readOffset(0)
, num_param_blocks(0)
, param_blocks_ptr(0)
, param_blocks2_ptr(0)
, m_NumberOfFramesToRender(0)
, m_CurrentFrameToRender(0)
, m_MaxSyncedPB(0)
, m_MailState(WaitForMail)
{
DEBUG_LOG(DSPHLE, "UCode_Zelda - add boot mails for handshake");
m_rMailHandler.PushMail(DSP_INIT);
g_dspInitialize.pGenerateDSPInterrupt();
m_rMailHandler.PushMail(0xF3551111); // handshake
memset(m_Buffer, 0, sizeof(m_Buffer));
memset(m_SyncValues, 0, sizeof(m_SyncValues));
}
for (int i=0; i<0x10; i++)
m_PBMask[i] = false;
templbuffer = new int[1024 * 1024];
temprbuffer = new int[1024 * 1024];
}
CUCode_Zelda::~CUCode_Zelda()
{
m_rMailHandler.Clear();
delete [] templbuffer;
delete [] temprbuffer;
}
void CUCode_Zelda::UpdatePB(ZPB& _rPB, int *templbuffer, int *temprbuffer, u32 _Size)
{
u16* pTest = (u16*)&_rPB;
if (pTest[0x00] == 0)
return;
if (pTest[0x01] != 0)
return;
if (pTest[0x06] != 0x00)
{
// probably pTest[0x06] ==0 -> AFC (and variants)
}
else
{
switch(_rPB.type) // or Bytes per Sample
{
case 0x05:
case 0x09:
{
//
// initialize "decoder" if the sample is played the first time
//
if (pTest[0x04] != 0)
{
// zelda:
// perhaps init or "has played before"
pTest[0x32] = 0x00;
pTest[0x66] = 0x00; // history1
pTest[0x67] = 0x00; // history2
// samplerate? length? num of samples? i dunno...
pTest[0x3a] = pTest[0x8a];
pTest[0x3b] = pTest[0x8b];
// copy ARAM addr from r to rw area
pTest[0x38] = pTest[0x8c];
pTest[0x39] = pTest[0x8d];
}
if (pTest[0x01] != 0) // early out... i dunno if this can happen because we filter it above
{
return;
}
u32 t1 = pTest[0x39];
u32 t2 = pTest[0x38];
u32 ARAMAddr = (t2<<16) | t1;
// ????????????????????????????????
u32 t3 = pTest[0x3a];
u32 t4 = pTest[0x3b];
u32 NumberOfSamples = (t3<<16) | t4;
NumberOfSamples = (NumberOfSamples + 0xf) >> 4; // i think the lower 4 are the fraction
u32 frac = NumberOfSamples& 0xF;
u8 inBuffer[9];
short outbuf[16];
u32 sampleCount = 0;
#define USE_RESAMPLE 1
#if USE_RESAMPLE == 1
while(NumberOfSamples > 0)
{
for (int i=0; i<9; i++)
{
inBuffer[i] = g_dspInitialize.pARAM_Read_U8(ARAMAddr);
ARAMAddr++;
}
AFCdecodebuffer((char*)inBuffer, outbuf, (short*)&pTest[0x66], (short*)&pTest[0x67]);
CResampler Sampler(outbuf, 16, 48000);
while (Sampler.m_queueSize > 0)
{
int sample = Sampler.sample_queue.front();
Sampler.sample_queue.pop();
Sampler.m_queueSize-=1;
templbuffer[sampleCount] += sample;
temprbuffer[sampleCount] += sample;
sampleCount++;
if (sampleCount > _Size)
break;
}
if (sampleCount > _Size)
break;
NumberOfSamples--;
}
#else
for (int s=0; s<(_Size/16);s++)
{
for (int i=0; i<9; i++)
{
inBuffer[i] = g_dspInitialize.pARAM_Read_U8(ARAMAddr);
ARAMAddr++;
}
AFCdecodebuffer((char*)inBuffer, outbuf, (short*)&pTest[0x66], (short*)&pTest[0x67]);
for (int i=0; i<16; i++)
{
templbuffer[sampleCount] += outbuf[i];
temprbuffer[sampleCount] += outbuf[i];
sampleCount++;
}
NumberOfSamples--;
if (NumberOfSamples<=0)
break;
}
#endif
if (NumberOfSamples == 0)
{
pTest[0x01] = 1; // we are done ??
}
// write back
NumberOfSamples = (NumberOfSamples << 4); // missing fraction
pTest[0x38] = ARAMAddr >> 16;
pTest[0x39] = ARAMAddr & 0xFFFF;
pTest[0x3a] = NumberOfSamples >> 16;
pTest[0x3b] = NumberOfSamples & 0xFFFF;
#if 0
NumberOfSamples = (NumberOfSamples + 0xf) >> 4;
static u8 Buffer[500000];
for (int i =0; i<NumberOfSamples*9; i++)
{
Buffer[i]=g_dspInitialize.pARAM_Read_U8(ARAMAddr+i);
}
// yes, the dumps are really zelda sound ;)
DumpAFC(Buffer, NumberOfSamples*9, 0x3d00);
DumpPB(_rPB);
// exit(1);
#endif
// i think pTest[0x3a] and pTest[0x3b] got an update after you have decoded some samples...
// just decrement them with the number of samples you have played
// and incrrease the ARAM Offset in pTest[0x38], pTest[0x39]
// end of block (Zelda 03b2)
if (pTest[0x06] == 0)
{
pTest[0x04] = 0;
}
}
break;
}
}
}
void CUCode_Zelda::Update(int cycles)
{
// check if we have to sent something
//if (!m_rMailHandler.IsEmpty())
// g_dspInitialize.pGenerateDSPInterrupt();
// if (!m_rMailHandler.IsEmpty())
// g_dspInitialize.pGenerateDSPInterrupt();
}
void CUCode_Zelda::HandleMail(u32 _uMail)
{
// When we used to lose sync, the last mails we get before the audio goes bye-bye
// 0
// 0x00000
// 0
// 0x10000
// 0
// 0x20000
// 0
// 0x30000
// And then silence... Looks like some reverse countdown :)
if (m_bSyncInProgress)
// XK: Sync mails spam the logs
/*
DEBUG_LOG(DSPHLE, "Zelda mail 0x%08X, list in progress? %s, sync in progress? %s", _uMail,
m_bListInProgress ? "Yes" : "No", m_bSyncInProgress ? "Yes" : "No");
*/
switch (m_MailState)
{
if (m_bSyncCmdPending)
case WaitForMail:
{
u32 n = (_uMail >> 16) & 0xF;
m_SyncStep = (n + 1) << 4;
m_SyncValues[n] = _uMail & 0xFFFF;
m_bSyncInProgress = false;
m_SyncCurStep = m_SyncStep;
if (m_SyncCurStep == m_SyncMaxStep)
if (_uMail == 0x00000000)
{
m_SyncCount++;
m_rMailHandler.PushMail(DSP_SYNC);
g_dspInitialize.pGenerateDSPInterrupt();
m_rMailHandler.PushMail(0xF355FF00 | m_SyncCount);
m_SyncCurStep = 0;
if (m_SyncCount == (m_SyncMax + 1))
{
m_rMailHandler.PushMail(DSP_UNKN);
g_dspInitialize.pGenerateDSPInterrupt();
soundStream->GetMixer()->SetHLEReady(true);
DEBUG_LOG(DSPHLE, "Update the SoundThread to be in sync");
soundStream->Update(); //do it in this thread to avoid sync problems
m_bSyncCmdPending = false;
}
m_MailState = ReadingFrameSync;
}
else if (_uMail < 0x255)
{
m_numSteps = _uMail;
m_MailState = ReadingMessage;
}
else
{
m_bSyncInProgress = false;
PanicAlert("Unknown Mail: 0x%08x", _uMail);
}
return;
}
break;
if (_uMail != 0)
case ReadingFrameSync:
{
DEBUG_LOG(DSPHLE, "Zelda mail 0x%08X, list in progress? %s", _uMail,
m_bListInProgress ? "Yes" : "No");
}
// PanicAlert("SyncMail: 0x%08x", _uMail);
if (m_bListInProgress)
int Slot = (_uMail >> 16) & 0x000F;
m_PBMask[Slot] = _uMail & 0xFFFF;
m_MailState = WaitForMail;
// Zelda UC 05db
m_MaxSyncedPB = (Slot+1) << 4;
}
break;
case ReadingMessage:
{
if (m_step < 0 || m_step >= sizeof(m_Buffer) / 4)
if (m_step < 0 || m_step >= sizeof(m_Buffer)/4)
PanicAlert("m_step out of range");
((u32*)m_Buffer)[m_step] = _uMail;
@ -212,45 +355,115 @@ void CUCode_Zelda::HandleMail(u32 _uMail)
if (m_step >= m_numSteps)
{
DEBUG_LOG(DSPHLE, "Executing %i-step list.", m_numSteps);
ExecuteList();
m_bListInProgress = false;
}
return;
}
if (_uMail == 0)
{
m_bSyncInProgress = true;
}
else if ((_uMail >> 16) == 0)
{
m_bListInProgress = true;
m_numSteps = _uMail;
m_MailState = WaitForMail;
m_step = 0;
}
}
break;
case ReadingSystemMsg:
{
_dbg_assert_msg_(DSPHLE, (_uMail & 0xFFFF0000) == 0xCDD10000, "This is not a system msg", _uMail);
m_MailState = WaitForMail;
}
break;
}
}
void CUCode_Zelda::MixAdd(short* _pBuffer, int _iSize)
{
//TODO(XK): Zelda UCode MixAdd?
if (m_NumberOfFramesToRender > 0)
{
if (m_NumPBs <= m_MaxSyncedPB) // we just render if all PBs are synced...Zelda does it in steps of 0x10 PBs but hey this is HLE
{
if (_iSize > 1024 * 1024)
_iSize = 1024 * 1024;
memset(templbuffer, 0, _iSize * sizeof(int));
memset(temprbuffer, 0, _iSize * sizeof(int));
{
CopyPBsFromRAM();
// render frame...
for (u32 i=0; i<m_NumPBs; i++)
{
// masking of PBs is done in zelda 0272... skip it for the moment
/* int Slot = i >> 4;
int Mask = i & 0x0F;
if (m_PBMask[Slot] & Mask))*/
{
UpdatePB(m_PBs[i], templbuffer, temprbuffer, _iSize);
}
}
CopyPBsToRAM();
m_MaxSyncedPB = 0;
}
if(_pBuffer) {
for (int i = 0; i < _iSize; i++)
{
// Clamp into 16-bit. Maybe we should add a volume compressor here.
int left = templbuffer[i] + _pBuffer[0];
int right = temprbuffer[i] + _pBuffer[1];
if (left < -32767) left = -32767;
if (left > 32767) left = 32767;
if (right < -32767) right = -32767;
if (right > 32767) right = 32767;
*_pBuffer++ = left;
*_pBuffer++ = right;
}
}
// make sure we never read outside the buffer by mistake.
// Before deleting extra reads in ExecuteList, we were getting these
// values.
memset(m_Buffer, 0xcc, sizeof(m_Buffer));
}
else
{
WARN_LOG(DSPHLE, "Zelda uCode: unknown mail %08X", _uMail);
return;
}
m_CurrentFrameToRender++;
// sync, we are ready
m_rMailHandler.PushMail(DSP_SYNC);
g_dspInitialize.pGenerateDSPInterrupt();
m_rMailHandler.PushMail(0xF3550000 | m_CurrentFrameToRender | 0xFF00);
if (m_CurrentFrameToRender == m_NumberOfFramesToRender)
{
/*
afaik zelda hasnt registered a handler to FRAME_END... so skip it
m_rMailHandler.PushMail(DSP_FRAME_END);
g_dspInitialize.pGenerateDSPInterrupt();
m_MailState = ReadingSystemMsg;
*/
m_NumberOfFramesToRender = 0;
m_CurrentFrameToRender = 0;
}
}
}
// zelda debug ..803F6418
void CUCode_Zelda::ExecuteList()
{
// begin with the list
m_readOffset = 0;
// First figure out what command we're dealing with.
u32 CmdMail = Read32();
u32 Command = (CmdMail >> 24) & 0x7f;
u32 Sync = CmdMail >> 16;
u16 ExtraData = CmdMail & 0xFFFF; // not yet used
DEBUG_LOG(DSPHLE, "==============================================================================");
DEBUG_LOG(DSPHLE, "Zelda UCode - execute dlist (cmd: 0x%04x : sync: 0x%04x)", Command, Sync);
@ -260,112 +473,59 @@ void CUCode_Zelda::ExecuteList()
// DsetupTable ... zelda ww jumps to 0x0095
case 0x01:
{
num_param_blocks = ExtraData;
m_NumPBs = (CmdMail & 0xFFFF);
if (m_NumPBs > 0x40)
{
PanicAlert("(m_NumPBs > 0x40) to much PBs");
m_NumPBs = 0x40;
}
u32 tmp[4];
param_blocks_ptr = tmp[0] = Read32();
m_PBAddress = tmp[0] = Read32();
tmp[1] = Read32();
tmp[2] = Read32();
param_blocks2_ptr = tmp[3] = Read32();
tmp[3] = Read32();
m_SyncMaxStep = CmdMail & 0xFFFF;
u16 Buffer[0x280];
for (int i=0; i<0x280;i++)
{
Buffer[i] = Memory_Read_U16(tmp[1] + (i*2));
}
u16* pTmp = (u16*)m_AFCCoefTable;
for (int i=0; i<0x20;i++)
{
pTmp[i] = Memory_Read_U16(tmp[2] + (i*2));
}
DEBUG_LOG(DSPHLE, "DsetupTable");
DEBUG_LOG(DSPHLE, "Num param blocks: %i", num_param_blocks);
DEBUG_LOG(DSPHLE, "Param blocks 1: 0x%08x", tmp[0]);
// This points to some strange data table.
DEBUG_LOG(DSPHLE, "DSPRES_FILTER (size: 0x40): 0x%08x", tmp[1]);
// Zelda WW: This points to a 64-byte array of coefficients, which are EXACTLY the same
// as the AFC ADPCM coef array in decode.c of the in_cube winamp plugin,
// which can play Zelda audio.
// There's also a lot more table-looking data immediately after - maybe alternative
// tables? I wonder where the parameter blocks are?
DEBUG_LOG(DSPHLE, "DSPADPCM_FILTER (size: 0x500): 0x%08x", tmp[2]);
DEBUG_LOG(DSPHLE, "Param blocks 2: 0x%08x", tmp[3]);
DEBUG_LOG(DSPHLE, "???: 0x%08x", tmp[0]);
DEBUG_LOG(DSPHLE, "DSPADPCM_FILTER (size: 0x500): 0x%08x", tmp[1]);
DEBUG_LOG(DSPHLE, "DSPRES_FILTER (size: 0x40): 0x%08x", tmp[2]);
DEBUG_LOG(DSPHLE, "???: 0x%08x", tmp[3]);
}
break;
// SyncFrame ... zelda ww jumps to 0x0243
// SyncFrame doesn't send a 0xDCD10004 SYNC at all ... just the 0xDCD10005 for "frame end"
case 0x02:
{
u32 tmp[2];
u32 tmp[3];
tmp[0] = Read32();
tmp[1] = Read32();
tmp[2] = Read32();
// We're ready to mix
// soundStream->GetMixer()->SetHLEReady(true);
// DEBUG_LOG(DSPHLE, "Update the SoundThread to be in sync");
soundStream->Update(); //do it in this thread to avoid sync problems
m_bSyncCmdPending = true;
m_SyncEndSync = Sync;
m_SyncCount = 0;
m_SyncMax = (CmdMail >> 16) & 0xFF;
m_NumberOfFramesToRender = (CmdMail >> 16) & 0xFF;
m_MaxSyncedPB = 0;
DEBUG_LOG(DSPHLE, "DsyncFrame");
DEBUG_LOG(DSPHLE, "???: 0x%08x", tmp[0]);
DEBUG_LOG(DSPHLE, "???: 0x%08x", tmp[1]);
DEBUG_LOG(DSPHLE, "DSPADPCM_FILTER (size: 0x500): 0x%08x", tmp[2]);
// These alternate between three sets of mixing buffers. They are all three fairly near,
// but not at, the ADMA read addresses.
DEBUG_LOG(DSPHLE, "Left mixing buffer? 0x%08x", tmp[0]);
DEBUG_LOG(DSPHLE, "Right mixing buffer? 0x%08x", tmp[1]);
// Let's log the parameter blocks.
// Copy and byteswap the parameter blocks.
// For some reason, in Z:WW we get no param blocks until in-game,
// while Zelda Four Swords happily sets param blocks as soon as the title screen comes up.
// Looks like it's playing midi music.
DEBUG_LOG(DSPHLE, "Param block at %08x:", param_blocks_ptr);
CopyPBsFromRAM();
for (int i = 0; i < num_param_blocks; i++)
{
const ZPB &pb = zpbs[i];
// The only thing that consistently looks like a pointer in the param blocks.
u32 addr = (pb.addr_high << 16) | pb.addr_low;
if (addr)
{
DEBUG_LOG(DSPHLE, "Param block: ==== %i ( %08x ) ====", i, GetParamBlockAddr(i));
DEBUG_LOG(DSPHLE, "Addr: %08x Type: %i", addr, pb.type);
// Got one! Read from ARAM, dump to file.
// I can't get the below to produce anything resembling sane audio :(
//addr *= 2;
/*
int size = 0x10000;
u8 *temp = new u8[size];
for (int i = 0; i < size; i++) {
temp[i] = g_dspInitialize.pARAM_Read_U8(addr + i);
}
s16 *audio = new s16[size * 4];
int aoff = 0;
short hist1 = 0, hist2 = 0;
for (int i = 0; i < size; i+=9)
{
AFCdecodebuffer(temp + i, audio + aoff, &hist1, &hist2);
aoff += 16;
}
char fname[256];
sprintf(fname, "%08x.bin", addr);
if (File::Exists(fname))
continue;
FILE *f = fopen(fname, "wb");
fwrite(audio, 1, size*4, f);
fclose(f);
sprintf(fname, "%08x_raw.bin", addr);
f = fopen(fname, "wb");
fwrite(temp, 1, size, f);
fclose(f);
*/
}
}
CopyPBsToRAM();
soundStream->GetMixer()->SetHLEReady(true);
}
return;
break;
/*
case 0x03: break; // dunno ... zelda ww jmps to 0x0073
@ -381,9 +541,13 @@ void CUCode_Zelda::ExecuteList()
// DsetDolbyDelay ... zelda ww jumps to 0x00b2
case 0x0d:
{
u32 tmp = Read32();
u32 tmp[2];
tmp[0] = Read32();
tmp[1] = Read32();
DEBUG_LOG(DSPHLE, "DSetDolbyDelay");
DEBUG_LOG(DSPHLE, "DOLBY2_DELAY_BUF (size 0x960): 0x%08x", tmp);
DEBUG_LOG(DSPHLE, "DOLBY2_DELAY_BUF (size 0x960): 0x%08x", tmp[0]);
DEBUG_LOG(DSPHLE, "DSPRES_FILTER (size 0x500): 0x%08x", tmp[1]);
}
break;
@ -418,14 +582,16 @@ void CUCode_Zelda::ExecuteList()
}
void CUCode_Zelda::CopyPBsFromRAM()
{
for (int i = 0; i < num_param_blocks; i++)
for (u32 i = 0; i < m_NumPBs; i++)
{
u32 addr = param_blocks_ptr + i * sizeof(ZPB);
u32 addr = m_PBAddress + i * sizeof(ZPB);
const u16 *src_ptr = (u16 *)g_dspInitialize.pGetMemoryPointer(addr);
u16 *dst_ptr = (u16 *)&zpbs[i];
for (size_t p = 0; p < sizeof(ZPB) / 2; p++)
u16 *dst_ptr = (u16 *)&m_PBs[i];
for (size_t p = 0; p < 0xC0; p++)
{
dst_ptr[p] = Common::swap16(src_ptr[p]);
}
@ -434,25 +600,186 @@ void CUCode_Zelda::CopyPBsFromRAM()
void CUCode_Zelda::CopyPBsToRAM()
{
for (int i = 0; i < num_param_blocks; i++)
for (u32 i = 0; i < m_NumPBs; i++)
{
u32 addr = param_blocks_ptr + i * sizeof(ZPB);
const u16 *src_ptr = (u16 *)&zpbs[i];
u32 addr = m_PBAddress + i * sizeof(ZPB);
const u16 *src_ptr = (u16 *)&m_PBs[i];
u16 *dst_ptr = (u16 *)g_dspInitialize.pGetMemoryPointer(addr);
for (size_t p = 0; p < sizeof(ZPB) / 2; p++)
for (size_t p = 0; p < 0x80; p++) // we write the first 0x80 shorts back only
{
dst_ptr[p] = Common::swap16(src_ptr[p]);
}
}
}
// size is in stereo samples.
void CUCode_Zelda::MixAdd(short* buffer, int size)
// Decoder from in_cube by hcs/destop/etc. Haven't yet found a valid use for it.
// Looking at in_cube, it seems to be 9 bytes of input = 16 samples of output.
// Different from AX ADPCM which is 8 bytes of input = 14 samples of output.
// input = location of encoded source samples
// out = location of destination buffer (16 bits / sample)
// I am sure that there are 5 bytes of input for 16 samples output too... just check the UCode
// if (type == 5) -> 5 input bytes
// if (type == 9) -> 9 input bytes
//
int CUCode_Zelda::AFCdecodebuffer(char *input, signed short *out, short * histp, short * hist2p, int type)
{
// for (int i = 0; i < size; i++)
// {
// buffer[i*2] = rand();
// buffer[i*2+1] = rand();
// }
int sample;
short nibbles[16];
int i,j;
char *src,*dst;
short idx;
short delta;
short hist=*histp;
short hist2=*hist2p;
dst = (char*)out;
src=input;
delta = 1<<(((*src)>>4)&0xf);
idx = (*src)&0xf;
src++;
if (type == 9)
{
for(i = 0; i < 16; i = i + 2) {
j = ( *src & 255) >> 4;
nibbles[i] = j;
j = *src & 255 & 15;
nibbles[i+1] = j;
src++;
}
for(i = 0; i < 16; i = i + 1) {
if(nibbles[i] >= 8)
nibbles[i] = nibbles[i] - 16;
}
}
else
{
// untested !!! i havnt seen such a sample yet :)
for(i = 0; i < 16; i = i + 4)
{
j = (*src >> 0) & 0x02;
nibbles[i] = j;
j = (*src >> 2) & 0x02;
nibbles[i+1] = j;
j = (*src >> 4) & 0x02;
nibbles[i+2] = j;
j = (*src >> 6) & 0x02;
nibbles[i+3] = j;
src++;
}
for(i = 0; i < 16; i = i + 1)
{
if(nibbles[i] >= 2)
nibbles[i] = nibbles[i] - 4;
}
}
for(i = 0; i<16 ; i = i + 1) {
sample = (delta * nibbles[i])<<11;
sample += ((long)hist * m_AFCCoefTable[idx][0]) + ((long)hist2 * m_AFCCoefTable[idx][1]);
sample = sample >> 11;
if(sample > 32767) {
sample = 32767;
}
if(sample < -32768) {
sample = -32768;
}
*(short*)dst = (short)sample;
dst = dst + 2;
hist2 = hist;
hist = (short)sample;
}
*histp=hist;
*hist2p=hist2;
return((int)src);
}
///////////////////////////////////////////////////////////////////////////////////////////////////
//
// --- Debug Helper
//
///////////////////////////////////////////////////////////////////////////////////////////////////
void CUCode_Zelda::DumpPB(const ZPB& _rPB)
{
u16* pTmp = (u16*)&_rPB;
FILE* pF = fopen("e:\\PB.txt", "wt");
if (pF)
{
for (int i=0; i<0xc0;i++)
{
fprintf(pF, "[0x%02x] 0x%04x\n", i, pTmp[i]);
}
fclose(pF);
}
}
void write32le(int in, unsigned char * buf) {
buf[0]=in&0xff;
buf[1]=(in>>8)&0xff;
buf[2]=(in>>16)&0xff;
buf[3]=(in>>24)&0xff;
}
int CUCode_Zelda::DumpAFC(u8* pIn, const int size, const int srate)
{
unsigned char inbuf[9];
short outbuf[16];
FILE * outfile;
int sizeleft;
int outsize,outsizetotal;
short hist=0,hist2=0;
unsigned char wavhead[44] = {
0x52, 0x49, 0x46, 0x46, 0x00, 0x00, 0x00, 0x00, 0x57, 0x41, 0x56, 0x45, 0x66, 0x6D, 0x74, 0x20,
0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x02, 0x00, 0x10, 0x00, 0x64, 0x61, 0x74, 0x61, 0x00, 0x00, 0x00, 0x00
};
outfile = fopen("d:/supa.wav","wb");
if (!outfile) return 1;
outsize = size/9*16*2;
outsizetotal = outsize+8;
write32le(outsizetotal,wavhead+4);
write32le(outsize,wavhead+40);
write32le(srate,wavhead+24);
write32le(srate*2,wavhead+28);
if (fwrite(wavhead,1,44,outfile)!=44) return 1;
for (sizeleft=size;sizeleft>=9;sizeleft-=9) {
memcpy(inbuf, pIn, 9);
pIn += 9;
AFCdecodebuffer((char*)inbuf,outbuf,&hist,&hist2);
if (fwrite(outbuf,1,16*2,outfile) != 16*2)
return 1;
}
if (fclose(outfile)==EOF) return 1;
return 0;
}

View File

@ -21,16 +21,24 @@
#include "Common.h"
#include "UCodes.h"
// Zelda WW param blocks. These are 0x180 bytes large.
// According to FIRES, the ucodes copies 0xc0 shorts (0x180 bytes) from RAM
// but only writes back 0x80 shorts (0x100 bytes). So the last 0x80 are "read only".
struct ZPB
{
// R/W data =============
// AFC history (2 shorts) must be in here somewhere, plus lots of other state.
u16 rw_unknown[0x80];
u16 unk0;
u16 unk1;
u16 unk2;
u16 unk3;
u16 unk4;
u16 unk5;
u16 unk6;
// Read only data
u16 unk7[0x2C]; // 0x033
u16 SampleData[0x4D];
// Read only data(0x80 to the end)
u16 type; // 0x5, 0x9 = AFC.
u16 r_unknown1;
@ -45,12 +53,11 @@ struct ZPB
};
namespace {
// If this miscompiles, adjust the size of ZPB to 0x180 bytes (0xc0 shorts).
// If this miscompiles, adjust the size of ZPB to 0x180 bytes (0xc0 shorts).
CompileTimeAssert<sizeof(ZPB) == 0x180> ensure_zpb_size_correct;
} // namespace
// Zelda UCode - the big mystery.
class CUCode_Zelda : public IUCode
{
private:
@ -61,59 +68,63 @@ private:
DSP_YIELD = 0xDCD10002,
DSP_DONE = 0xDCD10003,
DSP_SYNC = 0xDCD10004,
DSP_UNKN = 0xDCD10005,
DSP_FRAME_END = 0xDCD10005,
};
// Sync =========================================
bool m_bSyncInProgress;
u32 m_SyncIndex;
u32 m_SyncStep;
u32 m_SyncValues[16];
// Command 0x1: SetupTable
u32 m_SyncMaxStep;
// AFC CoefTable
short m_AFCCoefTable[16][2];
// Command 0x2: SyncFrame
bool m_bSyncCmdPending;
u32 m_SyncEndSync;
u32 m_SyncCurStep;
u32 m_SyncCount;
u32 m_SyncMax;
int m_NumberOfFramesToRender;
int m_CurrentFrameToRender;
// List, buffer management =====================
// List in progress
u32 m_numSteps;
bool m_bListInProgress;
u32 m_step;
u8 m_Buffer[1024];
void ExecuteList();
u32 m_readOffset;
u32 Read32() {
u32 res = *(u32*)&m_Buffer[m_readOffset];
m_readOffset += 4;
if ((m_readOffset >> 2) >= m_numSteps + 1) {
WARN_LOG(DSPHLE, "Read32 out of bounds");
enum EMailState
{
WaitForMail,
ReadingFrameSync,
ReadingMessage,
ReadingSystemMsg
};
EMailState m_MailState;
u16 m_PBMask[0x10];
u32 m_NumPBs;
u32 m_PBAddress;
u32 m_MaxSyncedPB;
ZPB m_PBs[0x40];
u8 Read8()
{
return m_Buffer[m_readOffset++];
}
u16 Read16()
{
u16 res = *(u16*)&m_Buffer[m_readOffset];
m_readOffset += 2;
return res;
}
// Param blocks, mixer state ====================
// HLE state
int num_param_blocks;
u32 param_blocks_ptr;
u32 param_blocks2_ptr;
ZPB zpbs[0x40];
ZPB zpbs2[4];
void CopyPBsFromRAM();
void CopyPBsToRAM();
u32 GetParamBlockAddr(int block_no) const {
return param_blocks_ptr + sizeof(ZPB) * block_no;
u32 Read32()
{
u32 res = *(u32*)&m_Buffer[m_readOffset];
m_readOffset += 4;
return res;
}
public:
CUCode_Zelda(CMailHandler& _rMailHandler);
@ -122,6 +133,24 @@ public:
void HandleMail(u32 _uMail);
void Update(int cycles);
void MixAdd(short* buffer, int size);
void UpdatePB(ZPB& _rPB, int *templbuffer, int *temprbuffer, u32 _Size);
void CopyPBsFromRAM();
void CopyPBsToRAM();
int AFCdecodebuffer(char *input, signed short *out, short * histp, short * hist2p, int type = 9);
int *templbuffer;
int *temprbuffer;
// simple dump ...
void DumpPB(const ZPB& _rPB);
int DumpAFC(u8* pIn, const int size, const int srate);
};
#endif