* symbolizer: follow global variables from known functions to identify the unknown ones

This commit is contained in:
giantpune 2011-12-13 06:07:45 +00:00
parent 90bc4a0090
commit ecea71b49d
5 changed files with 3178 additions and 7 deletions

2
.gitattributes vendored
View File

@ -141,6 +141,8 @@ symbolizer/dol.h -text
symbolizer/elfparser.cpp -text
symbolizer/elfparser.h -text
symbolizer/main.cpp -text
symbolizer/ppc_disasm.c -text
symbolizer/ppc_disasm.h -text
symbolizer/symbolizer.pro -text
thp_player/ffw.png -text
thp_player/gcvid.cpp -text

View File

@ -2,6 +2,7 @@
#include "dol.h"
#include "elfparser.h"
#include "ppc_disasm.h"
#include "../WiiQt/includes.h"
#include "../WiiQt/tools.h"
@ -30,6 +31,16 @@ struct KnownFunction
}
};
#define GLOBALVAR_MASK( x ) ( (quint32)( x & ( PPCAMASK | 0xffff ) ) )
// keep track of known rtoc & r13 references
struct KnownVariable
{
quint32 sig; // signature for opcodes dealing with this variable when GLOBALVAR_MASK()'d
//! given lwz %r0, -0x5460(%r13), it will store the -0x5460(%r13)
QString name; // symbol name
ElfParser::File *file; // pointer to the file object that contains the data
};
// holds the info about a dol to use while searching for crap in it
Dol dol;
QStringList dolTextHex;// store a copy of the dol as hex for easier searching for patterns
@ -42,15 +53,33 @@ QList< ElfParser::File > libFiles;
QList< KnownData > knownData;
QList< KnownFunction > knownFunctions;
// keep track of variables accessed through r2 and r13
QList< KnownVariable > knownVariables;
// keep a list of the locations that each function's pattern matched to keep from looking them up over and over
QMap< const ElfParser::Function *, QList< quint32 > >patternMatches;
#define DU32( x ) qDebug().nospace() << #x << ": " << hex << (x)
QString NStr( quint32 num, quint8 width = 8 );
QString NStr( quint32 num, quint8 width )
{
return QString( "%1" ).arg( num, width, 16, QChar( '0' ) );
}
bool IsVariableKnown( quint32 sig, const QList< KnownVariable > &list = knownVariables );
bool IsVariableKnown( quint32 sig, const QList< KnownVariable > &list )
{
foreach( const KnownVariable &var, list )
{
if( var.sig == sig )
{
return true;
}
}
return false;
}
// read dol into memory
bool LoadDol( const QString &path )
{
@ -942,6 +971,177 @@ void TryToMatchFunctions1()
RemoveOverlaps();
}
void FindGlobalVariables()
{
QList< QPair< const ElfParser::Function *, const KnownData *> > maybeMatches;// keep track of functions to check
//QList< QPair< QPair< const ElfParser::Function *, const KnownData *> *, const SymAlias * > > aliasMatches;// keep track of functions to check via aliases
QMap< const ElfParser::Function *, const ElfParser::File * >fileMap;
QMap< const ElfParser::Function *, quint32 >winners;
QList< KnownVariable > newVariables;
foreach( const KnownFunction &kf1, knownFunctions )// look in all known functions for global variables
{
if( !kf1.file )
{
continue;
}
foreach( const SymRef &ref, kf1.function->References() )
{
if( ref.type != SymRef::R_PPC_EMB_SDA21 )
{
continue;
}
quint32 addr = kf1.addr + ( ref.off & ~3 );
quint32 opcode = GetOpcodeFromAddr( addr );
if( opcode == 0xdeadbeef )
{
DBG << "opcode" << hex << opcode;
continue;
}
quint32 reg = (quint32)PPCGETA( opcode );
if( reg != 2 && reg != 13 )
{
DBG << "reg:" << hex << reg << kf1.function->Name() << ref.name;
continue;
}
quint32 sig = GLOBALVAR_MASK( opcode );
if( IsVariableKnown( sig, newVariables ) || IsVariableKnown( sig ) )
{
continue;
}
KnownVariable nw;
nw.file = (ElfParser::File *)kf1.file;
nw.name = ref.name;
nw.sig = sig;
newVariables << nw;
/*qDebug() << "opcode" << hex << opcode << "addr" << addr;
qDebug() << kf1.function->Name() << ref.name;
qDebug();
quint32 z = GLOBALVAR_MASK( opcode );
opcode = z;
quint32 s = (quint32)PPCGETD( opcode );
quint32 a = (quint32)PPCGETA( opcode );
quint32 d = (quint32)( opcode & 0xffff );
quint32 o = (quint32)( PPCGETIDX( opcode ) - 32 );
DU32( o );
DU32( d );
DU32( s );
DU32( a );*/
//exit( 0 );
}
}
// look at all the variables and see if there is an unknown function in that file that refers to the variable
QList< const ElfParser::Function * > bitches;
foreach( const KnownVariable &var, newVariables )
{
foreach( const ElfParser::Function &fun, var.file->Functions() )
{
if( FunctionIsKnown( &fun ) )
{
continue;
}
foreach( const SymRef &ref, fun.References() )
{
if( ref.type != SymRef::R_PPC_EMB_SDA21 )
{
continue;
}
if( ref.name != var.name )
{
continue;
}
if( !bitches.contains( &fun ) )
{
bitches << &fun;
fileMap[ &fun ] = var.file;
}
}
}
}
// now look at all the references to global variables in each found function and if it refers to a known one,
// check the opcode and see that it is correct
QMapIterator< const ElfParser::Function *, const ElfParser::File * > it( fileMap );
while( it.hasNext() )
{
it.next();
QList< quint32 > addrs = PatternMatches( it.key() );
quint32 winner = 0;
bool functionFailed = false;
foreach( quint32 addr, addrs )
{
bool fail = false;
foreach( const SymRef &ref, it.key()->References() )
{
if( ref.type != SymRef::R_PPC_EMB_SDA21 )
{
continue;
}
quint32 varSig = 0xb00b5;
foreach( const KnownVariable &kv, newVariables )
{
if( kv.file == it.value() && kv.name == ref.name )
{
varSig = kv.sig;
break;
}
}
if( varSig == 0xb00b5 )// we dont know this variable
{
continue;
}
quint32 opAddr = addr + ( ref.off & ~3 );
quint32 opcode = GetOpcodeFromAddr( opAddr );
if( opcode == 0xdeadbeef )
{
DBG << "opcode" << hex << opcode;
continue;
}
if( GLOBALVAR_MASK( opcode ) != varSig )
{
fail = true;
break;
}
}
if( fail )
{
continue;
}
if( winner )// found more than 1 match for this little guy
{
functionFailed = true;
break;
}
winner = addr;
}
if( winner && !functionFailed )
{
winners[ it.key() ] = winner;
}
}
knownVariables << newVariables;
//DBG << "added these bad boys";
QMapIterator< const ElfParser::Function *, quint32 > ret( winners );
while( ret.hasNext() )
{
ret.next();
//qDebug() << hex << ret.value()
// << NStr( ret.key()->Pattern().size() / 2, 4 )
// << ret.key()->Name()
// << fileMap.find( ret.key() ).value()->Name();
AddFunctionToKnownList( ret.key(), fileMap.find( ret.key() ).value(), ret.value() );
}
RemoveOverlaps();
}
void TryToMatchFunctions2( QMap< const ElfParser::Function *, quint32 > &nonMatchingBranches )
{
QStringList wholeDolHex = dolDataHex + dolTextHex;
@ -1290,11 +1490,11 @@ QList< QPair< const ElfParser::Function *, quint32> > TryToMatchFunctions4( QLis
// cleanup the list
CleanupList( maybeMatches );
int s = maybeMatches.size();
qDebug() << "Functions that only have 1 pattern match, contain wildcards, and are larger than 0x" << hex << minLen << "bytes:";
//qDebug() << "Functions that only have 1 pattern match, contain wildcards, and are larger than 0x" << hex << minLen << "bytes:";
for( int i = 0; i < s; i++ )
{
const QPair< const ElfParser::Function *, quint32 > &p = maybeMatches.at( i );
qDebug() << hex << p.second << NStr( p.first->Pattern().size() / 2, 4 ) << p.first->Name();
//qDebug() << hex << p.second << NStr( p.first->Pattern().size() / 2, 4 ) << p.first->Name();
AddFunctionToKnownList( p.first, fileMap.find( p.first ).value(), p.second );
}
@ -1518,8 +1718,10 @@ int main(int argc, char *argv[])
//dolPath = "/home/j/c/hackmiiHaxx/disassembly/mem1-decrypt_60.bin";
//libPath = "/home/j/devkitPRO/libogc/lib/wii";
//dolPath = "/home/j/c/WiiQt/93/symbolizer/00000043_60.dol";
//dolPath = "/home/j/c/WWE12_haxx/main.dol";
//libPath = "/home/j/devkitPRO/libogc/lib/wii";
//libPath += "/os.a";
qDebug() << "Loading dol...";
if( !LoadDol( dolPath ) )
@ -1544,9 +1746,15 @@ int main(int argc, char *argv[])
// find first round of functions
TryToMatchFunctions1();
// add functions by looking at rtoc and r13
FindGlobalVariables();
//exit( 0 );
// find branches from the first round of functions
TryToMatchFunctions2( nonMatchingBranches );
FindGlobalVariables();
// looking for functions that dont branch anywhere or use global variables or anything
TryToMatchFunctions0();
@ -1565,6 +1773,11 @@ int main(int argc, char *argv[])
qDebug() << " - added" << newFunctions.size() << "new functions -";
}
// add functions by looking at rtoc and r13
FindGlobalVariables();
qDebug() << "Total functions found:" << knownFunctions.size();
// find branches from the known functions
@ -1591,9 +1804,12 @@ int main(int argc, char *argv[])
num = knownFunctions.size();
for( int i = 0; i < maxRetries; i++ )
{
qDebug() << " -- Round" << i << '/' << maxRetries << " following branches --";
qDebug() << " -- Round" << i << '/' << maxRetries << " following branches and global variables --";
TryToMatchFunctions2( nonMatchingBranches );
// add functions by looking at rtoc and r13
FindGlobalVariables();
int num2 = knownFunctions.size();
if( num2 == num )
{
@ -1603,8 +1819,9 @@ int main(int argc, char *argv[])
num = num2;
}
qDebug() << "Total data matches: " << knownData.size();
qDebug() << "Total functions found:" << knownFunctions.size();
qDebug() << "Total global variables: " << knownVariables.size();
qDebug() << "Total data matches: " << knownData.size();
qDebug() << "Total functions found: " << knownFunctions.size();
qDebug() << "Generating idc file...";
QString idc = MakeIDC( dolPath, libPath, nonMatchingBranches );

2807
symbolizer/ppc_disasm.c Normal file

File diff suppressed because it is too large Load Diff

143
symbolizer/ppc_disasm.h Normal file
View File

@ -0,0 +1,143 @@
/* $VER: ppc_disasm.h V1.4 (29.08.2001)
*
* Disassembler module for the PowerPC microprocessor family
* Copyright (c) 1998-2001 Frank Wille
*
* ppc_disasm.c is freeware and may be freely redistributed as long as
* no modifications are made and nothing is charged for it.
* Non-commercial usage is allowed without any restrictions.
* EVERY PRODUCT OR PROGRAM DERIVED DIRECTLY FROM MY SOURCE MAY NOT BE
* SOLD COMMERCIALLY WITHOUT PERMISSION FROM THE AUTHOR.
*
*
* v1.4 (29.08.2001) phx
* AltiVec support.
* v0.1 (23.05.1998) phx
* First version, which implements all PowerPC instructions.
* v0.0 (09.05.1998) phx
* File created.
*/
#ifndef PPC_DISASM_H
#define PPC_DISASM_H
#define PPC_GEKKO
/* version/revision */
#define PPCDISASM_VER 1
#define PPCDISASM_REV 4
/* typedefs */
typedef unsigned int ppc_word;
#ifndef NULL
#define NULL (0L)
#endif
/* endianess */
#define LITTLEENDIAN
#endif
#if !defined(BIGENDIAN) && !defined(LITTLEENDIAN)
#error Define either BIGENDIAN or LITTLEENDIAN!
#define LITTLEENDIAN
#endif
/* general defines */
#define PPCIDXMASK 0xfc000000
#define PPCIDX2MASK 0x000007fe
#define PPCDMASK 0x03e00000
#define PPCAMASK 0x001f0000
#define PPCBMASK 0x0000f800
#define PPCCMASK 0x000007c0
#define PPCMMASK 0x0000003e
#define PPCCRDMASK 0x03800000
#define PPCCRAMASK 0x001c0000
#define PPCLMASK 0x00600000
#define PPCOE 0x00000400
#define PPCVRC 0x00000400
#define PPCDST 0x02000000
#define PPCSTRM 0x00600000
#define PPCIDXSH 26
#define PPCDSH 21
#define PPCASH 16
#define PPCBSH 11
#define PPCCSH 6
#define PPCMSH 1
#define PPCCRDSH 23
#define PPCCRASH 18
#define PPCLSH 21
#define PPCIDX2SH 1
#define PPCGETIDX(x) (((x)&PPCIDXMASK)>>PPCIDXSH)
#define PPCGETD(x) (((x)&PPCDMASK)>>PPCDSH)
#define PPCGETA(x) (((x)&PPCAMASK)>>PPCASH)
#define PPCGETB(x) (((x)&PPCBMASK)>>PPCBSH)
#define PPCGETC(x) (((x)&PPCCMASK)>>PPCCSH)
#define PPCGETM(x) (((x)&PPCMMASK)>>PPCMSH)
#define PPCGETCRD(x) (((x)&PPCCRDMASK)>>PPCCRDSH)
#define PPCGETCRA(x) (((x)&PPCCRAMASK)>>PPCCRASH)
#define PPCGETL(x) (((x)&PPCLMASK)>>PPCLSH)
#define PPCGETIDX2(x) (((x)&PPCIDX2MASK)>>PPCIDX2SH)
#define PPCGETSTRM(x) (((x)&PPCSTRM)>>PPCDSH)
#ifdef PPC_GEKKO
#define GEKKOLASMASK 0x0000007e
#define GEKKOWMASK 0x00000400
#define GEKKOIMASK 0x00000380
#define GEKKOW2MASK 0x00008000
#define GEKKOI2MASK 0x00007000
#define GEKKOVMASK 0x00000FFF
#define GEKKOLASSH 1
#define GEKKOWSH 10
#define GEKKOISH 7
#define GEKKOW2SH 15
#define GEKKOI2SH 12
#define GEKKOVSH 0
#define GEKKOGETLASID(x) (((x)&GEKKOLASMASK)>>GEKKOLASSH)
#define GEKKOGETW(x) (((x)&GEKKOWMASK)>>GEKKOWSH)
#define GEKKOGETI(x) (((x)&GEKKOIMASK)>>GEKKOISH)
#define GEKKOGETW2(x) (((x)&GEKKOW2MASK)>>GEKKOW2SH)
#define GEKKOGETI2(x) (((x)&GEKKOI2MASK)>>GEKKOI2SH)
#define GEKKOGETV(x) (((x)&GEKKOVMASK)>>GEKKOVSH)
#endif
/* Disassembler structure, the interface to the application */
struct DisasmPara_PPC {
ppc_word *instr; /* pointer to instruction to disassemble */
ppc_word *iaddr; /* instr.addr., usually the same as instr */
char *opcode; /* buffer for opcode, min. 10 chars. */
char *operands; /* operand buffer, min. 24 chars. */
/* changed by disassembler: */
unsigned char type; /* type of instruction, see below */
unsigned char flags; /* additional flags */
unsigned short sreg; /* register in load/store instructions */
ppc_word displacement; /* branch- or load/store displacement */
};
#define PPCINSTR_OTHER 0 /* no additional info for other instr. */
#define PPCINSTR_BRANCH 1 /* branch dest. = PC+displacement */
#define PPCINSTR_LDST 2 /* load/store instruction: displ(sreg) */
#define PPCINSTR_IMM 3 /* 16-bit immediate val. in displacement */
#define PPCF_ILLEGAL (1<<0) /* illegal PowerPC instruction */
#define PPCF_UNSIGNED (1<<1) /* unsigned immediate instruction */
#define PPCF_SUPER (1<<2) /* supervisor level instruction */
#define PPCF_64 (1<<3) /* 64-bit only instruction */
#define PPCF_ALTIVEC (1<<4) /* AltiVec instruction */
/* ppc_disasm.o prototypes */
#ifndef PPC_DISASM_C
extern ppc_word *PPC_Disassemble(struct DisasmPara_PPC *);
#endif

View File

@ -19,10 +19,12 @@ SOURCES += main.cpp\
../WiiQt/aes.c \
elfparser.cpp \
dol.cpp \
be.cpp
be.cpp \
ppc_disasm.c
HEADERS += ../WiiQt/tools.h \
../WiiQt/aes.h \
elfparser.h \
dol.h \
be.h
be.h \
ppc_disasm.h