From ae6e6ba7d81828ada911cb45892f417cf5c910ce Mon Sep 17 00:00:00 2001 From: giantpune Date: Mon, 12 Dec 2011 06:52:50 +0000 Subject: [PATCH] * add new program 'symbolizer' which has basically nothing to do with any of the rest of this stuff. but it is wii-related and using Qt, so it ends up here --- .gitattributes | 9 + symbolizer/be.cpp | 3 + symbolizer/be.h | 83 ++ symbolizer/dol.cpp | 84 ++ symbolizer/dol.h | 58 ++ symbolizer/elfparser.cpp | 625 +++++++++++++ symbolizer/elfparser.h | 135 +++ symbolizer/main.cpp | 1516 ++++++++++++++++++++++++++++++++ symbolizer/symbolizer.pro | 28 + symbolizer/symbolizer.pro.user | 157 ++++ 10 files changed, 2698 insertions(+) create mode 100644 symbolizer/be.cpp create mode 100644 symbolizer/be.h create mode 100644 symbolizer/dol.cpp create mode 100644 symbolizer/dol.h create mode 100644 symbolizer/elfparser.cpp create mode 100644 symbolizer/elfparser.h create mode 100644 symbolizer/main.cpp create mode 100644 symbolizer/symbolizer.pro create mode 100644 symbolizer/symbolizer.pro.user diff --git a/.gitattributes b/.gitattributes index 9f47f2c..da8e08e 100644 --- a/.gitattributes +++ b/.gitattributes @@ -134,6 +134,15 @@ saveToy/saveloadthread.h -text saveToy/textdialog.cpp -text saveToy/textdialog.h -text saveToy/textdialog.ui -text +symbolizer/be.cpp -text +symbolizer/be.h -text +symbolizer/dol.cpp -text +symbolizer/dol.h -text +symbolizer/elfparser.cpp -text +symbolizer/elfparser.h -text +symbolizer/main.cpp -text +symbolizer/symbolizer.pro -text +symbolizer/symbolizer.pro.user -text thp_player/ffw.png -text thp_player/gcvid.cpp -text thp_player/gcvid.h -text diff --git a/symbolizer/be.cpp b/symbolizer/be.cpp new file mode 100644 index 0000000..d7fbfb2 --- /dev/null +++ b/symbolizer/be.cpp @@ -0,0 +1,3 @@ +#include "be.h" + + diff --git a/symbolizer/be.h b/symbolizer/be.h new file mode 100644 index 0000000..ec336e1 --- /dev/null +++ b/symbolizer/be.h @@ -0,0 +1,83 @@ +#ifndef BE_H +#define BE_H + +#include "../WiiQt/includes.h" + +class be64 +{ + quint64 value; // in big endian; + +public: + explicit be64( quint64 v ): value( qFromBigEndian( v ) ) {} + be64(): value( 0 ) {} + + quint64 operator*() + { + return to_quint64(); + } + + quint64 to_quint64() const + { + return qToBigEndian( value ); + } + + be64 &operator = ( const quint64 &v ) + { + value = qFromBigEndian( v ); + return *this; + } +}; + +class be32 +{ + quint32 value; // in big endian; + +public: + explicit be32( quint32 v ): value( qFromBigEndian( v ) ) {} + be32(): value( 0 ) {} + + quint32 operator*() + { + return to_quint32(); + } + + quint32 to_quint32() const + { + return qToBigEndian( value ); + } + + be32 &operator = ( const quint32 &v ) + { + value = qFromBigEndian( v ); + return *this; + } +}; + +class be16 +{ + quint16 value; // in big endian; + +public: + explicit be16( quint16 v ): value( qFromBigEndian( v ) ) {} + be16(): value( 0 ) {} + + quint16 operator*() + { + return to_quint16(); + } + + quint16 to_quint16() const + { + return qToBigEndian( value ); + } + + be16 &operator = ( const quint16 &v ) + { + value = qFromBigEndian( v ); + return *this; + } +}; + + + +#endif // BE_H diff --git a/symbolizer/dol.cpp b/symbolizer/dol.cpp new file mode 100644 index 0000000..d436072 --- /dev/null +++ b/symbolizer/dol.cpp @@ -0,0 +1,84 @@ +#include "dol.h" + +Dol::Dol( const QByteArray &dol ) : dh( NULL ) +{ + if( (quint32)dol.size() > sizeof( Dolheader ) ) + { + Parse( dol ); + } +} + +bool Dol::Parse( const QByteArray &dol ) +{ + dh = NULL; + text.clear(); + data.clear(); + + if( (quint32)dol.size() < sizeof( Dolheader ) ) + { + WRN << "dol.size() < sizeof( Dolheader )"; + return false; + } + headerBuf = dol.left( sizeof( Dolheader ) ); + dh = reinterpret_cast< Dolheader * >( headerBuf.data() ); + if( !dh ) + { + WRN << "!dh"; + return false; + } + //DBG << hex << (*(dh->entrypoint)); + + for( quint32 i = 0; i < 7; i ++ ) + { + quint32 fileOff = (*(dh->offsetText[ i ] ) ); + quint32 len = (*(dh->sizeText[ i ] ) ); + quint32 addr = (*(dh->addressText[ i ] ) ); + if( !fileOff || !len || !addr ) + { + continue; + } + if( fileOff + len > (quint32) dol.size() ) + { + WRN << "text section is out of range:" << i << hex << fileOff << len; + continue; + } + DolSection sec; + sec.addr = addr; + sec.data = dol.mid( fileOff, len ); + + text << sec; + } + + for( quint32 i = 0; i < 11; i ++ ) + { + quint32 fileOff = (*(dh->offsetData[ i ] ) ); + quint32 len = (*(dh->sizeData[ i ] ) ); + quint32 addr = (*(dh->addressData[ i ] ) ); + if( !fileOff || !len || !addr ) + { + continue; + } + if( fileOff + len > (quint32) dol.size() ) + { + WRN << "data section is out of range:" << i << hex << fileOff << len; + continue; + } + DolSection sec; + sec.addr = addr; + sec.data = dol.mid( fileOff, len ); + + data << sec; + } + + return true; +} + +Dol Dol::FakeDol( const QByteArray &mem, quint32 loadAddress ) +{ + Dol ret; + DolSection sec; + sec.addr = loadAddress; + sec.data = mem; + ret.text << sec; + return ret; +} diff --git a/symbolizer/dol.h b/symbolizer/dol.h new file mode 100644 index 0000000..3ee4aaa --- /dev/null +++ b/symbolizer/dol.h @@ -0,0 +1,58 @@ +#ifndef DOL_H +#define DOL_H + +#include "../WiiQt/includes.h" +#include "../WiiQt/tools.h" +#include "be.h" + +struct Dolheader +{ + be32 offsetText[ 7 ]; // 0 // 0000 + be32 offsetData[ 11 ]; // 28 // 0012 + be32 addressText[ 7 ]; // 72 // 0048 + be32 addressData[ 11 ]; // 100 // 0064 + be32 sizeText[ 7 ]; // 144 // 0090 + be32 sizeData[ 11 ]; // 172 // 00ac + be32 addressBSS; // 216 // 00d8 + be32 sizeBSS; // 220 // 00dc + be32 entrypoint; // 224 // 00e0 +}; + +struct DolSection +{ + quint32 addr; + QByteArray data; + DolSection() : addr( 0 ) + { + } +}; + +class Dol +{ +public: + Dol( const QByteArray &dol = QByteArray() ); + + const Dolheader *Header() const { return dh; } + + const QList< DolSection > &TextSections() const { return text; } + const QList< DolSection > &DataSections() const { return data; } + + bool Parse( const QByteArray &dol ); + + // create a fake dol from a memory buffer + //! Header() will return NULL, and it will create only 1 text section with the load address and no data sections + //! added just for convenience + static Dol FakeDol( const QByteArray &mem, quint32 loadAddress = 0x80000000 ); +private: + Dolheader *dh; + QList< DolSection >text; + QList< DolSection >data; + + QByteArray headerBuf; + + + + +}; + +#endif // DOL_H diff --git a/symbolizer/elfparser.cpp b/symbolizer/elfparser.cpp new file mode 100644 index 0000000..f1591ba --- /dev/null +++ b/symbolizer/elfparser.cpp @@ -0,0 +1,625 @@ + +#include "../WiiQt/tools.h" +#include "elfparser.h" + +ElfParser::ElfParser( const QString &stuff ) : error( false ) +{ + ParseText( stuff ); +} + +bool ElfParser::ParseText( const QString &str ) +{ + QString fileName; + QStringList fileLines; + QMap< QString, QStringList >rawFiles; + QMap< QString, QStringList >rawSections; + QMap< QString, QStringList >rawSymbolTable; + QStringList lines = str.split( '\n', QString::KeepEmptyParts ); + quint32 lineCnt = lines.size(); + for( quint32 i = 0; i < lineCnt; i++ ) + { + const QString &line = lines.at( i ); + + + // start of a new file + if( line.contains( ": file format" ) ) + { + // add existing file to the list + if( !fileName.isEmpty() && fileLines.size() ) + { + rawFiles[ fileName ] = fileLines; + } + fileLines.clear(); + fileName.clear(); + fileName = line.left( line.indexOf( ": file format" ) ); + //qDebug() << "starting file" << fileName; + + + // read symbol table + for( ; i < lineCnt; i++ ) + { + if( lines.at( i ).startsWith( "SYMBOL TABLE:" ) ) + { + //qDebug() << "lines.at( i )" << lines.at( i ); + break; + } + } + + + QStringList symbolListLines; + for( ; i < lineCnt && !lines.at( i ).isEmpty(); i++ ) + { + symbolListLines << lines.at( i ); + } + rawSymbolTable[ fileName ] = symbolListLines; + + + // read hex dump + for( ; i < lineCnt; i++ ) + { + if( lines.at( i ).startsWith( "Contents of section " ) ) + { + //qDebug() << "lines.at( i )" << lines.at( i ); + break; + } + } + + QStringList secList; + for( ; i < lineCnt && !lines.at( i ).isEmpty(); i++ ) + { + secList << lines.at( i ); + } + //qDebug() << "section" << fileName << secList.size(); + rawSections[ fileName ] = secList; + + for( ; i < lineCnt - 1; i++ ) + { + if( lines.at( i + 1 ).startsWith( "Disassembly of section" ) ) + { + break; + } + if( lines.at( i + 1 ).contains( ": file format" ) )// happens if the .o doesnt contain any rode + { + break; + } + } + continue; + } + if( line.startsWith( "Disassembly of section" ) ) + { + continue; + } + fileLines << line; + } + // add the last file in there + if( !fileName.isEmpty() && fileLines.size() ) + { + rawFiles[ fileName ] = fileLines; + } + + + QMapIterator< QString, QStringList > it( rawFiles ); + while( it.hasNext() ) + { + it.next(); + + //qDebug() << "File:" << it.key() << it.value().size(); + File file( it.key() ); + if( !ParseFileText( it.value(), rawSections.find( it.key() ).value(), rawSymbolTable.find( it.key() ).value(), file ) ) + { + error = true; + return false; + } + files << file; + } + + foreach( const File &f, files ) + { + //qDebug() << f.Name(); + /*foreach( const Function &fun, f.Functions() ) + { + //qDebug() << " " << fun.Name(); + foreach( const SymRef &ref, fun.References() ) + { + //qDebug() << " " << hex << ref.off << ref.name; + } + }*/ + /*foreach( const SymAlias &alias, f.Aliases() ) + { + qDebug() << " " << alias.name << alias.containerName; + }*/ + } + //exit( 0 ); + + + return true; +} + +QList< SymAlias > ElfParser::ParseSymbolTable( const QStringList &lines ) +{ + QList< SymAlias >ret; + foreach( const QString &line, lines ) + { + if( line.size() < 19 )// too small + { + continue; + } + int tab = line.indexOf( '\t' ); + if( tab < 17 || line.size() < tab + 11 ) + { + continue; + } + bool ok; + SymAlias ref; + ref.containerName = line.mid( 17, tab - 17 ); + + // filter out certain sections + if( ref.containerName.startsWith( '*' ) + || ref.containerName.startsWith( ".text" ) + || ref.containerName.startsWith( ".debug" ) + || ref.containerName.startsWith( ".comment" ) + || ref.containerName.startsWith( ".gnu" ) + || ref.containerName.startsWith( ".init" ) ) + { + continue; + } + ref.offset = line.left( 8 ).toUInt( &ok, 16 ); + if( !ok ) + { + continue; + } + ref.size = line.mid( tab + 1, 8 ).toUInt( &ok, 16 ); + if( !ok ) + { + continue; + } + //qDebug() << line.mid( tab + 1, 8 ); + if( !ref.offset && !ref.size ) + { + continue; + } + ref.name = line.mid( tab + 10 ); + //qDebug() << hex << QString( "%1" ).arg( ref.offset, 8, 16, QChar( QChar( '0' ) ) ) + // << ref.containerName + // << QString( "%1" ).arg( ref.size, 8, 16, QChar( QChar( '0' ) ) ) + // << ref.name; + ret << ref; + } + return ret; +} + +QMap< QString, QByteArray > ElfParser::ParseSectionText( const QStringList &list ) +{ + QMap< QString, QByteArray >ret; + QMap< QString, QByteArray >ret2; + QByteArray ba; + QString name; + for( quint32 i = 0; i < (quint32)list.size(); i++ ) + { + const QString &line = list.at( i ); + if( line.startsWith( "Contents of section " ) ) + { + if( !name.isEmpty() && ba.size() ) + { + ret[ name ] = ba; + } + ba.clear(); + name = line.mid( 20 ); + name.resize( name.size() - 1 ); + //DBG << name; + continue; + } + QString hexS = line.mid( 6, 35 ); + QByteArray hexA = hexS.toLatin1(); + hexA = QByteArray::fromHex( hexA ); + ba += hexA; + } + if( !name.isEmpty() && ba.size() ) + { + ret[ name ] = ba; + } + + // remove unwanted sections + QMapIterator< QString, QByteArray > it( ret ); + while( it.hasNext() ) + { + it.next(); + if( !it.key().contains( ".text" ) + && !it.key().startsWith( ".init" ) + && !it.key().startsWith( ".ctors" ) + && !it.key().startsWith( ".dtors" ) + && !it.key().startsWith( ".debug" ) + && !it.key().startsWith( ".comment" ) + && !it.key().startsWith( "extab" )) + { + ret2[ it.key() ] = it.value(); + } + } + + // debug + /*QMapIterator< QString, QByteArray > it2( ret2 ); + while( it2.hasNext() ) + { + it2.next(); + qDebug() << it2.key(); + hexdump( it2.value() ); + }*/ + + return ret2; +} + +bool ElfParser::ParseFileText( const QStringList &strs, const QStringList §ionStrs, const QStringList &symbolStrs, ElfParser::File &file ) +{ + quint32 cnt = strs.size(); + + quint32 fOff = 0; + quint32 fStart = 0; + + QString name; + QString pattern; + QList< SymRef > refs; + + //DBG << file.Name() << sectionStrs.size() << symbolStrs.size() << strs.size(); + QMap< QString, QByteArray >sections = ParseSectionText( sectionStrs ); + QList< SymAlias > aliases = ParseSymbolTable( symbolStrs ); + + + //DBG << file.Name() << sections.size() << aliases.size(); + + for( quint32 i = 0; i < cnt; i++ ) + { + const QString &str = strs.at( i ); + /*if( name == "WII_Initialize" ) + { + qDebug() << str; + }*/ + + // start a new funciton + if( IsFunctionStart( str, &fStart ) ) + { + // add this function to the list + if( !name.isEmpty() && fOff ) + { + Function fun( name ); + fun.references = refs; + fun.pattern = pattern; + fun.file = &file; + file.functions << fun; + //qDebug() << "pattern:" << pattern; + } + //qDebug() << GetFunctionName( str ); + name = GetFunctionName( str ); + //DBG << name; + if( fOff != (quint32)pattern.size() / 2 ) + { + qDebug() << "size bad"; + exit( 0 ); + } + fOff = 0; + pattern.clear(); + refs.clear(); + + sections.remove( name );// remove functions from the section list + continue; + } + if( name.isEmpty() ) + { + continue; + } + if( IsBlank( str ) ) + { + //qDebug() << str << "is blank"; + continue; + } + if( IsSymbolLine( str ) ) + { + //qDebug() << str << "IsSymbolLine"; + continue; + } + QString hex; + QString oper; + QString symbol; + quint32 refOff = 0xdeadbeef; + + if( !ParseOpLine( str, hex, oper ) ) + { + qDebug() << str << strs.at( i - 1 ); + return false; + } + /*if( name == "WII_Initialize" ) + { + qDebug() << "hex" << hex; + }*/ + + if( ( i < cnt - 1 ) && IsSymbolLine( strs.at( i + 1 ) ) ) + { + SymRef::Type refType; + symbol = GetNonOperRef( strs.at( i + 1 ), &refOff, &refType ); + + if( refOff < fStart ) + { + WRN << "refOff < fStart" << str; + return false; + } + SymRef ref; + quint32 deRef; + ref.name = DeReferenceSymbol( symbol, &deRef ); + ref.symOff = deRef; + + switch( refType ) + { + case SymRef::R_PPC_ADDR16_HI: + case SymRef::R_PPC_ADDR16_LO: + { + hex[ 4 ] = '.'; + hex[ 5 ] = '.'; + hex[ 6 ] = '.'; + hex[ 7 ] = '.'; + } + break; + case SymRef::R_PPC_REL24: + case SymRef::R_PPC_EMB_SDA21: + { + hex[ 1 ] = '.'; + hex[ 2 ] = '.'; + hex[ 3 ] = '.'; + hex[ 4 ] = '.'; + hex[ 5 ] = '.'; + hex[ 6 ] = '.'; + hex[ 7 ] = '.'; + } + break; + case SymRef::R_PPC_SDAREL16: + { + hex = "........"; + } + break; + default: + WRN << "unhandled reference type"; + return false; + break; + } + + ref.type = refType; + ref.off = refOff - fStart; + refs << ref; + if( ref.off & 0xff000000 ) + { + qDebug() << "ref.off is busted 1" << name << str; + + qDebug() << ::hex << refOff << fStart; + exit( 0 ); + } + } + + else if( OpNeedsWildCard( oper ) ) + { + //DBG << "bl called without symbol reference\n" << str; + hex = "........"; + if( symbol.isEmpty() ) + { + symbol = GetOpersymRef( str ); + } + SymRef ref; + ref.name = symbol; + ref.off = (quint32)(pattern.size()); + ref.type = SymRef::R_PPC_REL24; + refs << ref; + + if( ref.off & 0xff000000 ) + { + DBG << "ref.off is busted 2" << name << str; + exit( 0 ); + } + + } + pattern += hex.toUpper(); + /*if( name == "WII_Initialize" ) + { + qDebug() << "hex" << pattern; + }*/ + fOff += 4; + } + if( !name.isEmpty() ) + { + Function fun( name ); + fun.references = refs; + fun.pattern = pattern; + fun.file = &file; + file.functions << fun; + } + file.sections = sections; + file.aliases = aliases; + return true; +} + +bool ElfParser::IsFunctionStart( const QString &str, quint32 *start ) +{ + bool ok; + quint32 s; + if( str.size() < 12 ) + { + return false; + } + if( str.startsWith( ' ' ) ) + { + return false; + } + if( !str.endsWith( ">:" ) ) + { + return false; + } + s = str.left( 8 ).toUInt( &ok, 16 ); + if( !ok ) + { + return false; + } + int o = str.indexOf( '<' ); + if( o < 9 ) + { + return false; + } + if( start ) + { + *start = s; + } + return true; +} + +QString ElfParser::GetFunctionName( const QString &str ) +{ + QString ret = str; + ret.remove( 0, ret.indexOf( '<' ) + 1 ); + ret.resize( ret.size() - 2 ); + return ret; +} + +bool ElfParser::IsSymbolLine( const QString & str ) +{ + if( str.startsWith( "\t\t\t" ) && str.indexOf( '\t', 3 ) > 4 ) + { + return true; + } + return false; +} + +bool ElfParser::IsBlank( const QString & str ) +{ + QString sim = str.simplified(); + return sim.isEmpty() || sim == "..."; +} + +bool ElfParser::ParseOpLine( const QString &str, QString &hex, QString &oper ) +{ + // 1c74: 41 82 01 54 beq- + int tab = str.indexOf( '\t', 3 ); + if( tab < 0 || str.size() < tab + 15 || str.at( tab + 3 ) != ' ' || str.at( tab + 6 ) != ' ' || str.at( tab + 9 ) != ' ' || str.at( tab + 12 ) != ' ' ) + { + qDebug() << str << "is not an opline"; + qDebug() << hex << oper; + return false; + } + // " 0: 94 21 ff f0 stwu r1,-16(r1)" + hex = str.mid( tab + 1, 11 ); + hex.remove( ' ' ); + + oper = str.mid( tab + 14 ); + int i = oper.indexOf( ' ' ); + if( i > 0 ) + { + oper.resize( i ); + } + //qDebug() << str << '\n' << hex << oper; + //exit( 0 ); + + return true; +} + +bool ElfParser::OpNeedsWildCard( const QString &str ) +{ + if( str == "bl" ) + { + return true; + } + return false; +} + +QString ElfParser::GetNonOperRef( const QString &str, quint32 *off, SymRef::Type *type ) +{ + int i = str.lastIndexOf( '\t' ); + if( i < 0 ) + { + return QString(); + } + if( off )// get offset + { + bool ok; + QString n = str.mid( 3 ); + { + n.resize( n.indexOf( ':' ) ); + } + *off = n.toUInt( &ok, 16 ); + if( !ok ) + { + *off = 0xdeadbeef; + DBG << "error converting\n" << str << '\n' << n; + exit( 0 ); + } + } + if( type ) + { + if( str.contains( "R_PPC_REL24" ) ) + { + *type = SymRef::R_PPC_REL24; + } + else if( str.contains( "R_PPC_ADDR16_LO" ) ) + { + *type = SymRef::R_PPC_ADDR16_LO; + } + else if( str.contains( "R_PPC_EMB_SDA21" ) ) + { + *type = SymRef::R_PPC_EMB_SDA21; + } + else if( str.contains( "R_PPC_ADDR16_HA" ) || str.contains( "R_PPC_ADDR16_HI" ) ) + { + *type = SymRef::R_PPC_ADDR16_HI; + } + else if( str.contains( "R_PPC_SDAREL16" ) ) + { + *type = SymRef::R_PPC_SDAREL16; + } + else + { + *type = SymRef::R_PPC_WTF; + DBG << "*type = SymRef::R_PPC_WTF" << str; + exit( 0 ); + } + } + return str.mid( i + 1 ); +} + +QString ElfParser::GetOpersymRef( const QString &str ) +{ + QString ret; + int o = str.indexOf( '<' ); + if( o < 0 ) + { + return QString(); + } + ret = str.mid( o + 1 ); + if( !ret.endsWith( '>' ) ) + { + return QString(); + } + ret.resize( ret.size() - 1 ); + return ret; +} + +QString ElfParser::DeReferenceSymbol( const QString &reference, quint32 *offset ) +{ + // .rodata.str1.1+0x5 + QString ret; + if( offset ) + { + *offset = 0; + } + int o = reference.indexOf( "+0x" ); + if( o < 0 ) + { + + return reference; + } + ret = reference.left( o ); + bool ok; + quint32 d = reference.mid( o + 3 ).toUInt( &ok, 16 ); + if( !ok ) + { + return reference; + } + if( offset ) + { + *offset = d; + } + return ret; + +} diff --git a/symbolizer/elfparser.h b/symbolizer/elfparser.h new file mode 100644 index 0000000..4dae048 --- /dev/null +++ b/symbolizer/elfparser.h @@ -0,0 +1,135 @@ +#ifndef ELFPARSER_H +#define ELFPARSER_H + +#include "../WiiQt/includes.h" + +// class to parse the output of "objdump -xds someLib.a" +//! specifically wii PPC libs + + +struct SymRef +{ + enum Type + { + R_PPC_ADDR16_HI, // reference to the upper 16 bits of the variable. ie "lis" + R_PPC_ADDR16_LO, // refers to the lower 16 bits. ie "addi" + R_PPC_REL24, // refers by branching. ie "bl" + R_PPC_SDAREL16, // no clue wtf this one does + R_PPC_EMB_SDA21, // referenced by lwz/stw r13 or r2. these are converted at link time + R_PPC_WTF // something went wrong + }; + + quint32 off; // offset within the function where the reference occurs + quint32 symOff; // offset from the referenced symbol. ie stringTable + 0x10 + Type type; + QString name; // name of the symbol referenced + SymRef() : off( 0 ), symOff( 0 ), type( R_PPC_WTF ) + { + } +}; + +// ninty's compiler seems to lump a bunch of data together in 1 section and then offer names to it +// gcc seems to give each piece of data its own section +struct SymAlias +{ + QString name; + QString containerName; + quint32 offset; + quint32 size; + SymAlias() : offset( 0 ), size( 0 ) + { + } +}; + +class ElfParser +{ +public: + class File; + class Function + { + public: + Function( const QString &n = QString() ) : name( n ) + { + } + const QString &Name() const { return name; } + const QString &Pattern() const { return pattern; } + const QList< SymRef > &References() const { return references; } + const File *PFile() const { return file; } + private: + friend class ElfParser; + QString name; + QString pattern; + File *file; + QList< SymRef > references; + }; + + class File + { + public: + File( const QString &n = QString() ) : name( n ) + { + } + const QString &Name() const { return name; } + const QList< Function > &Functions() const { return functions; } + + // doesnt contain the ".text" section, since that is already parsed and turned into functions + //! this is for data and rodata and stuff + const QMap< QString, QByteArray > &Sections() const { return sections; } + const QList< SymAlias > &Aliases() const { return aliases; } + private: + friend class ElfParser; + QString name; + QList< Function > functions; + QMap< QString, QByteArray >sections; + QList< SymAlias >aliases; + }; + + ElfParser( const QString &stuff ); + + //const QList< File >Files() const { return files; } + bool Error() const { return error; } + + const QList< File > Files() const { return files; } +private: + bool error; + QList< File >files; + + // takes the entire dump and separetes it into files + bool ParseText( const QString &str ); + + // takes each file and separetes it into functions + bool ParseFileText( const QStringList &strs, const QStringList §ionStrs, const QStringList &symbolStrs, File &file ); + + QRegExp funcStart; + bool IsFunctionStart( const QString &str, quint32 *start = NULL ); + + // extract function name from the line that starts it + QString GetFunctionName( const QString &str ); + + // check if operation is one that should be wildcarded + bool OpNeedsWildCard( const QString & str ); + + // check if this is a symbol line or opcode + bool IsSymbolLine( const QString & str ); + + bool IsBlank( const QString & str ); + + bool ParseOpLine( const QString &str, QString &hex, QString &oper ); + + // returns stuff in <***> at the end of a line + QString GetOpersymRef( const QString &str ); + + // returns the string referenced by an extra line and the type of reference + QString GetNonOperRef( const QString &str, quint32 *off = NULL, SymRef::Type *type = NULL ); + + // parses the hex dump text of sections + QMap< QString, QByteArray >ParseSectionText( const QStringList &list ); + + QString DeReferenceSymbol( const QString &reference, quint32 *offset = NULL ); + + // parse symbol table + QList< SymAlias >ParseSymbolTable( const QStringList &lines ); + +}; + +#endif // ELFPARSER_H diff --git a/symbolizer/main.cpp b/symbolizer/main.cpp new file mode 100644 index 0000000..9a48b3c --- /dev/null +++ b/symbolizer/main.cpp @@ -0,0 +1,1516 @@ +#include + +#include "dol.h" +#include "elfparser.h" +#include "../WiiQt/includes.h" +#include "../WiiQt/tools.h" + +// holds info about a data section that is assumed correct +struct KnownData +{ + quint32 addr; // memory address + quint32 len; // size + QString name; // symbol name + ElfParser::File *file; // pointer to the file object that contains the data + KnownData() : addr( 0 ), len( 0 ), file( NULL ) + { + } +}; + +// holds info about a function that is assumed correct +struct KnownFunction +{ + const ElfParser::Function *function; + const ElfParser::File *file; + const quint32 addr; + const QString name;// name is only used for functinos that dont have symbols + KnownFunction( const ElfParser::Function *f = NULL, const ElfParser::File *fi = NULL, quint32 a = 0, const QString &n = QString() ) + : function( f ), file( fi ), addr( a ), name( n ) + { + } +}; + +// holds the info about a dol to use while searching for crap in it +Dol dol; +QStringList dolTextHex;// store a copy of the dol as hex for easier searching for patterns +QStringList dolDataHex; + +// this is a list containing all the parsed libraries +QList< ElfParser::File > libFiles; + +// keep track of stuff that matched exactly +QList< KnownData > knownData; +QList< KnownFunction > knownFunctions; + +// keep a list of the locations that each function's pattern matched to keep from looking them up over and over +QMap< const ElfParser::Function *, QList< quint32 > >patternMatches; + +QString NStr( quint32 num, quint8 width = 8 ); +QString NStr( quint32 num, quint8 width ) +{ + return QString( "%1" ).arg( num, width, 16, QChar( '0' ) ); +} + +// read dol into memory +bool LoadDol( const QString &path ) +{ + QByteArray ba = ReadFile( path ); + if( ba.isEmpty() ) + { + return false; + } + if( path.endsWith( ".dol", Qt::CaseInsensitive ) ) + { + if( !dol.Parse( ba ) ) + { + return false; + } + } + else + { + //ba.resize( 0x807B3E88 - 0x80004000 ); + dol = Dol::FakeDol( ba, 0x80004000 ); + } + foreach( const DolSection &sec, dol.TextSections() ) + { + QString hexS = sec.data.toHex().toUpper(); + dolTextHex << hexS; + } + foreach( const DolSection &sec, dol.DataSections() ) + { + QString hexS = sec.data.toHex().toUpper(); + dolDataHex << hexS; + } + return true; +} + +// use objdump on a file and parse its output for elfy goodness +bool ObjDumpLib( const QString &path ) +{ + QProcess p; + QStringList cmd = QStringList() << "-xds" << path; + + p.start( "./objdump", cmd ); + if( !p.waitForStarted() ) + { + DBG << "!p.waitForStarted()"; + return false; + } + + if( !p.waitForFinished() ) + { + DBG << "!p.waitForStarted()"; + return false; + } + QByteArray output = p.readAll(); + if( p.exitCode() != QProcess::NormalExit ) + { + DBG << "p.exitCode() != QProcess::NormalExit"; + return false; + } + + QString s( output ); + ElfParser parser( s ); + if( parser.Error() ) + { + WRN << "parser failed"; + return false; + } + + libFiles << parser.Files(); + return true; +} + +// decide if the path is a file or a folder coontaining a bunch of files +bool LoadLibs( const QString &path ) +{ + QFileInfo fi( path ); + if( !fi.exists() ) + { + DBG << "file"; + return false; + } + if( fi.isFile() ) + { + if( !ObjDumpLib( path ) ) + { + return false; + } + } + else + { + QDir dir( path ); + QFileInfoList fil = dir.entryInfoList( QStringList() << "*.a", QDir::Files ); + foreach( const QFileInfo &f, fil ) + { + // skip debug libs + if( f.completeBaseName().endsWith( 'D' ) ) + { + continue; + } + if( !ObjDumpLib( f.absoluteFilePath() ) ) + { + return false; + } + } + } + return true; +} + +// search for a bytearray in another one, but aligned to 4 +int AlignedBASearch( const QByteArray &needle, const QByteArray &haystack, qint64 start = 0 ); +int AlignedBASearch( const QByteArray &needle, const QByteArray &haystack, qint64 start ) +{ + + qint64 hSize = haystack.size(); + qint64 nSize = needle.size(); + + if( start % 4 || start > hSize ) + { + return -1; + } + qint64 end = hSize - nSize; + for( ; start < end; start += 8 ) + { + qint64 j; + for( j = 0; j < nSize; j++ ) + { + if( needle.at( j ) != haystack.at( start + j ) ) + { + break; + } + } + if( j == nSize ) + { + return start; + } + } + return -1; +} + +void RemoveOverlaps() +{ + QList< const KnownData * > removeData; + QList< const KnownFunction * > removeFunctions; + + foreach( const KnownFunction &kf1, knownFunctions ) + { + if( !kf1.function ) + { + continue; + } + quint32 start = kf1.addr; + quint32 end = start + ( kf1.function->Pattern().size() / 2 ); + foreach( const KnownFunction &kf2, knownFunctions ) + { + if( !kf2.function ) + { + continue; + } + if( kf1.function == kf2.function ) + { + continue; + } + quint32 start2 = kf2.addr; + if( start2 >= start && start2 < end ) + { + removeFunctions << &kf1 << &kf2; + } + } + foreach( const KnownData & kd, knownData ) + { + quint32 start2 = kd.addr; + if( start2 >= start && start2 < end ) + { + removeFunctions << &kf1; + removeData << &kd; + } + } + } + // this check works, but when allowing tiny data patterns into the list, it allows some false positives and throws errors + foreach( const KnownData & kd, knownData ) + { + quint32 start = kd.addr; + quint32 end = start + kd.len; + foreach( const KnownFunction &kf2, knownFunctions ) + { + if( !kf2.function ) + { + continue; + } + quint32 start2 = kf2.addr; + if( start2 >= start && start2 < end ) + { + removeFunctions << &kf2; + removeData << &kd; + } + } + foreach( const KnownData & kd2, knownData ) + { + if( kd.name == kd2.name ) + { + continue; + } + quint32 start2 = kd2.addr; + if( start2 >= start && start2 < end ) + { + removeData << &kd << &kd2; + } + } + } + + // build new lists and exclude the overlapped stuff + QList< KnownData > knownData2; + QList< KnownFunction > knownFunctions2; + + foreach( const KnownFunction &kf, knownFunctions ) + { + if( !removeFunctions.contains( &kf ) ) + { + knownFunctions2 << kf; + } + } + foreach( const KnownData & kd, knownData ) + { + if( !removeData.contains( &kd ) ) + { + knownData2 << kd; + } + } + + knownData = knownData2; + knownFunctions = knownFunctions2; +} + +void AddFunctionToKnownList( const ElfParser::Function *function, const ElfParser::File *file, quint32 addr ) +{ + foreach( const KnownFunction &kf, knownFunctions ) + { + if( kf.function == function ) + { + if( kf.addr != addr ) + { + DBG << "tried to add" << function->Name() << "to known functions at" << hex << addr << "but it already exists at" << kf.addr; + // TODO, probably need to remove the existing function from the list + } + return; + } + } + knownFunctions << KnownFunction( function, file, addr ); +} + +void AddFunctionToKnownList( const QString &name, quint32 addr ) +{ + foreach( const KnownFunction &kf, knownFunctions ) + { + if( kf.name == name ) + { + if( kf.addr != addr ) + { + DBG << "tried to add" << name << "to known functions at" << hex << addr << "but it already exists at" << kf.addr; + // TODO, probably need to remove the existing function from the list + } + return; + } + } + knownFunctions << KnownFunction( NULL, NULL, addr, name ); +} + +int PatternSearch( const QString &needle, const QString &haystack, qint64 start = 0 ); +int PatternSearch( const QString &needle, const QString &haystack, qint64 start ) +{ + qint64 hSize = haystack.size(); + qint64 nSize = needle.size(); + + if( start % 8 || start > hSize ) + { + return -1; + } + qint64 end = hSize - nSize; + for( ; start < end; start += 8 ) + { + qint64 j; + for( j = 0; j < nSize; j++ ) + { + QChar c = needle.at( j ); + if( c == '.' ) + { + continue; + } + if( c != haystack.at( start + j ) ) + { + //DBG << "index" << hex << (quint32)j; + break; + } + } + if( j == nSize ) + { + return start; + } + } + return -1; +} + +const QList< quint32 > &PatternMatches( const ElfParser::Function *fun ) +{ + QMap< const ElfParser::Function *, QList< quint32 > >::iterator it = patternMatches.find( fun ); + if( it != patternMatches.end() ) + { + return it.value(); + } + + QStringList wholeDolHex = dolTextHex + dolDataHex; + QList< DolSection > wholeDol = dol.TextSections() + dol.DataSections(); + QList< quint32 > addrs; + + // find all pattern matches + for( int i = 0; i < wholeDol.size(); i++ ) + { + const QString &line = wholeDolHex.at( i ); + qint64 off2 = PatternSearch( fun->Pattern(), line ); + while( off2 >= 0 ) + { + // convert to actual address + quint32 maybeAddr = ( (quint32)off2 / 2 ) + wholeDol.at( i ).addr; + + // add it to the list + addrs << maybeAddr; + off2 = PatternSearch( fun->Pattern(), line, off2 + 8 ); + } + } + patternMatches[ fun ] = addrs; + + return patternMatches.find( fun ).value(); +} + +bool PattenMatches( const QString &needle, const QString &haystack, qint64 offset ) +{ + qint64 hSize = haystack.size(); + qint64 nSize = needle.size(); + + if( nSize > hSize || offset > hSize ) + { + return false; + } + qint64 j; + for( j = 0; j < nSize; j++ ) + { + QChar c = needle.at( j ); + if( c == '.' ) + { + continue; + } + if( c != haystack.at( j + offset ) ) + { + return false; + } + } + return true; +} + +quint32 GetOpcodeFromAddr( quint32 addr ) +{ + QList< DolSection > wholeDol = dol.TextSections() + dol.DataSections(); + foreach( const DolSection &sec, wholeDol ) + { + quint32 end = sec.addr + sec.data.size(); + if( addr >= sec.addr && addr < end ) + { + quint32 offset = addr - sec.addr; + return qToBigEndian( *(quint32*)( sec.data.data() + offset ) ); + } + } + return 0xdeadbeef; +} + +quint32 ResolveBranch( quint32 addr, quint32 opcode ) +{ + quint32 ret = opcode & 0x3fffffc; + if( ret >= 0x2000000 ) + { + return addr - ( 0x4000000 - ret ); + } + return addr + ret; +} + +bool AddressIsInDol( quint32 addr, quint32 *section = NULL ); +bool AddressIsInDol( quint32 addr, quint32 *section ) +{ + QList< DolSection > wholeDol = dol.DataSections() + dol.TextSections(); + for( quint32 i = 0; i < (quint32)wholeDol.size(); i++ ) + { + const DolSection &sec = wholeDol.at( i ); + quint32 end = sec.addr + sec.data.size(); + if( addr >= sec.addr && addr < end ) + { + if( section ) + { + *section = i; + } + return true; + } + } + return false; +} + +bool ListContains( const QList< QPair< const ElfParser::Function *, quint32 > > &list, const ElfParser::Function *fun, quint32 addr ) +{ + int s = list.size(); + for( int i = 0; i < s; i++ ) + { + const QPair< const ElfParser::Function *, quint32 > &p = list.at( i ); + if( p.first == fun && p.second == addr ) + { + return true; + } + } + return false; +} + +void CleanupList( QList< QPair< const ElfParser::Function *, quint32 > > &list ) +{ + QList< const ElfParser::Function * >dupFunctions; + QList< const ElfParser::Function * >foundFunctions; + + QList< quint32 >dupAddrs; + QList< quint32 >foundAddrs; + + QList< QPair< const ElfParser::Function *, quint32 > > ret; + + int s = list.size(); + + // search for stuff to remove + for( int i = 0; i < s; i++ ) + { + const QPair< const ElfParser::Function *, quint32 > &p = list.at( i ); + if( foundFunctions.contains( p.first ) ) + { + dupFunctions << p.first; + } + foundFunctions << p.first; + + if( foundAddrs.contains( p.second ) ) + { + dupAddrs << p.second; + } + foundAddrs << p.second; + } + + // build a new list + for( int i = 0; i < s; i++ ) + { + const QPair< const ElfParser::Function *, quint32 > &p = list.at( i ); + if( dupFunctions.contains( p.first ) || dupAddrs.contains( p.second ) ) + { + continue; + } + ret << p; + } + list = ret; +} + +void CleanupList( QList< QPair< QString, quint32 > > &list ) +{ + QStringList dupFunctions; + QStringList foundFunctions; + + QList< quint32 >dupAddrs; + QList< quint32 >foundAddrs; + + QList< QPair< QString, quint32 > > ret; + + int s = list.size(); + + // search for stuff to remove + for( int i = 0; i < s; i++ ) + { + const QPair< QString, quint32 > &p = list.at( i ); + if( foundFunctions.contains( p.first ) ) + { + dupFunctions << p.first; + } + foundFunctions << p.first; + + if( foundAddrs.contains( p.second ) ) + { + dupAddrs << p.second; + } + foundAddrs << p.second; + } + + // build a new list + for( int i = 0; i < s; i++ ) + { + const QPair< QString, quint32 > &p = list.at( i ); + if( dupFunctions.contains( p.first ) || dupAddrs.contains( p.second ) ) + { + continue; + } + ret << p; + } + list = ret; +} + +bool FunctionIsKnown( const ElfParser::Function *f ) +{ + foreach( const KnownFunction &kf1, knownFunctions ) + { + if( kf1.function == f ) + { + return true; + } + } + return false; +} + +bool FunctionIsKnown( const QString &str, quint32 *addrOut = NULL ); +bool FunctionIsKnown( const QString &str, quint32 *addrOut ) +{ + foreach( const KnownFunction &kf1, knownFunctions ) + { + if( !kf1.function ) + { + if( kf1.name == str ) + { + if( addrOut ) + { + *addrOut = kf1.addr; + } + return true; + } + } + else if( kf1.function->Name() == str ) + { + if( addrOut ) + { + *addrOut = kf1.addr; + } + return true; + } + } + return false; +} + + +// look for non-function data and try to match it +void TryToMatchData() +{ + QList< quint32 > matchedAddrs; + QList< quint32 > reusedAddrs; + QList< KnownData > maybeMatches; + + QList< DolSection > wholeDol = dol.DataSections() + dol.TextSections(); + + foreach( const ElfParser::File &f, libFiles )// search each file + { + //qDebug() << "f.Name()" << f.Name() << f.Sections().size(); + QMapIterator< QString, QByteArray >it( f.Sections() );// search each data section in each file + while( it.hasNext() ) + { + it.next(); + + qint64 off = -1; + quint32 addr = 0; + bool fail = false; + foreach( const DolSection &sec, wholeDol )// search in each section of the dol + { + //qDebug() << "expected" << sec.data.indexOf( it.value() ); + qint64 off2 = AlignedBASearch( it.value(), sec.data ); + if( off2 < 0 ) + { + continue; + } + //qDebug() << "matched" << it.key(); + qint64 off3 = AlignedBASearch( it.value(), sec.data, off2 + 4 ); + if( off3 > 0 ) + { + //qDebug() << "matched more than once in 1 section" << it.key(); + continue; + } + if( off >= 0 ) + { + //qDebug() << "matched more than once in 2 section" << it.key(); + fail = true; + break; + } + off = off2; + addr = sec.addr + off; + } + if( fail || off < 0 )// the data section was matched more than 1 time or wasnt matched at all + { + //qDebug() << "didnt match" << fail << off; + continue; + } + if( matchedAddrs.contains( addr ) )// something else already matched this location + { + //qDebug() << "matched but reused addr"; + reusedAddrs << addr; + continue; + } + + // add this to the list of posibilities + matchedAddrs << addr; + KnownData kd; + kd.addr = addr; + kd.name = it.key(); + kd.len = it.value().size(); + kd.file = (ElfParser::File*)&f; + maybeMatches << kd; + } + } + + // now go back and pick out the ones that are really matches + //qDebug() << "Matched data sections:"; + foreach( const KnownData &kd, maybeMatches ) + { + if( reusedAddrs.contains( kd.addr ) ) + { + continue; + } + knownData << kd; + /*qDebug() << hex << kd.addr << kd.len << kd.name << "from" << kd.file->Name(); + // print aliases + foreach( const SymAlias &alias, kd.file->Aliases() ) + { + if( alias.containerName == kd.name ) + { + qDebug() << hex << " " << ( kd.addr + alias.offset ) << alias.size << alias.name; + } + }*/ + } + RemoveOverlaps(); +} + +void TryToMatchFunctions0() +{ + QList usedAddrs; + QList dupAddrs; + + QList< QPair< const ElfParser::Function *, quint32> > maybeMatches;// keep track of functions to check + + QMap< const ElfParser::Function *, const ElfParser::File * >fileMap; + + foreach( const ElfParser::File &f, libFiles )// search each file + { + foreach( const ElfParser::Function &fun, f.Functions() )// search each function in each file + { + if( fun.References().size() ) + { + continue; + } + quint32 addr; + const QList< quint32 > &addrs = PatternMatches( &fun ); + if( addrs.size() != 1 ) + { + continue; + } + addr = addrs.at( 0 ); + if( usedAddrs.contains( addr ) ) + { + dupAddrs << addr; + continue; + } + usedAddrs << addr; + maybeMatches << QPair< const ElfParser::Function *, quint32>( &fun, addr ); + fileMap[ &fun ] = &f; + } + } + + qDebug() << " -- Matched by searching for patterns with no wildcards --"; + int ss = maybeMatches.size(); + for( int i = 0; i < ss; i++ ) + { + const QPair< const ElfParser::Function *, quint32>&p = maybeMatches.at( i ); + if( dupAddrs.contains( p.second ) ) + { + //qDebug() << "tossing out" << p.first->Name() << "because addr" << hex << p.second << "is reused"; + continue; + } + qDebug() << hex << p.second << NStr( p.first->Pattern().size() / 2, 4 ) << p.first->Name(); + AddFunctionToKnownList( p.first, fileMap.find( p.first ).value(), p.second ); + } + RemoveOverlaps(); +} + +void TryToMatchFunctions1() +{ + QList< QPair< const ElfParser::Function *, const KnownData *> > maybeMatches;// keep track of functions to check + QList< QPair< QPair< const ElfParser::Function *, const KnownData *> *, const SymAlias * > > aliasMatches;// keep track of functions to check via aliases + + QMap< const ElfParser::Function *, const ElfParser::File * >fileMap; + + // build a list of all the functions that reference the known data + foreach( const ElfParser::File &f, libFiles )// search each file + { + //qDebug() << "file" << f.Name(); + foreach( const ElfParser::Function &fun, f.Functions() )// search each function in each file + { + bool doneWithFunction = false; + QStringList doneRefs;// keep a list of refs we already tried to match up against + + foreach( const SymRef &ref, fun.References() )// look at each reference from each function + { + if( doneWithFunction ) + { + break; + } + //bool aliased = false; + if( doneRefs.contains( ref.name ) ) + { + continue; + } + doneRefs << ref.name; + switch( ref.type ) + { + case SymRef::R_PPC_EMB_SDA21:// not sure how to handle these 2 types, so skip them for now + case SymRef::R_PPC_SDAREL16: + case SymRef::R_PPC_WTF: + { + continue; + } + break; + default: + break; + } + + //qDebug() << " " << ref.name; + foreach( const KnownData &kd, knownData )// see if this function references a known symbol + { + if( kd.file != (ElfParser::File*)&f ) + { + continue; + } + //qDebug() << " " << kd.name; + if( kd.name == ref.name ) + { + qDebug() << "function:" << fun.Name() << "references" << kd.name << kd.file->Name(); + //qDebug() << ref.name; + maybeMatches << QPair< const ElfParser::Function *, const KnownData *>( &fun, &kd ); + doneWithFunction = true; + break; + } + + foreach( const SymAlias &alias, f.Aliases() ) + { + bool ok = false; + if( alias.containerName == kd.name && alias.name == ref.name ) + { + qDebug() << "function:" << fun.Name() << "references" << kd.name << "through alias" << alias.name << " in" << kd.file->Name(); + //qDebug() << "container str" << alias.containerName; + QPair< const ElfParser::Function *, const KnownData *> np( &fun, &kd ); + maybeMatches << np; + QPair< QPair< const ElfParser::Function *, const KnownData *> *, const SymAlias * > ap( &np, &alias ); + aliasMatches << ap; + doneWithFunction = true; + break; + } + if( ok ) + { + break; + } + } + } + if( doneWithFunction ) + { + fileMap[ &fun ] = &f; + } + } + } + } + + // now search the dol and see if the possible matches will fit + int listSize = maybeMatches.size(); + QList< QPair< const ElfParser::Function *, quint32 > > probablyMatches; + for( int j = 0; j < listSize; j++ ) + { + const QPair< const ElfParser::Function *, const KnownData *> &it = maybeMatches.at( j ); + + // look for a possible match for this function + const QList< quint32 > &addrs = PatternMatches( it.first ); + foreach( quint32 addr, addrs ) + { + //qDebug() << "using address" << hex << addr << "for" << it.first->Name(); + foreach( const SymRef &ref, it.first->References() ) + { + switch( ref.type ) + { + case SymRef::R_PPC_EMB_SDA21:// not sure how to handle these 2 types, so skip them for now + case SymRef::R_PPC_SDAREL16: + case SymRef::R_PPC_WTF: + { + //qDebug() << "skipped due to ref type" << ref.type; + continue; + } + break; + default: + break; + } + + bool fail = false; + + quint32 refOff = ref.off; + quint32 aliasDiff = 0; + + if( ref.name != it.second->name )// wrong reference + { + //qDebug() << " ref.name" << ref.name; + //qDebug() << it.second->name << "wasn't found. checking aliases"; + bool ok = false; + int s = aliasMatches.size(); + for( int ss = 0; ss < s; ss++ ) + { + const QPair< QPair< const ElfParser::Function *, const KnownData *> *, const SymAlias * > &aliasMatch = aliasMatches.at( ss ); + if( it.second->name == aliasMatch.second->containerName ) + { + //qDebug() << "search using alias" << aliasMatch.second->name; + aliasDiff = aliasMatch.second->offset; + ok = true; + break; + } + } + if( !ok ) + { + continue; + } + } + + quint32 codeOff = ( addr + refOff ) & ~3; + quint32 opcode = GetOpcodeFromAddr( codeOff ); + //qDebug() << "possible match" << it.first->Name(); + + switch( ref.type ) + { + case SymRef::R_PPC_ADDR16_HI:// upper 16 bits + { + if( ( opcode & 0xffff ) != ( ( ( it.second->addr + ref.symOff + aliasDiff ) & 0xffff0000 ) >> 16 ) ) + { + fail = true; + //qDebug() << "bad high" << hex << opcode << refOff << ref.name << ref.symOff; + //qDebug() << hex << "expected" << (quint32)( ( ( it.second->addr + ref.symOff + aliasDiff ) & 0xffff0000 ) >> 16 ); + //DumpRefs( *( it.first ) ); + } + } + break; + case SymRef::R_PPC_ADDR16_LO:// lower 16 bits + { + if( ( opcode & 0xffff ) != ( (it.second->addr + ref.symOff + aliasDiff ) & 0xffff ) ) + { + fail = true; + //qDebug() << "bad low" << hex << opcode << refOff << ref.name << ref.symOff; + //qDebug() << hex << "expected" << (quint32)( ( (it.second->addr + ref.symOff + aliasDiff ) & 0xffff ) ); + //DumpRefs( *( it.first ) ); + } + } + break; + case SymRef::R_PPC_REL24:// branch + { + quint32 res = ResolveBranch( addr + ( refOff & ~3 ), opcode ); + if( !AddressIsInDol( res ) )// just make sure the branch is inside the dol for now. no functions are actually known + { + fail = true; + //qDebug() << "bad dranch" << hex << res << opcode << it.second->addr << ref.name; + } + } + break; + default: + continue; + break; + } + //qDebug() << "fakematch" << hex << addr << it.first->Name(); + + // if we found a possible match and we dont already have this one + if( !fail && !ListContains( probablyMatches, it.first, addr ) ) + { + probablyMatches << QPair< const ElfParser::Function *, quint32 > ( it.first, addr ); + } + } + } + } + + + + // cleanup the list + CleanupList( probablyMatches ); + int s = probablyMatches.size(); + //qDebug() << " -- Functions matched by data references --"; + for( int i = 0; i < s; i++ ) + { + const QPair< const ElfParser::Function *, quint32 > &p = probablyMatches.at( i ); + //qDebug() << hex << p.second << NStr( p.first->Pattern().size() / 2, 4 ) << p.first->Name(); + AddFunctionToKnownList( p.first, fileMap.find( p.first ).value(), p.second ); + } + RemoveOverlaps(); +} + +void TryToMatchFunctions2( QMap< const ElfParser::Function *, quint32 > &nonMatchingBranches ) +{ + QStringList wholeDolHex = dolDataHex + dolTextHex; + QList< DolSection > wholeDol = dol.DataSections() + dol.TextSections(); + QMap< const ElfParser::Function *, const ElfParser::File * >fileMap; + QList< QPair< const ElfParser::Function *, quint32 > > probablyMatches;// these have symbols + + + QList< QPair< QString, quint32 > > probablyMatches2;// these have no symbols + + foreach( const KnownFunction &kf, knownFunctions ) + { + if( !kf.function )// wont have these for functions we dont have symbols for + { + continue; + } + const ElfParser::Function * fun = kf.function; + QStringList doneRefs; + bool alreadyKnown = false; + foreach( const SymRef &ref, fun->References() )// look at each reference from each function + { + switch( ref.type ) + { + case SymRef::R_PPC_REL24:// we only care about branches right now + break; + default: + continue; + break; + } + if( doneRefs.contains( ref.name ) )// dont check branches to the same function from within the same calling function + { + continue; + } + foreach( const KnownFunction &kf, knownFunctions )// dont bother checking branches if we already know the function it is branching to + { + if( !kf.function )// wont have these for functions we dont have symbols for + { + if( ref.name == kf.name ) + { + alreadyKnown = true; + break; + } + continue; + } + if( kf.function->Name() == ref.name ) + { + alreadyKnown = true; + break; + } + } + if( alreadyKnown ) + { + break; + } + doneRefs << ref.name; + + quint32 addr = kf.addr + ref.off; + quint32 opcode = GetOpcodeFromAddr( addr ); + if( opcode == 0xdeadbeef ) + { + DBG << "error getting opcode from" << hex << addr << fun->Name(); + break; + } + quint32 res = ResolveBranch( addr , opcode ); + quint32 dolIdx; + + if( !AddressIsInDol( res, &dolIdx ) )// make sure the branch is inside the dol + { + break; + } + //qDebug() << hex << res << ref.name << "from" << kf.addr << fun->Name() << addr << opcode; + + bool branchHasSymbols = false; + + foreach( const ElfParser::File &f, libFiles ) + { + //qDebug() << "f.Name():" << f.Name(); + foreach( const ElfParser::Function &fun2, f.Functions() ) + { + //qDebug() << " fun.Name():" << fun.Name() << ref.name; + if( fun2.Name() == ref.name ) + { + branchHasSymbols = true; + qint64 textOffset = res - wholeDol.at( dolIdx ).addr; + textOffset *= 2; + if( PattenMatches( fun2.Pattern(), wholeDolHex.at( dolIdx ), textOffset ) ) + { + probablyMatches << QPair< const ElfParser::Function *, quint32 >( &fun2, res ); + fileMap[ &fun2 ] = &f; + } + else + { + //qDebug() << "expected" << fun2.Name() << "at" << hex << res << "but pattern didnt match"; + //qDebug() << "being called from" << fun->Name() << "at" << hex << addr; + nonMatchingBranches[ &fun2 ] = res; + } + break; + } + } + if( branchHasSymbols ) + { + break; + } + } + if( !branchHasSymbols )// we don't have any symbols for this function. but just assume it is right for now + { + probablyMatches2 << QPair< QString, quint32 >( ref.name, res ); + } + } + + } + // cleanup the lists + CleanupList( probablyMatches ); + int s = probablyMatches.size(); + for( int i = 0; i < s; i++ ) + { + const QPair< const ElfParser::Function *, quint32 > &p = probablyMatches.at( i ); + //qDebug() << hex << p.second << p.first->Name(); + AddFunctionToKnownList( p.first, fileMap.find( p.first ).value(), p.second ); + } + + + + + QStringList dupFunctions; + QStringList foundFunctions; + + QList< quint32 >dupAddrs; + QList< quint32 >foundAddrs; + + QList< QPair< QString, quint32 > > cleanList; + + s = probablyMatches2.size(); + + // search for stuff to remove + for( int i = 0; i < s; i++ ) + { + const QPair< QString, quint32 > &p = probablyMatches2.at( i ); + if( foundFunctions.contains( p.first ) ) + { + dupFunctions << p.first; + } + foundFunctions << p.first; + + if( foundAddrs.contains( p.second ) ) + { + dupAddrs << p.second; + } + foundAddrs << p.second; + } + + // build a new list + for( int i = 0; i < s; i++ ) + { + const QPair< QString, quint32 > &p = probablyMatches2.at( i ); + if( dupFunctions.contains( p.first ) || dupAddrs.contains( p.second ) ) + { + continue; + } + cleanList << p; + } + //qDebug() << " -- Functions matched by branches from known functions --"; + s = cleanList.size(); + for( int i = 0; i < s; i++ ) + { + const QPair< QString, quint32 > &p = cleanList.at( i ); + //qDebug() << hex << p.second << p.first; + AddFunctionToKnownList( p.first, p.second ); + } + RemoveOverlaps(); +} + +QList< QPair< const ElfParser::Function *, quint32> > TryToMatchFunctions3( QList< const ElfParser::Function * > &ignoreFunctions ) +{ + QMap< const ElfParser::Function *, const ElfParser::File * >fileMap; + + QList usedAddrs; + QList dupAddrs; + + QList< QPair< const ElfParser::Function *, quint32> > maybeMatches;// keep track of functions to check + + foreach( const ElfParser::File &f, libFiles )// search each file + { + foreach( const ElfParser::Function &fun, f.Functions() )// search each function in each file + { + if( ignoreFunctions.contains( &fun ) || FunctionIsKnown( &fun ) ) + { + continue; + } + + // keep a list of addresses of the functions this one branches to + QMap< QString, quint32 >refAddrs; + QStringList doneRefs;// keep a list of refs we already tried to match up against + + foreach( const SymRef &ref, fun.References() )// look at each reference from each function + { + if( ref.type != SymRef::R_PPC_REL24 || doneRefs.contains( ref.name ) ) + { + continue; + } + doneRefs << ref.name; + + quint32 expectedAddr; + if( FunctionIsKnown( ref.name, &expectedAddr ) ) + { + refAddrs[ ref.name ] = expectedAddr; + } + } + + // this function doesnt branch to any known ones + if( !refAddrs.size() ) + { + continue; + } + + // find a pattern match + const QList< quint32 > &addrs = PatternMatches( &fun ); + foreach( quint32 maybeAddr, addrs ) + { + bool fail = false; + doneRefs.clear(); + foreach( const SymRef &ref, fun.References() )// look at each reference from each function + { + if( ref.type != SymRef::R_PPC_REL24 || doneRefs.contains( ref.name ) ) + { + continue; + } + doneRefs << ref.name; + + QMap< QString, quint32 >::iterator refAddr = refAddrs.find( ref.name ); + if( refAddr == refAddrs.end() ) + { + continue; + } + quint32 branchFromAddr = maybeAddr + ref.off; + quint32 opcode = GetOpcodeFromAddr( branchFromAddr ); + if( opcode == 0xdeadbeef ) + { + DBG << "error getting opcode from" << hex << branchFromAddr << fun.Name() << "ref" << ref.name; + fail = true; + break; + } + quint32 res = ResolveBranch( branchFromAddr , opcode ); + if( res != refAddr.value() ) + { + fail = true; + break; + } + } + if( !fail )// all the branches from this function land on the expected other function + { + if( usedAddrs.contains( maybeAddr ) ) + { + dupAddrs << maybeAddr; + ignoreFunctions << &fun; + } + else + { + maybeMatches << QPair< const ElfParser::Function *, quint32 >( &fun, maybeAddr ); + fileMap[ &fun ] = &f; + } + usedAddrs << maybeAddr; + } + else + { + ignoreFunctions << &fun; + } + } + } + } + + + // cleanup the list + CleanupList( maybeMatches ); + int s = maybeMatches.size(); + //qDebug() << "Functions matched by branch references:"; + for( int i = 0; i < s; i++ ) + { + const QPair< const ElfParser::Function *, quint32 > &p = maybeMatches.at( i ); + //qDebug() << hex << p.second << NStr( p.first->Pattern().size() / 2, 4 ) << p.first->Name(); + + AddFunctionToKnownList( p.first, fileMap.find( p.first ).value(), p.second ); + } + RemoveOverlaps(); + return maybeMatches; +} + +#define INDENT_TXT QString( " " ) + +QString CleanupNameString( const QString &name )// gcc puts the section name at the front of the user-given name +{ + if( name.startsWith( ".sbss." ) ) + { + return name.mid( 6 ); + } + if( name.startsWith( ".rodata." ) ) + { + return name.mid( 8 ); + } + if( name.startsWith( ".sdata." ) ) + { + return name.mid( 7 ); + } + if( name.startsWith( ".data." ) ) + { + return name.mid( 6 ); + } + if( name.startsWith( ".text." ) ) + { + return name.mid( 6 ); + } + return name; +} + +QString MakeIDC( const QString &dolPath, const QString &libPath, const QMap< const ElfParser::Function *, quint32 > &nonMatchingBranches ) +{ + QString ret = QString( + "/***********************************************************\n" + "* This file was created automatically with\n" + "* DolPath: \"%1\"\n" + "* LibPath: \"%2\"\n" + "***********************************************************/\n" + "\n" + "#include \n" + "\n" ) + .arg( QFileInfo( dolPath ).absoluteFilePath() ) + .arg( QFileInfo( libPath ).absoluteFilePath() ); + + QString makeCode = + "static CreateFunction( addr, len, name )\n" + "{\n" + + INDENT_TXT + "MakeCode( addr );\n" + + INDENT_TXT + "MakeFunction( addr, len );\n" + + INDENT_TXT + "MakeName( addr, name );\n" + +"}\n\n"; + bool insertedMakeCode = false; + bool haveData = knownData.size() != 0; + if( haveData ) + { + ret += "static DoData()\n{\n"; + foreach( const KnownData &kd, knownData ) + { + QString line; + bool havealias = false; + foreach( const SymAlias &alias, kd.file->Aliases() ) + { + if( alias.containerName == kd.name ) + { + havealias = true; + break; + } + } + //TODO - maybe create data types like strings and words and stuff? + + + line += INDENT_TXT + QString( "MakeComm( 0x%1, \"File : %2\\nSection: %3\\nLen : 0x%4\" );\n" ) + .arg( kd.addr, 8, 16, QChar( '0' ) ).arg( kd.file->Name() ) + .arg( kd.name ).arg( kd.len, 0, 16, QChar( '0' ) ); + if( havealias ) + { + foreach( const SymAlias &alias, kd.file->Aliases() ) + { + if( alias.containerName == kd.name ) + { + line += INDENT_TXT + QString( "MakeName( 0x%1, \"%2\" );\n" ) + .arg( kd.addr + alias.offset, 8, 16, QChar( '0' ) ).arg( alias.name ); + } + } + } + else + { + line += INDENT_TXT + QString( "MakeName( 0x%1, \"%2\" );\n" ) + .arg( kd.addr, 8, 16, QChar( '0' ) ).arg( kd.name ); + } + + ret += line; + } + + ret += "\n}\n\n"; + } + bool haveKnownFunctions = knownFunctions.size() != 0; + if( haveKnownFunctions ) + { + insertedMakeCode = true; + ret += makeCode; + ret += "static DoKnownFunctions()\n{\n"; + foreach( const KnownFunction &kf, knownFunctions ) + { + QString line; + if( kf.function ) + { + line += INDENT_TXT + QString( "CreateFunction( 0x%1, 0x%2, \"%3\" ); " ) + .arg( kf.addr, 8, 16, QChar( '0' ) ).arg( kf.function->Pattern().size() / 2, 4, 16, QChar( '0' ) ) + .arg( CleanupNameString( kf.function->Name() ) ); + if( kf.file->Name() != libPath ) + { + line += QString( "MakeComm( 0x%1, \"File: %2\" );" ) + .arg( kf.addr, 8, 16, QChar( '0' ) ).arg( kf.file->Name() ); + } + + line += '\n'; + + // do something cool here with the r13/rtoc references + foreach( const SymRef &ref, kf.function->References() ) + { + if( ref.type == SymRef::R_PPC_EMB_SDA21 ) + { + line += INDENT_TXT + INDENT_TXT + QString( "MakeComm( 0x%1, \"%2\" );\n" ) + .arg( kf.addr + ( ref.off & ~3 ), 8, 16, QChar( '0' ) ) + .arg( CleanupNameString( ref.name ) ); + } + } + } + else + { + line += INDENT_TXT + QString( "CreateFunction( 0x%1, BADADDR, \"%2\" );\n" ) + .arg( kf.addr, 8, 16, QChar( '0' ) ) + .arg( CleanupNameString( kf.name ) ); + } + ret += line; + } + + ret += "\n}\n"; + } + bool haveMaybeFunctions = nonMatchingBranches.size() != 0; + if( haveMaybeFunctions ) + { + if( !insertedMakeCode ) + { + ret += makeCode; + insertedMakeCode = true; + } + ret += "\nstatic DoMaybeFunctions()\n{\n"; + QMapIterator< const ElfParser::Function *, quint32 > it( nonMatchingBranches ); + while( it.hasNext() ) + { + it.next(); + ret += INDENT_TXT + QString( "CreateFunction( 0x%1, 0x%2, \"%3\" );\n" ) + .arg( it.value(), 8, 16, QChar( '0' ) ) + .arg( it.key()->Pattern().size() / 2, 4, 16, QChar( '0' ) ) + .arg( CleanupNameString( it.key()->Name() ) ); + // do something cool here with the r13/rtoc references + foreach( const SymRef &ref, it.key()->References() ) + { + if( ref.type == SymRef::R_PPC_EMB_SDA21 ) + { + ret += INDENT_TXT + INDENT_TXT + QString( "MakeComm( 0x%1, \"%2\" );\n" ) + .arg( it.value() + ( ref.off & ~3 ), 8, 16, QChar( '0' ) ) + .arg( CleanupNameString( ref.name ) ); + } + } + } + ret += "\n}\n"; + } + + // create the main() + ret += "\nstatic main()\n{\n"; + if( haveData ) + { + ret += INDENT_TXT + "DoData();\n"; + } + if( haveKnownFunctions ) + { + ret += INDENT_TXT + "DoKnownFunctions();\n"; + } + if( haveMaybeFunctions ) + { + ret += INDENT_TXT + "DoMaybeFunctions();\n"; + } + ret += "}\n\n"; + + return ret; +} + +void Usage() +{ + qDebug() << "Usage:"; + qDebug() << ""; + qDebug() << "symbolize "; + qDebug() << ""; + qDebug() << " this program requires objdump built for ppc in the same folder"; + + exit( 1 ); +} + +int main(int argc, char *argv[]) +{ + QCoreApplication a(argc, argv); + + if( argc < 4 ) + { + Usage(); + } + + QString dolPath( argv[ 1 ] ); + QString libPath( argv[ 2 ] ); + QString outName( argv[ 3 ] ); + + qDebug() << "Loading dol..."; + if( !LoadDol( dolPath ) ) + { + exit( 0 ); + } + + qDebug() << "Loading libs..."; + if( !LoadLibs( libPath ) ) + { + exit( 0 ); + } + + // this is a list of functions that are branched to, but dont match the patterns read from the libs + QMap< const ElfParser::Function *, quint32 > nonMatchingBranches; + + // find unique data symbols + + qDebug() << "matching data..."; + TryToMatchData(); + + // find first round of functions + TryToMatchFunctions1(); + + // find branches from the first round of functions + TryToMatchFunctions2( nonMatchingBranches ); + + // looking for functions that dont branch anywhere or use global variables or anything + TryToMatchFunctions0(); + + int maxRetries = 10; + QList< const ElfParser::Function * > ignoreFunctions; + for( int i = 0; i < maxRetries; i++ ) + { + qDebug() << " -- Round" << i << '/' << maxRetries << "--"; + // look for functions that branch to known functions + QList< QPair< const ElfParser::Function *, quint32> > newFunctions = TryToMatchFunctions3( ignoreFunctions ); + if( !newFunctions.size() ) + { + qDebug() << "no new functions found" << i; + break; + } + qDebug() << " - added" << newFunctions.size() << "new functions -"; + } + + qDebug() << "Total functions found:" << knownFunctions.size(); + + // find branches from the known functions + int num = knownFunctions.size(); + for( int i = 0; i < maxRetries; i++ ) + { + qDebug() << " -- Round" << i << '/' << maxRetries << " following branches --"; + TryToMatchFunctions2( nonMatchingBranches ); + + int num2 = knownFunctions.size(); + if( num2 == num ) + { + break; + } + qDebug() << " - added" << ( num2 - num ) << "new functions -"; + num = num2; + } + + qDebug() << "Total data matches: " << knownData.size(); + qDebug() << "Total functions found:" << knownFunctions.size(); + + qDebug() << "Generating idc file..."; + QString idc = MakeIDC( dolPath, libPath, nonMatchingBranches ); + //qDebug() << idc; + + WriteFile( outName, idc.toLatin1() ); + + + return 0; +} diff --git a/symbolizer/symbolizer.pro b/symbolizer/symbolizer.pro new file mode 100644 index 0000000..fd6ab44 --- /dev/null +++ b/symbolizer/symbolizer.pro @@ -0,0 +1,28 @@ +#------------------------------------------------- +# +# Project created by QtCreator 2011-12-12T00:35:52 +# +#------------------------------------------------- + +QT += core + +#QT -= gui + +TARGET = symbolizer +CONFIG += console +CONFIG -= app_bundle + +TEMPLATE = app + +SOURCES += main.cpp\ + ../WiiQt/tools.cpp \ + ../WiiQt/aes.c \ + elfparser.cpp \ + dol.cpp \ + be.cpp + +HEADERS += ../WiiQt/tools.h \ + ../WiiQt/aes.h \ + elfparser.h \ + dol.h \ + be.h diff --git a/symbolizer/symbolizer.pro.user b/symbolizer/symbolizer.pro.user new file mode 100644 index 0000000..298c28a --- /dev/null +++ b/symbolizer/symbolizer.pro.user @@ -0,0 +1,157 @@ + + + + ProjectExplorer.Project.ActiveTarget + 0 + + + ProjectExplorer.Project.EditorSettings + + true + false + UTF-8 + false + false + 4 + true + 2 + false + true + false + 0 + 4 + true + 1 + true + true + true + false + + + + ProjectExplorer.Project.Target.0 + + Desktop + Desktop + Qt4ProjectManager.Target.DesktopTarget + 0 + 0 + 0 + + ProjectExplorer.ToolChain.Gcc:/usr/bin/g++.x86-linux-generic-elf-64bit. + + + qmake + + QtProjectManager.QMakeBuildStep + false + + false + + + Make + + Qt4ProjectManager.MakeStep + false + -j3 + + + 2 + Build + + ProjectExplorer.BuildSteps.Build + + + + Make + + Qt4ProjectManager.MakeStep + true + clean + + + 1 + Clean + + ProjectExplorer.BuildSteps.Clean + + 2 + false + + Qt in PATH Release + + Qt4ProjectManager.Qt4BuildConfiguration + 0 + /home/j/c/WiiQt/93/symbolizer + 2 + ProjectExplorer.ToolChain.Gcc:/usr/bin/g++.x86-linux-generic-elf-64bit. + false + + 1 + + + 0 + Deploy + + ProjectExplorer.BuildSteps.Deploy + + 1 + No deployment + + ProjectExplorer.DefaultDeployConfiguration + + 1 + + + true + 25 + + true + valgrind + + 0 + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 + 13 + 14 + + symbolizer + + Qt4ProjectManager.Qt4RunConfiguration + 2 + ./mario.dol /home/j/c/NINTENDO_Revolution_SDK_2.1_Wii-SYNDiCATE/RVL_SDK-2_1-060821/RVL_SDK/RVL/lib/ ./testing.idc + symbolizer.pro + false + false + + + 3768 + true + false + + 1 + + + + ProjectExplorer.Project.TargetCount + 1 + + + ProjectExplorer.Project.Updater.EnvironmentId + {dd2f24c7-c64b-41bd-8686-5d5bbb908d16} + + + ProjectExplorer.Project.Updater.FileVersion + 9 + +