mirror of
https://github.com/Barubary/dsdecmp.git
synced 2025-02-21 13:47:14 +01:00
C#: added a variation to the original LZ-10 compression algorithm that increases the compression rate, while still ensuring compatibility with the built-in decompression.
This commit is contained in:
parent
2c507a5765
commit
8892f527f5
@ -75,9 +75,10 @@ namespace DSDecmp.Formats
|
||||
/// input data may be read (if there is padding, for example), however never more than
|
||||
/// this number of bytes is read from the input stream.</param>
|
||||
/// <param name="outstream">The stream to write the decompressed data to.</param>
|
||||
/// <returns>The length of the output data.</returns>
|
||||
/// <exception cref="NotEnoughDataException">When the given length of the input data
|
||||
/// is not enough to properly decompress the input.</exception>
|
||||
public abstract void Decompress(Stream instream, long inLength, Stream outstream);
|
||||
public abstract long Decompress(Stream instream, long inLength, Stream outstream);
|
||||
|
||||
/// <summary>
|
||||
/// Compresses the given input file, and writes the compressed data to the given
|
||||
|
@ -90,7 +90,7 @@ namespace DSDecmp.Formats
|
||||
}
|
||||
#endregion
|
||||
|
||||
public override void Decompress(System.IO.Stream instream, long inLength, System.IO.Stream outstream)
|
||||
public override long Decompress(System.IO.Stream instream, long inLength, System.IO.Stream outstream)
|
||||
{
|
||||
#region Format description
|
||||
// Overlay LZ compression is basically just LZ-0x10 compression.
|
||||
@ -152,6 +152,8 @@ namespace DSDecmp.Formats
|
||||
// make sure the input is positioned at the end of the file
|
||||
instream.Position += 4;
|
||||
|
||||
return inLength - 4;
|
||||
|
||||
#endregion
|
||||
}
|
||||
else
|
||||
@ -268,6 +270,8 @@ namespace DSDecmp.Formats
|
||||
// make sure the input is positioned at the end of the file; the stream is currently
|
||||
// at the compression header.
|
||||
instream.Position += headerSize;
|
||||
|
||||
return decompressedLength + (inLength - headerSize - compressedSize);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -24,7 +24,7 @@ namespace DSDecmp.Formats.Nitro
|
||||
return base.Supports(stream, inLength);
|
||||
}
|
||||
|
||||
public override void Decompress(Stream instream, long inLength, Stream outstream)
|
||||
public override long Decompress(Stream instream, long inLength, Stream outstream)
|
||||
{
|
||||
#region GBATEK format specification
|
||||
/*
|
||||
@ -178,7 +178,13 @@ namespace DSDecmp.Formats.Nitro
|
||||
readBytes += 4 - (readBytes % 4);
|
||||
|
||||
if (readBytes < inLength)
|
||||
throw new TooMuchInputException(readBytes, inLength);
|
||||
{
|
||||
// the input may be 4-byte aligned.
|
||||
if ((readBytes ^ (readBytes & 3)) + 4 < inLength)
|
||||
throw new TooMuchInputException(readBytes, inLength);
|
||||
}
|
||||
|
||||
return decompressedSize;
|
||||
}
|
||||
|
||||
public override int Compress(Stream instream, long inLength, Stream outstream)
|
||||
|
@ -11,9 +11,27 @@ namespace DSDecmp.Formats.Nitro
|
||||
/// </summary>
|
||||
public class LZ10 : NitroCFormat
|
||||
{
|
||||
private static bool lookAhead = false;
|
||||
/// <summary>
|
||||
/// Sets the flag that determines if 'look-ahead'/DP should be used when compressing
|
||||
/// with the LZ-10 format. The default is false, which is what is used in the original
|
||||
/// implementation.
|
||||
/// </summary>
|
||||
public static bool LookAhead
|
||||
{
|
||||
set { lookAhead = value; }
|
||||
}
|
||||
|
||||
public LZ10() : base(0x10) { }
|
||||
|
||||
public override void Decompress(Stream instream, long inLength,
|
||||
#region 'Original' Decompression method
|
||||
/// <summary>
|
||||
/// Decompress a stream that is compressed in the LZ-10 format.
|
||||
/// </summary>
|
||||
/// <param name="instream">The compressed stream.</param>
|
||||
/// <param name="inLength">The length of the input stream.</param>
|
||||
/// <param name="outstream">The output stream, where the decompressed data is written to.</param>
|
||||
public override long Decompress(Stream instream, long inLength,
|
||||
Stream outstream)
|
||||
{
|
||||
#region format definition form GBATEK/NDSTEK
|
||||
@ -138,13 +156,21 @@ namespace DSDecmp.Formats.Nitro
|
||||
buffer[bufferOffset] = (byte)next;
|
||||
bufferOffset = (bufferOffset + 1) % bufferLength;
|
||||
}
|
||||
outstream.Flush();
|
||||
}
|
||||
|
||||
if (readBytes < inLength)
|
||||
throw new TooMuchInputException(readBytes, inLength);
|
||||
{
|
||||
// the input may be 4-byte aligned.
|
||||
if ((readBytes ^ (readBytes & 3)) + 4 < inLength)
|
||||
throw new TooMuchInputException(readBytes, inLength);
|
||||
}
|
||||
|
||||
return decompressedSize;
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region Original Compress method
|
||||
public unsafe override int Compress(Stream instream, long inLength, Stream outstream)
|
||||
{
|
||||
// make sure the decompressed size fits in 3 bytes.
|
||||
@ -152,6 +178,12 @@ namespace DSDecmp.Formats.Nitro
|
||||
// in every game, as it may not be a built-in function.
|
||||
if (inLength > 0xFFFFFF)
|
||||
throw new InputTooLargeException();
|
||||
|
||||
// use the other method if lookahead is enabled
|
||||
if (lookAhead)
|
||||
{
|
||||
return CompressWithLA(instream, inLength, outstream);
|
||||
}
|
||||
|
||||
// save the input data in an array to prevent having to go back and forth in a file
|
||||
byte[] indata = new byte[inLength];
|
||||
@ -177,6 +209,7 @@ namespace DSDecmp.Formats.Nitro
|
||||
int readBytes = 0;
|
||||
while (readBytes < inLength)
|
||||
{
|
||||
#region If 8 blocks are bufferd, write them and reset the buffer
|
||||
// we can only buffer 8 blocks at a time.
|
||||
if (bufferedBlocks == 8)
|
||||
{
|
||||
@ -187,6 +220,7 @@ namespace DSDecmp.Formats.Nitro
|
||||
bufferlength = 1;
|
||||
bufferedBlocks = 0;
|
||||
}
|
||||
#endregion
|
||||
|
||||
// determine if we're dealing with a compressed or raw block.
|
||||
// it is a compressed block when the next 3 or more bytes can be copied from
|
||||
@ -234,10 +268,153 @@ namespace DSDecmp.Formats.Nitro
|
||||
|
||||
return compressedLength;
|
||||
}
|
||||
#endregion
|
||||
|
||||
#region Dynamic Programming compression method
|
||||
/// <summary>
|
||||
/// Variation of the original compression method, making use of Dynamic Programming to 'look ahead'
|
||||
/// and determine the optimal 'length' values for the compressed blocks. Is not 100% optimal,
|
||||
/// as the flag-bytes are not taken into account.
|
||||
/// </summary>
|
||||
private unsafe int CompressWithLA(Stream instream, long inLength, Stream outstream)
|
||||
{
|
||||
// save the input data in an array to prevent having to go back and forth in a file
|
||||
byte[] indata = new byte[inLength];
|
||||
int numReadBytes = instream.Read(indata, 0, (int)inLength);
|
||||
if (numReadBytes != inLength)
|
||||
throw new StreamTooShortException();
|
||||
|
||||
// write the compression header first
|
||||
outstream.WriteByte(0x10);
|
||||
outstream.WriteByte((byte)(inLength & 0xFF));
|
||||
outstream.WriteByte((byte)((inLength >> 8) & 0xFF));
|
||||
outstream.WriteByte((byte)((inLength >> 16) & 0xFF));
|
||||
|
||||
int compressedLength = 4;
|
||||
|
||||
fixed (byte* instart = &indata[0])
|
||||
{
|
||||
// we do need to buffer the output, as the first byte indicates which blocks are compressed.
|
||||
// this version does not use a look-ahead, so we do not need to buffer more than 8 blocks at a time.
|
||||
byte[] outbuffer = new byte[8 * 2 + 1];
|
||||
outbuffer[0] = 0;
|
||||
int bufferlength = 1, bufferedBlocks = 0;
|
||||
int readBytes = 0;
|
||||
|
||||
// get the optimal choices for len and disp
|
||||
int[] lengths, disps;
|
||||
this.GetOptimalCompressionLengths(instart, indata.Length, out lengths, out disps);
|
||||
while (readBytes < inLength)
|
||||
{
|
||||
// we can only buffer 8 blocks at a time.
|
||||
if (bufferedBlocks == 8)
|
||||
{
|
||||
outstream.Write(outbuffer, 0, bufferlength);
|
||||
compressedLength += bufferlength;
|
||||
// reset the buffer
|
||||
outbuffer[0] = 0;
|
||||
bufferlength = 1;
|
||||
bufferedBlocks = 0;
|
||||
}
|
||||
|
||||
|
||||
if (lengths[readBytes] == 1)
|
||||
{
|
||||
outbuffer[bufferlength++] = *(instart + (readBytes++));
|
||||
}
|
||||
else
|
||||
{
|
||||
// mark the next block as compressed
|
||||
outbuffer[0] |= (byte)(1 << (7 - bufferedBlocks));
|
||||
|
||||
outbuffer[bufferlength] = (byte)(((lengths[readBytes] - 3) << 4) & 0xF0);
|
||||
outbuffer[bufferlength] |= (byte)(((disps[readBytes] - 1) >> 8) & 0x0F);
|
||||
bufferlength++;
|
||||
outbuffer[bufferlength] = (byte)((disps[readBytes] - 1) & 0xFF);
|
||||
bufferlength++;
|
||||
|
||||
readBytes += lengths[readBytes];
|
||||
}
|
||||
|
||||
|
||||
bufferedBlocks++;
|
||||
}
|
||||
|
||||
// copy the remaining blocks to the output
|
||||
if (bufferedBlocks > 0)
|
||||
{
|
||||
outstream.Write(outbuffer, 0, bufferlength);
|
||||
compressedLength += bufferlength;
|
||||
/*/ make the compressed file 4-byte aligned.
|
||||
while ((compressedLength % 4) != 0)
|
||||
{
|
||||
outstream.WriteByte(0);
|
||||
compressedLength++;
|
||||
}/**/
|
||||
}
|
||||
}
|
||||
|
||||
return compressedLength;
|
||||
}
|
||||
#endregion
|
||||
|
||||
#region DP compression helper method; GetOptimalCompressionLengths
|
||||
/// <summary>
|
||||
/// Gets the optimal compression lengths for each start of a compressed block using Dynamic Programming.
|
||||
/// This takes O(n^2) time.
|
||||
/// </summary>
|
||||
/// <param name="indata">The data to compress.</param>
|
||||
/// <param name="inLength">The length of the data to compress.</param>
|
||||
/// <param name="lengths">The optimal 'length' of the compressed blocks. For each byte in the input data,
|
||||
/// this value is the optimal 'length' value. If it is 1, the block should not be compressed.</param>
|
||||
/// <param name="disps">The 'disp' values of the compressed blocks. May be 0, in which case the
|
||||
/// corresponding length will never be anything other than 1.</param>
|
||||
private unsafe void GetOptimalCompressionLengths(byte* indata, int inLength, out int[] lengths, out int[] disps)
|
||||
{
|
||||
lengths = new int[inLength];
|
||||
disps = new int[inLength];
|
||||
int[] minLengths = new int[inLength];
|
||||
|
||||
for (int i = inLength - 1; i >= 0; i--)
|
||||
{
|
||||
// first get the compression length when the next byte is not compressed
|
||||
minLengths[i] = int.MaxValue;
|
||||
lengths[i] = 1;
|
||||
if (i + 1 >= inLength)
|
||||
minLengths[i] = 1;
|
||||
else
|
||||
minLengths[i] = 1 + minLengths[i + 1];
|
||||
// then the optimal compressed length
|
||||
int oldLength = Math.Min(0x1000, i);
|
||||
// get the appropriate disp while at it. Takes at most O(n) time if oldLength is considered O(n)
|
||||
// be sure to bound the input length with 0x12, as that's the maximum length for LZ-10 compressed blocks.
|
||||
int maxLen = GetOccurrenceLength(indata + i, Math.Min(inLength - i, 0x12),
|
||||
indata + i - oldLength, oldLength, out disps[i]);
|
||||
if (disps[i] > i)
|
||||
throw new Exception("disp is too large");
|
||||
for (int j = 3; j <= maxLen; j++)
|
||||
{
|
||||
int newCompLen;
|
||||
if (i + j >= inLength)
|
||||
newCompLen = 2;
|
||||
else
|
||||
newCompLen = 2 + minLengths[i + j];
|
||||
if (newCompLen < minLengths[i])
|
||||
{
|
||||
lengths[i] = j;
|
||||
minLengths[i] = newCompLen;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// we could optimize this further to also optimize it with regard to the flag-bytes, but that would require 8 times
|
||||
// more space and time (one for each position in the block) for only a potentially tiny increase in compression ratio.
|
||||
}
|
||||
#endregion
|
||||
|
||||
/// <summary>
|
||||
/// Determine the maximum size of a LZ-compressed block starting at newPtr, using the already compressed data
|
||||
/// starting at oldPtr.
|
||||
/// starting at oldPtr. Takes O(inLength * oldLength) = O(n^2) time.
|
||||
/// </summary>
|
||||
/// <param name="newPtr">The start of the data that needs to be compressed.</param>
|
||||
/// <param name="newLength">The number of bytes that still need to be compressed.</param>
|
||||
@ -251,15 +428,16 @@ namespace DSDecmp.Formats.Nitro
|
||||
if (newLength == 0)
|
||||
return 0;
|
||||
int maxLength = 0;
|
||||
//for (int i = 1; i < oldLength; i++)
|
||||
// try every possible 'disp' value (disp = oldLength - i)
|
||||
for (int i = 0; i < oldLength - 1; i++)
|
||||
{
|
||||
// work from the end of the old data to the start, to mimic the original implementation's behaviour
|
||||
//byte* currentOldStart = oldPtr + oldLength - i;
|
||||
// WRONG: original works from start
|
||||
// work from the start of the old data to the end, to mimic the original implementation's behaviour
|
||||
// (and going from start to end or from end to start does not influence the compression ratio anyway)
|
||||
byte* currentOldStart = oldPtr + i;
|
||||
int currentLength = 0;
|
||||
// determine the length we can copy if we go back i bytes
|
||||
// determine the length we can copy if we go back (oldLength - i) bytes
|
||||
// always check the next 'newLength' bytes, and not just the available 'old' bytes,
|
||||
// as the copied data can also originate from what we're currently trying to compress.
|
||||
for (int j = 0; j < newLength; j++)
|
||||
{
|
||||
// stop when the bytes are no longer the same
|
||||
@ -268,6 +446,7 @@ namespace DSDecmp.Formats.Nitro
|
||||
currentLength++;
|
||||
}
|
||||
|
||||
// update the optimal value
|
||||
if (currentLength > maxLength)
|
||||
{
|
||||
maxLength = currentLength;
|
||||
|
@ -13,7 +13,7 @@ namespace DSDecmp.Formats.Nitro
|
||||
{
|
||||
public LZ11() : base(0x11) { }
|
||||
|
||||
public override void Decompress(Stream instream, long inLength, Stream outstream)
|
||||
public override long Decompress(Stream instream, long inLength, Stream outstream)
|
||||
{
|
||||
#region Format definition in NDSTEK style
|
||||
/* Data header (32bit)
|
||||
@ -213,7 +213,13 @@ namespace DSDecmp.Formats.Nitro
|
||||
}
|
||||
|
||||
if (readBytes < inLength)
|
||||
throw new TooMuchInputException(readBytes, inLength);
|
||||
{
|
||||
// the input may be 4-byte aligned.
|
||||
if ((readBytes ^ (readBytes & 3)) + 4 < inLength)
|
||||
throw new TooMuchInputException(readBytes, inLength);
|
||||
}
|
||||
|
||||
return decompressedSize;
|
||||
}
|
||||
|
||||
public override int Compress(Stream instream, long inLength, Stream outstream)
|
||||
|
@ -13,7 +13,7 @@ namespace DSDecmp.Formats.Nitro
|
||||
{
|
||||
public RLE() : base(0x30) { }
|
||||
|
||||
public override void Decompress(Stream instream, long inLength, Stream outstream)
|
||||
public override long Decompress(Stream instream, long inLength, Stream outstream)
|
||||
{
|
||||
/*
|
||||
Data header (32bit)
|
||||
@ -119,7 +119,13 @@ namespace DSDecmp.Formats.Nitro
|
||||
}
|
||||
|
||||
if (readBytes < inLength)
|
||||
throw new TooMuchInputException(readBytes, inLength);
|
||||
{
|
||||
// the input may be 4-byte aligned.
|
||||
if ((readBytes ^ (readBytes & 3)) + 4 < inLength)
|
||||
throw new TooMuchInputException(readBytes, inLength);
|
||||
}
|
||||
|
||||
return decompressedSize;
|
||||
}
|
||||
|
||||
public override int Compress(Stream instream, long inLength, Stream outstream)
|
||||
|
@ -27,7 +27,7 @@ namespace DSDecmp
|
||||
{
|
||||
Console.WriteLine("DSDecmp - Decompressor for compression formats used on the NDS - by Barubary");
|
||||
Console.WriteLine();
|
||||
Console.WriteLine("Usage:\tDSDecmp (-c FORMAT) (-ge) input (output)");
|
||||
Console.WriteLine("Usage:\tDSDecmp (-c FORMAT (FORMATOPTS)) (-ge) input (output)");
|
||||
Console.WriteLine();
|
||||
Console.WriteLine("Without the -c modifier, DSDecmp will decompress the input file to the output");
|
||||
Console.WriteLine("file. If the output file is a directory, the output file will be placed in that");
|
||||
@ -60,6 +60,12 @@ namespace DSDecmp
|
||||
Console.WriteLine(" gba* - The built-in compression format that gives the best compression");
|
||||
Console.WriteLine(" ratio, and is also supported by the GBA.");
|
||||
Console.WriteLine();
|
||||
Console.WriteLine("The following format options are available:");
|
||||
Console.WriteLine(" lz10, lz11 and lzovl:");
|
||||
Console.WriteLine(" -opt : employs a better compression algorithm to boost the compression");
|
||||
Console.WriteLine(" ratio. Not using this option will result in using the algorithm");
|
||||
Console.WriteLine(" originally used to compress the game files.");
|
||||
Console.WriteLine();
|
||||
Console.WriteLine("Supplying the -ge modifier together with the -c modifier, the extension of the");
|
||||
Console.WriteLine("compressed files will be extended with the 'FORMAT' value that always results");
|
||||
Console.WriteLine("in that particualr format (so 'lz11', 'rle', etc).");
|
||||
|
Loading…
x
Reference in New Issue
Block a user