mirror of
https://github.com/Barubary/dsdecmp.git
synced 2024-11-16 15:49:24 +01:00
C#: Added compression algorithms for the LZ-Overlay format. At the moment, the file will either be fully compressed or not compressed at all. The 'optimal' method also has a higher chance of being sub-optimal due to the nature of overlay files (usually double-compressed files).
This commit is contained in:
parent
b8d11b3e9d
commit
48f189c991
@ -2,6 +2,7 @@
|
||||
using System.Collections.Generic;
|
||||
using System.Text;
|
||||
using System.IO;
|
||||
using DSDecmp.Utils;
|
||||
|
||||
namespace DSDecmp.Formats
|
||||
{
|
||||
@ -13,6 +14,17 @@ namespace DSDecmp.Formats
|
||||
/// </summary>
|
||||
public class LZOvl : CompressionFormat
|
||||
{
|
||||
private static bool lookAhead = false;
|
||||
/// <summary>
|
||||
/// Sets the flag that determines if 'look-ahead'/DP should be used when compressing
|
||||
/// with the LZ-Ovl format. The default is false, which is what is used in the original
|
||||
/// implementation.
|
||||
/// </summary>
|
||||
public static bool LookAhead
|
||||
{
|
||||
set { lookAhead = value; }
|
||||
}
|
||||
|
||||
#region Method: Supports(string file)
|
||||
public override bool Supports(string file)
|
||||
{
|
||||
@ -90,6 +102,7 @@ namespace DSDecmp.Formats
|
||||
}
|
||||
#endregion
|
||||
|
||||
#region Decompression method
|
||||
public override long Decompress(System.IO.Stream instream, long inLength, System.IO.Stream outstream)
|
||||
{
|
||||
#region Format description
|
||||
@ -274,10 +287,316 @@ namespace DSDecmp.Formats
|
||||
return decompressedLength + (inLength - headerSize - compressedSize);
|
||||
}
|
||||
}
|
||||
#endregion
|
||||
|
||||
public override int Compress(System.IO.Stream instream, long inLength, System.IO.Stream outstream)
|
||||
{
|
||||
throw new NotImplementedException();
|
||||
// don't bother trying to get the optimal not-compressed - compressed ratio for now.
|
||||
// Either compress fully or don't compress (as the format cannot handle decompressed
|
||||
// sizes that are smaller than the compressed file).
|
||||
|
||||
if (inLength > 0xFFFFFF)
|
||||
throw new InputTooLargeException();
|
||||
|
||||
// read the input and reverse it
|
||||
byte[] indata = new byte[inLength];
|
||||
instream.Read(indata, 0, (int)inLength);
|
||||
Array.Reverse(indata);
|
||||
|
||||
MemoryStream inMemStream = new MemoryStream(indata);
|
||||
MemoryStream outMemStream = new MemoryStream();
|
||||
int compressedLength = this.CompressNormal(inMemStream, inLength, outMemStream);
|
||||
|
||||
int totalCompFileLength = compressedLength + 8;
|
||||
// make the file 4-byte aligned with padding in the header
|
||||
if (totalCompFileLength % 4 != 0)
|
||||
totalCompFileLength += 4 - totalCompFileLength % 4;
|
||||
|
||||
if (totalCompFileLength < inLength)
|
||||
{
|
||||
byte[] compData = outMemStream.ToArray();
|
||||
Array.Reverse(compData);
|
||||
outstream.Write(compData, 0, compressedLength);
|
||||
int writtenBytes = compressedLength;
|
||||
// there always seem to be some padding FFs. Let's pad to make the file 4-byte aligned
|
||||
while ((writtenBytes + 8) % 4 != 0)
|
||||
{
|
||||
outstream.WriteByte(0xFF);
|
||||
writtenBytes++;
|
||||
}
|
||||
|
||||
outstream.WriteByte((byte)((compressedLength) & 0xFF));
|
||||
outstream.WriteByte((byte)((compressedLength >> 8) & 0xFF));
|
||||
outstream.WriteByte((byte)((compressedLength >> 16) & 0xFF));
|
||||
|
||||
int headerLength = totalCompFileLength - compressedLength;
|
||||
outstream.WriteByte((byte)headerLength);
|
||||
|
||||
int extraSize = (int)inLength - totalCompFileLength;
|
||||
outstream.WriteByte((byte)((extraSize) & 0xFF));
|
||||
outstream.WriteByte((byte)((extraSize >> 8) & 0xFF));
|
||||
outstream.WriteByte((byte)((extraSize >> 16) & 0xFF));
|
||||
outstream.WriteByte((byte)((extraSize >> 24) & 0xFF));
|
||||
|
||||
return totalCompFileLength;
|
||||
}
|
||||
else
|
||||
{
|
||||
Array.Reverse(indata);
|
||||
outstream.Write(indata, 0, (int)inLength);
|
||||
outstream.WriteByte(0); outstream.WriteByte(0); outstream.WriteByte(0); outstream.WriteByte(0);
|
||||
return (int)inLength + 4;
|
||||
}
|
||||
}
|
||||
|
||||
#region 'Normal' compression method. Delegates to CompressWithLA when LookAhead is set
|
||||
/// <summary>
|
||||
/// Compresses the given input stream with the LZ-Ovl compression, but compresses _forward_
|
||||
/// instad of backwards.
|
||||
/// </summary>
|
||||
/// <param name="instream">The input stream to compress.</param>
|
||||
/// <param name="inLength">The length of the input stream.</param>
|
||||
/// <param name="outstream">The stream to write to.</param>
|
||||
private unsafe int CompressNormal(Stream instream, long inLength, Stream outstream)
|
||||
{
|
||||
// make sure the decompressed size fits in 3 bytes.
|
||||
// There should be room for four bytes, however I'm not 100% sure if that can be used
|
||||
// in every game, as it may not be a built-in function.
|
||||
if (inLength > 0xFFFFFF)
|
||||
throw new InputTooLargeException();
|
||||
|
||||
// use the other method if lookahead is enabled
|
||||
if (lookAhead)
|
||||
{
|
||||
return CompressWithLA(instream, inLength, outstream);
|
||||
}
|
||||
|
||||
// save the input data in an array to prevent having to go back and forth in a file
|
||||
byte[] indata = new byte[inLength];
|
||||
int numReadBytes = instream.Read(indata, 0, (int)inLength);
|
||||
if (numReadBytes != inLength)
|
||||
throw new StreamTooShortException();
|
||||
|
||||
int compressedLength = 0;
|
||||
|
||||
fixed (byte* instart = &indata[0])
|
||||
{
|
||||
// we do need to buffer the output, as the first byte indicates which blocks are compressed.
|
||||
// this version does not use a look-ahead, so we do not need to buffer more than 8 blocks at a time.
|
||||
byte[] outbuffer = new byte[8 * 2 + 1];
|
||||
outbuffer[0] = 0;
|
||||
int bufferlength = 1, bufferedBlocks = 0;
|
||||
int readBytes = 0;
|
||||
while (readBytes < inLength)
|
||||
{
|
||||
#region If 8 blocks are bufferd, write them and reset the buffer
|
||||
// we can only buffer 8 blocks at a time.
|
||||
if (bufferedBlocks == 8)
|
||||
{
|
||||
outstream.Write(outbuffer, 0, bufferlength);
|
||||
compressedLength += bufferlength;
|
||||
// reset the buffer
|
||||
outbuffer[0] = 0;
|
||||
bufferlength = 1;
|
||||
bufferedBlocks = 0;
|
||||
}
|
||||
#endregion
|
||||
|
||||
// determine if we're dealing with a compressed or raw block.
|
||||
// it is a compressed block when the next 3 or more bytes can be copied from
|
||||
// somewhere in the set of already compressed bytes.
|
||||
int disp;
|
||||
int oldLength = Math.Min(readBytes, 0x1001);
|
||||
int length = LZUtil.GetOccurrenceLength(instart + readBytes, (int)Math.Min(inLength - readBytes, 0x12),
|
||||
instart + readBytes - oldLength, oldLength, out disp);
|
||||
|
||||
// disp = 1 cannot be stored.
|
||||
if (disp == 1)
|
||||
{
|
||||
length = 1;
|
||||
}
|
||||
// disp = 2 cannot be saved properly. use a too large disp instead.
|
||||
// however since I'm not sure if that's actually how that's handled, don't compress instead.
|
||||
else if (disp == 2)
|
||||
{
|
||||
length = 1;
|
||||
/*if (readBytes < 0x1001)
|
||||
disp = readBytes + 1;
|
||||
else
|
||||
length = 1;/**/
|
||||
}
|
||||
|
||||
// length not 3 or more? next byte is raw data
|
||||
if (length < 3)
|
||||
{
|
||||
outbuffer[bufferlength++] = *(instart + (readBytes++));
|
||||
}
|
||||
else
|
||||
{
|
||||
// 3 or more bytes can be copied? next (length) bytes will be compressed into 2 bytes
|
||||
readBytes += length;
|
||||
|
||||
// mark the next block as compressed
|
||||
outbuffer[0] |= (byte)(1 << (7 - bufferedBlocks));
|
||||
|
||||
outbuffer[bufferlength] = (byte)(((length - 3) << 4) & 0xF0);
|
||||
outbuffer[bufferlength] |= (byte)(((disp - 3) >> 8) & 0x0F);
|
||||
bufferlength++;
|
||||
outbuffer[bufferlength] = (byte)((disp - 3) & 0xFF);
|
||||
bufferlength++;
|
||||
}
|
||||
bufferedBlocks++;
|
||||
}
|
||||
|
||||
// copy the remaining blocks to the output
|
||||
if (bufferedBlocks > 0)
|
||||
{
|
||||
outstream.Write(outbuffer, 0, bufferlength);
|
||||
compressedLength += bufferlength;
|
||||
/*/ make the compressed file 4-byte aligned.
|
||||
while ((compressedLength % 4) != 0)
|
||||
{
|
||||
outstream.WriteByte(0);
|
||||
compressedLength++;
|
||||
}/**/
|
||||
}
|
||||
}
|
||||
|
||||
return compressedLength;
|
||||
}
|
||||
#endregion
|
||||
|
||||
|
||||
#region Dynamic Programming compression method
|
||||
/// <summary>
|
||||
/// Variation of the original compression method, making use of Dynamic Programming to 'look ahead'
|
||||
/// and determine the optimal 'length' values for the compressed blocks. Is not 100% optimal,
|
||||
/// as the flag-bytes are not taken into account.
|
||||
/// </summary>
|
||||
private unsafe int CompressWithLA(Stream instream, long inLength, Stream outstream)
|
||||
{
|
||||
// save the input data in an array to prevent having to go back and forth in a file
|
||||
byte[] indata = new byte[inLength];
|
||||
int numReadBytes = instream.Read(indata, 0, (int)inLength);
|
||||
if (numReadBytes != inLength)
|
||||
throw new StreamTooShortException();
|
||||
|
||||
int compressedLength = 0;
|
||||
|
||||
fixed (byte* instart = &indata[0])
|
||||
{
|
||||
// we do need to buffer the output, as the first byte indicates which blocks are compressed.
|
||||
// this version does not use a look-ahead, so we do not need to buffer more than 8 blocks at a time.
|
||||
byte[] outbuffer = new byte[8 * 2 + 1];
|
||||
outbuffer[0] = 0;
|
||||
int bufferlength = 1, bufferedBlocks = 0;
|
||||
int readBytes = 0;
|
||||
|
||||
// get the optimal choices for len and disp
|
||||
int[] lengths, disps;
|
||||
this.GetOptimalCompressionLengths(instart, indata.Length, out lengths, out disps);
|
||||
while (readBytes < inLength)
|
||||
{
|
||||
// we can only buffer 8 blocks at a time.
|
||||
if (bufferedBlocks == 8)
|
||||
{
|
||||
outstream.Write(outbuffer, 0, bufferlength);
|
||||
compressedLength += bufferlength;
|
||||
// reset the buffer
|
||||
outbuffer[0] = 0;
|
||||
bufferlength = 1;
|
||||
bufferedBlocks = 0;
|
||||
}
|
||||
|
||||
|
||||
if (lengths[readBytes] == 1)
|
||||
{
|
||||
outbuffer[bufferlength++] = *(instart + (readBytes++));
|
||||
}
|
||||
else
|
||||
{
|
||||
// mark the next block as compressed
|
||||
outbuffer[0] |= (byte)(1 << (7 - bufferedBlocks));
|
||||
|
||||
outbuffer[bufferlength] = (byte)(((lengths[readBytes] - 3) << 4) & 0xF0);
|
||||
outbuffer[bufferlength] |= (byte)(((disps[readBytes] - 3) >> 8) & 0x0F);
|
||||
bufferlength++;
|
||||
outbuffer[bufferlength] = (byte)((disps[readBytes] - 3) & 0xFF);
|
||||
bufferlength++;
|
||||
|
||||
readBytes += lengths[readBytes];
|
||||
}
|
||||
|
||||
bufferedBlocks++;
|
||||
}
|
||||
|
||||
// copy the remaining blocks to the output
|
||||
if (bufferedBlocks > 0)
|
||||
{
|
||||
outstream.Write(outbuffer, 0, bufferlength);
|
||||
compressedLength += bufferlength;
|
||||
}
|
||||
}
|
||||
|
||||
return compressedLength;
|
||||
}
|
||||
#endregion
|
||||
|
||||
#region DP compression helper method; GetOptimalCompressionLengths
|
||||
/// <summary>
|
||||
/// Gets the optimal compression lengths for each start of a compressed block using Dynamic Programming.
|
||||
/// This takes O(n^2) time.
|
||||
/// </summary>
|
||||
/// <param name="indata">The data to compress.</param>
|
||||
/// <param name="inLength">The length of the data to compress.</param>
|
||||
/// <param name="lengths">The optimal 'length' of the compressed blocks. For each byte in the input data,
|
||||
/// this value is the optimal 'length' value. If it is 1, the block should not be compressed.</param>
|
||||
/// <param name="disps">The 'disp' values of the compressed blocks. May be less than 3, in which case the
|
||||
/// corresponding length will never be anything other than 1.</param>
|
||||
private unsafe void GetOptimalCompressionLengths(byte* indata, int inLength, out int[] lengths, out int[] disps)
|
||||
{
|
||||
lengths = new int[inLength];
|
||||
disps = new int[inLength];
|
||||
int[] minLengths = new int[inLength];
|
||||
|
||||
for (int i = inLength - 1; i >= 0; i--)
|
||||
{
|
||||
// first get the compression length when the next byte is not compressed
|
||||
minLengths[i] = int.MaxValue;
|
||||
lengths[i] = 1;
|
||||
if (i + 1 >= inLength)
|
||||
minLengths[i] = 1;
|
||||
else
|
||||
minLengths[i] = 1 + minLengths[i + 1];
|
||||
// then the optimal compressed length
|
||||
int oldLength = Math.Min(0x1001, i);
|
||||
// get the appropriate disp while at it. Takes at most O(n) time if oldLength is considered O(n)
|
||||
// be sure to bound the input length with 0x12, as that's the maximum length for LZ-Ovl compressed blocks.
|
||||
int maxLen = LZUtil.GetOccurrenceLength(indata + i, Math.Min(inLength - i, 0x12),
|
||||
indata + i - oldLength, oldLength, out disps[i]);
|
||||
if (disps[i] > i)
|
||||
throw new Exception("disp is too large");
|
||||
// disp < 3 cannot be stored explicitly.
|
||||
if (disps[i] < 3)
|
||||
maxLen = 1;
|
||||
for (int j = 3; j <= maxLen; j++)
|
||||
{
|
||||
int newCompLen;
|
||||
if (i + j >= inLength)
|
||||
newCompLen = 2;
|
||||
else
|
||||
newCompLen = 2 + minLengths[i + j];
|
||||
if (newCompLen < minLengths[i])
|
||||
{
|
||||
lengths[i] = j;
|
||||
minLengths[i] = newCompLen;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// we could optimize this further to also optimize it with regard to the flag-bytes, but that would require 8 times
|
||||
// more space and time (one for each position in the block) for only a potentially tiny increase in compression ratio.
|
||||
}
|
||||
#endregion
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user