diff --git a/CSharp/DSDecmp/Formats/LZOvl.cs b/CSharp/DSDecmp/Formats/LZOvl.cs index 45c94b5..1a6b574 100644 --- a/CSharp/DSDecmp/Formats/LZOvl.cs +++ b/CSharp/DSDecmp/Formats/LZOvl.cs @@ -2,6 +2,7 @@ using System.Collections.Generic; using System.Text; using System.IO; +using DSDecmp.Utils; namespace DSDecmp.Formats { @@ -13,6 +14,17 @@ namespace DSDecmp.Formats /// public class LZOvl : CompressionFormat { + private static bool lookAhead = false; + /// + /// Sets the flag that determines if 'look-ahead'/DP should be used when compressing + /// with the LZ-Ovl format. The default is false, which is what is used in the original + /// implementation. + /// + public static bool LookAhead + { + set { lookAhead = value; } + } + #region Method: Supports(string file) public override bool Supports(string file) { @@ -90,6 +102,7 @@ namespace DSDecmp.Formats } #endregion + #region Decompression method public override long Decompress(System.IO.Stream instream, long inLength, System.IO.Stream outstream) { #region Format description @@ -274,10 +287,316 @@ namespace DSDecmp.Formats return decompressedLength + (inLength - headerSize - compressedSize); } } + #endregion public override int Compress(System.IO.Stream instream, long inLength, System.IO.Stream outstream) { - throw new NotImplementedException(); + // don't bother trying to get the optimal not-compressed - compressed ratio for now. + // Either compress fully or don't compress (as the format cannot handle decompressed + // sizes that are smaller than the compressed file). + + if (inLength > 0xFFFFFF) + throw new InputTooLargeException(); + + // read the input and reverse it + byte[] indata = new byte[inLength]; + instream.Read(indata, 0, (int)inLength); + Array.Reverse(indata); + + MemoryStream inMemStream = new MemoryStream(indata); + MemoryStream outMemStream = new MemoryStream(); + int compressedLength = this.CompressNormal(inMemStream, inLength, outMemStream); + + int totalCompFileLength = compressedLength + 8; + // make the file 4-byte aligned with padding in the header + if (totalCompFileLength % 4 != 0) + totalCompFileLength += 4 - totalCompFileLength % 4; + + if (totalCompFileLength < inLength) + { + byte[] compData = outMemStream.ToArray(); + Array.Reverse(compData); + outstream.Write(compData, 0, compressedLength); + int writtenBytes = compressedLength; + // there always seem to be some padding FFs. Let's pad to make the file 4-byte aligned + while ((writtenBytes + 8) % 4 != 0) + { + outstream.WriteByte(0xFF); + writtenBytes++; + } + + outstream.WriteByte((byte)((compressedLength) & 0xFF)); + outstream.WriteByte((byte)((compressedLength >> 8) & 0xFF)); + outstream.WriteByte((byte)((compressedLength >> 16) & 0xFF)); + + int headerLength = totalCompFileLength - compressedLength; + outstream.WriteByte((byte)headerLength); + + int extraSize = (int)inLength - totalCompFileLength; + outstream.WriteByte((byte)((extraSize) & 0xFF)); + outstream.WriteByte((byte)((extraSize >> 8) & 0xFF)); + outstream.WriteByte((byte)((extraSize >> 16) & 0xFF)); + outstream.WriteByte((byte)((extraSize >> 24) & 0xFF)); + + return totalCompFileLength; + } + else + { + Array.Reverse(indata); + outstream.Write(indata, 0, (int)inLength); + outstream.WriteByte(0); outstream.WriteByte(0); outstream.WriteByte(0); outstream.WriteByte(0); + return (int)inLength + 4; + } } + + #region 'Normal' compression method. Delegates to CompressWithLA when LookAhead is set + /// + /// Compresses the given input stream with the LZ-Ovl compression, but compresses _forward_ + /// instad of backwards. + /// + /// The input stream to compress. + /// The length of the input stream. + /// The stream to write to. + private unsafe int CompressNormal(Stream instream, long inLength, Stream outstream) + { + // make sure the decompressed size fits in 3 bytes. + // There should be room for four bytes, however I'm not 100% sure if that can be used + // in every game, as it may not be a built-in function. + if (inLength > 0xFFFFFF) + throw new InputTooLargeException(); + + // use the other method if lookahead is enabled + if (lookAhead) + { + return CompressWithLA(instream, inLength, outstream); + } + + // save the input data in an array to prevent having to go back and forth in a file + byte[] indata = new byte[inLength]; + int numReadBytes = instream.Read(indata, 0, (int)inLength); + if (numReadBytes != inLength) + throw new StreamTooShortException(); + + int compressedLength = 0; + + fixed (byte* instart = &indata[0]) + { + // we do need to buffer the output, as the first byte indicates which blocks are compressed. + // this version does not use a look-ahead, so we do not need to buffer more than 8 blocks at a time. + byte[] outbuffer = new byte[8 * 2 + 1]; + outbuffer[0] = 0; + int bufferlength = 1, bufferedBlocks = 0; + int readBytes = 0; + while (readBytes < inLength) + { + #region If 8 blocks are bufferd, write them and reset the buffer + // we can only buffer 8 blocks at a time. + if (bufferedBlocks == 8) + { + outstream.Write(outbuffer, 0, bufferlength); + compressedLength += bufferlength; + // reset the buffer + outbuffer[0] = 0; + bufferlength = 1; + bufferedBlocks = 0; + } + #endregion + + // determine if we're dealing with a compressed or raw block. + // it is a compressed block when the next 3 or more bytes can be copied from + // somewhere in the set of already compressed bytes. + int disp; + int oldLength = Math.Min(readBytes, 0x1001); + int length = LZUtil.GetOccurrenceLength(instart + readBytes, (int)Math.Min(inLength - readBytes, 0x12), + instart + readBytes - oldLength, oldLength, out disp); + + // disp = 1 cannot be stored. + if (disp == 1) + { + length = 1; + } + // disp = 2 cannot be saved properly. use a too large disp instead. + // however since I'm not sure if that's actually how that's handled, don't compress instead. + else if (disp == 2) + { + length = 1; + /*if (readBytes < 0x1001) + disp = readBytes + 1; + else + length = 1;/**/ + } + + // length not 3 or more? next byte is raw data + if (length < 3) + { + outbuffer[bufferlength++] = *(instart + (readBytes++)); + } + else + { + // 3 or more bytes can be copied? next (length) bytes will be compressed into 2 bytes + readBytes += length; + + // mark the next block as compressed + outbuffer[0] |= (byte)(1 << (7 - bufferedBlocks)); + + outbuffer[bufferlength] = (byte)(((length - 3) << 4) & 0xF0); + outbuffer[bufferlength] |= (byte)(((disp - 3) >> 8) & 0x0F); + bufferlength++; + outbuffer[bufferlength] = (byte)((disp - 3) & 0xFF); + bufferlength++; + } + bufferedBlocks++; + } + + // copy the remaining blocks to the output + if (bufferedBlocks > 0) + { + outstream.Write(outbuffer, 0, bufferlength); + compressedLength += bufferlength; + /*/ make the compressed file 4-byte aligned. + while ((compressedLength % 4) != 0) + { + outstream.WriteByte(0); + compressedLength++; + }/**/ + } + } + + return compressedLength; + } + #endregion + + + #region Dynamic Programming compression method + /// + /// Variation of the original compression method, making use of Dynamic Programming to 'look ahead' + /// and determine the optimal 'length' values for the compressed blocks. Is not 100% optimal, + /// as the flag-bytes are not taken into account. + /// + private unsafe int CompressWithLA(Stream instream, long inLength, Stream outstream) + { + // save the input data in an array to prevent having to go back and forth in a file + byte[] indata = new byte[inLength]; + int numReadBytes = instream.Read(indata, 0, (int)inLength); + if (numReadBytes != inLength) + throw new StreamTooShortException(); + + int compressedLength = 0; + + fixed (byte* instart = &indata[0]) + { + // we do need to buffer the output, as the first byte indicates which blocks are compressed. + // this version does not use a look-ahead, so we do not need to buffer more than 8 blocks at a time. + byte[] outbuffer = new byte[8 * 2 + 1]; + outbuffer[0] = 0; + int bufferlength = 1, bufferedBlocks = 0; + int readBytes = 0; + + // get the optimal choices for len and disp + int[] lengths, disps; + this.GetOptimalCompressionLengths(instart, indata.Length, out lengths, out disps); + while (readBytes < inLength) + { + // we can only buffer 8 blocks at a time. + if (bufferedBlocks == 8) + { + outstream.Write(outbuffer, 0, bufferlength); + compressedLength += bufferlength; + // reset the buffer + outbuffer[0] = 0; + bufferlength = 1; + bufferedBlocks = 0; + } + + + if (lengths[readBytes] == 1) + { + outbuffer[bufferlength++] = *(instart + (readBytes++)); + } + else + { + // mark the next block as compressed + outbuffer[0] |= (byte)(1 << (7 - bufferedBlocks)); + + outbuffer[bufferlength] = (byte)(((lengths[readBytes] - 3) << 4) & 0xF0); + outbuffer[bufferlength] |= (byte)(((disps[readBytes] - 3) >> 8) & 0x0F); + bufferlength++; + outbuffer[bufferlength] = (byte)((disps[readBytes] - 3) & 0xFF); + bufferlength++; + + readBytes += lengths[readBytes]; + } + + bufferedBlocks++; + } + + // copy the remaining blocks to the output + if (bufferedBlocks > 0) + { + outstream.Write(outbuffer, 0, bufferlength); + compressedLength += bufferlength; + } + } + + return compressedLength; + } + #endregion + + #region DP compression helper method; GetOptimalCompressionLengths + /// + /// Gets the optimal compression lengths for each start of a compressed block using Dynamic Programming. + /// This takes O(n^2) time. + /// + /// The data to compress. + /// The length of the data to compress. + /// The optimal 'length' of the compressed blocks. For each byte in the input data, + /// this value is the optimal 'length' value. If it is 1, the block should not be compressed. + /// The 'disp' values of the compressed blocks. May be less than 3, in which case the + /// corresponding length will never be anything other than 1. + private unsafe void GetOptimalCompressionLengths(byte* indata, int inLength, out int[] lengths, out int[] disps) + { + lengths = new int[inLength]; + disps = new int[inLength]; + int[] minLengths = new int[inLength]; + + for (int i = inLength - 1; i >= 0; i--) + { + // first get the compression length when the next byte is not compressed + minLengths[i] = int.MaxValue; + lengths[i] = 1; + if (i + 1 >= inLength) + minLengths[i] = 1; + else + minLengths[i] = 1 + minLengths[i + 1]; + // then the optimal compressed length + int oldLength = Math.Min(0x1001, i); + // get the appropriate disp while at it. Takes at most O(n) time if oldLength is considered O(n) + // be sure to bound the input length with 0x12, as that's the maximum length for LZ-Ovl compressed blocks. + int maxLen = LZUtil.GetOccurrenceLength(indata + i, Math.Min(inLength - i, 0x12), + indata + i - oldLength, oldLength, out disps[i]); + if (disps[i] > i) + throw new Exception("disp is too large"); + // disp < 3 cannot be stored explicitly. + if (disps[i] < 3) + maxLen = 1; + for (int j = 3; j <= maxLen; j++) + { + int newCompLen; + if (i + j >= inLength) + newCompLen = 2; + else + newCompLen = 2 + minLengths[i + j]; + if (newCompLen < minLengths[i]) + { + lengths[i] = j; + minLengths[i] = newCompLen; + } + } + } + + // we could optimize this further to also optimize it with regard to the flag-bytes, but that would require 8 times + // more space and time (one for each position in the block) for only a potentially tiny increase in compression ratio. + } + #endregion } }