C#: Added compression algorithms for the LZ-Overlay format. At the moment, the file will either be fully compressed or not compressed at all. The 'optimal' method also has a higher chance of being sub-optimal due to the nature of overlay files (usually double-compressed files).

2024-11-16 15:49:24 +01:00 · 2011-05-14 18:52:46 +00:00 · 2011-05-14 18:52:46 +00:00 · 48f189c991
commit 48f189c991
parent b8d11b3e9d
1 changed files with 320 additions and 1 deletions
--- a/CSharp/DSDecmp/Formats/LZOvl.cs
+++ b/CSharp/DSDecmp/Formats/LZOvl.cs
@ -2,6 +2,7 @@
 using System.Collections.Generic;
 using System.Text;
 using System.IO;
+using DSDecmp.Utils;

 namespace DSDecmp.Formats
 {
@ -13,6 +14,17 @@ namespace DSDecmp.Formats
    /// </summary>
    public class LZOvl : CompressionFormat
    {
+        private static bool lookAhead = false;
+        /// <summary>
+        /// Sets the flag that determines if 'look-ahead'/DP should be used when compressing
+        /// with the LZ-Ovl format. The default is false, which is what is used in the original
+        /// implementation.
+        /// </summary>
+        public static bool LookAhead
+        {
+            set { lookAhead = value; }
+        }
+
        #region Method: Supports(string file)
        public override bool Supports(string file)
        {
@ -90,6 +102,7 @@ namespace DSDecmp.Formats
        }
        #endregion

+        #region Decompression method
        public override long Decompress(System.IO.Stream instream, long inLength, System.IO.Stream outstream)
        {
            #region Format description
@ -274,10 +287,316 @@ namespace DSDecmp.Formats
                return decompressedLength + (inLength - headerSize - compressedSize);
            }
        }
+        #endregion

        public override int Compress(System.IO.Stream instream, long inLength, System.IO.Stream outstream)
        {
-            throw new NotImplementedException();
+            // don't bother trying to get the optimal not-compressed - compressed ratio for now.
+            // Either compress fully or don't compress (as the format cannot handle decompressed
+            // sizes that are smaller than the compressed file).
+
+            if (inLength > 0xFFFFFF)
+                throw new InputTooLargeException();
+
+            // read the input and reverse it
+            byte[] indata = new byte[inLength];
+            instream.Read(indata, 0, (int)inLength);
+            Array.Reverse(indata);
+
+            MemoryStream inMemStream = new MemoryStream(indata);
+            MemoryStream outMemStream = new MemoryStream();
+            int compressedLength = this.CompressNormal(inMemStream, inLength, outMemStream);
+
+            int totalCompFileLength = compressedLength + 8;
+            // make the file 4-byte aligned with padding in the header
+            if (totalCompFileLength % 4 != 0)
+                totalCompFileLength += 4 - totalCompFileLength % 4;
+
+            if (totalCompFileLength < inLength)
+            {
+                byte[] compData = outMemStream.ToArray();
+                Array.Reverse(compData);
+                outstream.Write(compData, 0, compressedLength);
+                int writtenBytes = compressedLength;
+                // there always seem to be some padding FFs. Let's pad to make the file 4-byte aligned
+                while ((writtenBytes + 8) % 4 != 0)
+                {
+                    outstream.WriteByte(0xFF);
+                    writtenBytes++;
                }
+
+                outstream.WriteByte((byte)((compressedLength) & 0xFF));
+                outstream.WriteByte((byte)((compressedLength >> 8) & 0xFF));
+                outstream.WriteByte((byte)((compressedLength >> 16) & 0xFF));
+
+                int headerLength = totalCompFileLength - compressedLength;
+                outstream.WriteByte((byte)headerLength);
+
+                int extraSize = (int)inLength - totalCompFileLength;
+                outstream.WriteByte((byte)((extraSize) & 0xFF));
+                outstream.WriteByte((byte)((extraSize >> 8) & 0xFF));
+                outstream.WriteByte((byte)((extraSize >> 16) & 0xFF));
+                outstream.WriteByte((byte)((extraSize >> 24) & 0xFF));
+
+                return totalCompFileLength;
+            }
+            else
+            {
+                Array.Reverse(indata);
+                outstream.Write(indata, 0, (int)inLength);
+                outstream.WriteByte(0); outstream.WriteByte(0); outstream.WriteByte(0); outstream.WriteByte(0);
+                return (int)inLength + 4;
+            }
+        }
+
+        #region 'Normal' compression method. Delegates to CompressWithLA when LookAhead is set
+        /// <summary>
+        /// Compresses the given input stream with the LZ-Ovl compression, but compresses _forward_
+        /// instad of backwards.
+        /// </summary>
+        /// <param name="instream">The input stream to compress.</param>
+        /// <param name="inLength">The length of the input stream.</param>
+        /// <param name="outstream">The stream to write to.</param>
+        private unsafe int CompressNormal(Stream instream, long inLength, Stream outstream)
+        {
+            // make sure the decompressed size fits in 3 bytes.
+            // There should be room for four bytes, however I'm not 100% sure if that can be used
+            // in every game, as it may not be a built-in function.
+            if (inLength > 0xFFFFFF)
+                throw new InputTooLargeException();
+
+            // use the other method if lookahead is enabled
+            if (lookAhead)
+            {
+                return CompressWithLA(instream, inLength, outstream);
+            }
+
+            // save the input data in an array to prevent having to go back and forth in a file
+            byte[] indata = new byte[inLength];
+            int numReadBytes = instream.Read(indata, 0, (int)inLength);
+            if (numReadBytes != inLength)
+                throw new StreamTooShortException();
+
+            int compressedLength = 0;
+
+            fixed (byte* instart = &indata[0])
+            {
+                // we do need to buffer the output, as the first byte indicates which blocks are compressed.
+                // this version does not use a look-ahead, so we do not need to buffer more than 8 blocks at a time.
+                byte[] outbuffer = new byte[8 * 2 + 1];
+                outbuffer[0] = 0;
+                int bufferlength = 1, bufferedBlocks = 0;
+                int readBytes = 0;
+                while (readBytes < inLength)
+                {
+                    #region If 8 blocks are bufferd, write them and reset the buffer
+                    // we can only buffer 8 blocks at a time.
+                    if (bufferedBlocks == 8)
+                    {
+                        outstream.Write(outbuffer, 0, bufferlength);
+                        compressedLength += bufferlength;
+                        // reset the buffer
+                        outbuffer[0] = 0;
+                        bufferlength = 1;
+                        bufferedBlocks = 0;
+                    }
+                    #endregion
+
+                    // determine if we're dealing with a compressed or raw block.
+                    // it is a compressed block when the next 3 or more bytes can be copied from
+                    // somewhere in the set of already compressed bytes.
+                    int disp;
+                    int oldLength = Math.Min(readBytes, 0x1001);
+                    int length = LZUtil.GetOccurrenceLength(instart + readBytes, (int)Math.Min(inLength - readBytes, 0x12),
+                                                          instart + readBytes - oldLength, oldLength, out disp);
+
+                    // disp = 1 cannot be stored.
+                    if (disp == 1)
+                    {
+                        length = 1;
+                    }
+                    // disp = 2 cannot be saved properly. use a too large disp instead.
+                    // however since I'm not sure if that's actually how that's handled, don't compress instead.
+                    else if (disp == 2)
+                    {
+                        length = 1;
+                        /*if (readBytes < 0x1001)
+                            disp = readBytes + 1;
+                        else
+                            length = 1;/**/
+                    }
+
+                    // length not 3 or more? next byte is raw data
+                    if (length < 3)
+                    {
+                        outbuffer[bufferlength++] = *(instart + (readBytes++));
+                    }
+                    else
+                    {
+                        // 3 or more bytes can be copied? next (length) bytes will be compressed into 2 bytes
+                        readBytes += length;
+
+                        // mark the next block as compressed
+                        outbuffer[0] |= (byte)(1 << (7 - bufferedBlocks));
+
+                        outbuffer[bufferlength] = (byte)(((length - 3) << 4) & 0xF0);
+                        outbuffer[bufferlength] |= (byte)(((disp - 3) >> 8) & 0x0F);
+                        bufferlength++;
+                        outbuffer[bufferlength] = (byte)((disp - 3) & 0xFF);
+                        bufferlength++;
+                    }
+                    bufferedBlocks++;
+                }
+
+                // copy the remaining blocks to the output
+                if (bufferedBlocks > 0)
+                {
+                    outstream.Write(outbuffer, 0, bufferlength);
+                    compressedLength += bufferlength;
+                    /*/ make the compressed file 4-byte aligned.
+                    while ((compressedLength % 4) != 0)
+                    {
+                        outstream.WriteByte(0);
+                        compressedLength++;
+                    }/**/
+                }
+            }
+
+            return compressedLength;
+        }
+        #endregion
+
+
+        #region Dynamic Programming compression method
+        /// <summary>
+        /// Variation of the original compression method, making use of Dynamic Programming to 'look ahead'
+        /// and determine the optimal 'length' values for the compressed blocks. Is not 100% optimal,
+        /// as the flag-bytes are not taken into account.
+        /// </summary>
+        private unsafe int CompressWithLA(Stream instream, long inLength, Stream outstream)
+        {
+            // save the input data in an array to prevent having to go back and forth in a file
+            byte[] indata = new byte[inLength];
+            int numReadBytes = instream.Read(indata, 0, (int)inLength);
+            if (numReadBytes != inLength)
+                throw new StreamTooShortException();
+
+            int compressedLength = 0;
+
+            fixed (byte* instart = &indata[0])
+            {
+                // we do need to buffer the output, as the first byte indicates which blocks are compressed.
+                // this version does not use a look-ahead, so we do not need to buffer more than 8 blocks at a time.
+                byte[] outbuffer = new byte[8 * 2 + 1];
+                outbuffer[0] = 0;
+                int bufferlength = 1, bufferedBlocks = 0;
+                int readBytes = 0;
+
+                // get the optimal choices for len and disp
+                int[] lengths, disps;
+                this.GetOptimalCompressionLengths(instart, indata.Length, out lengths, out disps);
+                while (readBytes < inLength)
+                {
+                    // we can only buffer 8 blocks at a time.
+                    if (bufferedBlocks == 8)
+                    {
+                        outstream.Write(outbuffer, 0, bufferlength);
+                        compressedLength += bufferlength;
+                        // reset the buffer
+                        outbuffer[0] = 0;
+                        bufferlength = 1;
+                        bufferedBlocks = 0;
+                    }
+
+
+                    if (lengths[readBytes] == 1)
+                    {
+                        outbuffer[bufferlength++] = *(instart + (readBytes++));
+                    }
+                    else
+                    {
+                        // mark the next block as compressed
+                        outbuffer[0] |= (byte)(1 << (7 - bufferedBlocks));
+
+                        outbuffer[bufferlength] = (byte)(((lengths[readBytes] - 3) << 4) & 0xF0);
+                        outbuffer[bufferlength] |= (byte)(((disps[readBytes] - 3) >> 8) & 0x0F);
+                        bufferlength++;
+                        outbuffer[bufferlength] = (byte)((disps[readBytes] - 3) & 0xFF);
+                        bufferlength++;
+
+                        readBytes += lengths[readBytes];
+                    }
+
+                    bufferedBlocks++;
+                }
+
+                // copy the remaining blocks to the output
+                if (bufferedBlocks > 0)
+                {
+                    outstream.Write(outbuffer, 0, bufferlength);
+                    compressedLength += bufferlength;
+                }
+            }
+
+            return compressedLength;
+        }
+        #endregion
+
+        #region DP compression helper method; GetOptimalCompressionLengths
+        /// <summary>
+        /// Gets the optimal compression lengths for each start of a compressed block using Dynamic Programming.
+        /// This takes O(n^2) time.
+        /// </summary>
+        /// <param name="indata">The data to compress.</param>
+        /// <param name="inLength">The length of the data to compress.</param>
+        /// <param name="lengths">The optimal 'length' of the compressed blocks. For each byte in the input data,
+        /// this value is the optimal 'length' value. If it is 1, the block should not be compressed.</param>
+        /// <param name="disps">The 'disp' values of the compressed blocks. May be less than 3, in which case the
+        /// corresponding length will never be anything other than 1.</param>
+        private unsafe void GetOptimalCompressionLengths(byte* indata, int inLength, out int[] lengths, out int[] disps)
+        {
+            lengths = new int[inLength];
+            disps = new int[inLength];
+            int[] minLengths = new int[inLength];
+
+            for (int i = inLength - 1; i >= 0; i--)
+            {
+                // first get the compression length when the next byte is not compressed
+                minLengths[i] = int.MaxValue;
+                lengths[i] = 1;
+                if (i + 1 >= inLength)
+                    minLengths[i] = 1;
+                else
+                    minLengths[i] = 1 + minLengths[i + 1];
+                // then the optimal compressed length
+                int oldLength = Math.Min(0x1001, i);
+                // get the appropriate disp while at it. Takes at most O(n) time if oldLength is considered O(n)
+                // be sure to bound the input length with 0x12, as that's the maximum length for LZ-Ovl compressed blocks.
+                int maxLen = LZUtil.GetOccurrenceLength(indata + i, Math.Min(inLength - i, 0x12),
+                                                 indata + i - oldLength, oldLength, out disps[i]);
+                if (disps[i] > i)
+                    throw new Exception("disp is too large");
+                // disp < 3 cannot be stored explicitly.
+                if (disps[i] < 3)
+                    maxLen = 1;
+                for (int j = 3; j <= maxLen; j++)
+                {
+                    int newCompLen;
+                    if (i + j >= inLength)
+                        newCompLen = 2;
+                    else
+                        newCompLen = 2 + minLengths[i + j];
+                    if (newCompLen < minLengths[i])
+                    {
+                        lengths[i] = j;
+                        minLengths[i] = newCompLen;
+                    }
+                }
+            }
+
+            // we could optimize this further to also optimize it with regard to the flag-bytes, but that would require 8 times
+            // more space and time (one for each position in the block) for only a potentially tiny increase in compression ratio.
+        }
+        #endregion
    }
 }