C#: Added compression algorithms for the LZ-Overlay format. At the moment, the file will either be fully compressed or not compressed at all. The 'optimal' method also has a higher chance of being sub-optimal due to the nature of overlay files (usually double-compressed files).

2024-09-28 17:28:41 +02:00 · 2011-05-14 18:52:46 +00:00 · 2011-05-14 18:52:46 +00:00 · 48f189c991
commit 48f189c991
parent b8d11b3e9d
1 changed files with 320 additions and 1 deletions
--- a/CSharp/DSDecmp/Formats/LZOvl.cs
+++ b/CSharp/DSDecmp/Formats/LZOvl.cs
@ -2,6 +2,7 @@
 using System.Collections.Generic;
 using System.Text;
 using System.IO;
 using DSDecmp.Utils;
 namespace DSDecmp.Formats
 {
@ -13,6 +14,17 @@ namespace DSDecmp.Formats
    /// </summary>
    public class LZOvl : CompressionFormat
    {
        private static bool lookAhead = false;
        /// <summary>
        /// Sets the flag that determines if 'look-ahead'/DP should be used when compressing
        /// with the LZ-Ovl format. The default is false, which is what is used in the original
        /// implementation.
        /// </summary>
        public static bool LookAhead
        {
            set { lookAhead = value; }
        }
        #region Method: Supports(string file)
        public override bool Supports(string file)
        {
@ -90,6 +102,7 @@ namespace DSDecmp.Formats
        }
        #endregion
        #region Decompression method
        public override long Decompress(System.IO.Stream instream, long inLength, System.IO.Stream outstream)
        {
            #region Format description
@ -274,10 +287,316 @@ namespace DSDecmp.Formats
                return decompressedLength + (inLength - headerSize - compressedSize);
            }
        }
        #endregion
        public override int Compress(System.IO.Stream instream, long inLength, System.IO.Stream outstream)
        {
-            throw new NotImplementedException();
+            // don't bother trying to get the optimal not-compressed - compressed ratio for now.
            // Either compress fully or don't compress (as the format cannot handle decompressed
            // sizes that are smaller than the compressed file).
            if (inLength > 0xFFFFFF)
                throw new InputTooLargeException();
            // read the input and reverse it
            byte[] indata = new byte[inLength];
            instream.Read(indata, 0, (int)inLength);
            Array.Reverse(indata);
            MemoryStream inMemStream = new MemoryStream(indata);
            MemoryStream outMemStream = new MemoryStream();
            int compressedLength = this.CompressNormal(inMemStream, inLength, outMemStream);
            int totalCompFileLength = compressedLength + 8;
            // make the file 4-byte aligned with padding in the header
            if (totalCompFileLength % 4 != 0)
                totalCompFileLength += 4 - totalCompFileLength % 4;
            if (totalCompFileLength < inLength)
            {
                byte[] compData = outMemStream.ToArray();
                Array.Reverse(compData);
                outstream.Write(compData, 0, compressedLength);
                int writtenBytes = compressedLength;
                // there always seem to be some padding FFs. Let's pad to make the file 4-byte aligned
                while ((writtenBytes + 8) % 4 != 0)
                {
                    outstream.WriteByte(0xFF);
                    writtenBytes++;
                }
                outstream.WriteByte((byte)((compressedLength) & 0xFF));
                outstream.WriteByte((byte)((compressedLength >> 8) & 0xFF));
                outstream.WriteByte((byte)((compressedLength >> 16) & 0xFF));
                int headerLength = totalCompFileLength - compressedLength;
                outstream.WriteByte((byte)headerLength);
                int extraSize = (int)inLength - totalCompFileLength;
                outstream.WriteByte((byte)((extraSize) & 0xFF));
                outstream.WriteByte((byte)((extraSize >> 8) & 0xFF));
                outstream.WriteByte((byte)((extraSize >> 16) & 0xFF));
                outstream.WriteByte((byte)((extraSize >> 24) & 0xFF));
                return totalCompFileLength;
            }
            else
            {
                Array.Reverse(indata);
                outstream.Write(indata, 0, (int)inLength);
                outstream.WriteByte(0); outstream.WriteByte(0); outstream.WriteByte(0); outstream.WriteByte(0);
                return (int)inLength + 4;
            }
        }
        #region 'Normal' compression method. Delegates to CompressWithLA when LookAhead is set
        /// <summary>
        /// Compresses the given input stream with the LZ-Ovl compression, but compresses _forward_
        /// instad of backwards.
        /// </summary>
        /// <param name="instream">The input stream to compress.</param>
        /// <param name="inLength">The length of the input stream.</param>
        /// <param name="outstream">The stream to write to.</param>
        private unsafe int CompressNormal(Stream instream, long inLength, Stream outstream)
        {
            // make sure the decompressed size fits in 3 bytes.
            // There should be room for four bytes, however I'm not 100% sure if that can be used
            // in every game, as it may not be a built-in function.
            if (inLength > 0xFFFFFF)
                throw new InputTooLargeException();
            // use the other method if lookahead is enabled
            if (lookAhead)
            {
                return CompressWithLA(instream, inLength, outstream);
            }
            // save the input data in an array to prevent having to go back and forth in a file
            byte[] indata = new byte[inLength];
            int numReadBytes = instream.Read(indata, 0, (int)inLength);
            if (numReadBytes != inLength)
                throw new StreamTooShortException();
            int compressedLength = 0;
            fixed (byte* instart = &indata[0])
            {
                // we do need to buffer the output, as the first byte indicates which blocks are compressed.
                // this version does not use a look-ahead, so we do not need to buffer more than 8 blocks at a time.
                byte[] outbuffer = new byte[8 * 2 + 1];
                outbuffer[0] = 0;
                int bufferlength = 1, bufferedBlocks = 0;
                int readBytes = 0;
                while (readBytes < inLength)
                {
                    #region If 8 blocks are bufferd, write them and reset the buffer
                    // we can only buffer 8 blocks at a time.
                    if (bufferedBlocks == 8)
                    {
                        outstream.Write(outbuffer, 0, bufferlength);
                        compressedLength += bufferlength;
                        // reset the buffer
                        outbuffer[0] = 0;
                        bufferlength = 1;
                        bufferedBlocks = 0;
                    }
                    #endregion
                    // determine if we're dealing with a compressed or raw block.
                    // it is a compressed block when the next 3 or more bytes can be copied from
                    // somewhere in the set of already compressed bytes.
                    int disp;
                    int oldLength = Math.Min(readBytes, 0x1001);
                    int length = LZUtil.GetOccurrenceLength(instart + readBytes, (int)Math.Min(inLength - readBytes, 0x12),
                                                          instart + readBytes - oldLength, oldLength, out disp);
                    // disp = 1 cannot be stored.
                    if (disp == 1)
                    {
                        length = 1;
                    }
                    // disp = 2 cannot be saved properly. use a too large disp instead.
                    // however since I'm not sure if that's actually how that's handled, don't compress instead.
                    else if (disp == 2)
                    {
                        length = 1;
                        /*if (readBytes < 0x1001)
                            disp = readBytes + 1;
                        else
                            length = 1;/**/
                    }
                    // length not 3 or more? next byte is raw data
                    if (length < 3)
                    {
                        outbuffer[bufferlength++] = *(instart + (readBytes++));
                    }
                    else
                    {
                        // 3 or more bytes can be copied? next (length) bytes will be compressed into 2 bytes
                        readBytes += length;
                        // mark the next block as compressed
                        outbuffer[0] |= (byte)(1 << (7 - bufferedBlocks));
                        outbuffer[bufferlength] = (byte)(((length - 3) << 4) & 0xF0);
                        outbuffer[bufferlength] |= (byte)(((disp - 3) >> 8) & 0x0F);
                        bufferlength++;
                        outbuffer[bufferlength] = (byte)((disp - 3) & 0xFF);
                        bufferlength++;
                    }
                    bufferedBlocks++;
                }
                // copy the remaining blocks to the output
                if (bufferedBlocks > 0)
                {
                    outstream.Write(outbuffer, 0, bufferlength);
                    compressedLength += bufferlength;
                    /*/ make the compressed file 4-byte aligned.
                    while ((compressedLength % 4) != 0)
                    {
                        outstream.WriteByte(0);
                        compressedLength++;
                    }/**/
                }
            }
            return compressedLength;
        }
        #endregion
        #region Dynamic Programming compression method
        /// <summary>
        /// Variation of the original compression method, making use of Dynamic Programming to 'look ahead'
        /// and determine the optimal 'length' values for the compressed blocks. Is not 100% optimal,
        /// as the flag-bytes are not taken into account.
        /// </summary>
        private unsafe int CompressWithLA(Stream instream, long inLength, Stream outstream)
        {
            // save the input data in an array to prevent having to go back and forth in a file
            byte[] indata = new byte[inLength];
            int numReadBytes = instream.Read(indata, 0, (int)inLength);
            if (numReadBytes != inLength)
                throw new StreamTooShortException();
            int compressedLength = 0;
            fixed (byte* instart = &indata[0])
            {
                // we do need to buffer the output, as the first byte indicates which blocks are compressed.
                // this version does not use a look-ahead, so we do not need to buffer more than 8 blocks at a time.
                byte[] outbuffer = new byte[8 * 2 + 1];
                outbuffer[0] = 0;
                int bufferlength = 1, bufferedBlocks = 0;
                int readBytes = 0;
                // get the optimal choices for len and disp
                int[] lengths, disps;
                this.GetOptimalCompressionLengths(instart, indata.Length, out lengths, out disps);
                while (readBytes < inLength)
                {
                    // we can only buffer 8 blocks at a time.
                    if (bufferedBlocks == 8)
                    {
                        outstream.Write(outbuffer, 0, bufferlength);
                        compressedLength += bufferlength;
                        // reset the buffer
                        outbuffer[0] = 0;
                        bufferlength = 1;
                        bufferedBlocks = 0;
                    }
                    if (lengths[readBytes] == 1)
                    {
                        outbuffer[bufferlength++] = *(instart + (readBytes++));
                    }
                    else
                    {
                        // mark the next block as compressed
                        outbuffer[0] |= (byte)(1 << (7 - bufferedBlocks));
                        outbuffer[bufferlength] = (byte)(((lengths[readBytes] - 3) << 4) & 0xF0);
                        outbuffer[bufferlength] |= (byte)(((disps[readBytes] - 3) >> 8) & 0x0F);
                        bufferlength++;
                        outbuffer[bufferlength] = (byte)((disps[readBytes] - 3) & 0xFF);
                        bufferlength++;
                        readBytes += lengths[readBytes];
                    }
                    bufferedBlocks++;
                }
                // copy the remaining blocks to the output
                if (bufferedBlocks > 0)
                {
                    outstream.Write(outbuffer, 0, bufferlength);
                    compressedLength += bufferlength;
                }
            }
            return compressedLength;
        }
        #endregion
        #region DP compression helper method; GetOptimalCompressionLengths
        /// <summary>
        /// Gets the optimal compression lengths for each start of a compressed block using Dynamic Programming.
        /// This takes O(n^2) time.
        /// </summary>
        /// <param name="indata">The data to compress.</param>
        /// <param name="inLength">The length of the data to compress.</param>
        /// <param name="lengths">The optimal 'length' of the compressed blocks. For each byte in the input data,
        /// this value is the optimal 'length' value. If it is 1, the block should not be compressed.</param>
        /// <param name="disps">The 'disp' values of the compressed blocks. May be less than 3, in which case the
        /// corresponding length will never be anything other than 1.</param>
        private unsafe void GetOptimalCompressionLengths(byte* indata, int inLength, out int[] lengths, out int[] disps)
        {
            lengths = new int[inLength];
            disps = new int[inLength];
            int[] minLengths = new int[inLength];
            for (int i = inLength - 1; i >= 0; i--)
            {
                // first get the compression length when the next byte is not compressed
                minLengths[i] = int.MaxValue;
                lengths[i] = 1;
                if (i + 1 >= inLength)
                    minLengths[i] = 1;
                else
                    minLengths[i] = 1 + minLengths[i + 1];
                // then the optimal compressed length
                int oldLength = Math.Min(0x1001, i);
                // get the appropriate disp while at it. Takes at most O(n) time if oldLength is considered O(n)
                // be sure to bound the input length with 0x12, as that's the maximum length for LZ-Ovl compressed blocks.
                int maxLen = LZUtil.GetOccurrenceLength(indata + i, Math.Min(inLength - i, 0x12),
                                                 indata + i - oldLength, oldLength, out disps[i]);
                if (disps[i] > i)
                    throw new Exception("disp is too large");
                // disp < 3 cannot be stored explicitly.
                if (disps[i] < 3)
                    maxLen = 1;
                for (int j = 3; j <= maxLen; j++)
                {
                    int newCompLen;
                    if (i + j >= inLength)
                        newCompLen = 2;
                    else
                        newCompLen = 2 + minLengths[i + j];
                    if (newCompLen < minLengths[i])
                    {
                        lengths[i] = j;
                        minLengths[i] = newCompLen;
                    }
                }
            }
            // we could optimize this further to also optimize it with regard to the flag-bytes, but that would require 8 times
            // more space and time (one for each position in the block) for only a potentially tiny increase in compression ratio.
        }
        #endregion
    }
 }