diff --git a/Source/Core/DiscIO/WIABlob.cpp b/Source/Core/DiscIO/WIABlob.cpp index 6af49b202f..6f64b3ba01 100644 --- a/Source/Core/DiscIO/WIABlob.cpp +++ b/Source/Core/DiscIO/WIABlob.cpp @@ -210,9 +210,10 @@ bool WIARVZFileReader::Initialize(const std::string& path) const u32 number_of_raw_data_entries = Common::swap32(m_header_2.number_of_raw_data_entries); m_raw_data_entries.resize(number_of_raw_data_entries); - Chunk& raw_data_entries = ReadCompressedData(Common::swap64(m_header_2.raw_data_entries_offset), - Common::swap32(m_header_2.raw_data_entries_size), - number_of_raw_data_entries * sizeof(RawDataEntry)); + Chunk& raw_data_entries = + ReadCompressedData(Common::swap64(m_header_2.raw_data_entries_offset), + Common::swap32(m_header_2.raw_data_entries_size), + number_of_raw_data_entries * sizeof(RawDataEntry), m_compression_type); if (!raw_data_entries.ReadAll(&m_raw_data_entries)) return false; @@ -226,9 +227,10 @@ bool WIARVZFileReader::Initialize(const std::string& path) const u32 number_of_group_entries = Common::swap32(m_header_2.number_of_group_entries); m_group_entries.resize(number_of_group_entries); - Chunk& group_entries = ReadCompressedData(Common::swap64(m_header_2.group_entries_offset), - Common::swap32(m_header_2.group_entries_size), - number_of_group_entries * sizeof(GroupEntry)); + Chunk& group_entries = + ReadCompressedData(Common::swap64(m_header_2.group_entries_offset), + Common::swap32(m_header_2.group_entries_size), + number_of_group_entries * sizeof(GroupEntry), m_compression_type); if (!group_entries.ReadAll(&m_group_entries)) return false; @@ -463,7 +465,20 @@ bool WIARVZFileReader::ReadFromGroups(u64* offset, u64* size, u8** out_ptr, chunk_size = std::min(chunk_size, data_size - group_offset_in_data); const u64 bytes_to_read = std::min(chunk_size - offset_in_group, *size); - const u32 group_data_size = Common::swap32(group.data_size); + u32 group_data_size = Common::swap32(group.data_size); + + WIARVZCompressionType compression_type = m_compression_type; + u32 rvz_packed_size = 0; + if constexpr (RVZ) + { + if ((group_data_size & 0x80000000) == 0) + compression_type = WIARVZCompressionType::None; + + group_data_size &= 0x7FFFFFFF; + + rvz_packed_size = Common::swap32(group.rvz_packed_size); + } + if (group_data_size == 0) { std::memset(*out_ptr, 0, bytes_to_read); @@ -471,8 +486,11 @@ bool WIARVZFileReader::ReadFromGroups(u64* offset, u64* size, u8** out_ptr, else { const u64 group_offset_in_file = static_cast(Common::swap32(group.data_offset)) << 2; - Chunk& chunk = ReadCompressedData(group_offset_in_file, group_data_size, chunk_size, - exception_lists, RVZ, group_offset_in_data); + + Chunk& chunk = + ReadCompressedData(group_offset_in_file, group_data_size, chunk_size, compression_type, + exception_lists, rvz_packed_size, group_offset_in_data); + if (!chunk.Read(offset_in_group, bytes_to_read, *out_ptr)) { m_cached_chunk_offset = std::numeric_limits::max(); // Invalidate the cache @@ -501,20 +519,22 @@ bool WIARVZFileReader::ReadFromGroups(u64* offset, u64* size, u8** out_ptr, template typename WIARVZFileReader::Chunk& WIARVZFileReader::ReadCompressedData(u64 offset_in_file, u64 compressed_size, - u64 decompressed_size, u32 exception_lists, bool rvz_pack, - u64 data_offset) + u64 decompressed_size, + WIARVZCompressionType compression_type, + u32 exception_lists, u32 rvz_packed_size, u64 data_offset) { if (offset_in_file == m_cached_chunk_offset) return m_cached_chunk; std::unique_ptr decompressor; - switch (m_compression_type) + switch (compression_type) { case WIARVZCompressionType::None: decompressor = std::make_unique(); break; case WIARVZCompressionType::Purge: - decompressor = std::make_unique(decompressed_size); + decompressor = std::make_unique(rvz_packed_size == 0 ? decompressed_size : + rvz_packed_size); break; case WIARVZCompressionType::Bzip2: decompressor = std::make_unique(); @@ -532,11 +552,11 @@ WIARVZFileReader::ReadCompressedData(u64 offset_in_file, u64 compressed_siz break; } - const bool compressed_exception_lists = m_compression_type > WIARVZCompressionType::Purge; + const bool compressed_exception_lists = compression_type > WIARVZCompressionType::Purge; m_cached_chunk = Chunk(&m_file, offset_in_file, compressed_size, decompressed_size, exception_lists, - compressed_exception_lists, rvz_pack, data_offset, std::move(decompressor)); + compressed_exception_lists, rvz_packed_size, data_offset, std::move(decompressor)); m_cached_chunk_offset = offset_in_file; return m_cached_chunk; } @@ -561,10 +581,10 @@ WIARVZFileReader::Chunk::Chunk() = default; template WIARVZFileReader::Chunk::Chunk(File::IOFile* file, u64 offset_in_file, u64 compressed_size, u64 decompressed_size, u32 exception_lists, - bool compressed_exception_lists, bool rvz_pack, u64 data_offset, - std::unique_ptr decompressor) + bool compressed_exception_lists, u32 rvz_packed_size, + u64 data_offset, std::unique_ptr decompressor) : m_file(file), m_offset_in_file(offset_in_file), m_exception_lists(exception_lists), - m_compressed_exception_lists(compressed_exception_lists), m_rvz_pack(rvz_pack), + m_compressed_exception_lists(compressed_exception_lists), m_rvz_packed_size(rvz_packed_size), m_data_offset(data_offset), m_decompressor(std::move(decompressor)) { constexpr size_t MAX_SIZE_PER_EXCEPTION_LIST = @@ -655,7 +675,7 @@ bool WIARVZFileReader::Chunk::Read(u64 offset, u64 size, u8* out_ptr) return false; } - if (m_rvz_pack && m_exception_lists == 0) + if (m_rvz_packed_size != 0 && m_exception_lists == 0) { if (!Decompress()) return false; @@ -691,10 +711,8 @@ bool WIARVZFileReader::Chunk::Read(u64 offset, u64 size, u8* out_ptr) template bool WIARVZFileReader::Chunk::Decompress() { - if (m_rvz_pack && m_exception_lists == 0) + if (m_rvz_packed_size != 0 && m_exception_lists == 0) { - m_rvz_pack = false; - const size_t bytes_to_move = m_out.bytes_written - m_out_bytes_used_for_exceptions; DecompressionBuffer in{std::vector(bytes_to_move), bytes_to_move}; @@ -703,7 +721,9 @@ bool WIARVZFileReader::Chunk::Decompress() m_out.bytes_written = m_out_bytes_used_for_exceptions; m_decompressor = std::make_unique(std::move(m_decompressor), std::move(in), - m_data_offset); + m_data_offset, m_rvz_packed_size); + + m_rvz_packed_size = 0; } return m_decompressor->Decompress(m_in, &m_out, &m_in_bytes_read); @@ -1069,8 +1089,8 @@ static bool AllSame(const u8* begin, const u8* end) template static void RVZPack(const u8* in, OutputParametersEntry* out, u64 bytes_per_chunk, size_t chunks, - u64 total_size, u64 data_offset, u64 in_offset, bool allow_junk_reuse, - bool compression, const FileSystem* file_system) + u64 total_size, u64 data_offset, u64 in_offset, bool multipart, + bool allow_junk_reuse, bool compression, const FileSystem* file_system) { using Seed = std::array; struct JunkInfo @@ -1148,6 +1168,11 @@ static void RVZPack(const u8* in, OutputParametersEntry* out, u64 bytes_per_chun const bool store_junk_efficiently = allow_junk_reuse || !entry.reuse_id; + // TODO: It would be possible to support skipping RVZ packing even when the chunk size is larger + // than 2 MiB (multipart == true), but it would be more effort than it's worth since Dolphin's + // converter doesn't expose chunk sizes larger than 2 MiB to the user anyway + bool first_loop_iteration = !multipart; + while (current_offset < end_offset) { u64 next_junk_start = end_offset; @@ -1165,6 +1190,18 @@ static void RVZPack(const u8* in, OutputParametersEntry* out, u64 bytes_per_chun } } + if (first_loop_iteration) + { + if (next_junk_start == end_offset) + { + // Storing this chunk without RVZ packing would be inefficient, so store it without + PushBack(&entry.main_data, in + in_offset + current_offset, in + in_offset + end_offset); + break; + } + + first_loop_iteration = false; + } + const u64 non_junk_bytes = next_junk_start - current_offset; if (non_junk_bytes > 0) { @@ -1174,6 +1211,7 @@ static void RVZPack(const u8* in, OutputParametersEntry* out, u64 bytes_per_chun PushBack(&entry.main_data, ptr, ptr + non_junk_bytes); current_offset += non_junk_bytes; + entry.rvz_packed_size += sizeof(u32) + non_junk_bytes; } const u64 junk_bytes = next_junk_end - current_offset; @@ -1183,6 +1221,7 @@ static void RVZPack(const u8* in, OutputParametersEntry* out, u64 bytes_per_chun PushBack(&entry.main_data, *seed); current_offset += junk_bytes; + entry.rvz_packed_size += sizeof(u32) + SEED_SIZE; } } } @@ -1192,7 +1231,8 @@ template static void RVZPack(const u8* in, OutputParametersEntry* out, u64 size, u64 data_offset, bool allow_junk_reuse, bool compression, const FileSystem* file_system) { - RVZPack(in, out, size, 1, size, data_offset, 0, allow_junk_reuse, compression, file_system); + RVZPack(in, out, size, 1, size, data_offset, 0, false, allow_junk_reuse, compression, + file_system); } template @@ -1381,7 +1421,7 @@ WIARVZFileReader::ProcessAndCompress(CompressThreadState* state, CompressPa RVZPack(state->decryption_buffer[0].data(), output_entries.data() + first_chunk, bytes_per_chunk, chunks, total_size, data_offset, write_offset_of_group, - allow_junk_reuse, compression, file_system); + groups > 1, allow_junk_reuse, compression, file_system); } else { @@ -1462,9 +1502,19 @@ WIARVZFileReader::ProcessAndCompress(CompressThreadState* state, CompressPa { entry.exception_lists.clear(); entry.main_data.clear(); + if constexpr (RVZ) + { + entry.rvz_packed_size = 0; + entry.compressed = false; + } continue; } + const auto pad_exception_lists = [&entry]() { + while (entry.exception_lists.size() % 4 != 0) + entry.exception_lists.push_back(0); + }; + if (state->compressor) { if (!state->compressor->Start()) @@ -1480,16 +1530,11 @@ WIARVZFileReader::ProcessAndCompress(CompressThreadState* state, CompressPa { return ConversionResultCode::InternalError; } - - entry.exception_lists.clear(); } else { if (!compressed_exception_lists) - { - while (entry.exception_lists.size() % 4 != 0) - entry.exception_lists.push_back(0); - } + pad_exception_lists(); if (state->compressor) { @@ -1510,13 +1555,30 @@ WIARVZFileReader::ProcessAndCompress(CompressThreadState* state, CompressPa return ConversionResultCode::InternalError; } - if (state->compressor) + bool compressed = !!state->compressor; + if constexpr (RVZ) + { + size_t uncompressed_size = entry.main_data.size(); + if (compressed_exception_lists) + uncompressed_size += Common::AlignUp(entry.exception_lists.size(), 4); + + compressed = state->compressor && state->compressor->GetSize() < uncompressed_size; + entry.compressed = compressed; + + if (!compressed) + pad_exception_lists(); + } + + if (compressed) { const u8* data = state->compressor->GetData(); const size_t size = state->compressor->GetSize(); entry.main_data.resize(size); std::copy(data, data + size, entry.main_data.data()); + + if (compressed_exception_lists) + entry.exception_lists.clear(); } } @@ -1540,21 +1602,26 @@ ConversionResultCode WIARVZFileReader::Output(std::vector> 2 > std::numeric_limits::max()) return ConversionResultCode::InternalError; ASSERT((*bytes_written & 3) == 0); group_entry->data_offset = Common::swap32(static_cast(*bytes_written >> 2)); - group_entry->data_size = Common::swap32(static_cast(data_size)); + + u32 data_size = static_cast(entry.exception_lists.size() + entry.main_data.size()); + if constexpr (RVZ) + { + data_size = (data_size & 0x7FFFFFFF) | (static_cast(entry.compressed) << 31); + group_entry->rvz_packed_size = Common::swap32(static_cast(entry.rvz_packed_size)); + } + group_entry->data_size = Common::swap32(data_size); if (!outfile->WriteArray(entry.exception_lists.data(), entry.exception_lists.size())) return ConversionResultCode::WriteFailed; if (!outfile->WriteArray(entry.main_data.data(), entry.main_data.size())) return ConversionResultCode::WriteFailed; - *bytes_written += data_size; + *bytes_written += entry.exception_lists.size() + entry.main_data.size(); if (entry.reuse_id) { @@ -1659,10 +1726,18 @@ WIARVZFileReader::Convert(BlobReader* infile, const VolumeDisc* infile_volu // Conservative estimate for how much space will be taken up by headers. // The compression methods None and Purge have very predictable overhead, // and the other methods are able to compress group entries well - const u64 headers_size_upper_bound = - Common::AlignUp(sizeof(WIAHeader1) + sizeof(WIAHeader2) + partition_entries_size + - raw_data_entries_size + group_entries_size + 0x100, - VolumeWii::BLOCK_TOTAL_SIZE); + const u64 headers_size_upper_bound = [&] { + u64 upper_bound = sizeof(WIAHeader1) + sizeof(WIAHeader2) + partition_entries_size + + raw_data_entries_size + 0x100; + + // RVZ's added data in GroupEntry usually compresses well + if (RVZ && compression_type > WIARVZCompressionType::Purge) + upper_bound += group_entries_size / 2; + else + upper_bound += group_entries_size; + + return Common::AlignUp(upper_bound, VolumeWii::BLOCK_TOTAL_SIZE); + }(); std::vector buffer; diff --git a/Source/Core/DiscIO/WIABlob.h b/Source/Core/DiscIO/WIABlob.h index d7e3573e7c..458b05e8e7 100644 --- a/Source/Core/DiscIO/WIABlob.h +++ b/Source/Core/DiscIO/WIABlob.h @@ -138,12 +138,22 @@ private: }; static_assert(sizeof(RawDataEntry) == 0x18, "Wrong size for WIA raw data entry"); - struct GroupEntry + struct WIAGroupEntry { u32 data_offset; // >> 2 u32 data_size; }; - static_assert(sizeof(GroupEntry) == 0x08, "Wrong size for WIA group entry"); + static_assert(sizeof(WIAGroupEntry) == 0x08, "Wrong size for WIA group entry"); + + struct RVZGroupEntry + { + u32 data_offset; // >> 2 + u32 data_size; + u32 rvz_packed_size; + }; + static_assert(sizeof(RVZGroupEntry) == 0x0c, "Wrong size for RVZ group entry"); + + using GroupEntry = std::conditional_t; struct HashExceptionEntry { @@ -172,8 +182,8 @@ private: public: Chunk(); Chunk(File::IOFile* file, u64 offset_in_file, u64 compressed_size, u64 decompressed_size, - u32 exception_lists, bool compressed_exception_lists, bool rvz_pack, u64 data_offset, - std::unique_ptr decompressor); + u32 exception_lists, bool compressed_exception_lists, u32 rvz_packed_size, + u64 data_offset, std::unique_ptr decompressor); bool Read(u64 offset, u64 size, u8* out_ptr); @@ -205,7 +215,7 @@ private: size_t m_in_bytes_used_for_exceptions = 0; u32 m_exception_lists = 0; bool m_compressed_exception_lists = false; - bool m_rvz_pack = false; + u32 m_rvz_packed_size = 0; u64 m_data_offset = 0; }; @@ -217,7 +227,8 @@ private: u64 data_offset, u64 data_size, u32 group_index, u32 number_of_groups, u32 exception_lists); Chunk& ReadCompressedData(u64 offset_in_file, u64 compressed_size, u64 decompressed_size, - u32 exception_lists = 0, bool rvz_pack = false, u64 data_offset = 0); + WIARVZCompressionType compression_type, u32 exception_lists = 0, + u32 rvz_packed_size = 0, u64 data_offset = 0); static bool ApplyHashExceptions(const std::vector& exception_list, VolumeWii::HashBlock hash_blocks[VolumeWii::BLOCKS_PER_GROUP]); @@ -273,7 +284,7 @@ private: size_t group_index; }; - struct OutputParametersEntry + struct WIAOutputParametersEntry { std::vector exception_lists; std::vector main_data; @@ -281,6 +292,19 @@ private: std::optional reused_group; }; + struct RVZOutputParametersEntry + { + std::vector exception_lists; + std::vector main_data; + std::optional reuse_id; + std::optional reused_group; + size_t rvz_packed_size = 0; + bool compressed = false; + }; + + using OutputParametersEntry = + std::conditional_t; + struct OutputParameters { std::vector entries; @@ -355,9 +379,9 @@ private: static constexpr u32 WIA_VERSION_WRITE_COMPATIBLE = 0x01000000; static constexpr u32 WIA_VERSION_READ_COMPATIBLE = 0x00080000; - static constexpr u32 RVZ_VERSION = 0x00020000; - static constexpr u32 RVZ_VERSION_WRITE_COMPATIBLE = 0x00020000; - static constexpr u32 RVZ_VERSION_READ_COMPATIBLE = 0x00020000; + static constexpr u32 RVZ_VERSION = 0x00030000; + static constexpr u32 RVZ_VERSION_WRITE_COMPATIBLE = 0x00030000; + static constexpr u32 RVZ_VERSION_READ_COMPATIBLE = 0x00030000; }; using WIAFileReader = WIARVZFileReader; diff --git a/Source/Core/DiscIO/WIACompression.cpp b/Source/Core/DiscIO/WIACompression.cpp index c15380a343..d6d3acae2b 100644 --- a/Source/Core/DiscIO/WIACompression.cpp +++ b/Source/Core/DiscIO/WIACompression.cpp @@ -292,10 +292,18 @@ bool ZstdDecompressor::Decompress(const DecompressionBuffer& in, DecompressionBu } RVZPackDecompressor::RVZPackDecompressor(std::unique_ptr decompressor, - DecompressionBuffer decompressed, u64 data_offset) + DecompressionBuffer decompressed, u64 data_offset, + u32 rvz_packed_size) : m_decompressor(std::move(decompressor)), m_decompressed(std::move(decompressed)), - m_data_offset(data_offset) + m_data_offset(data_offset), m_rvz_packed_size(rvz_packed_size) { + m_bytes_read = m_decompressed.bytes_written; +} + +bool RVZPackDecompressor::IncrementBytesRead(size_t x) +{ + m_bytes_read += x; + return m_bytes_read <= m_rvz_packed_size; } std::optional RVZPackDecompressor::ReadToDecompressed(const DecompressionBuffer& in, @@ -308,9 +316,14 @@ std::optional RVZPackDecompressor::ReadToDecompressed(const DecompressionB if (m_decompressed.bytes_written < decompressed_bytes_read + bytes_to_read) { + const size_t prev_bytes_written = m_decompressed.bytes_written; + if (!m_decompressor->Decompress(in, &m_decompressed, in_bytes_read)) return false; + if (!IncrementBytesRead(m_decompressed.bytes_written - prev_bytes_written)) + return false; + if (m_decompressed.bytes_written < decompressed_bytes_read + bytes_to_read) return true; } @@ -395,6 +408,10 @@ bool RVZPackDecompressor::Decompress(const DecompressionBuffer& in, Decompressio out->data.resize(old_out_size); bytes_to_write = out->bytes_written - prev_out_bytes_written; + + if (!IncrementBytesRead(bytes_to_write)) + return false; + if (bytes_to_write == 0) return true; } @@ -417,8 +434,8 @@ bool RVZPackDecompressor::Decompress(const DecompressionBuffer& in, Decompressio bool RVZPackDecompressor::Done() const { - return m_size == 0 && m_decompressed.bytes_written == m_decompressed_bytes_read && - m_decompressor->Done(); + return m_size == 0 && m_rvz_packed_size == m_bytes_read && + m_decompressed.bytes_written == m_decompressed_bytes_read && m_decompressor->Done(); } Compressor::~Compressor() = default; diff --git a/Source/Core/DiscIO/WIACompression.h b/Source/Core/DiscIO/WIACompression.h index 5015802802..37e8cf3dc3 100644 --- a/Source/Core/DiscIO/WIACompression.h +++ b/Source/Core/DiscIO/WIACompression.h @@ -122,7 +122,7 @@ class RVZPackDecompressor final : public Decompressor { public: RVZPackDecompressor(std::unique_ptr decompressor, DecompressionBuffer decompressed, - u64 data_offset); + u64 data_offset, u32 rvz_packed_size); bool Decompress(const DecompressionBuffer& in, DecompressionBuffer* out, size_t* in_bytes_read) override; @@ -130,13 +130,16 @@ public: bool Done() const override; private: + bool IncrementBytesRead(size_t x); std::optional ReadToDecompressed(const DecompressionBuffer& in, size_t* in_bytes_read, size_t decompressed_bytes_read, size_t bytes_to_read); std::unique_ptr m_decompressor; DecompressionBuffer m_decompressed; size_t m_decompressed_bytes_read = 0; + size_t m_bytes_read; u64 m_data_offset; + u32 m_rvz_packed_size; u32 m_size = 0; bool m_junk; diff --git a/docs/WIA.md b/docs/WIA.md index 16688d5bdd..aa8120b6d2 100644 --- a/docs/WIA.md +++ b/docs/WIA.md @@ -178,11 +178,24 @@ RVZ is a file format which is closely based on WIA. The differences are as follo * Chunk sizes smaller than 2 MiB are supported. The following applies when using a chunk size smaller than 2 MiB: * The chunk size must be at least 32 KiB and must be a power of two. (Just like with WIA, sizes larger than 2 MiB do not have to be a power of two, they just have to be an integer multiple of 2 MiB.) * For Wii partition data, each chunk contains one `wia_except_list_t` which contains exceptions for that chunk (and no other chunks). Offset 0 refers to the first hash of the current chunk, not the first hash of the full 2 MiB of data. -* An encoding scheme which is described below is used to store pseudorandom padding data losslessly. +* The `wia_group_t` struct has been expanded. See the `rvz_group_t` section below. +* Pseudorandom padding data is stored losslessly using an encoding scheme described in the *RVZ packing* section below. + +## `rvz_group_t` + +Compared to `wia_group_t`, `rvz_group_t` changes the meaning of the most significant bit of `data_size` and adds one additional attribute. + +"Compressed data" below means the data as it is stored in the file. When compression is disabled, this "compressed data" is actually not compressed. + +|Type and name|Description| +|--|--| +|`u32 data_off4`|The offset in the file where the compressed data is stored, divided by 4.| +|`u32 data_size`|The most significant bit is 1 if the data is compressed using the compression method indicated in `wia_disc_t`, and 0 if it is not compressed. The lower 31 bits are the size of the compressed data, including any `wia_except_list_t` structs. The lower 31 bits being 0 is a special case meaning that every byte of the decompressed and unpacked data is `0x00` and the `wia_except_list_t` structs (if there are supposed to be any) contain 0 exceptions.| +|`u32 rvz_packed_size`|The size after decompressing but before decoding the RVZ packing. If this is 0, RVZ packing is not used for this group.| ## RVZ packing -The RVZ packing encoding scheme is applied to all `wia_group_t` data, with any bzip2/LZMA/Zstandard compression being applied on top of it. (In other words, when reading an RVZ file, bzip2/LZMA/Zstandard decompression is done before decoding the RVZ packing.) RVZ packed data can be decoded as follows: +The RVZ packing encoding scheme can be applied to `wia_group_t` data, with any bzip2/LZMA/Zstandard compression being applied on top of it. (In other words, when reading an RVZ file, bzip2/LZMA/Zstandard decompression is done before decoding the RVZ packing.) RVZ packed data can be decoded as follows: 1. Read 4 bytes of data and interpret it as a 32-bit unsigned big endian integer. Call this `size`. 2. If the most significant bit of `size` is not set, read `size` bytes and output them unchanged. If the most significant bit of `size` is set, unset the most significant bit of `size`, then read 68 bytes of PRNG seed data and output `size` bytes using the PRNG algorithm described below.