Implement indexed quads support

We previously only supported non-indexed quads. Support for this is implemented by converting the index buffer at record time and pushing the result into the megabuffer, which is then used as the index buffer in the final draw command.
2025-02-17 07:46:21 +01:00 · 2022-08-04 20:00:36 +01:00 · 2022-08-04 20:00:36 +01:00 · d5174175d1
commit d5174175d1
parent e6741642ba
3 changed files with 71 additions and 13 deletions
--- a/app/src/main/cpp/skyline/gpu/interconnect/conversion/quads.cpp
+++ b/app/src/main/cpp/skyline/gpu/interconnect/conversion/quads.cpp
@ -19,4 +19,37 @@ namespace skyline::gpu::interconnect::conversion::quads {
            *(dest++) = i + 0;
        }
    }
+
+    template<typename S>
+    static void GenerateQuadIndexConversionBufferImpl(S *__restrict__ dest, S *__restrict__ source, u32 indexCount) {
+        #pragma clang loop vectorize(enable) interleave(enable) unroll(enable)
+        for (size_t i{}; i < indexCount; i += 4, source += 4) {
+            // Given a quad ABCD, we want to generate triangles ABC & CDA
+            // Triangle ABC
+            *(dest++) = *(source + 0);
+            *(dest++) = *(source + 1);
+            *(dest++) = *(source + 2);
+
+            // Triangle CDA
+            *(dest++) = *(source + 2);
+            *(dest++) = *(source + 3);
+            *(dest++) = *(source + 0);
+        }
+    }
+
+    void GenerateIndexedQuadConversionBuffer(u8 *dest, u8 *source, u32 indexCount, vk::IndexType type) {
+        switch (type) {
+            case vk::IndexType::eUint32:
+                GenerateQuadIndexConversionBufferImpl(reinterpret_cast<u32 *>(dest), reinterpret_cast<u32 *>(source), indexCount);
+                break;
+            case vk::IndexType::eUint16:
+                GenerateQuadIndexConversionBufferImpl(reinterpret_cast<u16 *>(dest), reinterpret_cast<u16 *>(source), indexCount);
+                break;
+            case vk::IndexType::eUint8EXT:
+                GenerateQuadIndexConversionBufferImpl(dest, source, indexCount);
+                break;
+            default:
+                break;
+        }
+    }
 }
--- a/app/src/main/cpp/skyline/gpu/interconnect/conversion/quads.h
+++ b/app/src/main/cpp/skyline/gpu/interconnect/conversion/quads.h
@ -43,4 +43,10 @@ namespace skyline::gpu::interconnect::conversion::quads {
     * @note The size of the supplied buffer should be at least the size returned by GetRequiredBufferSize()
     */
    void GenerateQuadListConversionBuffer(u32 *dest, u32 vertexCount);
+
+    /**
+     * @brief Create an index buffer that repeats quad vertices from the source buffer to generate a triangle list
+     * @note The size of the destination buffer should be at least the size returned by GetRequiredBufferSize()
+     */
+    void GenerateIndexedQuadConversionBuffer(u8 *dest, u8 *source, u32 indexCount, vk::IndexType type);
 }
--- a/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h
+++ b/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h
@ -1699,6 +1699,19 @@ namespace skyline::gpu::interconnect {
            return {quadListConversionBuffer->GetView(0, size), vk::IndexType::eUint32, conversion::quads::GetIndexCount(count)};
        }

+        MegaBufferAllocator::Allocation GetIndexedQuadConversionBuffer(u32 count) {
+            vk::DeviceSize size{conversion::quads::GetRequiredBufferSize(count, indexBuffer.type)};
+            auto allocation{executor.AcquireMegaBufferAllocator().Allocate(executor.cycle, size)};
+
+            ContextLock lock{executor.tag, indexBuffer.view};
+            auto guestIndexBuffer{indexBuffer.view.GetReadOnlyBackingSpan(lock.IsFirstUsage(), []() {
+                // TODO: see Read()
+                Logger::Error("Dirty index buffer reads for attached buffers are unimplemented");
+            })};
+            conversion::quads::GenerateIndexedQuadConversionBuffer(allocation.region.data(), guestIndexBuffer.data(), count, indexBuffer.type);
+            return allocation;
+        }
+
      public:
        void SetVertexBufferStride(u32 index, u32 stride) {
            vertexBuffers[index].bindingDescription.stride = stride;
@ -2972,13 +2985,18 @@ namespace skyline::gpu::interconnect {

            std::shared_ptr<BoundIndexBuffer> boundIndexBuffer{};
            if constexpr (IsIndexed) {
-                if (needsQuadConversion)
-                    throw exception("Indexed quad conversion is not supported");
-
                auto indexBufferView{GetIndexBuffer(count)};
+                boundIndexBuffer = std::allocate_shared<BoundIndexBuffer, LinearAllocator<BoundIndexBuffer>>(executor.allocator);
+                boundIndexBuffer->type = indexBuffer.type;
+
+                if (needsQuadConversion) {
+                    auto allocation{GetIndexedQuadConversionBuffer(count)};
+                    boundIndexBuffer->handle = allocation.buffer;
+                    boundIndexBuffer->offset = allocation.offset;
+                    count = conversion::quads::GetIndexCount(count);
+                } else {
                    executor.AttachBuffer(indexBufferView);

-                boundIndexBuffer = std::allocate_shared<BoundIndexBuffer, LinearAllocator<BoundIndexBuffer>>(executor.allocator);
                    boundIndexBuffer->type = indexBuffer.type;
                    if (auto megaBufferAllocation{indexBufferView.AcquireMegaBuffer(executor.cycle, executor.AcquireMegaBufferAllocator())}) {
                        // If the buffer is megabuffered then since we don't get out data from the underlying buffer, rather the megabuffer which stays consistent throughout a single execution, we can skip registering usage
@ -2990,6 +3008,7 @@ namespace skyline::gpu::interconnect {
                            boundIndexBuffer->offset = view.offset;
                        });
                    }
+                }
            } else if (needsQuadConversion) {
                // Convert the guest-supplied quad list to an indexed triangle list
                auto[bufferView, indexType, indexCount]{GetNonIndexedQuadConversionBuffer(count)};