mirror of
https://github.com/skyline-emu/skyline.git
synced 2025-01-11 11:29:10 +01:00
Optimize deswizzling implementation and support multiple formats
The deswizzling implementation currently writes linearly and reads non-linearly, this is non optimal as the MMU cannot read ahead. This flips that and reads linearly while it writes non-linearly. This is based on: 324a3624ac/nx/source/display/framebuffer.c (L189)
.
This commit is contained in:
parent
e11d7d9ce0
commit
745cb208a6
@ -12,6 +12,7 @@ namespace skyline::gpu {
|
|||||||
ANativeWindow_acquire(window);
|
ANativeWindow_acquire(window);
|
||||||
resolution.width = static_cast<u32>(ANativeWindow_getWidth(window));
|
resolution.width = static_cast<u32>(ANativeWindow_getWidth(window));
|
||||||
resolution.height = static_cast<u32>(ANativeWindow_getHeight(window));
|
resolution.height = static_cast<u32>(ANativeWindow_getHeight(window));
|
||||||
|
format = ANativeWindow_getFormat(window);
|
||||||
}
|
}
|
||||||
|
|
||||||
GPU::~GPU() {
|
GPU::~GPU() {
|
||||||
@ -23,38 +24,39 @@ namespace skyline::gpu {
|
|||||||
auto &buffer = bufferQueue.displayQueue.front();
|
auto &buffer = bufferQueue.displayQueue.front();
|
||||||
bufferQueue.displayQueue.pop();
|
bufferQueue.displayQueue.pop();
|
||||||
if (resolution != buffer->resolution || buffer->gbpBuffer.format != format) {
|
if (resolution != buffer->resolution || buffer->gbpBuffer.format != format) {
|
||||||
if (resolution != buffer->resolution && buffer->gbpBuffer.format != format) {
|
ANativeWindow_setBuffersGeometry(window, buffer->resolution.width, buffer->resolution.height, buffer->gbpBuffer.format);
|
||||||
ANativeWindow_setBuffersGeometry(window, buffer->resolution.width, buffer->resolution.height, buffer->gbpBuffer.format);
|
resolution = buffer->resolution;
|
||||||
resolution = buffer->resolution;
|
format = buffer->gbpBuffer.format;
|
||||||
format = buffer->gbpBuffer.format;
|
|
||||||
} else if (resolution != buffer->resolution) {
|
|
||||||
ANativeWindow_setBuffersGeometry(window, buffer->resolution.width, buffer->resolution.height, format);
|
|
||||||
resolution = buffer->resolution;
|
|
||||||
} else if (buffer->gbpBuffer.format != format) {
|
|
||||||
ANativeWindow_setBuffersGeometry(window, resolution.width, resolution.height, buffer->gbpBuffer.format);
|
|
||||||
format = buffer->gbpBuffer.format;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
buffer->UpdateBuffer();
|
buffer->UpdateBuffer();
|
||||||
auto bufferData = buffer->dataBuffer.data();
|
u8 *inBuffer = buffer->dataBuffer.data();
|
||||||
//madvise(bufferData, buffer->gbpBuffer.size, MADV_SEQUENTIAL); (Uncomment this after deswizzling while reading sequentially instead of writing sequentially)
|
madvise(inBuffer, buffer->gbpBuffer.size, MADV_SEQUENTIAL);
|
||||||
ANativeWindow_Buffer windowBuffer;
|
ANativeWindow_Buffer windowBuffer;
|
||||||
ARect rect;
|
ARect rect;
|
||||||
ANativeWindow_lock(window, &windowBuffer, &rect);
|
ANativeWindow_lock(window, &windowBuffer, &rect);
|
||||||
u32 *address = reinterpret_cast<u32 *>(windowBuffer.bits);
|
u8 *outBuffer = reinterpret_cast<u8 *>(windowBuffer.bits);
|
||||||
for (u32 y = 0; y < buffer->resolution.height; y++) {
|
const u32 strideBytes = buffer->gbpBuffer.stride * buffer->bpp;
|
||||||
for (u32 x = 0; x < buffer->resolution.width; x += 4, address += 4) {
|
const u32 blockHeight = 1U << buffer->gbpBuffer.blockHeightLog2;
|
||||||
u32 position = (y & 0x7f) >> 4U;
|
const u32 blockHeightPixels = 8U << buffer->gbpBuffer.blockHeightLog2;
|
||||||
position += (x >> 4U) << 3U;
|
const u32 widthBlocks = strideBytes >> 6U;
|
||||||
position += (y >> 7U) * ((resolution.width >> 4U) << 3U);
|
const u32 heightBlocks = ((resolution.height) + blockHeightPixels - 1) >> (3 + buffer->gbpBuffer.blockHeightLog2);
|
||||||
position *= 1024;
|
for (u32 blockY = 0; blockY < heightBlocks; blockY++) {
|
||||||
position += ((y & 0xf) >> 3U) << 9U;
|
for (u32 blockX = 0; blockX < widthBlocks; blockX++) {
|
||||||
position += ((x & 0xf) >> 3U) << 8U;
|
for (u32 gobY = 0; gobY < blockHeight; gobY++) {
|
||||||
position += ((y & 0x7) >> 1U) << 6U;
|
const u32 x = blockX * constant::GobStride;
|
||||||
position += ((x & 0x7) >> 2U) << 5U;
|
const u32 y = blockY * blockHeightPixels + gobY * constant::GobHeight;
|
||||||
position += (y & 0x1) << 4U;
|
if (y < resolution.height) {
|
||||||
position += (x & 0x3) << 2U;
|
u8 *inBlock = inBuffer;
|
||||||
std::memcpy(address, bufferData + position, sizeof(u32) * 4);
|
u8 *outBlock = outBuffer + (y * strideBytes) + x;
|
||||||
|
for (u32 i = 0; i < 32; i++) {
|
||||||
|
const u32 yT = ((i >> 1) & 0x06) | (i & 0x01); // NOLINT(hicpp-signed-bitwise)
|
||||||
|
const u32 xT = ((i << 3) & 0x10) | ((i << 1) & 0x20); // NOLINT(hicpp-signed-bitwise)
|
||||||
|
std::memcpy(outBlock + (yT * strideBytes) + xT, inBlock, sizeof(u128));
|
||||||
|
inBlock += sizeof(u128);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
inBuffer += constant::GobSize;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ANativeWindow_unlockAndPost(window);
|
ANativeWindow_unlockAndPost(window);
|
||||||
|
@ -17,6 +17,17 @@ namespace skyline::gpu {
|
|||||||
if (!nvBuffer)
|
if (!nvBuffer)
|
||||||
throw exception("A QueueBuffer request has an invalid NVMap Handle ({}) and ID ({})", gbpBuffer.nvmapHandle, gbpBuffer.nvmapId);
|
throw exception("A QueueBuffer request has an invalid NVMap Handle ({}) and ID ({})", gbpBuffer.nvmapHandle, gbpBuffer.nvmapId);
|
||||||
}
|
}
|
||||||
|
switch(gbpBuffer.format) {
|
||||||
|
case WINDOW_FORMAT_RGBA_8888:
|
||||||
|
case WINDOW_FORMAT_RGBX_8888:
|
||||||
|
bpp = sizeof(u32);
|
||||||
|
break;
|
||||||
|
case WINDOW_FORMAT_RGB_565:
|
||||||
|
bpp = sizeof(u16);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw exception("Unknown pixel format used for FB");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Buffer::UpdateBuffer() {
|
void Buffer::UpdateBuffer() {
|
||||||
@ -31,19 +42,18 @@ namespace skyline::gpu {
|
|||||||
|
|
||||||
void BufferQueue::RequestBuffer(Parcel &in, Parcel &out) {
|
void BufferQueue::RequestBuffer(Parcel &in, Parcel &out) {
|
||||||
u32 slot = *reinterpret_cast<u32 *>(in.data.data() + constant::TokenLength);
|
u32 slot = *reinterpret_cast<u32 *>(in.data.data() + constant::TokenLength);
|
||||||
auto buffer = queue.at(slot);
|
|
||||||
out.WriteData<u32>(1);
|
out.WriteData<u32>(1);
|
||||||
out.WriteData<u32>(sizeof(GbpBuffer));
|
out.WriteData<u32>(sizeof(GbpBuffer));
|
||||||
out.WriteData<u32>(0);
|
out.WriteData<u32>(0);
|
||||||
out.WriteData(buffer->gbpBuffer);
|
out.WriteData(queue.at(slot)->gbpBuffer);
|
||||||
state.logger->Debug("RequestBuffer: Slot: {}, Size: {}", slot, sizeof(GbpBuffer));
|
state.logger->Debug("RequestBuffer: Slot: {}", slot, sizeof(GbpBuffer));
|
||||||
}
|
}
|
||||||
|
|
||||||
bool BufferQueue::DequeueBuffer(Parcel &in, Parcel &out, u64 address, u64 size) {
|
bool BufferQueue::DequeueBuffer(Parcel &in, Parcel &out, u64 address, u64 size) {
|
||||||
auto *data = reinterpret_cast<DequeueIn *>(in.data.data() + constant::TokenLength);
|
auto *data = reinterpret_cast<DequeueIn *>(in.data.data() + constant::TokenLength);
|
||||||
i64 slot{-1};
|
i64 slot{-1};
|
||||||
for (auto &buffer : queue) {
|
for (auto &buffer : queue) {
|
||||||
if (buffer.second->status == BufferStatus::Free && buffer.second->resolution.width == data->width && buffer.second->resolution.height == data->height && buffer.second->gbpBuffer.format == data->format && buffer.second->gbpBuffer.usage == data->usage) {
|
if (buffer.second->status == BufferStatus::Free && buffer.second->resolution.width == data->width && buffer.second->resolution.height == data->height && buffer.second->gbpBuffer.usage == data->usage) {
|
||||||
slot = buffer.first;
|
slot = buffer.first;
|
||||||
buffer.second->status = BufferStatus::Dequeued;
|
buffer.second->status = BufferStatus::Dequeued;
|
||||||
}
|
}
|
||||||
@ -51,7 +61,7 @@ namespace skyline::gpu {
|
|||||||
if (slot == -1) {
|
if (slot == -1) {
|
||||||
state.thisThread->Sleep();
|
state.thisThread->Sleep();
|
||||||
waitVec.emplace_back(state.thisThread, *data, address, size);
|
waitVec.emplace_back(state.thisThread, *data, address, size);
|
||||||
state.logger->Debug("DequeueBuffer: No Free Buffers");
|
state.logger->Debug("DequeueBuffer: Width: {}, Height: {}, Format: {}, Usage: {}, Timestamps: {}, No Free Buffers", data->width, data->height, data->format, data->usage, data->timestamps);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
DequeueOut output(static_cast<u32>(slot));
|
DequeueOut output(static_cast<u32>(slot));
|
||||||
@ -86,8 +96,7 @@ namespace skyline::gpu {
|
|||||||
.height = buffer->gbpBuffer.height
|
.height = buffer->gbpBuffer.height
|
||||||
};
|
};
|
||||||
out.WriteData(output);
|
out.WriteData(output);
|
||||||
state.logger->Debug("QueueBuffer: Timestamp: {}, Auto Timestamp: {}, Crop: [T: {}, B: {}, L: {}, R: {}], Scaling Mode: {}, Transform: {}, Sticky Transform: {}, Swap Interval: {}, Slot: {}", data->timestamp, data->autoTimestamp, data->crop.top, data->crop.bottom, data->crop.left, data->crop.right, data->scalingMode, data->transform, data->stickyTransform, data->swapInterval,
|
state.logger->Debug("QueueBuffer: Timestamp: {}, Auto Timestamp: {}, Crop: [T: {}, B: {}, L: {}, R: {}], Scaling Mode: {}, Transform: {}, Sticky Transform: {}, Swap Interval: {}, Slot: {}", data->timestamp, data->autoTimestamp, data->crop.top, data->crop.bottom, data->crop.left, data->crop.right, data->scalingMode, data->transform, data->stickyTransform, data->swapInterval, data->slot);
|
||||||
data->slot);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void BufferQueue::CancelBuffer(Parcel &parcel) {
|
void BufferQueue::CancelBuffer(Parcel &parcel) {
|
||||||
@ -96,6 +105,7 @@ namespace skyline::gpu {
|
|||||||
Fence fence[4];
|
Fence fence[4];
|
||||||
} *data = reinterpret_cast<Data *>(parcel.data.data() + constant::TokenLength);
|
} *data = reinterpret_cast<Data *>(parcel.data.data() + constant::TokenLength);
|
||||||
FreeBuffer(data->slot);
|
FreeBuffer(data->slot);
|
||||||
|
state.logger->Debug("CancelBuffer: Slot: {}", data->slot);
|
||||||
}
|
}
|
||||||
|
|
||||||
void BufferQueue::SetPreallocatedBuffer(Parcel &parcel) {
|
void BufferQueue::SetPreallocatedBuffer(Parcel &parcel) {
|
||||||
@ -110,8 +120,7 @@ namespace skyline::gpu {
|
|||||||
auto gbpBuffer = reinterpret_cast<GbpBuffer *>(pointer);
|
auto gbpBuffer = reinterpret_cast<GbpBuffer *>(pointer);
|
||||||
queue[data->slot] = std::make_shared<Buffer>(state, data->slot, *gbpBuffer);
|
queue[data->slot] = std::make_shared<Buffer>(state, data->slot, *gbpBuffer);
|
||||||
state.gpu->bufferEvent->Signal();
|
state.gpu->bufferEvent->Signal();
|
||||||
state.logger->Debug("SetPreallocatedBuffer: Slot: {}, Length: {}, Magic: 0x{:X}, Width: {}, Height: {}, Stride: {}, Format: {}, Usage: {}, Index: {}, ID: {}, Handle: {}, Offset: 0x{:X}, Block Height: {}", data->slot, data->length, gbpBuffer->magic, gbpBuffer->width, gbpBuffer->height, gbpBuffer->stride, gbpBuffer->format, gbpBuffer->usage, gbpBuffer->index, gbpBuffer->nvmapId,
|
state.logger->Debug("SetPreallocatedBuffer: Slot: {}, Magic: 0x{:X}, Width: {}, Height: {}, Stride: {}, Format: {}, Usage: {}, Index: {}, ID: {}, Handle: {}, Offset: 0x{:X}, Block Height: {}, Size: 0x{:X}", data->slot, gbpBuffer->magic, gbpBuffer->width, gbpBuffer->height, gbpBuffer->stride, gbpBuffer->format, gbpBuffer->usage, gbpBuffer->index,gbpBuffer->nvmapId, gbpBuffer->nvmapHandle, gbpBuffer->offset, (1U << gbpBuffer->blockHeightLog2), gbpBuffer->size);
|
||||||
gbpBuffer->nvmapHandle, gbpBuffer->offset, (1U << gbpBuffer->blockHeightLog2));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void BufferQueue::FreeBuffer(u32 slotNo) {
|
void BufferQueue::FreeBuffer(u32 slotNo) {
|
||||||
|
@ -90,8 +90,8 @@ namespace skyline::gpu {
|
|||||||
* @brief This represents conditions for the completion of an asynchronous graphics operation
|
* @brief This represents conditions for the completion of an asynchronous graphics operation
|
||||||
*/
|
*/
|
||||||
struct Fence {
|
struct Fence {
|
||||||
u32 syncptId;
|
u32 syncptId; //!< The ID of the syncpoint
|
||||||
u32 syncptValue;
|
u32 syncptValue; //!< The value of the syncpoint
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -101,6 +101,7 @@ namespace skyline::gpu {
|
|||||||
public:
|
public:
|
||||||
const DeviceState &state; //!< The state of the device
|
const DeviceState &state; //!< The state of the device
|
||||||
u32 slot; //!< The slot the buffer is in
|
u32 slot; //!< The slot the buffer is in
|
||||||
|
u32 bpp; //!< The amount of bytes per pixel
|
||||||
Resolution resolution; //!< The resolution of this buffer
|
Resolution resolution; //!< The resolution of this buffer
|
||||||
GbpBuffer gbpBuffer; //!< The information about the underlying buffer
|
GbpBuffer gbpBuffer; //!< The information about the underlying buffer
|
||||||
BufferStatus status{BufferStatus::Free}; //!< The status of this buffer
|
BufferStatus status{BufferStatus::Free}; //!< The status of this buffer
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
#include "dispdrv.h"
|
#include "dispdrv.h"
|
||||||
#include <kernel/types/KProcess.h>
|
#include <kernel/types/KProcess.h>
|
||||||
#include <gpu.h>
|
#include <gpu.h>
|
||||||
#include <android/native_window.h>
|
|
||||||
|
|
||||||
namespace skyline::service::nvnflinger {
|
namespace skyline::service::nvnflinger {
|
||||||
dispdrv::dispdrv(const DeviceState &state, ServiceManager &manager) : BaseService(state, manager, false, Service::nvnflinger_dispdrv, {
|
dispdrv::dispdrv(const DeviceState &state, ServiceManager &manager) : BaseService(state, manager, false, Service::nvnflinger_dispdrv, {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user