diff --git a/app/src/main/cpp/skyline/gpu.cpp b/app/src/main/cpp/skyline/gpu.cpp index 11f95f86..55ce3daf 100644 --- a/app/src/main/cpp/skyline/gpu.cpp +++ b/app/src/main/cpp/skyline/gpu.cpp @@ -120,8 +120,6 @@ namespace skyline::gpu { } vk::raii::Device GPU::CreateDevice(const vk::raii::PhysicalDevice &physicalDevice, decltype(vk::DeviceQueueCreateInfo::queueCount) &vkQueueFamilyIndex, QuirkManager &quirks) { - auto properties{physicalDevice.getProperties()}; - auto deviceFeatures2{physicalDevice.getFeatures2()}; decltype(deviceFeatures2) enabledFeatures2{}; // We only want to enable features we required due to potential overhead from unused features @@ -151,7 +149,9 @@ namespace skyline::gpu { throw exception("Cannot find Vulkan device extension: \"{}\"", requiredExtension.data()); } - quirks = QuirkManager(properties, deviceFeatures2, enabledFeatures2, deviceExtensions, enabledExtensions); + auto deviceProperties2{physicalDevice.getProperties2()}; + + quirks = QuirkManager(deviceFeatures2, enabledFeatures2, deviceExtensions, enabledExtensions, deviceProperties2); std::vector pEnabledExtensions; pEnabledExtensions.reserve(enabledExtensions.size()); @@ -186,6 +186,7 @@ namespace skyline::gpu { for (const auto &queueFamily : queueFamilies) queueString += util::Format("\n* {}x{}{}{}{}{}: TSB{} MIG({}x{}x{}){}", queueFamily.queueCount, queueFamily.queueFlags & vk::QueueFlagBits::eGraphics ? 'G' : '-', queueFamily.queueFlags & vk::QueueFlagBits::eCompute ? 'C' : '-', queueFamily.queueFlags & vk::QueueFlagBits::eTransfer ? 'T' : '-', queueFamily.queueFlags & vk::QueueFlagBits::eSparseBinding ? 'S' : '-', queueFamily.queueFlags & vk::QueueFlagBits::eProtected ? 'P' : '-', queueFamily.timestampValidBits, queueFamily.minImageTransferGranularity.width, queueFamily.minImageTransferGranularity.height, queueFamily.minImageTransferGranularity.depth, familyIndex++ == vkQueueFamilyIndex ? " <--" : ""); + auto properties{deviceProperties2.get().properties}; Logger::Info("Vulkan Device:\nName: {}\nType: {}\nVulkan Version: {}.{}.{}\nDriver Version: {}.{}.{}\nQueues:{}\nExtensions:{}\nQuirks:{}", properties.deviceName, vk::to_string(properties.deviceType), VK_API_VERSION_MAJOR(properties.apiVersion), VK_API_VERSION_MINOR(properties.apiVersion), VK_API_VERSION_PATCH(properties.apiVersion), diff --git a/app/src/main/cpp/skyline/gpu/quirk_manager.cpp b/app/src/main/cpp/skyline/gpu/quirk_manager.cpp index 8b91c6f1..5f084f0f 100644 --- a/app/src/main/cpp/skyline/gpu/quirk_manager.cpp +++ b/app/src/main/cpp/skyline/gpu/quirk_manager.cpp @@ -4,7 +4,7 @@ #include "quirk_manager.h" namespace skyline::gpu { - QuirkManager::QuirkManager(const vk::PhysicalDeviceProperties &properties, const DeviceFeatures2 &deviceFeatures2, DeviceFeatures2 &enabledFeatures2, const std::vector &deviceExtensions, std::vector> &enabledExtensions) { + QuirkManager::QuirkManager(const DeviceFeatures2 &deviceFeatures2, DeviceFeatures2 &enabledFeatures2, const std::vector &deviceExtensions, std::vector> &enabledExtensions, const DeviceProperties2 &deviceProperties2) { bool hasShaderAtomicInt64{}, hasShaderFloat16Int8Ext{}; for (auto &extension : deviceExtensions) { @@ -58,7 +58,7 @@ namespace skyline::gpu { enabledFeatures2.unlink(); } - auto& shaderAtomicFeatures{deviceFeatures2.get()}; + auto &shaderAtomicFeatures{deviceFeatures2.get()}; if (hasShaderAtomicInt64 && shaderAtomicFeatures.shaderBufferInt64Atomics && shaderAtomicFeatures.shaderSharedInt64Atomics) { supportsAtomicInt64 = true; } else { @@ -73,9 +73,16 @@ namespace skyline::gpu { } #undef FEAT_SET + + if (supportsFloatControls) + floatControls = deviceProperties2.get(); + + auto &subgroupProperties{deviceProperties2.get()}; + supportsSubgroupVote = static_cast(subgroupProperties.supportedOperations & vk::SubgroupFeatureFlagBits::eVote); + subgroupSize = deviceProperties2.get().subgroupSize; } std::string QuirkManager::Summary() { - return fmt::format("\n* Supports Last Provoking Vertex: {}\n* Supports Logical Operations: {}\n* Supports Vertex Attribute Divisor: {}\n* Supports Vertex Attribute Zero Divisor: {}\n* Supports Multiple Viewports: {}\n* Supports SPIR-V 1.4: {}\n* Supports 16-bit FP: {}\n* Supports 8-bit Integers: {}\n* Supports 16-bit Integers: {}\n* Supports 64-bit Integers: {}\n* Supports Atomic 64-bit Integers: {}\n* Supports Floating Point Behavior Control: {}\n* Supports Image Read Without Format: {}", supportsLastProvokingVertex, supportsLogicOp, supportsVertexAttributeDivisor, supportsVertexAttributeZeroDivisor, supportsMultipleViewports, supportsSpirv14, supportsFloat16, supportsInt8, supportsInt16, supportsInt64, supportsAtomicInt64, supportsFloatControls, supportsImageReadWithoutFormat); + return fmt::format("\n* Supports Last Provoking Vertex: {}\n* Supports Logical Operations: {}\n* Supports Vertex Attribute Divisor: {}\n* Supports Vertex Attribute Zero Divisor: {}\n* Supports Multiple Viewports: {}\n* Supports SPIR-V 1.4: {}\n* Supports 16-bit FP: {}\n* Supports 8-bit Integers: {}\n* Supports 16-bit Integers: {}\n* Supports 64-bit Integers: {}\n* Supports Atomic 64-bit Integers: {}\n* Supports Floating Point Behavior Control: {}\n* Supports Image Read Without Format: {}\n* Supports Subgroup Vote: {}\n* Subgroup Size: {}", supportsLastProvokingVertex, supportsLogicOp, supportsVertexAttributeDivisor, supportsVertexAttributeZeroDivisor, supportsMultipleViewports, supportsSpirv14, supportsFloat16, supportsInt8, supportsInt16, supportsInt64, supportsAtomicInt64, supportsFloatControls, supportsImageReadWithoutFormat, supportsSubgroupVote, subgroupSize); } } diff --git a/app/src/main/cpp/skyline/gpu/quirk_manager.h b/app/src/main/cpp/skyline/gpu/quirk_manager.h index e08b2e78..30c88c5f 100644 --- a/app/src/main/cpp/skyline/gpu/quirk_manager.h +++ b/app/src/main/cpp/skyline/gpu/quirk_manager.h @@ -24,13 +24,18 @@ namespace skyline::gpu { bool supportsInt64{}; //!< If 64-bit integers are supported in shaders bool supportsAtomicInt64{}; //!< If atomic operations on 64-bit integers are supported in shaders bool supportsFloatControls{}; //!< If extensive control over FP behavior is exposed (with VK_KHR_shader_float_controls) + vk::PhysicalDeviceFloatControlsProperties floatControls{}; //!< Specifics of FP behavior control (All members will be zero'd out when unavailable) bool supportsImageReadWithoutFormat{}; //!< If a storage image can be read without a format + bool supportsSubgroupVote{}; //!< If subgroup votes are supported in shaders with SPV_KHR_subgroup_vote + u32 subgroupSize{}; //!< Size of a subgroup on the host GPU QuirkManager() = default; + using DeviceProperties2 = vk::StructureChain; + using DeviceFeatures2 = vk::StructureChain; - QuirkManager(const vk::PhysicalDeviceProperties &properties, const DeviceFeatures2 &deviceFeatures2, DeviceFeatures2 &enabledFeatures2, const std::vector &deviceExtensions, std::vector> &enabledExtensions); + QuirkManager(const DeviceFeatures2 &deviceFeatures2, DeviceFeatures2 &enabledFeatures2, const std::vector &deviceExtensions, std::vector> &enabledExtensions, const DeviceProperties2 &deviceProperties2); /** * @return A summary of all the GPU quirks as a human-readable string diff --git a/app/src/main/cpp/skyline/gpu/shader_manager.cpp b/app/src/main/cpp/skyline/gpu/shader_manager.cpp index 448b1a38..504ef9d8 100644 --- a/app/src/main/cpp/skyline/gpu/shader_manager.cpp +++ b/app/src/main/cpp/skyline/gpu/shader_manager.cpp @@ -29,6 +29,7 @@ namespace skyline::gpu { .needs_demote_reorder = false, }; + constexpr u32 TegraX1WarpSize{32}; //!< The amount of threads in a warp on the Tegra X1 profile = Shader::Profile{ .supported_spirv = quirks.supportsSpirv14 ? 0x00010400U : 0x00010000U, .unified_descriptor_binding = true, @@ -38,8 +39,17 @@ namespace skyline::gpu { .support_int64 = quirks.supportsInt64, .support_vertex_instance_id = false, .support_float_controls = quirks.supportsFloatControls, - // TODO: Float control specifics - .support_vote = true, + .support_separate_denorm_behavior = quirks.floatControls.denormBehaviorIndependence == vk::ShaderFloatControlsIndependence::eAll, + .support_separate_rounding_mode = quirks.floatControls.roundingModeIndependence == vk::ShaderFloatControlsIndependence::eAll, + .support_fp16_denorm_preserve = static_cast(quirks.floatControls.shaderDenormPreserveFloat16), + .support_fp32_denorm_preserve = static_cast(quirks.floatControls.shaderDenormPreserveFloat32), + .support_fp16_denorm_flush = static_cast(quirks.floatControls.shaderDenormFlushToZeroFloat16), + .support_fp32_denorm_flush = static_cast(quirks.floatControls.shaderDenormFlushToZeroFloat32), + .support_fp16_signed_zero_nan_preserve = static_cast(quirks.floatControls.shaderSignedZeroInfNanPreserveFloat16), + .support_fp32_signed_zero_nan_preserve = static_cast(quirks.floatControls.shaderSignedZeroInfNanPreserveFloat32), + .support_fp64_signed_zero_nan_preserve = static_cast(quirks.floatControls.shaderSignedZeroInfNanPreserveFloat64), + .support_explicit_workgroup_layout = false, + .support_vote = quirks.supportsSubgroupVote, .support_viewport_index_layer_non_geometry = false, .support_viewport_mask = false, .support_typeless_image_loads = quirks.supportsImageReadWithoutFormat, @@ -47,7 +57,7 @@ namespace skyline::gpu { .support_int64_atomics = false, .support_derivative_control = true, .support_geometry_shader_passthrough = false, - // TODO: Warp size property + .warp_size_potentially_larger_than_guest = TegraX1WarpSize < quirks.subgroupSize, .lower_left_origin_mode = false, .need_declared_frag_colors = false, };