mirror of
				https://git.suyu.dev/suyu/suyu
				synced 2025-10-30 15:39:02 -05:00 
			
		
		
		
	Merge pull request #4443 from ameerj/vk-async-shaders
vulkan_renderer: Async shader/graphics pipeline compilation
This commit is contained in:
		| @@ -177,15 +177,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     if (device.UseAsynchronousShaders()) { |     if (device.UseAsynchronousShaders()) { | ||||||
|         // Max worker threads we should allow |         async_shaders.AllocateWorkers(); | ||||||
|         constexpr u32 MAX_THREADS = 4; |  | ||||||
|         // Deduce how many threads we can use |  | ||||||
|         const u32 threads_used = std::thread::hardware_concurrency() / 4; |  | ||||||
|         // Always allow at least 1 thread regardless of our settings |  | ||||||
|         const auto max_worker_count = std::max(1U, threads_used); |  | ||||||
|         // Don't use more than MAX_THREADS |  | ||||||
|         const auto worker_count = std::min(max_worker_count, MAX_THREADS); |  | ||||||
|         async_shaders.AllocateWorkers(worker_count); |  | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -382,6 +382,8 @@ bool VKDevice::Create() { | |||||||
|  |  | ||||||
|     graphics_queue = logical.GetQueue(graphics_family); |     graphics_queue = logical.GetQueue(graphics_family); | ||||||
|     present_queue = logical.GetQueue(present_family); |     present_queue = logical.GetQueue(present_family); | ||||||
|  |  | ||||||
|  |     use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue(); | ||||||
|     return true; |     return true; | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -202,6 +202,11 @@ public: | |||||||
|         return reported_extensions; |         return reported_extensions; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     /// Returns true if the setting for async shader compilation is enabled. | ||||||
|  |     bool UseAsynchronousShaders() const { | ||||||
|  |         return use_asynchronous_shaders; | ||||||
|  |     } | ||||||
|  |  | ||||||
|     /// Checks if the physical device is suitable. |     /// Checks if the physical device is suitable. | ||||||
|     static bool IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface); |     static bool IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface); | ||||||
|  |  | ||||||
| @@ -252,6 +257,9 @@ private: | |||||||
|     bool ext_extended_dynamic_state{};         ///< Support for VK_EXT_extended_dynamic_state. |     bool ext_extended_dynamic_state{};         ///< Support for VK_EXT_extended_dynamic_state. | ||||||
|     bool nv_device_diagnostics_config{};       ///< Support for VK_NV_device_diagnostics_config. |     bool nv_device_diagnostics_config{};       ///< Support for VK_NV_device_diagnostics_config. | ||||||
|  |  | ||||||
|  |     // Asynchronous Graphics Pipeline setting | ||||||
|  |     bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline | ||||||
|  |  | ||||||
|     // Telemetry parameters |     // Telemetry parameters | ||||||
|     std::string vendor_name;                      ///< Device's driver name. |     std::string vendor_name;                      ///< Device's driver name. | ||||||
|     std::vector<std::string> reported_extensions; ///< Reported Vulkan extensions. |     std::vector<std::string> reported_extensions; ///< Reported Vulkan extensions. | ||||||
|   | |||||||
| @@ -29,7 +29,7 @@ void InnerFence::Queue() { | |||||||
|     } |     } | ||||||
|     ASSERT(!event); |     ASSERT(!event); | ||||||
|  |  | ||||||
|     event = device.GetLogical().CreateEvent(); |     event = device.GetLogical().CreateNewEvent(); | ||||||
|     ticks = scheduler.Ticks(); |     ticks = scheduler.Ticks(); | ||||||
|  |  | ||||||
|     scheduler.RequestOutsideRenderPassOperationContext(); |     scheduler.RequestOutsideRenderPassOperationContext(); | ||||||
|   | |||||||
| @@ -78,15 +78,14 @@ VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device, VKScheduler& sche | |||||||
|                                        const GraphicsPipelineCacheKey& key, |                                        const GraphicsPipelineCacheKey& key, | ||||||
|                                        vk::Span<VkDescriptorSetLayoutBinding> bindings, |                                        vk::Span<VkDescriptorSetLayoutBinding> bindings, | ||||||
|                                        const SPIRVProgram& program) |                                        const SPIRVProgram& program) | ||||||
|     : device{device}, scheduler{scheduler}, fixed_state{key.fixed_state}, hash{key.Hash()}, |     : device{device}, scheduler{scheduler}, cache_key{key}, hash{cache_key.Hash()}, | ||||||
|       descriptor_set_layout{CreateDescriptorSetLayout(bindings)}, |       descriptor_set_layout{CreateDescriptorSetLayout(bindings)}, | ||||||
|       descriptor_allocator{descriptor_pool, *descriptor_set_layout}, |       descriptor_allocator{descriptor_pool, *descriptor_set_layout}, | ||||||
|       update_descriptor_queue{update_descriptor_queue}, layout{CreatePipelineLayout()}, |       update_descriptor_queue{update_descriptor_queue}, layout{CreatePipelineLayout()}, | ||||||
|       descriptor_template{CreateDescriptorUpdateTemplate(program)}, modules{CreateShaderModules( |       descriptor_template{CreateDescriptorUpdateTemplate(program)}, modules{CreateShaderModules( | ||||||
|                                                                         program)}, |                                                                         program)}, | ||||||
|       renderpass{renderpass_cache.GetRenderPass(key.renderpass_params)}, pipeline{CreatePipeline( |       renderpass{renderpass_cache.GetRenderPass(cache_key.renderpass_params)}, | ||||||
|                                                                              key.renderpass_params, |       pipeline{CreatePipeline(cache_key.renderpass_params, program)} {} | ||||||
|                                                                              program)} {} |  | ||||||
|  |  | ||||||
| VKGraphicsPipeline::~VKGraphicsPipeline() = default; | VKGraphicsPipeline::~VKGraphicsPipeline() = default; | ||||||
|  |  | ||||||
| @@ -181,7 +180,7 @@ std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules( | |||||||
|  |  | ||||||
| vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params, | vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params, | ||||||
|                                                 const SPIRVProgram& program) const { |                                                 const SPIRVProgram& program) const { | ||||||
|     const auto& state = fixed_state; |     const auto& state = cache_key.fixed_state; | ||||||
|     const auto& viewport_swizzles = state.viewport_swizzles; |     const auto& viewport_swizzles = state.viewport_swizzles; | ||||||
|  |  | ||||||
|     FixedPipelineState::DynamicState dynamic; |     FixedPipelineState::DynamicState dynamic; | ||||||
|   | |||||||
| @@ -19,7 +19,27 @@ namespace Vulkan { | |||||||
|  |  | ||||||
| using Maxwell = Tegra::Engines::Maxwell3D::Regs; | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||||||
|  |  | ||||||
| struct GraphicsPipelineCacheKey; | struct GraphicsPipelineCacheKey { | ||||||
|  |     RenderPassParams renderpass_params; | ||||||
|  |     u32 padding; | ||||||
|  |     std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders; | ||||||
|  |     FixedPipelineState fixed_state; | ||||||
|  |  | ||||||
|  |     std::size_t Hash() const noexcept; | ||||||
|  |  | ||||||
|  |     bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept; | ||||||
|  |  | ||||||
|  |     bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept { | ||||||
|  |         return !operator==(rhs); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     std::size_t Size() const noexcept { | ||||||
|  |         return sizeof(renderpass_params) + sizeof(padding) + sizeof(shaders) + fixed_state.Size(); | ||||||
|  |     } | ||||||
|  | }; | ||||||
|  | static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>); | ||||||
|  | static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>); | ||||||
|  | static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>); | ||||||
|  |  | ||||||
| class VKDescriptorPool; | class VKDescriptorPool; | ||||||
| class VKDevice; | class VKDevice; | ||||||
| @@ -54,6 +74,10 @@ public: | |||||||
|         return renderpass; |         return renderpass; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     GraphicsPipelineCacheKey GetCacheKey() const { | ||||||
|  |         return cache_key; | ||||||
|  |     } | ||||||
|  |  | ||||||
| private: | private: | ||||||
|     vk::DescriptorSetLayout CreateDescriptorSetLayout( |     vk::DescriptorSetLayout CreateDescriptorSetLayout( | ||||||
|         vk::Span<VkDescriptorSetLayoutBinding> bindings) const; |         vk::Span<VkDescriptorSetLayoutBinding> bindings) const; | ||||||
| @@ -70,7 +94,7 @@ private: | |||||||
|  |  | ||||||
|     const VKDevice& device; |     const VKDevice& device; | ||||||
|     VKScheduler& scheduler; |     VKScheduler& scheduler; | ||||||
|     const FixedPipelineState fixed_state; |     const GraphicsPipelineCacheKey cache_key; | ||||||
|     const u64 hash; |     const u64 hash; | ||||||
|  |  | ||||||
|     vk::DescriptorSetLayout descriptor_set_layout; |     vk::DescriptorSetLayout descriptor_set_layout; | ||||||
|   | |||||||
| @@ -28,6 +28,7 @@ | |||||||
| #include "video_core/shader/compiler_settings.h" | #include "video_core/shader/compiler_settings.h" | ||||||
| #include "video_core/shader/memory_util.h" | #include "video_core/shader/memory_util.h" | ||||||
| #include "video_core/shader_cache.h" | #include "video_core/shader_cache.h" | ||||||
|  | #include "video_core/shader_notify.h" | ||||||
|  |  | ||||||
| namespace Vulkan { | namespace Vulkan { | ||||||
|  |  | ||||||
| @@ -205,24 +206,43 @@ std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { | |||||||
|     return last_shaders = shaders; |     return last_shaders = shaders; | ||||||
| } | } | ||||||
|  |  | ||||||
| VKGraphicsPipeline& VKPipelineCache::GetGraphicsPipeline(const GraphicsPipelineCacheKey& key) { | VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline( | ||||||
|  |     const GraphicsPipelineCacheKey& key, VideoCommon::Shader::AsyncShaders& async_shaders) { | ||||||
|     MICROPROFILE_SCOPE(Vulkan_PipelineCache); |     MICROPROFILE_SCOPE(Vulkan_PipelineCache); | ||||||
|  |  | ||||||
|     if (last_graphics_pipeline && last_graphics_key == key) { |     if (last_graphics_pipeline && last_graphics_key == key) { | ||||||
|         return *last_graphics_pipeline; |         return last_graphics_pipeline; | ||||||
|     } |     } | ||||||
|     last_graphics_key = key; |     last_graphics_key = key; | ||||||
|  |  | ||||||
|  |     if (device.UseAsynchronousShaders() && async_shaders.IsShaderAsync(system.GPU())) { | ||||||
|  |         std::unique_lock lock{pipeline_cache}; | ||||||
|  |         const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); | ||||||
|  |         if (is_cache_miss) { | ||||||
|  |             system.GPU().ShaderNotify().MarkSharderBuilding(); | ||||||
|  |             LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); | ||||||
|  |             const auto [program, bindings] = DecompileShaders(key.fixed_state); | ||||||
|  |             async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool, | ||||||
|  |                                             update_descriptor_queue, renderpass_cache, bindings, | ||||||
|  |                                             program, key); | ||||||
|  |         } | ||||||
|  |         last_graphics_pipeline = pair->second.get(); | ||||||
|  |         return last_graphics_pipeline; | ||||||
|  |     } | ||||||
|  |  | ||||||
|     const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); |     const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); | ||||||
|     auto& entry = pair->second; |     auto& entry = pair->second; | ||||||
|     if (is_cache_miss) { |     if (is_cache_miss) { | ||||||
|  |         system.GPU().ShaderNotify().MarkSharderBuilding(); | ||||||
|         LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); |         LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); | ||||||
|         const auto [program, bindings] = DecompileShaders(key); |         const auto [program, bindings] = DecompileShaders(key.fixed_state); | ||||||
|         entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool, |         entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool, | ||||||
|                                                      update_descriptor_queue, renderpass_cache, key, |                                                      update_descriptor_queue, renderpass_cache, key, | ||||||
|                                                      bindings, program); |                                                      bindings, program); | ||||||
|  |         system.GPU().ShaderNotify().MarkShaderComplete(); | ||||||
|     } |     } | ||||||
|     return *(last_graphics_pipeline = entry.get()); |     last_graphics_pipeline = entry.get(); | ||||||
|  |     return last_graphics_pipeline; | ||||||
| } | } | ||||||
|  |  | ||||||
| VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) { | VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) { | ||||||
| @@ -277,6 +297,12 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach | |||||||
|     return *entry; |     return *entry; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | void VKPipelineCache::EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline) { | ||||||
|  |     system.GPU().ShaderNotify().MarkShaderComplete(); | ||||||
|  |     std::unique_lock lock{pipeline_cache}; | ||||||
|  |     graphics_cache.at(pipeline->GetCacheKey()) = std::move(pipeline); | ||||||
|  | } | ||||||
|  |  | ||||||
| void VKPipelineCache::OnShaderRemoval(Shader* shader) { | void VKPipelineCache::OnShaderRemoval(Shader* shader) { | ||||||
|     bool finished = false; |     bool finished = false; | ||||||
|     const auto Finish = [&] { |     const auto Finish = [&] { | ||||||
| @@ -312,8 +338,7 @@ void VKPipelineCache::OnShaderRemoval(Shader* shader) { | |||||||
| } | } | ||||||
|  |  | ||||||
| std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> | std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> | ||||||
| VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { | VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) { | ||||||
|     const auto& fixed_state = key.fixed_state; |  | ||||||
|     auto& memory_manager = system.GPU().MemoryManager(); |     auto& memory_manager = system.GPU().MemoryManager(); | ||||||
|     const auto& gpu = system.GPU().Maxwell3D(); |     const auto& gpu = system.GPU().Maxwell3D(); | ||||||
|  |  | ||||||
|   | |||||||
| @@ -22,6 +22,7 @@ | |||||||
| #include "video_core/renderer_vulkan/vk_renderpass_cache.h" | #include "video_core/renderer_vulkan/vk_renderpass_cache.h" | ||||||
| #include "video_core/renderer_vulkan/vk_shader_decompiler.h" | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" | ||||||
| #include "video_core/renderer_vulkan/wrapper.h" | #include "video_core/renderer_vulkan/wrapper.h" | ||||||
|  | #include "video_core/shader/async_shaders.h" | ||||||
| #include "video_core/shader/memory_util.h" | #include "video_core/shader/memory_util.h" | ||||||
| #include "video_core/shader/registry.h" | #include "video_core/shader/registry.h" | ||||||
| #include "video_core/shader/shader_ir.h" | #include "video_core/shader/shader_ir.h" | ||||||
| @@ -43,28 +44,6 @@ class VKUpdateDescriptorQueue; | |||||||
|  |  | ||||||
| using Maxwell = Tegra::Engines::Maxwell3D::Regs; | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||||||
|  |  | ||||||
| struct GraphicsPipelineCacheKey { |  | ||||||
|     RenderPassParams renderpass_params; |  | ||||||
|     u32 padding; |  | ||||||
|     std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders; |  | ||||||
|     FixedPipelineState fixed_state; |  | ||||||
|  |  | ||||||
|     std::size_t Hash() const noexcept; |  | ||||||
|  |  | ||||||
|     bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept; |  | ||||||
|  |  | ||||||
|     bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept { |  | ||||||
|         return !operator==(rhs); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     std::size_t Size() const noexcept { |  | ||||||
|         return sizeof(renderpass_params) + sizeof(padding) + sizeof(shaders) + fixed_state.Size(); |  | ||||||
|     } |  | ||||||
| }; |  | ||||||
| static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>); |  | ||||||
| static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>); |  | ||||||
| static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>); |  | ||||||
|  |  | ||||||
| struct ComputePipelineCacheKey { | struct ComputePipelineCacheKey { | ||||||
|     GPUVAddr shader; |     GPUVAddr shader; | ||||||
|     u32 shared_memory_size; |     u32 shared_memory_size; | ||||||
| @@ -152,16 +131,19 @@ public: | |||||||
|  |  | ||||||
|     std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders(); |     std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders(); | ||||||
|  |  | ||||||
|     VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key); |     VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key, | ||||||
|  |                                             VideoCommon::Shader::AsyncShaders& async_shaders); | ||||||
|  |  | ||||||
|     VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); |     VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); | ||||||
|  |  | ||||||
|  |     void EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline); | ||||||
|  |  | ||||||
| protected: | protected: | ||||||
|     void OnShaderRemoval(Shader* shader) final; |     void OnShaderRemoval(Shader* shader) final; | ||||||
|  |  | ||||||
| private: | private: | ||||||
|     std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders( |     std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders( | ||||||
|         const GraphicsPipelineCacheKey& key); |         const FixedPipelineState& fixed_state); | ||||||
|  |  | ||||||
|     Core::System& system; |     Core::System& system; | ||||||
|     const VKDevice& device; |     const VKDevice& device; | ||||||
| @@ -178,6 +160,7 @@ private: | |||||||
|     GraphicsPipelineCacheKey last_graphics_key; |     GraphicsPipelineCacheKey last_graphics_key; | ||||||
|     VKGraphicsPipeline* last_graphics_pipeline = nullptr; |     VKGraphicsPipeline* last_graphics_pipeline = nullptr; | ||||||
|  |  | ||||||
|  |     std::mutex pipeline_cache; | ||||||
|     std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<VKGraphicsPipeline>> |     std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<VKGraphicsPipeline>> | ||||||
|         graphics_cache; |         graphics_cache; | ||||||
|     std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<VKComputePipeline>> compute_cache; |     std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<VKComputePipeline>> compute_cache; | ||||||
|   | |||||||
| @@ -14,6 +14,7 @@ | |||||||
| #include "common/assert.h" | #include "common/assert.h" | ||||||
| #include "common/logging/log.h" | #include "common/logging/log.h" | ||||||
| #include "common/microprofile.h" | #include "common/microprofile.h" | ||||||
|  | #include "common/scope_exit.h" | ||||||
| #include "core/core.h" | #include "core/core.h" | ||||||
| #include "core/settings.h" | #include "core/settings.h" | ||||||
| #include "video_core/engines/kepler_compute.h" | #include "video_core/engines/kepler_compute.h" | ||||||
| @@ -400,8 +401,12 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind | |||||||
|       buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool), |       buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool), | ||||||
|       sampler_cache(device), |       sampler_cache(device), | ||||||
|       fence_manager(system, *this, device, scheduler, texture_cache, buffer_cache, query_cache), |       fence_manager(system, *this, device, scheduler, texture_cache, buffer_cache, query_cache), | ||||||
|       query_cache(system, *this, device, scheduler), wfi_event{device.GetLogical().CreateEvent()} { |       query_cache(system, *this, device, scheduler), | ||||||
|  |       wfi_event{device.GetLogical().CreateNewEvent()}, async_shaders{renderer} { | ||||||
|     scheduler.SetQueryCache(query_cache); |     scheduler.SetQueryCache(query_cache); | ||||||
|  |     if (device.UseAsynchronousShaders()) { | ||||||
|  |         async_shaders.AllocateWorkers(); | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| RasterizerVulkan::~RasterizerVulkan() = default; | RasterizerVulkan::~RasterizerVulkan() = default; | ||||||
| @@ -413,6 +418,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { | |||||||
|  |  | ||||||
|     query_cache.UpdateCounters(); |     query_cache.UpdateCounters(); | ||||||
|  |  | ||||||
|  |     SCOPE_EXIT({ system.GPU().TickWork(); }); | ||||||
|  |  | ||||||
|     const auto& gpu = system.GPU().Maxwell3D(); |     const auto& gpu = system.GPU().Maxwell3D(); | ||||||
|     GraphicsPipelineCacheKey key; |     GraphicsPipelineCacheKey key; | ||||||
|     key.fixed_state.Fill(gpu.regs, device.IsExtExtendedDynamicStateSupported()); |     key.fixed_state.Fill(gpu.regs, device.IsExtExtendedDynamicStateSupported()); | ||||||
| @@ -439,10 +446,15 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { | |||||||
|     key.renderpass_params = GetRenderPassParams(texceptions); |     key.renderpass_params = GetRenderPassParams(texceptions); | ||||||
|     key.padding = 0; |     key.padding = 0; | ||||||
|  |  | ||||||
|     auto& pipeline = pipeline_cache.GetGraphicsPipeline(key); |     auto* pipeline = pipeline_cache.GetGraphicsPipeline(key, async_shaders); | ||||||
|     scheduler.BindGraphicsPipeline(pipeline.GetHandle()); |     if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) { | ||||||
|  |         // Async graphics pipeline was not ready. | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|  |  | ||||||
|     const auto renderpass = pipeline.GetRenderPass(); |     scheduler.BindGraphicsPipeline(pipeline->GetHandle()); | ||||||
|  |  | ||||||
|  |     const auto renderpass = pipeline->GetRenderPass(); | ||||||
|     const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass); |     const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass); | ||||||
|     scheduler.RequestRenderpass(renderpass, framebuffer, render_area); |     scheduler.RequestRenderpass(renderpass, framebuffer, render_area); | ||||||
|  |  | ||||||
| @@ -452,8 +464,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { | |||||||
|  |  | ||||||
|     BeginTransformFeedback(); |     BeginTransformFeedback(); | ||||||
|  |  | ||||||
|     const auto pipeline_layout = pipeline.GetLayout(); |     const auto pipeline_layout = pipeline->GetLayout(); | ||||||
|     const auto descriptor_set = pipeline.CommitDescriptorSet(); |     const auto descriptor_set = pipeline->CommitDescriptorSet(); | ||||||
|     scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) { |     scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) { | ||||||
|         if (descriptor_set) { |         if (descriptor_set) { | ||||||
|             cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, |             cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, | ||||||
| @@ -463,8 +475,6 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { | |||||||
|     }); |     }); | ||||||
|  |  | ||||||
|     EndTransformFeedback(); |     EndTransformFeedback(); | ||||||
|  |  | ||||||
|     system.GPU().TickWork(); |  | ||||||
| } | } | ||||||
|  |  | ||||||
| void RasterizerVulkan::Clear() { | void RasterizerVulkan::Clear() { | ||||||
|   | |||||||
| @@ -32,6 +32,7 @@ | |||||||
| #include "video_core/renderer_vulkan/vk_texture_cache.h" | #include "video_core/renderer_vulkan/vk_texture_cache.h" | ||||||
| #include "video_core/renderer_vulkan/vk_update_descriptor.h" | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | ||||||
| #include "video_core/renderer_vulkan/wrapper.h" | #include "video_core/renderer_vulkan/wrapper.h" | ||||||
|  | #include "video_core/shader/async_shaders.h" | ||||||
|  |  | ||||||
| namespace Core { | namespace Core { | ||||||
| class System; | class System; | ||||||
| @@ -136,6 +137,14 @@ public: | |||||||
|                            u32 pixel_stride) override; |                            u32 pixel_stride) override; | ||||||
|     void SetupDirtyFlags() override; |     void SetupDirtyFlags() override; | ||||||
|  |  | ||||||
|  |     VideoCommon::Shader::AsyncShaders& GetAsyncShaders() { | ||||||
|  |         return async_shaders; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const { | ||||||
|  |         return async_shaders; | ||||||
|  |     } | ||||||
|  |  | ||||||
|     /// Maximum supported size that a constbuffer can have in bytes. |     /// Maximum supported size that a constbuffer can have in bytes. | ||||||
|     static constexpr std::size_t MaxConstbufferSize = 0x10000; |     static constexpr std::size_t MaxConstbufferSize = 0x10000; | ||||||
|     static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0, |     static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0, | ||||||
| @@ -297,6 +306,7 @@ private: | |||||||
|     vk::Buffer default_buffer; |     vk::Buffer default_buffer; | ||||||
|     VKMemoryCommit default_buffer_commit; |     VKMemoryCommit default_buffer_commit; | ||||||
|     vk::Event wfi_event; |     vk::Event wfi_event; | ||||||
|  |     VideoCommon::Shader::AsyncShaders async_shaders; | ||||||
|  |  | ||||||
|     std::array<View, Maxwell::NumRenderTargets> color_attachments; |     std::array<View, Maxwell::NumRenderTargets> color_attachments; | ||||||
|     View zeta_attachment; |     View zeta_attachment; | ||||||
|   | |||||||
| @@ -644,7 +644,7 @@ ShaderModule Device::CreateShaderModule(const VkShaderModuleCreateInfo& ci) cons | |||||||
|     return ShaderModule(object, handle, *dld); |     return ShaderModule(object, handle, *dld); | ||||||
| } | } | ||||||
|  |  | ||||||
| Event Device::CreateEvent() const { | Event Device::CreateNewEvent() const { | ||||||
|     static constexpr VkEventCreateInfo ci{ |     static constexpr VkEventCreateInfo ci{ | ||||||
|         .sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO, |         .sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO, | ||||||
|         .pNext = nullptr, |         .pNext = nullptr, | ||||||
|   | |||||||
| @@ -721,7 +721,7 @@ public: | |||||||
|  |  | ||||||
|     ShaderModule CreateShaderModule(const VkShaderModuleCreateInfo& ci) const; |     ShaderModule CreateShaderModule(const VkShaderModuleCreateInfo& ci) const; | ||||||
|  |  | ||||||
|     Event CreateEvent() const; |     Event CreateNewEvent() const; | ||||||
|  |  | ||||||
|     SwapchainKHR CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const; |     SwapchainKHR CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -2,7 +2,6 @@ | |||||||
| // Licensed under GPLv2 or any later version | // Licensed under GPLv2 or any later version | ||||||
| // Refer to the license.txt file included. | // Refer to the license.txt file included. | ||||||
|  |  | ||||||
| #include <chrono> |  | ||||||
| #include <condition_variable> | #include <condition_variable> | ||||||
| #include <mutex> | #include <mutex> | ||||||
| #include <thread> | #include <thread> | ||||||
| @@ -20,9 +19,18 @@ AsyncShaders::~AsyncShaders() { | |||||||
|     KillWorkers(); |     KillWorkers(); | ||||||
| } | } | ||||||
|  |  | ||||||
| void AsyncShaders::AllocateWorkers(std::size_t num_workers) { | void AsyncShaders::AllocateWorkers() { | ||||||
|     // If we're already have workers queued or don't want to queue workers, ignore |     // Max worker threads we should allow | ||||||
|     if (num_workers == worker_threads.size() || num_workers == 0) { |     constexpr u32 MAX_THREADS = 4; | ||||||
|  |     // Deduce how many threads we can use | ||||||
|  |     const u32 threads_used = std::thread::hardware_concurrency() / 4; | ||||||
|  |     // Always allow at least 1 thread regardless of our settings | ||||||
|  |     const auto max_worker_count = std::max(1U, threads_used); | ||||||
|  |     // Don't use more than MAX_THREADS | ||||||
|  |     const auto num_workers = std::min(max_worker_count, MAX_THREADS); | ||||||
|  |  | ||||||
|  |     // If we already have workers queued, ignore | ||||||
|  |     if (num_workers == worker_threads.size()) { | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -111,24 +119,50 @@ void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device, | |||||||
|                                      VideoCommon::Shader::CompilerSettings compiler_settings, |                                      VideoCommon::Shader::CompilerSettings compiler_settings, | ||||||
|                                      const VideoCommon::Shader::Registry& registry, |                                      const VideoCommon::Shader::Registry& registry, | ||||||
|                                      VAddr cpu_addr) { |                                      VAddr cpu_addr) { | ||||||
|     WorkerParams params{device.UseAssemblyShaders() ? AsyncShaders::Backend::GLASM |     WorkerParams params{ | ||||||
|                                                     : AsyncShaders::Backend::OpenGL, |         .backend = device.UseAssemblyShaders() ? Backend::GLASM : Backend::OpenGL, | ||||||
|                         device, |         .device = &device, | ||||||
|                         shader_type, |         .shader_type = shader_type, | ||||||
|                         uid, |         .uid = uid, | ||||||
|                         std::move(code), |         .code = std::move(code), | ||||||
|                         std::move(code_b), |         .code_b = std::move(code_b), | ||||||
|                         main_offset, |         .main_offset = main_offset, | ||||||
|                         compiler_settings, |         .compiler_settings = compiler_settings, | ||||||
|                         registry, |         .registry = registry, | ||||||
|                         cpu_addr}; |         .cpu_address = cpu_addr, | ||||||
|  |     }; | ||||||
|     std::unique_lock lock(queue_mutex); |     std::unique_lock lock(queue_mutex); | ||||||
|     pending_queue.push_back(std::move(params)); |     pending_queue.push(std::move(params)); | ||||||
|  |     cv.notify_one(); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, | ||||||
|  |                                      const Vulkan::VKDevice& device, Vulkan::VKScheduler& scheduler, | ||||||
|  |                                      Vulkan::VKDescriptorPool& descriptor_pool, | ||||||
|  |                                      Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, | ||||||
|  |                                      Vulkan::VKRenderPassCache& renderpass_cache, | ||||||
|  |                                      std::vector<VkDescriptorSetLayoutBinding> bindings, | ||||||
|  |                                      Vulkan::SPIRVProgram program, | ||||||
|  |                                      Vulkan::GraphicsPipelineCacheKey key) { | ||||||
|  |     WorkerParams params{ | ||||||
|  |         .backend = Backend::Vulkan, | ||||||
|  |         .pp_cache = pp_cache, | ||||||
|  |         .vk_device = &device, | ||||||
|  |         .scheduler = &scheduler, | ||||||
|  |         .descriptor_pool = &descriptor_pool, | ||||||
|  |         .update_descriptor_queue = &update_descriptor_queue, | ||||||
|  |         .renderpass_cache = &renderpass_cache, | ||||||
|  |         .bindings = bindings, | ||||||
|  |         .program = program, | ||||||
|  |         .key = key, | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     std::unique_lock lock(queue_mutex); | ||||||
|  |     pending_queue.push(std::move(params)); | ||||||
|     cv.notify_one(); |     cv.notify_one(); | ||||||
| } | } | ||||||
|  |  | ||||||
| void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context) { | void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context) { | ||||||
|     using namespace std::chrono_literals; |  | ||||||
|     while (!is_thread_exiting.load(std::memory_order_relaxed)) { |     while (!is_thread_exiting.load(std::memory_order_relaxed)) { | ||||||
|         std::unique_lock lock{queue_mutex}; |         std::unique_lock lock{queue_mutex}; | ||||||
|         cv.wait(lock, [this] { return HasWorkQueued() || is_thread_exiting; }); |         cv.wait(lock, [this] { return HasWorkQueued() || is_thread_exiting; }); | ||||||
| @@ -144,18 +178,17 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context | |||||||
|         if (pending_queue.empty()) { |         if (pending_queue.empty()) { | ||||||
|             continue; |             continue; | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         // Pull work from queue |         // Pull work from queue | ||||||
|         WorkerParams work = std::move(pending_queue.front()); |         WorkerParams work = std::move(pending_queue.front()); | ||||||
|         pending_queue.pop_front(); |         pending_queue.pop(); | ||||||
|  |  | ||||||
|         lock.unlock(); |         lock.unlock(); | ||||||
|  |  | ||||||
|         if (work.backend == AsyncShaders::Backend::OpenGL || |         if (work.backend == Backend::OpenGL || work.backend == Backend::GLASM) { | ||||||
|             work.backend == AsyncShaders::Backend::GLASM) { |             const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, *work.registry); | ||||||
|             const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, work.registry); |  | ||||||
|             const auto scope = context->Acquire(); |             const auto scope = context->Acquire(); | ||||||
|             auto program = |             auto program = | ||||||
|                 OpenGL::BuildShader(work.device, work.shader_type, work.uid, ir, work.registry); |                 OpenGL::BuildShader(*work.device, work.shader_type, work.uid, ir, *work.registry); | ||||||
|             Result result{}; |             Result result{}; | ||||||
|             result.backend = work.backend; |             result.backend = work.backend; | ||||||
|             result.cpu_address = work.cpu_address; |             result.cpu_address = work.cpu_address; | ||||||
| @@ -164,9 +197,9 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context | |||||||
|             result.code_b = std::move(work.code_b); |             result.code_b = std::move(work.code_b); | ||||||
|             result.shader_type = work.shader_type; |             result.shader_type = work.shader_type; | ||||||
|  |  | ||||||
|             if (work.backend == AsyncShaders::Backend::OpenGL) { |             if (work.backend == Backend::OpenGL) { | ||||||
|                 result.program.opengl = std::move(program->source_program); |                 result.program.opengl = std::move(program->source_program); | ||||||
|             } else if (work.backend == AsyncShaders::Backend::GLASM) { |             } else if (work.backend == Backend::GLASM) { | ||||||
|                 result.program.glasm = std::move(program->assembly_program); |                 result.program.glasm = std::move(program->assembly_program); | ||||||
|             } |             } | ||||||
|  |  | ||||||
| @@ -174,6 +207,13 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context | |||||||
|                 std::unique_lock complete_lock(completed_mutex); |                 std::unique_lock complete_lock(completed_mutex); | ||||||
|                 finished_work.push_back(std::move(result)); |                 finished_work.push_back(std::move(result)); | ||||||
|             } |             } | ||||||
|  |         } else if (work.backend == Backend::Vulkan) { | ||||||
|  |             auto pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>( | ||||||
|  |                 *work.vk_device, *work.scheduler, *work.descriptor_pool, | ||||||
|  |                 *work.update_descriptor_queue, *work.renderpass_cache, work.key, work.bindings, | ||||||
|  |                 work.program); | ||||||
|  |  | ||||||
|  |             work.pp_cache->EmplacePipeline(std::move(pipeline)); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
|   | |||||||
| @@ -14,6 +14,10 @@ | |||||||
| #include "video_core/renderer_opengl/gl_device.h" | #include "video_core/renderer_opengl/gl_device.h" | ||||||
| #include "video_core/renderer_opengl/gl_resource_manager.h" | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||||||
| #include "video_core/renderer_opengl/gl_shader_decompiler.h" | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | ||||||
|  | #include "video_core/renderer_vulkan/vk_device.h" | ||||||
|  | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | ||||||
|  | #include "video_core/renderer_vulkan/vk_scheduler.h" | ||||||
|  | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | ||||||
|  |  | ||||||
| namespace Core::Frontend { | namespace Core::Frontend { | ||||||
| class EmuWindow; | class EmuWindow; | ||||||
| @@ -24,6 +28,10 @@ namespace Tegra { | |||||||
| class GPU; | class GPU; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | namespace Vulkan { | ||||||
|  | class VKPipelineCache; | ||||||
|  | } | ||||||
|  |  | ||||||
| namespace VideoCommon::Shader { | namespace VideoCommon::Shader { | ||||||
|  |  | ||||||
| class AsyncShaders { | class AsyncShaders { | ||||||
| @@ -31,6 +39,7 @@ public: | |||||||
|     enum class Backend { |     enum class Backend { | ||||||
|         OpenGL, |         OpenGL, | ||||||
|         GLASM, |         GLASM, | ||||||
|  |         Vulkan, | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|     struct ResultPrograms { |     struct ResultPrograms { | ||||||
| @@ -52,7 +61,7 @@ public: | |||||||
|     ~AsyncShaders(); |     ~AsyncShaders(); | ||||||
|  |  | ||||||
|     /// Start up shader worker threads |     /// Start up shader worker threads | ||||||
|     void AllocateWorkers(std::size_t num_workers); |     void AllocateWorkers(); | ||||||
|  |  | ||||||
|     /// Clear the shader queue and kill all worker threads |     /// Clear the shader queue and kill all worker threads | ||||||
|     void FreeWorkers(); |     void FreeWorkers(); | ||||||
| @@ -76,6 +85,14 @@ public: | |||||||
|                            VideoCommon::Shader::CompilerSettings compiler_settings, |                            VideoCommon::Shader::CompilerSettings compiler_settings, | ||||||
|                            const VideoCommon::Shader::Registry& registry, VAddr cpu_addr); |                            const VideoCommon::Shader::Registry& registry, VAddr cpu_addr); | ||||||
|  |  | ||||||
|  |     void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, const Vulkan::VKDevice& device, | ||||||
|  |                            Vulkan::VKScheduler& scheduler, | ||||||
|  |                            Vulkan::VKDescriptorPool& descriptor_pool, | ||||||
|  |                            Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, | ||||||
|  |                            Vulkan::VKRenderPassCache& renderpass_cache, | ||||||
|  |                            std::vector<VkDescriptorSetLayoutBinding> bindings, | ||||||
|  |                            Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key); | ||||||
|  |  | ||||||
| private: | private: | ||||||
|     void ShaderCompilerThread(Core::Frontend::GraphicsContext* context); |     void ShaderCompilerThread(Core::Frontend::GraphicsContext* context); | ||||||
|  |  | ||||||
| @@ -83,16 +100,28 @@ private: | |||||||
|     bool HasWorkQueued(); |     bool HasWorkQueued(); | ||||||
|  |  | ||||||
|     struct WorkerParams { |     struct WorkerParams { | ||||||
|         AsyncShaders::Backend backend; |         Backend backend; | ||||||
|         OpenGL::Device device; |         // For OGL | ||||||
|  |         const OpenGL::Device* device; | ||||||
|         Tegra::Engines::ShaderType shader_type; |         Tegra::Engines::ShaderType shader_type; | ||||||
|         u64 uid; |         u64 uid; | ||||||
|         std::vector<u64> code; |         std::vector<u64> code; | ||||||
|         std::vector<u64> code_b; |         std::vector<u64> code_b; | ||||||
|         u32 main_offset; |         u32 main_offset; | ||||||
|         VideoCommon::Shader::CompilerSettings compiler_settings; |         VideoCommon::Shader::CompilerSettings compiler_settings; | ||||||
|         VideoCommon::Shader::Registry registry; |         std::optional<VideoCommon::Shader::Registry> registry; | ||||||
|         VAddr cpu_address; |         VAddr cpu_address; | ||||||
|  |  | ||||||
|  |         // For Vulkan | ||||||
|  |         Vulkan::VKPipelineCache* pp_cache; | ||||||
|  |         const Vulkan::VKDevice* vk_device; | ||||||
|  |         Vulkan::VKScheduler* scheduler; | ||||||
|  |         Vulkan::VKDescriptorPool* descriptor_pool; | ||||||
|  |         Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue; | ||||||
|  |         Vulkan::VKRenderPassCache* renderpass_cache; | ||||||
|  |         std::vector<VkDescriptorSetLayoutBinding> bindings; | ||||||
|  |         Vulkan::SPIRVProgram program; | ||||||
|  |         Vulkan::GraphicsPipelineCacheKey key; | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|     std::condition_variable cv; |     std::condition_variable cv; | ||||||
| @@ -101,7 +130,7 @@ private: | |||||||
|     std::atomic<bool> is_thread_exiting{}; |     std::atomic<bool> is_thread_exiting{}; | ||||||
|     std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> context_list; |     std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> context_list; | ||||||
|     std::vector<std::thread> worker_threads; |     std::vector<std::thread> worker_threads; | ||||||
|     std::deque<WorkerParams> pending_queue; |     std::queue<WorkerParams> pending_queue; | ||||||
|     std::vector<AsyncShaders::Result> finished_work; |     std::vector<AsyncShaders::Result> finished_work; | ||||||
|     Core::Frontend::EmuWindow& emu_window; |     Core::Frontend::EmuWindow& emu_window; | ||||||
| }; | }; | ||||||
|   | |||||||
| @@ -92,7 +92,7 @@ | |||||||
|            <string>Enables asynchronous shader compilation, which may reduce shader stutter. This feature is experimental.</string> |            <string>Enables asynchronous shader compilation, which may reduce shader stutter. This feature is experimental.</string> | ||||||
|           </property> |           </property> | ||||||
|           <property name="text"> |           <property name="text"> | ||||||
|            <string>Use asynchronous shader building (experimental, OpenGL or Assembly shaders only)</string> |            <string>Use asynchronous shader building (experimental)</string> | ||||||
|           </property> |           </property> | ||||||
|          </widget> |          </widget> | ||||||
|         </item> |         </item> | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 David
					David