texture_cache: Implement Buffer Copy and detect Turing GPUs Image Copies
This commit is contained in:
		 Fernando Sahmkow
					Fernando Sahmkow
				
			
				
					committed by
					
						 ReinUsesLisp
						ReinUsesLisp
					
				
			
			
				
	
			
			
			 ReinUsesLisp
						ReinUsesLisp
					
				
			
						parent
						
							228f516bb4
						
					
				
				
					commit
					60bf761afb
				
			| @@ -28,6 +28,7 @@ Device::Device() { | ||||
|     max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); | ||||
|     has_variable_aoffi = TestVariableAoffi(); | ||||
|     has_component_indexing_bug = TestComponentIndexingBug(); | ||||
|     is_turing_plus = GLAD_GL_NV_mesh_shader; | ||||
| } | ||||
|  | ||||
| Device::Device(std::nullptr_t) { | ||||
|   | ||||
| @@ -34,6 +34,10 @@ public: | ||||
|         return has_component_indexing_bug; | ||||
|     } | ||||
|  | ||||
|     bool IsTuringGPU() const { | ||||
|         return is_turing_plus; | ||||
|     } | ||||
|  | ||||
| private: | ||||
|     static bool TestVariableAoffi(); | ||||
|     static bool TestComponentIndexingBug(); | ||||
| @@ -43,6 +47,7 @@ private: | ||||
|     u32 max_varyings{}; | ||||
|     bool has_variable_aoffi{}; | ||||
|     bool has_component_indexing_bug{}; | ||||
|     bool is_turing_plus{}; | ||||
| }; | ||||
|  | ||||
| } // namespace OpenGL | ||||
|   | ||||
| @@ -82,7 +82,7 @@ struct DrawParameters { | ||||
|  | ||||
| RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, | ||||
|                                    ScreenInfo& info) | ||||
|     : texture_cache{system, *this}, shader_cache{*this, system, emu_window, device}, | ||||
|     : texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device}, | ||||
|       global_cache{*this}, system{system}, screen_info{info}, | ||||
|       buffer_cache(*this, STREAM_BUFFER_SIZE) { | ||||
|     OpenGLState::ApplyDefaultState(); | ||||
|   | ||||
| @@ -148,6 +148,14 @@ void OGLBuffer::Release() { | ||||
|     handle = 0; | ||||
| } | ||||
|  | ||||
| void OGLBuffer::MakePersistant(std::size_t buffer_size) { | ||||
|     if (handle == 0 || buffer_size == 0) | ||||
|         return; | ||||
|  | ||||
|     const GLbitfield flags = GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT | GL_MAP_READ_BIT; | ||||
|     glNamedBufferStorage(handle, static_cast<GLsizeiptr>(buffer_size), nullptr, flags); | ||||
| } | ||||
|  | ||||
| void OGLSync::Create() { | ||||
|     if (handle != 0) | ||||
|         return; | ||||
|   | ||||
| @@ -186,6 +186,9 @@ public: | ||||
|     /// Deletes the internal OpenGL resource | ||||
|     void Release(); | ||||
|  | ||||
|     // Converts the buffer into a persistant storage buffer | ||||
|     void MakePersistant(std::size_t buffer_size); | ||||
|  | ||||
|     GLuint handle = 0; | ||||
| }; | ||||
|  | ||||
|   | ||||
| @@ -3,6 +3,7 @@ | ||||
| // Refer to the license.txt file included. | ||||
|  | ||||
| #include "common/assert.h" | ||||
| #include "common/bit_util.h" | ||||
| #include "common/common_types.h" | ||||
| #include "common/microprofile.h" | ||||
| #include "common/scope_exit.h" | ||||
| @@ -435,8 +436,10 @@ OGLTextureView CachedSurfaceView::CreateTextureView() const { | ||||
| } | ||||
|  | ||||
| TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system, | ||||
|                                        VideoCore::RasterizerInterface& rasterizer) | ||||
|                                        VideoCore::RasterizerInterface& rasterizer, | ||||
|                                        const Device& device) | ||||
|     : TextureCacheBase{system, rasterizer} { | ||||
|     support_info.depth_color_image_copies = !device.IsTuringGPU(); | ||||
|     src_framebuffer.Create(); | ||||
|     dst_framebuffer.Create(); | ||||
| } | ||||
| @@ -449,6 +452,14 @@ Surface TextureCacheOpenGL::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams | ||||
|  | ||||
| void TextureCacheOpenGL::ImageCopy(Surface src_surface, Surface dst_surface, | ||||
|                                    const VideoCommon::CopyParams& copy_params) { | ||||
|     if (!support_info.depth_color_image_copies) { | ||||
|         const auto& src_params = src_surface->GetSurfaceParams(); | ||||
|         const auto& dst_params = dst_surface->GetSurfaceParams(); | ||||
|         if (src_params.type != dst_params.type) { | ||||
|             // A fallback is needed | ||||
|             return; | ||||
|         } | ||||
|     } | ||||
|     const auto src_handle = src_surface->GetTexture(); | ||||
|     const auto src_target = src_surface->GetTarget(); | ||||
|     const auto dst_handle = dst_surface->GetTexture(); | ||||
| @@ -517,4 +528,83 @@ void TextureCacheOpenGL::ImageBlit(View src_view, View dst_view, | ||||
|                       is_linear ? GL_LINEAR : GL_NEAREST); | ||||
| } | ||||
|  | ||||
| void TextureCacheOpenGL::BufferCopy(Surface src_surface, Surface dst_surface) { | ||||
|     const auto& src_params = src_surface->GetSurfaceParams(); | ||||
|     const auto& dst_params = dst_surface->GetSurfaceParams(); | ||||
|  | ||||
|     const auto source_format = GetFormatTuple(src_params.pixel_format, src_params.component_type); | ||||
|     const auto dest_format = GetFormatTuple(dst_params.pixel_format, dst_params.component_type); | ||||
|  | ||||
|     const std::size_t source_size = src_surface->GetHostSizeInBytes(); | ||||
|     const std::size_t dest_size = dst_surface->GetHostSizeInBytes(); | ||||
|  | ||||
|     const std::size_t buffer_size = std::max(source_size, dest_size); | ||||
|  | ||||
|     GLuint copy_pbo_handle = FetchPBO(buffer_size); | ||||
|  | ||||
|     glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle); | ||||
|  | ||||
|     if (source_format.compressed) { | ||||
|         glGetCompressedTextureImage(src_surface->GetTexture(), 0, static_cast<GLsizei>(source_size), | ||||
|                                     nullptr); | ||||
|     } else { | ||||
|         glGetTextureImage(src_surface->GetTexture(), 0, source_format.format, source_format.type, | ||||
|                           static_cast<GLsizei>(source_size), nullptr); | ||||
|     } | ||||
|     glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); | ||||
|  | ||||
|     glBindBuffer(GL_PIXEL_UNPACK_BUFFER, copy_pbo_handle); | ||||
|  | ||||
|     const GLsizei width = static_cast<GLsizei>(dst_params.width); | ||||
|     const GLsizei height = static_cast<GLsizei>(dst_params.height); | ||||
|     const GLsizei depth = static_cast<GLsizei>(dst_params.depth); | ||||
|     if (dest_format.compressed) { | ||||
|         LOG_CRITICAL(HW_GPU, "Compressed buffer copy is unimplemented!"); | ||||
|         UNREACHABLE(); | ||||
|     } else { | ||||
|         switch (dst_params.target) { | ||||
|         case SurfaceTarget::Texture1D: | ||||
|             glTextureSubImage1D(dst_surface->GetTexture(), 0, 0, width, dest_format.format, | ||||
|                                 dest_format.type, nullptr); | ||||
|             break; | ||||
|         case SurfaceTarget::Texture2D: | ||||
|             glTextureSubImage2D(dst_surface->GetTexture(), 0, 0, 0, width, height, | ||||
|                                 dest_format.format, dest_format.type, nullptr); | ||||
|             break; | ||||
|         case SurfaceTarget::Texture3D: | ||||
|         case SurfaceTarget::Texture2DArray: | ||||
|         case SurfaceTarget::TextureCubeArray: | ||||
|             glTextureSubImage3D(dst_surface->GetTexture(), 0, 0, 0, 0, width, height, depth, | ||||
|                                 dest_format.format, dest_format.type, nullptr); | ||||
|             break; | ||||
|         case SurfaceTarget::TextureCubemap: | ||||
|             glTextureSubImage3D(dst_surface->GetTexture(), 0, 0, 0, 0, width, height, depth, | ||||
|                                 dest_format.format, dest_format.type, nullptr); | ||||
|             break; | ||||
|         default: | ||||
|             LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", | ||||
|                          static_cast<u32>(dst_params.target)); | ||||
|             UNREACHABLE(); | ||||
|         } | ||||
|     } | ||||
|     glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); | ||||
|  | ||||
|     glTextureBarrier(); | ||||
| } | ||||
|  | ||||
| GLuint TextureCacheOpenGL::FetchPBO(std::size_t buffer_size) { | ||||
|     if (buffer_size < 0) { | ||||
|         UNREACHABLE(); | ||||
|         return 0; | ||||
|     } | ||||
|     const u32 l2 = Common::Log2Ceil64(static_cast<u64>(buffer_size)); | ||||
|     OGLBuffer& cp = copy_pbo_cache[l2]; | ||||
|     if (cp.handle == 0) { | ||||
|         const std::size_t ceil_size = 1ULL << l2; | ||||
|         cp.Create(); | ||||
|         cp.MakePersistant(ceil_size); | ||||
|     } | ||||
|     return cp.handle; | ||||
| } | ||||
|  | ||||
| } // namespace OpenGL | ||||
|   | ||||
| @@ -13,6 +13,7 @@ | ||||
|  | ||||
| #include "common/common_types.h" | ||||
| #include "video_core/engines/shader_bytecode.h" | ||||
| #include "video_core/renderer_opengl/gl_device.h" | ||||
| #include "video_core/renderer_opengl/gl_resource_manager.h" | ||||
| #include "video_core/texture_cache/texture_cache.h" | ||||
|  | ||||
| @@ -129,7 +130,8 @@ private: | ||||
|  | ||||
| class TextureCacheOpenGL final : public TextureCacheBase { | ||||
| public: | ||||
|     explicit TextureCacheOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer); | ||||
|     explicit TextureCacheOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | ||||
|                                 const Device& device); | ||||
|     ~TextureCacheOpenGL(); | ||||
|  | ||||
| protected: | ||||
| @@ -141,9 +143,14 @@ protected: | ||||
|     void ImageBlit(View src_view, View dst_view, | ||||
|                    const Tegra::Engines::Fermi2D::Config& copy_config) override; | ||||
|  | ||||
|     void BufferCopy(Surface src_surface, Surface dst_surface) override; | ||||
|  | ||||
| private: | ||||
|     GLuint FetchPBO(std::size_t buffer_size); | ||||
|  | ||||
|     OGLFramebuffer src_framebuffer; | ||||
|     OGLFramebuffer dst_framebuffer; | ||||
|     std::unordered_map<u32, OGLBuffer> copy_pbo_cache; | ||||
| }; | ||||
|  | ||||
| } // namespace OpenGL | ||||
|   | ||||
| @@ -214,6 +214,13 @@ public: | ||||
|     } | ||||
|  | ||||
| protected: | ||||
|     // This structure is used for communicating with the backend, on which behaviors | ||||
|     // it supports and what not, to avoid assuming certain things about hardware. | ||||
|     // The backend is RESPONSIBLE for filling this settings on creation. | ||||
|     struct Support { | ||||
|         bool depth_color_image_copies; | ||||
|     } support_info; | ||||
|  | ||||
|     TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer) | ||||
|         : system{system}, rasterizer{rasterizer} { | ||||
|         for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { | ||||
| @@ -233,6 +240,10 @@ protected: | ||||
|     virtual void ImageBlit(TView src_view, TView dst_view, | ||||
|                            const Tegra::Engines::Fermi2D::Config& copy_config) = 0; | ||||
|  | ||||
|     // Depending on the backend, a buffer copy can be slow as it means deoptimizing the texture | ||||
|     // and reading it from a sepparate buffer. | ||||
|     virtual void BufferCopy(TSurface src_surface, TSurface dst_surface) = 0; | ||||
|  | ||||
|     void Register(TSurface surface) { | ||||
|         std::lock_guard lock{mutex}; | ||||
|  | ||||
| @@ -377,9 +388,14 @@ private: | ||||
|                                               const SurfaceParams& params) { | ||||
|         const auto gpu_addr = current_surface->GetGpuAddr(); | ||||
|         TSurface new_surface = GetUncachedSurface(gpu_addr, params); | ||||
|         std::vector<CopyParams> bricks = current_surface->BreakDown(params); | ||||
|         for (auto& brick : bricks) { | ||||
|             ImageCopy(current_surface, new_surface, brick); | ||||
|         const auto& cr_params = current_surface->GetSurfaceParams(); | ||||
|         if (!support_info.depth_color_image_copies && cr_params.type != params.type) { | ||||
|             BufferCopy(current_surface, new_surface); | ||||
|         } else { | ||||
|             std::vector<CopyParams> bricks = current_surface->BreakDown(params); | ||||
|             for (auto& brick : bricks) { | ||||
|                 ImageCopy(current_surface, new_surface, brick); | ||||
|             } | ||||
|         } | ||||
|         Unregister(current_surface); | ||||
|         Register(new_surface); | ||||
| @@ -505,7 +521,8 @@ private: | ||||
|             auto topological_result = current_surface->MatchesTopology(params); | ||||
|             if (topological_result != MatchTopologyResult::FullMatch) { | ||||
|                 std::vector<TSurface> overlaps{current_surface}; | ||||
|                 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, topological_result); | ||||
|                 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, | ||||
|                                       topological_result); | ||||
|             } | ||||
|             MatchStructureResult s_result = current_surface->MatchesStructure(params); | ||||
|             if (s_result != MatchStructureResult::None && | ||||
| @@ -537,7 +554,8 @@ private: | ||||
|         for (auto surface : overlaps) { | ||||
|             auto topological_result = surface->MatchesTopology(params); | ||||
|             if (topological_result != MatchTopologyResult::FullMatch) { | ||||
|                 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, topological_result); | ||||
|                 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, | ||||
|                                       topological_result); | ||||
|             } | ||||
|         } | ||||
|  | ||||
| @@ -555,7 +573,8 @@ private: | ||||
|                         return *view; | ||||
|                     } | ||||
|                 } | ||||
|                 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch); | ||||
|                 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, | ||||
|                                       MatchTopologyResult::FullMatch); | ||||
|             } | ||||
|             // Now we check if the candidate is a mipmap/layer of the overlap | ||||
|             std::optional<TView> view = | ||||
| @@ -578,13 +597,15 @@ private: | ||||
|                         pair.first->EmplaceView(params, gpu_addr, candidate_size); | ||||
|                     if (mirage_view) | ||||
|                         return {pair.first, *mirage_view}; | ||||
|                     return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch); | ||||
|                     return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, | ||||
|                                           MatchTopologyResult::FullMatch); | ||||
|                 } | ||||
|                 return {current_surface, *view}; | ||||
|             } | ||||
|             // The next case is unsafe, so if we r in accurate GPU, just skip it | ||||
|             if (Settings::values.use_accurate_gpu_emulation) { | ||||
|                 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch); | ||||
|                 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, | ||||
|                                       MatchTopologyResult::FullMatch); | ||||
|             } | ||||
|             // This is the case the texture is a part of the parent. | ||||
|             if (current_surface->MatchesSubTexture(params, gpu_addr)) { | ||||
| @@ -601,7 +622,8 @@ private: | ||||
|             } | ||||
|         } | ||||
|         // We failed all the tests, recycle the overlaps into a new texture. | ||||
|         return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, MatchTopologyResult::FullMatch); | ||||
|         return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, | ||||
|                               MatchTopologyResult::FullMatch); | ||||
|     } | ||||
|  | ||||
|     std::pair<TSurface, TView> InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params, | ||||
|   | ||||
		Reference in New Issue
	
	Block a user