Merge pull request #7036 from ameerj/ogl-bgr-v2
gl_texture_cache: Unify BGR copy passes using PBOs
This commit is contained in:
		| @@ -6,7 +6,6 @@ set(SHADER_FILES | ||||
|     convert_float_to_depth.frag | ||||
|     full_screen_triangle.vert | ||||
|     opengl_copy_bc4.comp | ||||
|     opengl_copy_bgra.comp | ||||
|     opengl_present.frag | ||||
|     opengl_present.vert | ||||
|     pitch_unswizzle.comp | ||||
|   | ||||
| @@ -1,15 +0,0 @@ | ||||
| // Copyright 2021 yuzu Emulator Project | ||||
| // Licensed under GPLv2 or any later version | ||||
| // Refer to the license.txt file included. | ||||
|  | ||||
| #version 430 core | ||||
|  | ||||
| layout (local_size_x = 4, local_size_y = 4) in; | ||||
|  | ||||
| layout(binding = 0, rgba8) readonly uniform image2DArray bgr_input; | ||||
| layout(binding = 1, rgba8) writeonly uniform image2DArray bgr_output; | ||||
|  | ||||
| void main() { | ||||
|     vec4 color = imageLoad(bgr_input, ivec3(gl_GlobalInvocationID)); | ||||
|     imageStore(bgr_output, ivec3(gl_GlobalInvocationID), color.bgra); | ||||
| } | ||||
| @@ -461,7 +461,7 @@ bool TextureCacheRuntime::CanImageBeCopied(const Image& dst, const Image& src) { | ||||
|     if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) { | ||||
|         return false; | ||||
|     } | ||||
|     if (IsPixelFormatBGR(dst.info.format) || IsPixelFormatBGR(src.info.format)) { | ||||
|     if (IsPixelFormatBGR(dst.info.format) != IsPixelFormatBGR(src.info.format)) { | ||||
|         return false; | ||||
|     } | ||||
|     return true; | ||||
| @@ -473,7 +473,7 @@ void TextureCacheRuntime::EmulateCopyImage(Image& dst, Image& src, | ||||
|         ASSERT(src.info.type == ImageType::e3D); | ||||
|         util_shaders.CopyBC4(dst, src, copies); | ||||
|     } else if (IsPixelFormatBGR(dst.info.format) || IsPixelFormatBGR(src.info.format)) { | ||||
|         util_shaders.CopyBGR(dst, src, copies); | ||||
|         bgr_copy_pass.CopyBGR(dst, src, copies); | ||||
|     } else { | ||||
|         UNREACHABLE(); | ||||
|     } | ||||
| @@ -1112,4 +1112,37 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM | ||||
|     framebuffer.handle = handle; | ||||
| } | ||||
|  | ||||
| void BGRCopyPass::CopyBGR(Image& dst_image, Image& src_image, | ||||
|                           std::span<const VideoCommon::ImageCopy> copies) { | ||||
|     static constexpr VideoCommon::Offset3D zero_offset{0, 0, 0}; | ||||
|     const u32 requested_pbo_size = | ||||
|         std::max(src_image.unswizzled_size_bytes, dst_image.unswizzled_size_bytes); | ||||
|  | ||||
|     if (bgr_pbo_size < requested_pbo_size) { | ||||
|         bgr_pbo.Create(); | ||||
|         bgr_pbo_size = requested_pbo_size; | ||||
|         glNamedBufferData(bgr_pbo.handle, bgr_pbo_size, nullptr, GL_STREAM_COPY); | ||||
|     } | ||||
|     for (const ImageCopy& copy : copies) { | ||||
|         ASSERT(copy.src_offset == zero_offset); | ||||
|         ASSERT(copy.dst_offset == zero_offset); | ||||
|  | ||||
|         // Copy from source to PBO | ||||
|         glPixelStorei(GL_PACK_ALIGNMENT, 1); | ||||
|         glPixelStorei(GL_PACK_ROW_LENGTH, copy.extent.width); | ||||
|         glBindBuffer(GL_PIXEL_PACK_BUFFER, bgr_pbo.handle); | ||||
|         glGetTextureSubImage(src_image.Handle(), 0, 0, 0, 0, copy.extent.width, copy.extent.height, | ||||
|                              copy.src_subresource.num_layers, src_image.GlFormat(), | ||||
|                              src_image.GlType(), static_cast<GLsizei>(bgr_pbo_size), nullptr); | ||||
|  | ||||
|         // Copy from PBO to destination in desired GL format | ||||
|         glPixelStorei(GL_UNPACK_ALIGNMENT, 1); | ||||
|         glPixelStorei(GL_UNPACK_ROW_LENGTH, copy.extent.width); | ||||
|         glBindBuffer(GL_PIXEL_UNPACK_BUFFER, bgr_pbo.handle); | ||||
|         glTextureSubImage3D(dst_image.Handle(), 0, 0, 0, 0, copy.extent.width, copy.extent.height, | ||||
|                             copy.dst_subresource.num_layers, dst_image.GlFormat(), | ||||
|                             dst_image.GlType(), nullptr); | ||||
|     } | ||||
| } | ||||
|  | ||||
| } // namespace OpenGL | ||||
|   | ||||
| @@ -47,6 +47,19 @@ struct FormatProperties { | ||||
|     bool is_compressed; | ||||
| }; | ||||
|  | ||||
| class BGRCopyPass { | ||||
| public: | ||||
|     BGRCopyPass() = default; | ||||
|     ~BGRCopyPass() = default; | ||||
|  | ||||
|     void CopyBGR(Image& dst_image, Image& src_image, | ||||
|                  std::span<const VideoCommon::ImageCopy> copies); | ||||
|  | ||||
| private: | ||||
|     OGLBuffer bgr_pbo; | ||||
|     size_t bgr_pbo_size{}; | ||||
| }; | ||||
|  | ||||
| class TextureCacheRuntime { | ||||
|     friend Framebuffer; | ||||
|     friend Image; | ||||
| @@ -118,6 +131,7 @@ private: | ||||
|     const Device& device; | ||||
|     StateTracker& state_tracker; | ||||
|     UtilShaders util_shaders; | ||||
|     BGRCopyPass bgr_copy_pass; | ||||
|  | ||||
|     std::array<std::unordered_map<GLenum, FormatProperties>, 3> format_properties; | ||||
|     bool has_broken_texture_view_formats = false; | ||||
| @@ -162,6 +176,14 @@ public: | ||||
|         return texture.handle; | ||||
|     } | ||||
|  | ||||
|     GLuint GlFormat() const noexcept { | ||||
|         return gl_format; | ||||
|     } | ||||
|  | ||||
|     GLuint GlType() const noexcept { | ||||
|         return gl_type; | ||||
|     } | ||||
|  | ||||
| private: | ||||
|     void CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); | ||||
|  | ||||
|   | ||||
| @@ -52,7 +52,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> FORMAT_TAB | ||||
|     {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT},                          // BC6H_UFLOAT | ||||
|     {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT},                            // BC6H_SFLOAT | ||||
|     {GL_COMPRESSED_RGBA_ASTC_4x4_KHR},                                // ASTC_2D_4X4_UNORM | ||||
|     {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE},                            // B8G8R8A8_UNORM | ||||
|     {GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV},                 // B8G8R8A8_UNORM | ||||
|     {GL_RGBA32F, GL_RGBA, GL_FLOAT},                                  // R32G32B32A32_FLOAT | ||||
|     {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT},                            // R32G32B32A32_SINT | ||||
|     {GL_RG32F, GL_RG, GL_FLOAT},                                      // R32G32_FLOAT | ||||
| @@ -81,7 +81,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> FORMAT_TAB | ||||
|     {GL_COMPRESSED_RGBA_ASTC_8x8_KHR},                                // ASTC_2D_8X8_UNORM | ||||
|     {GL_COMPRESSED_RGBA_ASTC_8x5_KHR},                                // ASTC_2D_8X5_UNORM | ||||
|     {GL_COMPRESSED_RGBA_ASTC_5x4_KHR},                                // ASTC_2D_5X4_UNORM | ||||
|     {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE},                     // B8G8R8A8_SRGB | ||||
|     {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV},          // B8G8R8A8_SRGB | ||||
|     {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT},                         // BC1_RGBA_SRGB | ||||
|     {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT},                         // BC2_SRGB | ||||
|     {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT},                         // BC3_SRGB | ||||
|   | ||||
| @@ -14,7 +14,6 @@ | ||||
| #include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h" | ||||
| #include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h" | ||||
| #include "video_core/host_shaders/opengl_copy_bc4_comp.h" | ||||
| #include "video_core/host_shaders/opengl_copy_bgra_comp.h" | ||||
| #include "video_core/host_shaders/pitch_unswizzle_comp.h" | ||||
| #include "video_core/renderer_opengl/gl_shader_manager.h" | ||||
| #include "video_core/renderer_opengl/gl_shader_util.h" | ||||
| @@ -44,11 +43,6 @@ namespace { | ||||
| OGLProgram MakeProgram(std::string_view source) { | ||||
|     return CreateProgram(source, GL_COMPUTE_SHADER); | ||||
| } | ||||
|  | ||||
| size_t NumPixelsInCopy(const VideoCommon::ImageCopy& copy) { | ||||
|     return static_cast<size_t>(copy.extent.width * copy.extent.height * | ||||
|                                copy.src_subresource.num_layers); | ||||
| } | ||||
| } // Anonymous namespace | ||||
|  | ||||
| UtilShaders::UtilShaders(ProgramManager& program_manager_) | ||||
| @@ -56,7 +50,6 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_) | ||||
|       block_linear_unswizzle_2d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_2D_COMP)), | ||||
|       block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)), | ||||
|       pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)), | ||||
|       copy_bgra_program(MakeProgram(OPENGL_COPY_BGRA_COMP)), | ||||
|       copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)) { | ||||
|     const auto swizzle_table = Tegra::Texture::MakeSwizzleTable(); | ||||
|     swizzle_table_buffer.Create(); | ||||
| @@ -255,43 +248,6 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const Im | ||||
|     program_manager.RestoreGuestCompute(); | ||||
| } | ||||
|  | ||||
| void UtilShaders::CopyBGR(Image& dst_image, Image& src_image, | ||||
|                           std::span<const VideoCommon::ImageCopy> copies) { | ||||
|     static constexpr GLuint BINDING_INPUT_IMAGE = 0; | ||||
|     static constexpr GLuint BINDING_OUTPUT_IMAGE = 1; | ||||
|     static constexpr VideoCommon::Offset3D zero_offset{0, 0, 0}; | ||||
|     const u32 bytes_per_block = BytesPerBlock(dst_image.info.format); | ||||
|     switch (bytes_per_block) { | ||||
|     case 2: | ||||
|         // BGR565 copy | ||||
|         for (const ImageCopy& copy : copies) { | ||||
|             ASSERT(copy.src_offset == zero_offset); | ||||
|             ASSERT(copy.dst_offset == zero_offset); | ||||
|             bgr_copy_pass.Execute(dst_image, src_image, copy); | ||||
|         } | ||||
|         break; | ||||
|     case 4: { | ||||
|         // BGRA8 copy | ||||
|         program_manager.BindComputeProgram(copy_bgra_program.handle); | ||||
|         constexpr GLenum FORMAT = GL_RGBA8; | ||||
|         for (const ImageCopy& copy : copies) { | ||||
|             ASSERT(copy.src_offset == zero_offset); | ||||
|             ASSERT(copy.dst_offset == zero_offset); | ||||
|             glBindImageTexture(BINDING_INPUT_IMAGE, src_image.StorageHandle(), | ||||
|                                copy.src_subresource.base_level, GL_FALSE, 0, GL_READ_ONLY, FORMAT); | ||||
|             glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.StorageHandle(), | ||||
|                                copy.dst_subresource.base_level, GL_FALSE, 0, GL_WRITE_ONLY, FORMAT); | ||||
|             glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth); | ||||
|         } | ||||
|         program_manager.RestoreGuestCompute(); | ||||
|         break; | ||||
|     } | ||||
|     default: | ||||
|         UNREACHABLE(); | ||||
|         break; | ||||
|     } | ||||
| } | ||||
|  | ||||
| GLenum StoreFormat(u32 bytes_per_block) { | ||||
|     switch (bytes_per_block) { | ||||
|     case 1: | ||||
| @@ -309,36 +265,4 @@ GLenum StoreFormat(u32 bytes_per_block) { | ||||
|     return GL_R8UI; | ||||
| } | ||||
|  | ||||
| void Bgr565CopyPass::Execute(const Image& dst_image, const Image& src_image, | ||||
|                              const ImageCopy& copy) { | ||||
|     if (CopyBufferCreationNeeded(copy)) { | ||||
|         CreateNewCopyBuffer(copy, GL_TEXTURE_2D_ARRAY, GL_RGB565); | ||||
|     } | ||||
|     // Copy from source to PBO | ||||
|     glPixelStorei(GL_PACK_ALIGNMENT, 1); | ||||
|     glPixelStorei(GL_PACK_ROW_LENGTH, copy.extent.width); | ||||
|     glBindBuffer(GL_PIXEL_PACK_BUFFER, bgr16_pbo.handle); | ||||
|     glGetTextureSubImage(src_image.Handle(), 0, 0, 0, 0, copy.extent.width, copy.extent.height, | ||||
|                          copy.src_subresource.num_layers, GL_RGB, GL_UNSIGNED_SHORT_5_6_5, | ||||
|                          static_cast<GLsizei>(bgr16_pbo_size), nullptr); | ||||
|  | ||||
|     // Copy from PBO to destination in reverse order | ||||
|     glPixelStorei(GL_UNPACK_ALIGNMENT, 1); | ||||
|     glPixelStorei(GL_UNPACK_ROW_LENGTH, copy.extent.width); | ||||
|     glBindBuffer(GL_PIXEL_UNPACK_BUFFER, bgr16_pbo.handle); | ||||
|     glTextureSubImage3D(dst_image.Handle(), 0, 0, 0, 0, copy.extent.width, copy.extent.height, | ||||
|                         copy.dst_subresource.num_layers, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, | ||||
|                         nullptr); | ||||
| } | ||||
|  | ||||
| bool Bgr565CopyPass::CopyBufferCreationNeeded(const ImageCopy& copy) { | ||||
|     return bgr16_pbo_size < NumPixelsInCopy(copy) * sizeof(u16); | ||||
| } | ||||
|  | ||||
| void Bgr565CopyPass::CreateNewCopyBuffer(const ImageCopy& copy, GLenum target, GLuint format) { | ||||
|     bgr16_pbo.Create(); | ||||
|     bgr16_pbo_size = NumPixelsInCopy(copy) * sizeof(u16); | ||||
|     glNamedBufferData(bgr16_pbo.handle, bgr16_pbo_size, nullptr, GL_STREAM_COPY); | ||||
| } | ||||
|  | ||||
| } // namespace OpenGL | ||||
|   | ||||
| @@ -19,22 +19,6 @@ class ProgramManager; | ||||
|  | ||||
| struct ImageBufferMap; | ||||
|  | ||||
| class Bgr565CopyPass { | ||||
| public: | ||||
|     Bgr565CopyPass() = default; | ||||
|     ~Bgr565CopyPass() = default; | ||||
|  | ||||
|     void Execute(const Image& dst_image, const Image& src_image, | ||||
|                  const VideoCommon::ImageCopy& copy); | ||||
|  | ||||
| private: | ||||
|     [[nodiscard]] bool CopyBufferCreationNeeded(const VideoCommon::ImageCopy& copy); | ||||
|     void CreateNewCopyBuffer(const VideoCommon::ImageCopy& copy, GLenum target, GLuint format); | ||||
|  | ||||
|     OGLBuffer bgr16_pbo; | ||||
|     size_t bgr16_pbo_size{}; | ||||
| }; | ||||
|  | ||||
| class UtilShaders { | ||||
| public: | ||||
|     explicit UtilShaders(ProgramManager& program_manager); | ||||
| @@ -55,9 +39,6 @@ public: | ||||
|     void CopyBC4(Image& dst_image, Image& src_image, | ||||
|                  std::span<const VideoCommon::ImageCopy> copies); | ||||
|  | ||||
|     void CopyBGR(Image& dst_image, Image& src_image, | ||||
|                  std::span<const VideoCommon::ImageCopy> copies); | ||||
|  | ||||
| private: | ||||
|     ProgramManager& program_manager; | ||||
|  | ||||
| @@ -67,10 +48,7 @@ private: | ||||
|     OGLProgram block_linear_unswizzle_2d_program; | ||||
|     OGLProgram block_linear_unswizzle_3d_program; | ||||
|     OGLProgram pitch_unswizzle_program; | ||||
|     OGLProgram copy_bgra_program; | ||||
|     OGLProgram copy_bc4_program; | ||||
|  | ||||
|     Bgr565CopyPass bgr_copy_pass; | ||||
| }; | ||||
|  | ||||
| GLenum StoreFormat(u32 bytes_per_block); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 bunnei
					bunnei