mirror of
				https://git.suyu.dev/suyu/suyu
				synced 2025-11-04 00:49:02 -06:00 
			
		
		
		
	Merge pull request #7036 from ameerj/ogl-bgr-v2
gl_texture_cache: Unify BGR copy passes using PBOs
This commit is contained in:
		@@ -6,7 +6,6 @@ set(SHADER_FILES
 | 
			
		||||
    convert_float_to_depth.frag
 | 
			
		||||
    full_screen_triangle.vert
 | 
			
		||||
    opengl_copy_bc4.comp
 | 
			
		||||
    opengl_copy_bgra.comp
 | 
			
		||||
    opengl_present.frag
 | 
			
		||||
    opengl_present.vert
 | 
			
		||||
    pitch_unswizzle.comp
 | 
			
		||||
 
 | 
			
		||||
@@ -1,15 +0,0 @@
 | 
			
		||||
// Copyright 2021 yuzu Emulator Project
 | 
			
		||||
// Licensed under GPLv2 or any later version
 | 
			
		||||
// Refer to the license.txt file included.
 | 
			
		||||
 | 
			
		||||
#version 430 core
 | 
			
		||||
 | 
			
		||||
layout (local_size_x = 4, local_size_y = 4) in;
 | 
			
		||||
 | 
			
		||||
layout(binding = 0, rgba8) readonly uniform image2DArray bgr_input;
 | 
			
		||||
layout(binding = 1, rgba8) writeonly uniform image2DArray bgr_output;
 | 
			
		||||
 | 
			
		||||
void main() {
 | 
			
		||||
    vec4 color = imageLoad(bgr_input, ivec3(gl_GlobalInvocationID));
 | 
			
		||||
    imageStore(bgr_output, ivec3(gl_GlobalInvocationID), color.bgra);
 | 
			
		||||
}
 | 
			
		||||
@@ -461,7 +461,7 @@ bool TextureCacheRuntime::CanImageBeCopied(const Image& dst, const Image& src) {
 | 
			
		||||
    if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) {
 | 
			
		||||
        return false;
 | 
			
		||||
    }
 | 
			
		||||
    if (IsPixelFormatBGR(dst.info.format) || IsPixelFormatBGR(src.info.format)) {
 | 
			
		||||
    if (IsPixelFormatBGR(dst.info.format) != IsPixelFormatBGR(src.info.format)) {
 | 
			
		||||
        return false;
 | 
			
		||||
    }
 | 
			
		||||
    return true;
 | 
			
		||||
@@ -473,7 +473,7 @@ void TextureCacheRuntime::EmulateCopyImage(Image& dst, Image& src,
 | 
			
		||||
        ASSERT(src.info.type == ImageType::e3D);
 | 
			
		||||
        util_shaders.CopyBC4(dst, src, copies);
 | 
			
		||||
    } else if (IsPixelFormatBGR(dst.info.format) || IsPixelFormatBGR(src.info.format)) {
 | 
			
		||||
        util_shaders.CopyBGR(dst, src, copies);
 | 
			
		||||
        bgr_copy_pass.CopyBGR(dst, src, copies);
 | 
			
		||||
    } else {
 | 
			
		||||
        UNREACHABLE();
 | 
			
		||||
    }
 | 
			
		||||
@@ -1112,4 +1112,37 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM
 | 
			
		||||
    framebuffer.handle = handle;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void BGRCopyPass::CopyBGR(Image& dst_image, Image& src_image,
 | 
			
		||||
                          std::span<const VideoCommon::ImageCopy> copies) {
 | 
			
		||||
    static constexpr VideoCommon::Offset3D zero_offset{0, 0, 0};
 | 
			
		||||
    const u32 requested_pbo_size =
 | 
			
		||||
        std::max(src_image.unswizzled_size_bytes, dst_image.unswizzled_size_bytes);
 | 
			
		||||
 | 
			
		||||
    if (bgr_pbo_size < requested_pbo_size) {
 | 
			
		||||
        bgr_pbo.Create();
 | 
			
		||||
        bgr_pbo_size = requested_pbo_size;
 | 
			
		||||
        glNamedBufferData(bgr_pbo.handle, bgr_pbo_size, nullptr, GL_STREAM_COPY);
 | 
			
		||||
    }
 | 
			
		||||
    for (const ImageCopy& copy : copies) {
 | 
			
		||||
        ASSERT(copy.src_offset == zero_offset);
 | 
			
		||||
        ASSERT(copy.dst_offset == zero_offset);
 | 
			
		||||
 | 
			
		||||
        // Copy from source to PBO
 | 
			
		||||
        glPixelStorei(GL_PACK_ALIGNMENT, 1);
 | 
			
		||||
        glPixelStorei(GL_PACK_ROW_LENGTH, copy.extent.width);
 | 
			
		||||
        glBindBuffer(GL_PIXEL_PACK_BUFFER, bgr_pbo.handle);
 | 
			
		||||
        glGetTextureSubImage(src_image.Handle(), 0, 0, 0, 0, copy.extent.width, copy.extent.height,
 | 
			
		||||
                             copy.src_subresource.num_layers, src_image.GlFormat(),
 | 
			
		||||
                             src_image.GlType(), static_cast<GLsizei>(bgr_pbo_size), nullptr);
 | 
			
		||||
 | 
			
		||||
        // Copy from PBO to destination in desired GL format
 | 
			
		||||
        glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
 | 
			
		||||
        glPixelStorei(GL_UNPACK_ROW_LENGTH, copy.extent.width);
 | 
			
		||||
        glBindBuffer(GL_PIXEL_UNPACK_BUFFER, bgr_pbo.handle);
 | 
			
		||||
        glTextureSubImage3D(dst_image.Handle(), 0, 0, 0, 0, copy.extent.width, copy.extent.height,
 | 
			
		||||
                            copy.dst_subresource.num_layers, dst_image.GlFormat(),
 | 
			
		||||
                            dst_image.GlType(), nullptr);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
} // namespace OpenGL
 | 
			
		||||
 
 | 
			
		||||
@@ -47,6 +47,19 @@ struct FormatProperties {
 | 
			
		||||
    bool is_compressed;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
class BGRCopyPass {
 | 
			
		||||
public:
 | 
			
		||||
    BGRCopyPass() = default;
 | 
			
		||||
    ~BGRCopyPass() = default;
 | 
			
		||||
 | 
			
		||||
    void CopyBGR(Image& dst_image, Image& src_image,
 | 
			
		||||
                 std::span<const VideoCommon::ImageCopy> copies);
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    OGLBuffer bgr_pbo;
 | 
			
		||||
    size_t bgr_pbo_size{};
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
class TextureCacheRuntime {
 | 
			
		||||
    friend Framebuffer;
 | 
			
		||||
    friend Image;
 | 
			
		||||
@@ -118,6 +131,7 @@ private:
 | 
			
		||||
    const Device& device;
 | 
			
		||||
    StateTracker& state_tracker;
 | 
			
		||||
    UtilShaders util_shaders;
 | 
			
		||||
    BGRCopyPass bgr_copy_pass;
 | 
			
		||||
 | 
			
		||||
    std::array<std::unordered_map<GLenum, FormatProperties>, 3> format_properties;
 | 
			
		||||
    bool has_broken_texture_view_formats = false;
 | 
			
		||||
@@ -162,6 +176,14 @@ public:
 | 
			
		||||
        return texture.handle;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    GLuint GlFormat() const noexcept {
 | 
			
		||||
        return gl_format;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    GLuint GlType() const noexcept {
 | 
			
		||||
        return gl_type;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    void CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -52,7 +52,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> FORMAT_TAB
 | 
			
		||||
    {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT},                          // BC6H_UFLOAT
 | 
			
		||||
    {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT},                            // BC6H_SFLOAT
 | 
			
		||||
    {GL_COMPRESSED_RGBA_ASTC_4x4_KHR},                                // ASTC_2D_4X4_UNORM
 | 
			
		||||
    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE},                            // B8G8R8A8_UNORM
 | 
			
		||||
    {GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV},                 // B8G8R8A8_UNORM
 | 
			
		||||
    {GL_RGBA32F, GL_RGBA, GL_FLOAT},                                  // R32G32B32A32_FLOAT
 | 
			
		||||
    {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT},                            // R32G32B32A32_SINT
 | 
			
		||||
    {GL_RG32F, GL_RG, GL_FLOAT},                                      // R32G32_FLOAT
 | 
			
		||||
@@ -81,7 +81,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> FORMAT_TAB
 | 
			
		||||
    {GL_COMPRESSED_RGBA_ASTC_8x8_KHR},                                // ASTC_2D_8X8_UNORM
 | 
			
		||||
    {GL_COMPRESSED_RGBA_ASTC_8x5_KHR},                                // ASTC_2D_8X5_UNORM
 | 
			
		||||
    {GL_COMPRESSED_RGBA_ASTC_5x4_KHR},                                // ASTC_2D_5X4_UNORM
 | 
			
		||||
    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE},                     // B8G8R8A8_SRGB
 | 
			
		||||
    {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV},          // B8G8R8A8_SRGB
 | 
			
		||||
    {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT},                         // BC1_RGBA_SRGB
 | 
			
		||||
    {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT},                         // BC2_SRGB
 | 
			
		||||
    {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT},                         // BC3_SRGB
 | 
			
		||||
 
 | 
			
		||||
@@ -14,7 +14,6 @@
 | 
			
		||||
#include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h"
 | 
			
		||||
#include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h"
 | 
			
		||||
#include "video_core/host_shaders/opengl_copy_bc4_comp.h"
 | 
			
		||||
#include "video_core/host_shaders/opengl_copy_bgra_comp.h"
 | 
			
		||||
#include "video_core/host_shaders/pitch_unswizzle_comp.h"
 | 
			
		||||
#include "video_core/renderer_opengl/gl_shader_manager.h"
 | 
			
		||||
#include "video_core/renderer_opengl/gl_shader_util.h"
 | 
			
		||||
@@ -44,11 +43,6 @@ namespace {
 | 
			
		||||
OGLProgram MakeProgram(std::string_view source) {
 | 
			
		||||
    return CreateProgram(source, GL_COMPUTE_SHADER);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
size_t NumPixelsInCopy(const VideoCommon::ImageCopy& copy) {
 | 
			
		||||
    return static_cast<size_t>(copy.extent.width * copy.extent.height *
 | 
			
		||||
                               copy.src_subresource.num_layers);
 | 
			
		||||
}
 | 
			
		||||
} // Anonymous namespace
 | 
			
		||||
 | 
			
		||||
UtilShaders::UtilShaders(ProgramManager& program_manager_)
 | 
			
		||||
@@ -56,7 +50,6 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_)
 | 
			
		||||
      block_linear_unswizzle_2d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_2D_COMP)),
 | 
			
		||||
      block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)),
 | 
			
		||||
      pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)),
 | 
			
		||||
      copy_bgra_program(MakeProgram(OPENGL_COPY_BGRA_COMP)),
 | 
			
		||||
      copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)) {
 | 
			
		||||
    const auto swizzle_table = Tegra::Texture::MakeSwizzleTable();
 | 
			
		||||
    swizzle_table_buffer.Create();
 | 
			
		||||
@@ -255,43 +248,6 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const Im
 | 
			
		||||
    program_manager.RestoreGuestCompute();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void UtilShaders::CopyBGR(Image& dst_image, Image& src_image,
 | 
			
		||||
                          std::span<const VideoCommon::ImageCopy> copies) {
 | 
			
		||||
    static constexpr GLuint BINDING_INPUT_IMAGE = 0;
 | 
			
		||||
    static constexpr GLuint BINDING_OUTPUT_IMAGE = 1;
 | 
			
		||||
    static constexpr VideoCommon::Offset3D zero_offset{0, 0, 0};
 | 
			
		||||
    const u32 bytes_per_block = BytesPerBlock(dst_image.info.format);
 | 
			
		||||
    switch (bytes_per_block) {
 | 
			
		||||
    case 2:
 | 
			
		||||
        // BGR565 copy
 | 
			
		||||
        for (const ImageCopy& copy : copies) {
 | 
			
		||||
            ASSERT(copy.src_offset == zero_offset);
 | 
			
		||||
            ASSERT(copy.dst_offset == zero_offset);
 | 
			
		||||
            bgr_copy_pass.Execute(dst_image, src_image, copy);
 | 
			
		||||
        }
 | 
			
		||||
        break;
 | 
			
		||||
    case 4: {
 | 
			
		||||
        // BGRA8 copy
 | 
			
		||||
        program_manager.BindComputeProgram(copy_bgra_program.handle);
 | 
			
		||||
        constexpr GLenum FORMAT = GL_RGBA8;
 | 
			
		||||
        for (const ImageCopy& copy : copies) {
 | 
			
		||||
            ASSERT(copy.src_offset == zero_offset);
 | 
			
		||||
            ASSERT(copy.dst_offset == zero_offset);
 | 
			
		||||
            glBindImageTexture(BINDING_INPUT_IMAGE, src_image.StorageHandle(),
 | 
			
		||||
                               copy.src_subresource.base_level, GL_FALSE, 0, GL_READ_ONLY, FORMAT);
 | 
			
		||||
            glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.StorageHandle(),
 | 
			
		||||
                               copy.dst_subresource.base_level, GL_FALSE, 0, GL_WRITE_ONLY, FORMAT);
 | 
			
		||||
            glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth);
 | 
			
		||||
        }
 | 
			
		||||
        program_manager.RestoreGuestCompute();
 | 
			
		||||
        break;
 | 
			
		||||
    }
 | 
			
		||||
    default:
 | 
			
		||||
        UNREACHABLE();
 | 
			
		||||
        break;
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
GLenum StoreFormat(u32 bytes_per_block) {
 | 
			
		||||
    switch (bytes_per_block) {
 | 
			
		||||
    case 1:
 | 
			
		||||
@@ -309,36 +265,4 @@ GLenum StoreFormat(u32 bytes_per_block) {
 | 
			
		||||
    return GL_R8UI;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void Bgr565CopyPass::Execute(const Image& dst_image, const Image& src_image,
 | 
			
		||||
                             const ImageCopy& copy) {
 | 
			
		||||
    if (CopyBufferCreationNeeded(copy)) {
 | 
			
		||||
        CreateNewCopyBuffer(copy, GL_TEXTURE_2D_ARRAY, GL_RGB565);
 | 
			
		||||
    }
 | 
			
		||||
    // Copy from source to PBO
 | 
			
		||||
    glPixelStorei(GL_PACK_ALIGNMENT, 1);
 | 
			
		||||
    glPixelStorei(GL_PACK_ROW_LENGTH, copy.extent.width);
 | 
			
		||||
    glBindBuffer(GL_PIXEL_PACK_BUFFER, bgr16_pbo.handle);
 | 
			
		||||
    glGetTextureSubImage(src_image.Handle(), 0, 0, 0, 0, copy.extent.width, copy.extent.height,
 | 
			
		||||
                         copy.src_subresource.num_layers, GL_RGB, GL_UNSIGNED_SHORT_5_6_5,
 | 
			
		||||
                         static_cast<GLsizei>(bgr16_pbo_size), nullptr);
 | 
			
		||||
 | 
			
		||||
    // Copy from PBO to destination in reverse order
 | 
			
		||||
    glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
 | 
			
		||||
    glPixelStorei(GL_UNPACK_ROW_LENGTH, copy.extent.width);
 | 
			
		||||
    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, bgr16_pbo.handle);
 | 
			
		||||
    glTextureSubImage3D(dst_image.Handle(), 0, 0, 0, 0, copy.extent.width, copy.extent.height,
 | 
			
		||||
                        copy.dst_subresource.num_layers, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV,
 | 
			
		||||
                        nullptr);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool Bgr565CopyPass::CopyBufferCreationNeeded(const ImageCopy& copy) {
 | 
			
		||||
    return bgr16_pbo_size < NumPixelsInCopy(copy) * sizeof(u16);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void Bgr565CopyPass::CreateNewCopyBuffer(const ImageCopy& copy, GLenum target, GLuint format) {
 | 
			
		||||
    bgr16_pbo.Create();
 | 
			
		||||
    bgr16_pbo_size = NumPixelsInCopy(copy) * sizeof(u16);
 | 
			
		||||
    glNamedBufferData(bgr16_pbo.handle, bgr16_pbo_size, nullptr, GL_STREAM_COPY);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
} // namespace OpenGL
 | 
			
		||||
 
 | 
			
		||||
@@ -19,22 +19,6 @@ class ProgramManager;
 | 
			
		||||
 | 
			
		||||
struct ImageBufferMap;
 | 
			
		||||
 | 
			
		||||
class Bgr565CopyPass {
 | 
			
		||||
public:
 | 
			
		||||
    Bgr565CopyPass() = default;
 | 
			
		||||
    ~Bgr565CopyPass() = default;
 | 
			
		||||
 | 
			
		||||
    void Execute(const Image& dst_image, const Image& src_image,
 | 
			
		||||
                 const VideoCommon::ImageCopy& copy);
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    [[nodiscard]] bool CopyBufferCreationNeeded(const VideoCommon::ImageCopy& copy);
 | 
			
		||||
    void CreateNewCopyBuffer(const VideoCommon::ImageCopy& copy, GLenum target, GLuint format);
 | 
			
		||||
 | 
			
		||||
    OGLBuffer bgr16_pbo;
 | 
			
		||||
    size_t bgr16_pbo_size{};
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
class UtilShaders {
 | 
			
		||||
public:
 | 
			
		||||
    explicit UtilShaders(ProgramManager& program_manager);
 | 
			
		||||
@@ -55,9 +39,6 @@ public:
 | 
			
		||||
    void CopyBC4(Image& dst_image, Image& src_image,
 | 
			
		||||
                 std::span<const VideoCommon::ImageCopy> copies);
 | 
			
		||||
 | 
			
		||||
    void CopyBGR(Image& dst_image, Image& src_image,
 | 
			
		||||
                 std::span<const VideoCommon::ImageCopy> copies);
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    ProgramManager& program_manager;
 | 
			
		||||
 | 
			
		||||
@@ -67,10 +48,7 @@ private:
 | 
			
		||||
    OGLProgram block_linear_unswizzle_2d_program;
 | 
			
		||||
    OGLProgram block_linear_unswizzle_3d_program;
 | 
			
		||||
    OGLProgram pitch_unswizzle_program;
 | 
			
		||||
    OGLProgram copy_bgra_program;
 | 
			
		||||
    OGLProgram copy_bc4_program;
 | 
			
		||||
 | 
			
		||||
    Bgr565CopyPass bgr_copy_pass;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
GLenum StoreFormat(u32 bytes_per_block);
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user