From dbfc39d21492dd1346b0e0d7ab5a2dbd989432bd Mon Sep 17 00:00:00 2001
From: Subv <subv2112@gmail.com>
Date: Wed, 6 Jun 2018 12:58:16 -0500
Subject: [PATCH] GPU: Implement sampling multiple textures in the generated
 glsl shaders.

All tested games that use a single texture show no regression.

Only Texture2D textures are supported right now, each shader gets its own "tex_fs/vs/gs" sampler array to maintain independent textures between shader stages, the textures themselves are reused if possible.
---
 src/video_core/engines/maxwell_3d.cpp         | 34 ++++++++
 src/video_core/engines/maxwell_3d.h           |  3 +
 .../renderer_opengl/gl_rasterizer.cpp         | 83 ++++++++++---------
 .../renderer_opengl/gl_rasterizer.h           | 14 +++-
 .../renderer_opengl/gl_shader_decompiler.cpp  | 45 ++++++++--
 .../renderer_opengl/gl_shader_gen.cpp         |  2 -
 .../renderer_opengl/gl_shader_gen.h           | 40 +++++++++
 .../renderer_opengl/gl_shader_manager.cpp     | 19 -----
 .../renderer_opengl/gl_shader_manager.h       |  1 -
 9 files changed, 172 insertions(+), 69 deletions(-)

diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index ef12d9300..86e9dc998 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -354,6 +354,40 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt
     return textures;
 }
 
+Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage, size_t offset) const {
+    auto& shader = state.shader_stages[static_cast<size_t>(stage)];
+    auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index];
+    ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0);
+
+    GPUVAddr tex_info_address = tex_info_buffer.address + offset * sizeof(Texture::TextureHandle);
+
+    ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size);
+
+    boost::optional<VAddr> tex_address_cpu = memory_manager.GpuToCpuAddress(tex_info_address);
+    Texture::TextureHandle tex_handle{Memory::Read32(*tex_address_cpu)};
+
+    Texture::FullTextureInfo tex_info{};
+    tex_info.index = static_cast<u32>(offset);
+
+    // Load the TIC data.
+    if (tex_handle.tic_id != 0) {
+        tex_info.enabled = true;
+
+        auto tic_entry = GetTICEntry(tex_handle.tic_id);
+        // TODO(Subv): Workaround for BitField's move constructor being deleted.
+        std::memcpy(&tex_info.tic, &tic_entry, sizeof(tic_entry));
+    }
+
+    // Load the TSC data
+    if (tex_handle.tsc_id != 0) {
+        auto tsc_entry = GetTSCEntry(tex_handle.tsc_id);
+        // TODO(Subv): Workaround for BitField's move constructor being deleted.
+        std::memcpy(&tex_info.tsc, &tsc_entry, sizeof(tsc_entry));
+    }
+
+    return tex_info;
+}
+
 u32 Maxwell3D::GetRegisterValue(u32 method) const {
     ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register");
     return regs.reg_array[method];
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 245410c95..56b837372 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -664,6 +664,9 @@ public:
     /// Returns a list of enabled textures for the specified shader stage.
     std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const;
 
+    /// Returns the texture information for a specific texture in a specific shader stage.
+    Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, size_t offset) const;
+
     /// Returns whether the specified shader stage is enabled or not.
     bool IsShaderStageEnabled(Regs::ShaderStage stage) const;
 
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 0a33868b7..e9eb03ad9 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -196,8 +196,10 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
     auto& gpu = Core::System().GetInstance().GPU().Maxwell3D();
     ASSERT_MSG(!gpu.regs.shader_config[0].enable, "VertexA is unsupported!");
 
-    // Next available bindpoint to use when uploading the const buffers to the GLSL shaders.
+    // Next available bindpoints to use when uploading the const buffers and textures to the GLSL
+    // shaders.
     u32 current_constbuffer_bindpoint = 0;
+    u32 current_texture_bindpoint = 0;
 
     for (unsigned index = 1; index < Maxwell::MaxShaderProgram; ++index) {
         auto& shader_config = gpu.regs.shader_config[index];
@@ -258,6 +260,11 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
         current_constbuffer_bindpoint =
             SetupConstBuffers(static_cast<Maxwell::ShaderStage>(stage), gl_stage_program,
                               current_constbuffer_bindpoint, shader_resources.const_buffer_entries);
+
+        // Configure the textures for this shader stage.
+        current_texture_bindpoint =
+            SetupTextures(static_cast<Maxwell::ShaderStage>(stage), gl_stage_program,
+                          current_texture_bindpoint, shader_resources.texture_samplers);
     }
 
     shader_program_manager->UseTrivialGeometryShader();
@@ -341,9 +348,6 @@ void RasterizerOpenGL::DrawArrays() {
     // TODO(bunnei): Sync framebuffer_scale uniform here
     // TODO(bunnei): Sync scissorbox uniform(s) here
 
-    // Sync and bind the texture surfaces
-    BindTextures();
-
     // Viewport can have negative offsets or larger dimensions than our framebuffer sub-rect. Enable
     // scissor test to prevent drawing outside of the framebuffer region
     state.scissor.enabled = true;
@@ -447,39 +451,6 @@ void RasterizerOpenGL::DrawArrays() {
     }
 }
 
-void RasterizerOpenGL::BindTextures() {
-    using Regs = Tegra::Engines::Maxwell3D::Regs;
-    auto& maxwell3d = Core::System::GetInstance().GPU().Get3DEngine();
-
-    // Each Maxwell shader stage can have an arbitrary number of textures, but we're limited to a
-    // certain number in OpenGL. We try to only use the minimum amount of host textures by not
-    // keeping a 1:1 relation between guest texture ids and host texture ids, ie, guest texture id 8
-    // can be host texture id 0 if it's the only texture used in the guest shader program.
-    u32 host_texture_index = 0;
-    for (u32 stage = 0; stage < Regs::MaxShaderStage; ++stage) {
-        ASSERT(host_texture_index < texture_samplers.size());
-        const auto textures = maxwell3d.GetStageTextures(static_cast<Regs::ShaderStage>(stage));
-        for (unsigned texture_index = 0; texture_index < textures.size(); ++texture_index) {
-            const auto& texture = textures[texture_index];
-
-            if (texture.enabled) {
-                texture_samplers[host_texture_index].SyncWithConfig(texture.tsc);
-                Surface surface = res_cache.GetTextureSurface(texture);
-                if (surface != nullptr) {
-                    state.texture_units[host_texture_index].texture_2d = surface->texture.handle;
-                } else {
-                    // Can occur when texture addr is null or its memory is unmapped/invalid
-                    state.texture_units[texture_index].texture_2d = 0;
-                }
-
-                ++host_texture_index;
-            } else {
-                state.texture_units[texture_index].texture_2d = 0;
-            }
-        }
-    }
-}
-
 void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) {
     const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
     switch (method) {
@@ -674,6 +645,44 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint progr
     return current_bindpoint + entries.size();
 }
 
+u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, GLuint program, u32 current_unit,
+                                    const std::vector<GLShader::SamplerEntry>& entries) {
+    auto& gpu = Core::System::GetInstance().GPU();
+    auto& maxwell3d = gpu.Get3DEngine();
+
+    ASSERT_MSG(maxwell3d.IsShaderStageEnabled(stage),
+               "Attempted to upload textures of disabled shader stage");
+
+    ASSERT_MSG(current_unit + entries.size() <= std::size(state.texture_units),
+               "Exceeded the number of active textures.");
+
+    for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
+        const auto& entry = entries[bindpoint];
+        u32 current_bindpoint = current_unit + bindpoint;
+
+        // Bind the uniform to the sampler.
+        GLint uniform = glGetUniformLocation(program, entry.GetName().c_str());
+        ASSERT(uniform != -1);
+        glProgramUniform1i(program, uniform, current_bindpoint);
+
+        const auto texture = maxwell3d.GetStageTexture(entry.GetStage(), entry.GetOffset());
+        ASSERT(texture.enabled);
+
+        texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc);
+        Surface surface = res_cache.GetTextureSurface(texture);
+        if (surface != nullptr) {
+            state.texture_units[current_bindpoint].texture_2d = surface->texture.handle;
+        } else {
+            // Can occur when texture addr is null or its memory is unmapped/invalid
+            state.texture_units[current_bindpoint].texture_2d = 0;
+        }
+    }
+
+    state.Apply();
+
+    return current_unit + entries.size();
+}
+
 void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface,
                                                const Surface& depth_surface, bool has_stencil) {
     state.draw.draw_framebuffer = framebuffer.handle;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 4b915c76a..d3f0558ed 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -80,9 +80,6 @@ private:
     void BindFramebufferSurfaces(const Surface& color_surface, const Surface& depth_surface,
                                  bool has_stencil);
 
-    /// Binds the required textures to OpenGL before drawing a batch.
-    void BindTextures();
-
     /*
      * Configures the current constbuffers to use for the draw command.
      * @param stage The shader stage to configure buffers for.
@@ -95,6 +92,17 @@ private:
                           u32 current_bindpoint,
                           const std::vector<GLShader::ConstBufferEntry>& entries);
 
+    /*
+     * Configures the current textures to use for the draw command.
+     * @param stage The shader stage to configure textures for.
+     * @param program The OpenGL program object that contains the specified stage.
+     * @param current_unit The offset at which to start counting unused texture units.
+     * @param entries Vector describing the textures that are actually used in the guest shader.
+     * @returns The next available bindpoint for use in the next shader stage.
+     */
+    u32 SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, GLuint program,
+                      u32 current_unit, const std::vector<GLShader::SamplerEntry>& entries);
+
     /// Syncs the viewport to match the guest state
     void SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect, u16 res_scale);
 
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 7a59ecccf..15288bd57 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -425,6 +425,14 @@ public:
             ++const_buffer_layout;
         }
         declarations.AddNewLine();
+
+        // Append the sampler2D array for the used textures.
+        size_t num_samplers = GetSamplers().size();
+        if (num_samplers > 0) {
+            declarations.AddLine("uniform sampler2D " + SamplerEntry::GetArrayName(stage) + '[' +
+                                 std::to_string(num_samplers) + "];");
+            declarations.AddNewLine();
+        }
     }
 
     /// Returns a list of constant buffer declarations
@@ -435,6 +443,32 @@ public:
         return result;
     }
 
+    /// Returns a list of samplers used in the shader
+    std::vector<SamplerEntry> GetSamplers() const {
+        return used_samplers;
+    }
+
+    /// Returns the GLSL sampler used for the input shader sampler, and creates a new one if
+    /// necessary.
+    std::string AccessSampler(const Sampler& sampler) {
+        size_t offset = static_cast<size_t>(sampler.index.Value());
+
+        // If this sampler has already been used, return the existing mapping.
+        auto itr =
+            std::find_if(used_samplers.begin(), used_samplers.end(),
+                         [&](const SamplerEntry& entry) { return entry.GetOffset() == offset; });
+
+        if (itr != used_samplers.end()) {
+            return itr->GetName();
+        }
+
+        // Otherwise create a new mapping for this sampler
+        size_t next_index = used_samplers.size();
+        SamplerEntry entry{stage, offset, next_index};
+        used_samplers.emplace_back(entry);
+        return entry.GetName();
+    }
+
 private:
     /// Build GLSL conversion function, e.g. floatBitsToInt, intBitsToFloat, etc.
     const std::string GetGLSLConversionFunc(GLSLRegister::Type src, GLSLRegister::Type dest) const {
@@ -544,6 +578,7 @@ private:
     std::set<Attribute::Index> declr_input_attribute;
     std::set<Attribute::Index> declr_output_attribute;
     std::array<ConstBufferEntry, Maxwell3D::Regs::MaxConstBuffers> declr_const_buffers;
+    std::vector<SamplerEntry> used_samplers;
     const Maxwell3D::Regs::ShaderStage& stage;
 };
 
@@ -563,7 +598,7 @@ public:
 
     /// Returns entries in the shader that are useful for external functions
     ShaderEntries GetEntries() const {
-        return {regs.GetConstBuffersDeclarations()};
+        return {regs.GetConstBuffersDeclarations(), regs.GetSamplers()};
     }
 
 private:
@@ -585,12 +620,8 @@ private:
     }
 
     /// Generates code representing a texture sampler.
-    std::string GetSampler(const Sampler& sampler) const {
-        // TODO(Subv): Support more than just texture sampler 0
-        ASSERT_MSG(sampler.index == Sampler::Index::Sampler_0, "unsupported");
-        const unsigned index{static_cast<unsigned>(sampler.index.Value()) -
-                             static_cast<unsigned>(Sampler::Index::Sampler_0)};
-        return "tex[" + std::to_string(index) + ']';
+    std::string GetSampler(const Sampler& sampler) {
+        return regs.AccessSampler(sampler);
     }
 
     /**
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 254f6e2c3..b88d592b7 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -62,8 +62,6 @@ layout (std140) uniform fs_config {
     vec4 viewport_flip;
 };
 
-uniform sampler2D tex[32];
-
 void main() {
     exec_shader();
 }
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index 458032b5c..e8b78934c 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -56,8 +56,48 @@ private:
     Maxwell::ShaderStage stage;
 };
 
+class SamplerEntry {
+    using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+
+public:
+    SamplerEntry(Maxwell::ShaderStage stage, size_t offset, size_t index)
+        : offset(offset), stage(stage), sampler_index(index) {}
+
+    size_t GetOffset() const {
+        return offset;
+    }
+
+    size_t GetIndex() const {
+        return sampler_index;
+    }
+
+    Maxwell::ShaderStage GetStage() const {
+        return stage;
+    }
+
+    std::string GetName() const {
+        return std::string(TextureSamplerNames[static_cast<size_t>(stage)]) + '[' +
+               std::to_string(sampler_index) + ']';
+    }
+
+    static std::string GetArrayName(Maxwell::ShaderStage stage) {
+        return TextureSamplerNames[static_cast<size_t>(stage)];
+    }
+
+private:
+    static constexpr std::array<const char*, Maxwell::MaxShaderStage> TextureSamplerNames = {
+        "tex_vs", "tex_tessc", "tex_tesse", "tex_gs", "tex_fs",
+    };
+    /// Offset in TSC memory from which to read the sampler object, as specified by the sampling
+    /// instruction.
+    size_t offset;
+    Maxwell::ShaderStage stage; ///< Shader stage where this sampler was used.
+    size_t sampler_index;       ///< Value used to index into the generated GLSL sampler array.
+};
+
 struct ShaderEntries {
     std::vector<ConstBufferEntry> const_buffer_entries;
+    std::vector<SamplerEntry> texture_samplers;
 };
 
 using ProgramResult = std::pair<std::string, ShaderEntries>;
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index ccdfc2718..7c00beb33 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -32,25 +32,6 @@ void SetShaderUniformBlockBindings(GLuint shader) {
                                  sizeof(MaxwellUniformData));
 }
 
-void SetShaderSamplerBindings(GLuint shader) {
-    OpenGLState cur_state = OpenGLState::GetCurState();
-    GLuint old_program = std::exchange(cur_state.draw.shader_program, shader);
-    cur_state.Apply();
-
-    // Set the texture samplers to correspond to different texture units
-    for (u32 texture = 0; texture < NumTextureSamplers; ++texture) {
-        // Set the texture samplers to correspond to different texture units
-        std::string uniform_name = "tex[" + std::to_string(texture) + "]";
-        GLint uniform_tex = glGetUniformLocation(shader, uniform_name.c_str());
-        if (uniform_tex != -1) {
-            glUniform1i(uniform_tex, TextureUnits::MaxwellTexture(texture).id);
-        }
-    }
-
-    cur_state.draw.shader_program = old_program;
-    cur_state.Apply();
-}
-
 } // namespace Impl
 
 void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage) {
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index e963b4b7e..4295c20a6 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -45,7 +45,6 @@ public:
         shader.Create(program_result.first.c_str(), type);
         program.Create(true, shader.handle);
         Impl::SetShaderUniformBlockBindings(program.handle);
-        Impl::SetShaderSamplerBindings(program.handle);
         entries = program_result.second;
     }
     GLuint GetHandle() const {