Merge pull request #1723 from degasus/dirty_flags
gl_rasterizer: Skip VB upload if the state is clean.
This commit is contained in:
		@@ -34,6 +34,9 @@ MICROPROFILE_DEFINE(ProcessCommandLists, "GPU", "Execute command buffer", MP_RGB
 | 
			
		||||
void GPU::ProcessCommandLists(const std::vector<CommandListHeader>& commands) {
 | 
			
		||||
    MICROPROFILE_SCOPE(ProcessCommandLists);
 | 
			
		||||
 | 
			
		||||
    // On entering GPU code, assume all memory may be touched by the ARM core.
 | 
			
		||||
    maxwell_3d->dirty_flags.OnMemoryWrite();
 | 
			
		||||
 | 
			
		||||
    auto WriteReg = [this](u32 method, u32 subchannel, u32 value, u32 remaining_params) {
 | 
			
		||||
        LOG_TRACE(HW_GPU,
 | 
			
		||||
                  "Processing method {:08X} on subchannel {} value "
 | 
			
		||||
 
 | 
			
		||||
@@ -2,8 +2,10 @@
 | 
			
		||||
// Licensed under GPLv2 or any later version
 | 
			
		||||
// Refer to the license.txt file included.
 | 
			
		||||
 | 
			
		||||
#include "core/core.h"
 | 
			
		||||
#include "core/memory.h"
 | 
			
		||||
#include "video_core/engines/fermi_2d.h"
 | 
			
		||||
#include "video_core/engines/maxwell_3d.h"
 | 
			
		||||
#include "video_core/rasterizer_interface.h"
 | 
			
		||||
#include "video_core/textures/decoders.h"
 | 
			
		||||
 | 
			
		||||
@@ -47,6 +49,9 @@ void Fermi2D::HandleSurfaceCopy() {
 | 
			
		||||
    u32 dst_bytes_per_pixel = RenderTargetBytesPerPixel(regs.dst.format);
 | 
			
		||||
 | 
			
		||||
    if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst)) {
 | 
			
		||||
        // All copies here update the main memory, so mark all rasterizer states as invalid.
 | 
			
		||||
        Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
 | 
			
		||||
 | 
			
		||||
        rasterizer.FlushRegion(source_cpu, src_bytes_per_pixel * regs.src.width * regs.src.height);
 | 
			
		||||
        // We have to invalidate the destination region to evict any outdated surfaces from the
 | 
			
		||||
        // cache. We do this before actually writing the new data because the destination address
 | 
			
		||||
 
 | 
			
		||||
@@ -3,8 +3,10 @@
 | 
			
		||||
// Refer to the license.txt file included.
 | 
			
		||||
 | 
			
		||||
#include "common/logging/log.h"
 | 
			
		||||
#include "core/core.h"
 | 
			
		||||
#include "core/memory.h"
 | 
			
		||||
#include "video_core/engines/kepler_memory.h"
 | 
			
		||||
#include "video_core/engines/maxwell_3d.h"
 | 
			
		||||
#include "video_core/rasterizer_interface.h"
 | 
			
		||||
 | 
			
		||||
namespace Tegra::Engines {
 | 
			
		||||
@@ -47,6 +49,7 @@ void KeplerMemory::ProcessData(u32 data) {
 | 
			
		||||
    rasterizer.InvalidateRegion(dest_address, sizeof(u32));
 | 
			
		||||
 | 
			
		||||
    Memory::Write32(dest_address, data);
 | 
			
		||||
    Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
 | 
			
		||||
 | 
			
		||||
    state.write_offset++;
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -135,10 +135,24 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
 | 
			
		||||
 | 
			
		||||
    if (regs.reg_array[method] != value) {
 | 
			
		||||
        regs.reg_array[method] = value;
 | 
			
		||||
        // Vertex format
 | 
			
		||||
        if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
 | 
			
		||||
            method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
 | 
			
		||||
            dirty_flags.vertex_attrib_format = true;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // Vertex buffer
 | 
			
		||||
        if (method >= MAXWELL3D_REG_INDEX(vertex_array) &&
 | 
			
		||||
            method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) {
 | 
			
		||||
            dirty_flags.vertex_array |= 1u << ((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
 | 
			
		||||
        } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
 | 
			
		||||
                   method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) {
 | 
			
		||||
            dirty_flags.vertex_array |=
 | 
			
		||||
                1u << ((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
 | 
			
		||||
        } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
 | 
			
		||||
                   method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) {
 | 
			
		||||
            dirty_flags.vertex_array |= 1u << (method - MAXWELL3D_REG_INDEX(instanced_arrays));
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    switch (method) {
 | 
			
		||||
@@ -270,6 +284,7 @@ void Maxwell3D::ProcessQueryGet() {
 | 
			
		||||
            query_result.timestamp = CoreTiming::GetTicks();
 | 
			
		||||
            Memory::WriteBlock(*address, &query_result, sizeof(query_result));
 | 
			
		||||
        }
 | 
			
		||||
        dirty_flags.OnMemoryWrite();
 | 
			
		||||
        break;
 | 
			
		||||
    }
 | 
			
		||||
    default:
 | 
			
		||||
@@ -346,6 +361,7 @@ void Maxwell3D::ProcessCBData(u32 value) {
 | 
			
		||||
        memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos);
 | 
			
		||||
 | 
			
		||||
    Memory::Write32(*address, value);
 | 
			
		||||
    dirty_flags.OnMemoryWrite();
 | 
			
		||||
 | 
			
		||||
    // Increment the current buffer position.
 | 
			
		||||
    regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4;
 | 
			
		||||
 
 | 
			
		||||
@@ -1061,6 +1061,11 @@ public:
 | 
			
		||||
 | 
			
		||||
    struct DirtyFlags {
 | 
			
		||||
        bool vertex_attrib_format = true;
 | 
			
		||||
        u32 vertex_array = 0xFFFFFFFF;
 | 
			
		||||
 | 
			
		||||
        void OnMemoryWrite() {
 | 
			
		||||
            vertex_array = 0xFFFFFFFF;
 | 
			
		||||
        }
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    DirtyFlags dirty_flags;
 | 
			
		||||
 
 | 
			
		||||
@@ -2,7 +2,9 @@
 | 
			
		||||
// Licensed under GPLv2 or any later version
 | 
			
		||||
// Refer to the license.txt file included.
 | 
			
		||||
 | 
			
		||||
#include "core/core.h"
 | 
			
		||||
#include "core/memory.h"
 | 
			
		||||
#include "video_core/engines/maxwell_3d.h"
 | 
			
		||||
#include "video_core/engines/maxwell_dma.h"
 | 
			
		||||
#include "video_core/rasterizer_interface.h"
 | 
			
		||||
#include "video_core/textures/decoders.h"
 | 
			
		||||
@@ -54,6 +56,9 @@ void MaxwellDMA::HandleCopy() {
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // All copies here update the main memory, so mark all rasterizer states as invalid.
 | 
			
		||||
    Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
 | 
			
		||||
 | 
			
		||||
    if (regs.exec.is_dst_linear && regs.exec.is_src_linear) {
 | 
			
		||||
        // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D
 | 
			
		||||
        // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count,
 | 
			
		||||
 
 | 
			
		||||
@@ -76,7 +76,7 @@ std::tuple<u8*, GLintptr> OGLBufferCache::ReserveMemory(std::size_t size, std::s
 | 
			
		||||
    return std::make_tuple(uploaded_ptr, uploaded_offset);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void OGLBufferCache::Map(std::size_t max_size) {
 | 
			
		||||
bool OGLBufferCache::Map(std::size_t max_size) {
 | 
			
		||||
    bool invalidate;
 | 
			
		||||
    std::tie(buffer_ptr, buffer_offset_base, invalidate) =
 | 
			
		||||
        stream_buffer.Map(static_cast<GLsizeiptr>(max_size), 4);
 | 
			
		||||
@@ -85,6 +85,7 @@ void OGLBufferCache::Map(std::size_t max_size) {
 | 
			
		||||
    if (invalidate) {
 | 
			
		||||
        InvalidateAll();
 | 
			
		||||
    }
 | 
			
		||||
    return invalidate;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void OGLBufferCache::Unmap() {
 | 
			
		||||
 
 | 
			
		||||
@@ -50,7 +50,7 @@ public:
 | 
			
		||||
    /// Reserves memory to be used by host's CPU. Returns mapped address and offset.
 | 
			
		||||
    std::tuple<u8*, GLintptr> ReserveMemory(std::size_t size, std::size_t alignment = 4);
 | 
			
		||||
 | 
			
		||||
    void Map(std::size_t max_size);
 | 
			
		||||
    bool Map(std::size_t max_size);
 | 
			
		||||
    void Unmap();
 | 
			
		||||
 | 
			
		||||
    GLuint GetHandle() const;
 | 
			
		||||
 
 | 
			
		||||
@@ -176,15 +176,25 @@ void RasterizerOpenGL::SetupVertexFormat() {
 | 
			
		||||
    }
 | 
			
		||||
    state.draw.vertex_array = VAO.handle;
 | 
			
		||||
    state.ApplyVertexBufferState();
 | 
			
		||||
 | 
			
		||||
    // Rebinding the VAO invalidates the vertex buffer bindings.
 | 
			
		||||
    gpu.dirty_flags.vertex_array = 0xFFFFFFFF;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void RasterizerOpenGL::SetupVertexBuffer() {
 | 
			
		||||
    MICROPROFILE_SCOPE(OpenGL_VB);
 | 
			
		||||
    const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
 | 
			
		||||
    auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
 | 
			
		||||
    const auto& regs = gpu.regs;
 | 
			
		||||
 | 
			
		||||
    if (!gpu.dirty_flags.vertex_array)
 | 
			
		||||
        return;
 | 
			
		||||
 | 
			
		||||
    MICROPROFILE_SCOPE(OpenGL_VB);
 | 
			
		||||
 | 
			
		||||
    // Upload all guest vertex arrays sequentially to our buffer
 | 
			
		||||
    for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
 | 
			
		||||
        if (~gpu.dirty_flags.vertex_array & (1u << index))
 | 
			
		||||
            continue;
 | 
			
		||||
 | 
			
		||||
        const auto& vertex_array = regs.vertex_array[index];
 | 
			
		||||
        if (!vertex_array.IsEnabled())
 | 
			
		||||
            continue;
 | 
			
		||||
@@ -211,6 +221,8 @@ void RasterizerOpenGL::SetupVertexBuffer() {
 | 
			
		||||
 | 
			
		||||
    // Implicit set by glBindVertexBuffer. Stupid glstate handling...
 | 
			
		||||
    state.draw.vertex_buffer = buffer_cache.GetHandle();
 | 
			
		||||
 | 
			
		||||
    gpu.dirty_flags.vertex_array = 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
DrawParameters RasterizerOpenGL::SetupDraw() {
 | 
			
		||||
@@ -600,7 +612,7 @@ void RasterizerOpenGL::DrawArrays() {
 | 
			
		||||
        return;
 | 
			
		||||
 | 
			
		||||
    MICROPROFILE_SCOPE(OpenGL_Drawing);
 | 
			
		||||
    const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
 | 
			
		||||
    auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
 | 
			
		||||
    const auto& regs = gpu.regs;
 | 
			
		||||
 | 
			
		||||
    ScopeAcquireGLContext acquire_context{emu_window};
 | 
			
		||||
@@ -653,7 +665,11 @@ void RasterizerOpenGL::DrawArrays() {
 | 
			
		||||
    // Add space for at least 18 constant buffers
 | 
			
		||||
    buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment);
 | 
			
		||||
 | 
			
		||||
    buffer_cache.Map(buffer_size);
 | 
			
		||||
    bool invalidate = buffer_cache.Map(buffer_size);
 | 
			
		||||
    if (invalidate) {
 | 
			
		||||
        // As all cached buffers are invalidated, we need to recheck their state.
 | 
			
		||||
        gpu.dirty_flags.vertex_attrib_format = 0xFFFFFFFF;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    SetupVertexFormat();
 | 
			
		||||
    SetupVertexBuffer();
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user