Merge pull request #12411 from ameerj/gl-nv-tfb-fixups
gl_buffer_cache: Reintroduce NV_vertex_buffer_unified_memory
This commit is contained in:
		@@ -58,6 +58,9 @@ Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rast
 | 
			
		||||
        glObjectLabel(GL_BUFFER, buffer.handle, static_cast<GLsizei>(name.size()), name.data());
 | 
			
		||||
    }
 | 
			
		||||
    glNamedBufferData(buffer.handle, SizeBytes(), nullptr, GL_DYNAMIC_DRAW);
 | 
			
		||||
    if (runtime.has_unified_vertex_buffers) {
 | 
			
		||||
        glGetNamedBufferParameterui64vNV(buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &address);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void Buffer::ImmediateUpload(size_t offset, std::span<const u8> data) noexcept {
 | 
			
		||||
@@ -109,6 +112,7 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_,
 | 
			
		||||
    : device{device_}, staging_buffer_pool{staging_buffer_pool_},
 | 
			
		||||
      has_fast_buffer_sub_data{device.HasFastBufferSubData()},
 | 
			
		||||
      use_assembly_shaders{device.UseAssemblyShaders()},
 | 
			
		||||
      has_unified_vertex_buffers{device.HasVertexBufferUnifiedMemory()},
 | 
			
		||||
      stream_buffer{has_fast_buffer_sub_data ? std::nullopt : std::make_optional<StreamBuffer>()} {
 | 
			
		||||
    GLint gl_max_attributes;
 | 
			
		||||
    glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &gl_max_attributes);
 | 
			
		||||
@@ -210,8 +214,14 @@ void BufferCacheRuntime::ClearBuffer(Buffer& dest_buffer, u32 offset, size_t siz
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void BufferCacheRuntime::BindIndexBuffer(Buffer& buffer, u32 offset, u32 size) {
 | 
			
		||||
    glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer.Handle());
 | 
			
		||||
    index_buffer_offset = offset;
 | 
			
		||||
    if (has_unified_vertex_buffers) {
 | 
			
		||||
        buffer.MakeResident(GL_READ_ONLY);
 | 
			
		||||
        glBufferAddressRangeNV(GL_ELEMENT_ARRAY_ADDRESS_NV, 0, buffer.HostGpuAddr() + offset,
 | 
			
		||||
                               static_cast<GLsizeiptr>(Common::AlignUp(size, 4)));
 | 
			
		||||
    } else {
 | 
			
		||||
        glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer.Handle());
 | 
			
		||||
        index_buffer_offset = offset;
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void BufferCacheRuntime::BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size,
 | 
			
		||||
@@ -219,8 +229,15 @@ void BufferCacheRuntime::BindVertexBuffer(u32 index, Buffer& buffer, u32 offset,
 | 
			
		||||
    if (index >= max_attributes) {
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
    glBindVertexBuffer(index, buffer.Handle(), static_cast<GLintptr>(offset),
 | 
			
		||||
                       static_cast<GLsizei>(stride));
 | 
			
		||||
    if (has_unified_vertex_buffers) {
 | 
			
		||||
        buffer.MakeResident(GL_READ_ONLY);
 | 
			
		||||
        glBindVertexBuffer(index, 0, 0, static_cast<GLsizei>(stride));
 | 
			
		||||
        glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, index,
 | 
			
		||||
                               buffer.HostGpuAddr() + offset, static_cast<GLsizeiptr>(size));
 | 
			
		||||
    } else {
 | 
			
		||||
        glBindVertexBuffer(index, buffer.Handle(), static_cast<GLintptr>(offset),
 | 
			
		||||
                           static_cast<GLsizei>(stride));
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings<Buffer>& bindings) {
 | 
			
		||||
@@ -233,9 +250,23 @@ void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings<Buffer>& bi
 | 
			
		||||
                           [](u64 stride) { return static_cast<GLsizei>(stride); });
 | 
			
		||||
    const u32 count =
 | 
			
		||||
        std::min(static_cast<u32>(bindings.buffers.size()), max_attributes - bindings.min_index);
 | 
			
		||||
    glBindVertexBuffers(bindings.min_index, static_cast<GLsizei>(count), buffer_handles.data(),
 | 
			
		||||
                        reinterpret_cast<const GLintptr*>(bindings.offsets.data()),
 | 
			
		||||
                        buffer_strides.data());
 | 
			
		||||
    if (has_unified_vertex_buffers) {
 | 
			
		||||
        for (u32 index = 0; index < count; ++index) {
 | 
			
		||||
            Buffer& buffer = *bindings.buffers[index];
 | 
			
		||||
            buffer.MakeResident(GL_READ_ONLY);
 | 
			
		||||
            glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, bindings.min_index + index,
 | 
			
		||||
                                   buffer.HostGpuAddr() + bindings.offsets[index],
 | 
			
		||||
                                   static_cast<GLsizeiptr>(bindings.sizes[index]));
 | 
			
		||||
        }
 | 
			
		||||
        static constexpr std::array<size_t, 32> ZEROS{};
 | 
			
		||||
        glBindVertexBuffers(bindings.min_index, static_cast<GLsizei>(count),
 | 
			
		||||
                            reinterpret_cast<const GLuint*>(ZEROS.data()),
 | 
			
		||||
                            reinterpret_cast<const GLintptr*>(ZEROS.data()), buffer_strides.data());
 | 
			
		||||
    } else {
 | 
			
		||||
        glBindVertexBuffers(bindings.min_index, static_cast<GLsizei>(count), buffer_handles.data(),
 | 
			
		||||
                            reinterpret_cast<const GLintptr*>(bindings.offsets.data()),
 | 
			
		||||
                            buffer_strides.data());
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer,
 | 
			
		||||
 
 | 
			
		||||
@@ -209,6 +209,7 @@ private:
 | 
			
		||||
 | 
			
		||||
    bool has_fast_buffer_sub_data = false;
 | 
			
		||||
    bool use_assembly_shaders = false;
 | 
			
		||||
    bool has_unified_vertex_buffers = false;
 | 
			
		||||
 | 
			
		||||
    bool use_storage_buffers = false;
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -200,6 +200,7 @@ Device::Device(Core::Frontend::EmuWindow& emu_window) {
 | 
			
		||||
    has_broken_texture_view_formats = is_amd || (!is_linux && is_intel);
 | 
			
		||||
    has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2;
 | 
			
		||||
    has_derivative_control = GLAD_GL_ARB_derivative_control;
 | 
			
		||||
    has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory;
 | 
			
		||||
    has_debugging_tool_attached = IsDebugToolAttached(extensions);
 | 
			
		||||
    has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float");
 | 
			
		||||
    has_geometry_shader_passthrough = GLAD_GL_NV_geometry_shader_passthrough;
 | 
			
		||||
 
 | 
			
		||||
@@ -72,6 +72,10 @@ public:
 | 
			
		||||
        return has_texture_shadow_lod;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    bool HasVertexBufferUnifiedMemory() const {
 | 
			
		||||
        return has_vertex_buffer_unified_memory;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    bool HasASTC() const {
 | 
			
		||||
        return has_astc;
 | 
			
		||||
    }
 | 
			
		||||
@@ -211,6 +215,7 @@ private:
 | 
			
		||||
    bool has_vertex_viewport_layer{};
 | 
			
		||||
    bool has_image_load_formatted{};
 | 
			
		||||
    bool has_texture_shadow_lod{};
 | 
			
		||||
    bool has_vertex_buffer_unified_memory{};
 | 
			
		||||
    bool has_astc{};
 | 
			
		||||
    bool has_variable_aoffi{};
 | 
			
		||||
    bool has_component_indexing_bug{};
 | 
			
		||||
 
 | 
			
		||||
@@ -162,14 +162,18 @@ void RasterizerOpenGL::Clear(u32 layer_count) {
 | 
			
		||||
        SyncFramebufferSRGB();
 | 
			
		||||
    }
 | 
			
		||||
    if (regs.clear_surface.Z) {
 | 
			
		||||
        ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear Z but buffer is not enabled!");
 | 
			
		||||
        if (regs.zeta_enable != 0) {
 | 
			
		||||
            LOG_DEBUG(Render_OpenGL, "Tried to clear Z but buffer is not enabled!");
 | 
			
		||||
        }
 | 
			
		||||
        use_depth = true;
 | 
			
		||||
 | 
			
		||||
        state_tracker.NotifyDepthMask();
 | 
			
		||||
        glDepthMask(GL_TRUE);
 | 
			
		||||
    }
 | 
			
		||||
    if (regs.clear_surface.S) {
 | 
			
		||||
        ASSERT_MSG(regs.zeta_enable, "Tried to clear stencil but buffer is not enabled!");
 | 
			
		||||
        if (regs.zeta_enable) {
 | 
			
		||||
            LOG_DEBUG(Render_OpenGL, "Tried to clear stencil but buffer is not enabled!");
 | 
			
		||||
        }
 | 
			
		||||
        use_stencil = true;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@@ -1294,15 +1298,13 @@ void RasterizerOpenGL::BeginTransformFeedback(GraphicsPipeline* program, GLenum
 | 
			
		||||
    program->ConfigureTransformFeedback();
 | 
			
		||||
 | 
			
		||||
    UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderType::TessellationInit) ||
 | 
			
		||||
                     regs.IsShaderConfigEnabled(Maxwell::ShaderType::Tessellation) ||
 | 
			
		||||
                     regs.IsShaderConfigEnabled(Maxwell::ShaderType::Geometry));
 | 
			
		||||
    UNIMPLEMENTED_IF(primitive_mode != GL_POINTS);
 | 
			
		||||
                     regs.IsShaderConfigEnabled(Maxwell::ShaderType::Tessellation));
 | 
			
		||||
 | 
			
		||||
    // We may have to call BeginTransformFeedbackNV here since they seem to call different
 | 
			
		||||
    // implementations on Nvidia's driver (the pointer is different) but we are using
 | 
			
		||||
    // ARB_transform_feedback3 features with NV_transform_feedback interactions and the ARB
 | 
			
		||||
    // extension doesn't define BeginTransformFeedback (without NV) interactions. It just works.
 | 
			
		||||
    glBeginTransformFeedback(GL_POINTS);
 | 
			
		||||
    glBeginTransformFeedback(primitive_mode);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void RasterizerOpenGL::EndTransformFeedback() {
 | 
			
		||||
 
 | 
			
		||||
@@ -168,6 +168,14 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_,
 | 
			
		||||
    if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) {
 | 
			
		||||
        glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS);
 | 
			
		||||
    }
 | 
			
		||||
    // Enable unified vertex attributes and query vertex buffer address when the driver supports it
 | 
			
		||||
    if (device.HasVertexBufferUnifiedMemory()) {
 | 
			
		||||
        glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
 | 
			
		||||
        glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV);
 | 
			
		||||
        glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY);
 | 
			
		||||
        glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV,
 | 
			
		||||
                                         &vertex_buffer_address);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
RendererOpenGL::~RendererOpenGL() = default;
 | 
			
		||||
@@ -667,7 +675,13 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
 | 
			
		||||
                         offsetof(ScreenRectVertex, tex_coord));
 | 
			
		||||
    glVertexAttribBinding(PositionLocation, 0);
 | 
			
		||||
    glVertexAttribBinding(TexCoordLocation, 0);
 | 
			
		||||
    glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex));
 | 
			
		||||
    if (device.HasVertexBufferUnifiedMemory()) {
 | 
			
		||||
        glBindVertexBuffer(0, 0, 0, sizeof(ScreenRectVertex));
 | 
			
		||||
        glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, 0, vertex_buffer_address,
 | 
			
		||||
                               sizeof(vertices));
 | 
			
		||||
    } else {
 | 
			
		||||
        glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex));
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (Settings::values.scaling_filter.GetValue() != Settings::ScalingFilter::NearestNeighbor) {
 | 
			
		||||
        glBindSampler(0, present_sampler.handle);
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user