mirror of
				https://git.suyu.dev/suyu/suyu
				synced 2025-10-31 07:59:02 -05:00 
			
		
		
		
	Update the stream_buffer helper from Citra.
Please see https://github.com/citra-emu/citra/pull/3666 for more details.
This commit is contained in:
		| @@ -36,7 +36,8 @@ MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192)); | ||||
| MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255)); | ||||
| MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); | ||||
|  | ||||
| RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window) : emu_window{window} { | ||||
| RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window) | ||||
|     : emu_window{window}, stream_buffer(GL_ARRAY_BUFFER, STREAM_BUFFER_SIZE) { | ||||
|     // Create sampler objects | ||||
|     for (size_t i = 0; i < texture_samplers.size(); ++i) { | ||||
|         texture_samplers[i].Create(); | ||||
| @@ -57,9 +58,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window) : emu_wind | ||||
|         const std::string_view extension{ | ||||
|             reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, i))}; | ||||
|  | ||||
|         if (extension == "GL_ARB_buffer_storage") { | ||||
|             has_ARB_buffer_storage = true; | ||||
|         } else if (extension == "GL_ARB_direct_state_access") { | ||||
|         if (extension == "GL_ARB_direct_state_access") { | ||||
|             has_ARB_direct_state_access = true; | ||||
|         } else if (extension == "GL_ARB_separate_shader_objects") { | ||||
|             has_ARB_separate_shader_objects = true; | ||||
| @@ -86,16 +85,14 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window) : emu_wind | ||||
|  | ||||
|     hw_vao.Create(); | ||||
|  | ||||
|     stream_buffer = OGLStreamBuffer::MakeBuffer(has_ARB_buffer_storage, GL_ARRAY_BUFFER); | ||||
|     stream_buffer->Create(STREAM_BUFFER_SIZE, STREAM_BUFFER_SIZE / 2); | ||||
|     state.draw.vertex_buffer = stream_buffer->GetHandle(); | ||||
|     state.draw.vertex_buffer = stream_buffer.GetHandle(); | ||||
|  | ||||
|     shader_program_manager = std::make_unique<GLShader::ProgramManager>(); | ||||
|     state.draw.shader_program = 0; | ||||
|     state.draw.vertex_array = hw_vao.handle; | ||||
|     state.Apply(); | ||||
|  | ||||
|     glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer->GetHandle()); | ||||
|     glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer.GetHandle()); | ||||
|  | ||||
|     for (unsigned index = 0; index < uniform_buffers.size(); ++index) { | ||||
|         auto& buffer = uniform_buffers[index]; | ||||
| @@ -111,13 +108,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window) : emu_wind | ||||
|     LOG_CRITICAL(Render_OpenGL, "Sync fixed function OpenGL state here!"); | ||||
| } | ||||
|  | ||||
| RasterizerOpenGL::~RasterizerOpenGL() { | ||||
|     if (stream_buffer != nullptr) { | ||||
|         state.draw.vertex_buffer = stream_buffer->GetHandle(); | ||||
|         state.Apply(); | ||||
|         stream_buffer->Release(); | ||||
|     } | ||||
| } | ||||
| RasterizerOpenGL::~RasterizerOpenGL() {} | ||||
|  | ||||
| std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr, | ||||
|                                                              GLintptr buffer_offset) { | ||||
| @@ -126,7 +117,7 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr, | ||||
|     const auto& memory_manager = Core::System::GetInstance().GPU().memory_manager; | ||||
|  | ||||
|     state.draw.vertex_array = hw_vao.handle; | ||||
|     state.draw.vertex_buffer = stream_buffer->GetHandle(); | ||||
|     state.draw.vertex_buffer = stream_buffer.GetHandle(); | ||||
|     state.Apply(); | ||||
|  | ||||
|     // Upload all guest vertex arrays sequentially to our buffer | ||||
| @@ -145,7 +136,7 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr, | ||||
|         Memory::ReadBlock(*memory_manager->GpuToCpuAddress(start), array_ptr, size); | ||||
|  | ||||
|         // Bind the vertex array to the buffer at the current offset. | ||||
|         glBindVertexBuffer(index, stream_buffer->GetHandle(), buffer_offset, vertex_array.stride); | ||||
|         glBindVertexBuffer(index, stream_buffer.GetHandle(), buffer_offset, vertex_array.stride); | ||||
|  | ||||
|         ASSERT_MSG(vertex_array.divisor == 0, "Vertex buffer divisor unimplemented"); | ||||
|  | ||||
| @@ -205,7 +196,7 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { | ||||
|     // Helper function for uploading uniform data | ||||
|     const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) { | ||||
|         if (has_ARB_direct_state_access) { | ||||
|             glCopyNamedBufferSubData(stream_buffer->GetHandle(), handle, offset, 0, size); | ||||
|             glCopyNamedBufferSubData(stream_buffer.GetHandle(), handle, offset, 0, size); | ||||
|         } else { | ||||
|             glBindBuffer(GL_COPY_WRITE_BUFFER, handle); | ||||
|             glCopyBufferSubData(GL_ARRAY_BUFFER, GL_COPY_WRITE_BUFFER, offset, 0, size); | ||||
| @@ -456,7 +447,7 @@ void RasterizerOpenGL::DrawArrays() { | ||||
|     const u64 index_buffer_size{regs.index_array.count * regs.index_array.FormatSizeInBytes()}; | ||||
|     const unsigned vertex_num{is_indexed ? regs.index_array.count : regs.vertex_buffer.count}; | ||||
|  | ||||
|     state.draw.vertex_buffer = stream_buffer->GetHandle(); | ||||
|     state.draw.vertex_buffer = stream_buffer.GetHandle(); | ||||
|     state.Apply(); | ||||
|  | ||||
|     size_t buffer_size = CalculateVertexArraysSize(); | ||||
| @@ -471,8 +462,8 @@ void RasterizerOpenGL::DrawArrays() { | ||||
|  | ||||
|     u8* buffer_ptr; | ||||
|     GLintptr buffer_offset; | ||||
|     std::tie(buffer_ptr, buffer_offset) = | ||||
|         stream_buffer->Map(static_cast<GLsizeiptr>(buffer_size), 4); | ||||
|     std::tie(buffer_ptr, buffer_offset, std::ignore) = | ||||
|         stream_buffer.Map(static_cast<GLsizeiptr>(buffer_size), 4); | ||||
|  | ||||
|     u8* offseted_buffer; | ||||
|     std::tie(offseted_buffer, buffer_offset) = SetupVertexArrays(buffer_ptr, buffer_offset); | ||||
| @@ -500,7 +491,8 @@ void RasterizerOpenGL::DrawArrays() { | ||||
|  | ||||
|     SetupShaders(offseted_buffer, buffer_offset); | ||||
|  | ||||
|     stream_buffer->Unmap(); | ||||
|     // TODO: Don't use buffer_size here, use the updated buffer_offset. | ||||
|     stream_buffer.Unmap(buffer_size); | ||||
|  | ||||
|     shader_program_manager->ApplyTo(state); | ||||
|     state.Apply(); | ||||
|   | ||||
| @@ -139,7 +139,6 @@ private: | ||||
|     /// Syncs the blend state to match the guest state | ||||
|     void SyncBlendState(); | ||||
|  | ||||
|     bool has_ARB_buffer_storage = false; | ||||
|     bool has_ARB_direct_state_access = false; | ||||
|     bool has_ARB_separate_shader_objects = false; | ||||
|     bool has_ARB_vertex_attrib_binding = false; | ||||
| @@ -160,7 +159,7 @@ private: | ||||
|         ssbos; | ||||
|  | ||||
|     static constexpr size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; | ||||
|     std::unique_ptr<OGLStreamBuffer> stream_buffer; | ||||
|     OGLStreamBuffer stream_buffer; | ||||
|     OGLBuffer uniform_buffer; | ||||
|     OGLFramebuffer framebuffer; | ||||
|  | ||||
|   | ||||
| @@ -9,174 +9,91 @@ | ||||
| #include "video_core/renderer_opengl/gl_state.h" | ||||
| #include "video_core/renderer_opengl/gl_stream_buffer.h" | ||||
|  | ||||
| class OrphanBuffer : public OGLStreamBuffer { | ||||
| public: | ||||
|     explicit OrphanBuffer(GLenum target) : OGLStreamBuffer(target) {} | ||||
|     ~OrphanBuffer() override; | ||||
| OGLStreamBuffer::OGLStreamBuffer(GLenum target, GLsizeiptr size, bool prefer_coherent) | ||||
|     : gl_target(target), buffer_size(size) { | ||||
|     gl_buffer.Create(); | ||||
|     glBindBuffer(gl_target, gl_buffer.handle); | ||||
|  | ||||
| private: | ||||
|     void Create(size_t size, size_t sync_subdivide) override; | ||||
|     void Release() override; | ||||
|     GLsizeiptr allocate_size = size; | ||||
|     if (target == GL_ARRAY_BUFFER) { | ||||
|         // On AMD GPU there is a strange crash in indexed drawing. The crash happens when the buffer | ||||
|         // read position is near the end and is an out-of-bound access to the vertex buffer. This is | ||||
|         // probably a bug in the driver and is related to the usage of vec3<byte> attributes in the | ||||
|         // vertex array. Doubling the allocation size for the vertex buffer seems to avoid the | ||||
|         // crash. | ||||
|         allocate_size *= 2; | ||||
|     } | ||||
|  | ||||
|     std::pair<u8*, GLintptr> Map(size_t size, size_t alignment) override; | ||||
|     void Unmap() override; | ||||
|     if (GLAD_GL_ARB_buffer_storage) { | ||||
|         persistent = true; | ||||
|         coherent = prefer_coherent; | ||||
|         GLbitfield flags = | ||||
|             GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0); | ||||
|         glBufferStorage(gl_target, allocate_size, nullptr, flags); | ||||
|         mapped_ptr = static_cast<u8*>(glMapBufferRange( | ||||
|             gl_target, 0, buffer_size, flags | (coherent ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT))); | ||||
|     } else { | ||||
|         glBufferData(gl_target, allocate_size, nullptr, GL_STREAM_DRAW); | ||||
|     } | ||||
| } | ||||
|  | ||||
|     std::vector<u8> data; | ||||
| }; | ||||
|  | ||||
| class StorageBuffer : public OGLStreamBuffer { | ||||
| public: | ||||
|     explicit StorageBuffer(GLenum target) : OGLStreamBuffer(target) {} | ||||
|     ~StorageBuffer() override; | ||||
|  | ||||
| private: | ||||
|     void Create(size_t size, size_t sync_subdivide) override; | ||||
|     void Release() override; | ||||
|  | ||||
|     std::pair<u8*, GLintptr> Map(size_t size, size_t alignment) override; | ||||
|     void Unmap() override; | ||||
|  | ||||
|     struct Fence { | ||||
|         OGLSync sync; | ||||
|         size_t offset; | ||||
|     }; | ||||
|     std::deque<Fence> head; | ||||
|     std::deque<Fence> tail; | ||||
|  | ||||
|     u8* mapped_ptr; | ||||
| }; | ||||
|  | ||||
| OGLStreamBuffer::OGLStreamBuffer(GLenum target) { | ||||
|     gl_target = target; | ||||
| OGLStreamBuffer::~OGLStreamBuffer() { | ||||
|     if (persistent) { | ||||
|         glBindBuffer(gl_target, gl_buffer.handle); | ||||
|         glUnmapBuffer(gl_target); | ||||
|     } | ||||
|     gl_buffer.Release(); | ||||
| } | ||||
|  | ||||
| GLuint OGLStreamBuffer::GetHandle() const { | ||||
|     return gl_buffer.handle; | ||||
| } | ||||
|  | ||||
| std::unique_ptr<OGLStreamBuffer> OGLStreamBuffer::MakeBuffer(bool storage_buffer, GLenum target) { | ||||
|     if (storage_buffer) { | ||||
|         return std::make_unique<StorageBuffer>(target); | ||||
|     } | ||||
|     return std::make_unique<OrphanBuffer>(target); | ||||
| GLsizeiptr OGLStreamBuffer::GetSize() const { | ||||
|     return buffer_size; | ||||
| } | ||||
|  | ||||
| OrphanBuffer::~OrphanBuffer() { | ||||
|     Release(); | ||||
| } | ||||
|  | ||||
| void OrphanBuffer::Create(size_t size, size_t /*sync_subdivide*/) { | ||||
|     buffer_pos = 0; | ||||
|     buffer_size = size; | ||||
|     data.resize(buffer_size); | ||||
|  | ||||
|     if (gl_buffer.handle == 0) { | ||||
|         gl_buffer.Create(); | ||||
|         glBindBuffer(gl_target, gl_buffer.handle); | ||||
|     } | ||||
|  | ||||
|     glBufferData(gl_target, static_cast<GLsizeiptr>(buffer_size), nullptr, GL_STREAM_DRAW); | ||||
| } | ||||
|  | ||||
| void OrphanBuffer::Release() { | ||||
|     gl_buffer.Release(); | ||||
| } | ||||
|  | ||||
| std::pair<u8*, GLintptr> OrphanBuffer::Map(size_t size, size_t alignment) { | ||||
|     buffer_pos = Common::AlignUp(buffer_pos, alignment); | ||||
|  | ||||
|     if (buffer_pos + size > buffer_size) { | ||||
|         Create(std::max(buffer_size, size), 0); | ||||
|     } | ||||
|  | ||||
|     mapped_size = size; | ||||
|     return std::make_pair(&data[buffer_pos], static_cast<GLintptr>(buffer_pos)); | ||||
| } | ||||
|  | ||||
| void OrphanBuffer::Unmap() { | ||||
|     glBufferSubData(gl_target, static_cast<GLintptr>(buffer_pos), | ||||
|                     static_cast<GLsizeiptr>(mapped_size), &data[buffer_pos]); | ||||
|     buffer_pos += mapped_size; | ||||
| } | ||||
|  | ||||
| StorageBuffer::~StorageBuffer() { | ||||
|     Release(); | ||||
| } | ||||
|  | ||||
| void StorageBuffer::Create(size_t size, size_t sync_subdivide) { | ||||
|     if (gl_buffer.handle != 0) | ||||
|         return; | ||||
|  | ||||
|     buffer_pos = 0; | ||||
|     buffer_size = size; | ||||
|     buffer_sync_subdivide = std::max<size_t>(sync_subdivide, 1); | ||||
|  | ||||
|     gl_buffer.Create(); | ||||
|     glBindBuffer(gl_target, gl_buffer.handle); | ||||
|  | ||||
|     glBufferStorage(gl_target, static_cast<GLsizeiptr>(buffer_size), nullptr, | ||||
|                     GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT); | ||||
|     mapped_ptr = reinterpret_cast<u8*>( | ||||
|         glMapBufferRange(gl_target, 0, static_cast<GLsizeiptr>(buffer_size), | ||||
|                          GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_FLUSH_EXPLICIT_BIT)); | ||||
| } | ||||
|  | ||||
| void StorageBuffer::Release() { | ||||
|     if (gl_buffer.handle == 0) | ||||
|         return; | ||||
|  | ||||
|     glUnmapBuffer(gl_target); | ||||
|  | ||||
|     gl_buffer.Release(); | ||||
|     head.clear(); | ||||
|     tail.clear(); | ||||
| } | ||||
|  | ||||
| std::pair<u8*, GLintptr> StorageBuffer::Map(size_t size, size_t alignment) { | ||||
| std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) { | ||||
|     ASSERT(size <= buffer_size); | ||||
|  | ||||
|     OGLSync sync; | ||||
|  | ||||
|     buffer_pos = Common::AlignUp(buffer_pos, alignment); | ||||
|     size_t effective_offset = Common::AlignDown(buffer_pos, buffer_sync_subdivide); | ||||
|  | ||||
|     if (!head.empty() && | ||||
|         (effective_offset > head.back().offset || buffer_pos + size > buffer_size)) { | ||||
|         ASSERT(head.back().sync.handle == 0); | ||||
|         head.back().sync.Create(); | ||||
|     } | ||||
|  | ||||
|     if (buffer_pos + size > buffer_size) { | ||||
|         if (!tail.empty()) { | ||||
|             std::swap(sync, tail.back().sync); | ||||
|             tail.clear(); | ||||
|         } | ||||
|         std::swap(tail, head); | ||||
|         buffer_pos = 0; | ||||
|         effective_offset = 0; | ||||
|     } | ||||
|  | ||||
|     while (!tail.empty() && buffer_pos + size > tail.front().offset) { | ||||
|         std::swap(sync, tail.front().sync); | ||||
|         tail.pop_front(); | ||||
|     } | ||||
|  | ||||
|     if (sync.handle != 0) { | ||||
|         glClientWaitSync(sync.handle, GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); | ||||
|         sync.Release(); | ||||
|     } | ||||
|  | ||||
|     if (head.empty() || effective_offset > head.back().offset) { | ||||
|         head.emplace_back(); | ||||
|         head.back().offset = effective_offset; | ||||
|     } | ||||
|  | ||||
|     ASSERT(alignment <= buffer_size); | ||||
|     mapped_size = size; | ||||
|     return std::make_pair(&mapped_ptr[buffer_pos], static_cast<GLintptr>(buffer_pos)); | ||||
|  | ||||
|     if (alignment > 0) { | ||||
|         buffer_pos = Common::AlignUp<size_t>(buffer_pos, alignment); | ||||
|     } | ||||
|  | ||||
|     bool invalidate = false; | ||||
|     if (buffer_pos + size > buffer_size) { | ||||
|         buffer_pos = 0; | ||||
|         invalidate = true; | ||||
|  | ||||
|         if (persistent) { | ||||
|             glUnmapBuffer(gl_target); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     if (invalidate | !persistent) { | ||||
|         GLbitfield flags = GL_MAP_WRITE_BIT | (persistent ? GL_MAP_PERSISTENT_BIT : 0) | | ||||
|                            (coherent ? GL_MAP_COHERENT_BIT : GL_MAP_FLUSH_EXPLICIT_BIT) | | ||||
|                            (invalidate ? GL_MAP_INVALIDATE_BUFFER_BIT : GL_MAP_UNSYNCHRONIZED_BIT); | ||||
|         mapped_ptr = static_cast<u8*>( | ||||
|             glMapBufferRange(gl_target, buffer_pos, buffer_size - buffer_pos, flags)); | ||||
|         mapped_offset = buffer_pos; | ||||
|     } | ||||
|  | ||||
|     return std::make_tuple(mapped_ptr + buffer_pos - mapped_offset, buffer_pos, invalidate); | ||||
| } | ||||
|  | ||||
| void StorageBuffer::Unmap() { | ||||
|     glFlushMappedBufferRange(gl_target, static_cast<GLintptr>(buffer_pos), | ||||
|                              static_cast<GLsizeiptr>(mapped_size)); | ||||
|     buffer_pos += mapped_size; | ||||
| void OGLStreamBuffer::Unmap(GLsizeiptr size) { | ||||
|     ASSERT(size <= mapped_size); | ||||
|  | ||||
|     if (!coherent && size > 0) { | ||||
|         glFlushMappedBufferRange(gl_target, buffer_pos - mapped_offset, size); | ||||
|     } | ||||
|  | ||||
|     if (!persistent) { | ||||
|         glUnmapBuffer(gl_target); | ||||
|     } | ||||
|  | ||||
|     buffer_pos += size; | ||||
| } | ||||
|   | ||||
| @@ -2,35 +2,41 @@ | ||||
| // Licensed under GPLv2 or any later version | ||||
| // Refer to the license.txt file included. | ||||
|  | ||||
| #pragma once | ||||
|  | ||||
| #include <memory> | ||||
| #include <tuple> | ||||
| #include <glad/glad.h> | ||||
| #include "common/common_types.h" | ||||
| #include "video_core/renderer_opengl/gl_resource_manager.h" | ||||
|  | ||||
| class OGLStreamBuffer : private NonCopyable { | ||||
| public: | ||||
|     explicit OGLStreamBuffer(GLenum target); | ||||
|     virtual ~OGLStreamBuffer() = default; | ||||
|  | ||||
| public: | ||||
|     static std::unique_ptr<OGLStreamBuffer> MakeBuffer(bool storage_buffer, GLenum target); | ||||
|  | ||||
|     virtual void Create(size_t size, size_t sync_subdivide) = 0; | ||||
|     virtual void Release() {} | ||||
|     explicit OGLStreamBuffer(GLenum target, GLsizeiptr size, bool prefer_coherent = false); | ||||
|     ~OGLStreamBuffer(); | ||||
|  | ||||
|     GLuint GetHandle() const; | ||||
|     GLsizeiptr GetSize() const; | ||||
|  | ||||
|     virtual std::pair<u8*, GLintptr> Map(size_t size, size_t alignment) = 0; | ||||
|     virtual void Unmap() = 0; | ||||
|     /* | ||||
|      * Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes | ||||
|      * and the optional alignment requirement. | ||||
|      * If the buffer is full, the whole buffer is reallocated which invalidates old chunks. | ||||
|      * The return values are the pointer to the new chunk, the offset within the buffer, | ||||
|      * and the invalidation flag for previous chunks. | ||||
|      * The actual used size must be specified on unmapping the chunk. | ||||
|      */ | ||||
|     std::tuple<u8*, GLintptr, bool> Map(GLsizeiptr size, GLintptr alignment = 0); | ||||
|  | ||||
| protected: | ||||
|     void Unmap(GLsizeiptr size); | ||||
|  | ||||
| private: | ||||
|     OGLBuffer gl_buffer; | ||||
|     GLenum gl_target; | ||||
|  | ||||
|     size_t buffer_pos = 0; | ||||
|     size_t buffer_size = 0; | ||||
|     size_t buffer_sync_subdivide = 0; | ||||
|     size_t mapped_size = 0; | ||||
|     bool coherent = false; | ||||
|     bool persistent = false; | ||||
|  | ||||
|     GLintptr buffer_pos = 0; | ||||
|     GLsizeiptr buffer_size = 0; | ||||
|     GLintptr mapped_offset = 0; | ||||
|     GLsizeiptr mapped_size = 0; | ||||
|     u8* mapped_ptr = nullptr; | ||||
| }; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Markus Wick
					Markus Wick