gl_buffer_cache: Rework to support internalized buffers
This commit is contained in:
		| @@ -7,90 +7,165 @@ | ||||
| #include <utility> | ||||
|  | ||||
| #include "common/alignment.h" | ||||
| #include "common/assert.h" | ||||
| #include "core/core.h" | ||||
| #include "video_core/memory_manager.h" | ||||
| #include "video_core/renderer_opengl/gl_buffer_cache.h" | ||||
| #include "video_core/renderer_opengl/gl_rasterizer.h" | ||||
| #include "video_core/renderer_opengl/gl_resource_manager.h" | ||||
|  | ||||
| namespace OpenGL { | ||||
|  | ||||
| CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, u8* host_ptr, std::size_t size, | ||||
|                                      std::size_t alignment, GLuint buffer, GLintptr offset) | ||||
|     : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size}, alignment{alignment}, | ||||
|       buffer{buffer}, offset{offset} {} | ||||
| namespace { | ||||
|  | ||||
| constexpr GLuint EmptyBuffer = 0; | ||||
| constexpr GLintptr CachedBufferOffset = 0; | ||||
|  | ||||
| OGLBuffer CreateBuffer(std::size_t size, GLenum usage) { | ||||
|     OGLBuffer buffer; | ||||
|     buffer.Create(); | ||||
|     glNamedBufferData(buffer.handle, size, nullptr, usage); | ||||
|     return buffer; | ||||
| } | ||||
|  | ||||
| } // Anonymous namespace | ||||
|  | ||||
| CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, u8* host_ptr) | ||||
|     : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr} {} | ||||
|  | ||||
| OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size) | ||||
|     : RasterizerCache{rasterizer}, stream_buffer(size, true) {} | ||||
|  | ||||
| std::pair<GLuint, GLintptr> OGLBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, | ||||
|                                                          std::size_t alignment, bool cache) { | ||||
| OGLBufferCache::~OGLBufferCache() = default; | ||||
|  | ||||
| void OGLBufferCache::Unregister(const std::shared_ptr<CachedBufferEntry>& entry) { | ||||
|     std::lock_guard lock{mutex}; | ||||
|  | ||||
|     if (entry->IsInternalized()) { | ||||
|         internalized_entries.erase(entry->GetCacheAddr()); | ||||
|     } | ||||
|     ReserveBuffer(entry); | ||||
|     RasterizerCache<std::shared_ptr<CachedBufferEntry>>::Unregister(entry); | ||||
| } | ||||
|  | ||||
| OGLBufferCache::BufferInfo OGLBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, | ||||
|                                                         std::size_t alignment, bool internalize) { | ||||
|     std::lock_guard lock{mutex}; | ||||
|  | ||||
|     auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); | ||||
|  | ||||
|     const auto& host_ptr{memory_manager.GetPointer(gpu_addr)}; | ||||
|     const auto host_ptr{memory_manager.GetPointer(gpu_addr)}; | ||||
|     const auto cache_addr{ToCacheAddr(host_ptr)}; | ||||
|     if (!host_ptr) { | ||||
|         // Return a dummy buffer when host_ptr is invalid. | ||||
|         return {0, 0}; | ||||
|         return {EmptyBuffer, 0}; | ||||
|     } | ||||
|  | ||||
|     // Cache management is a big overhead, so only cache entries with a given size. | ||||
|     // TODO: Figure out which size is the best for given games. | ||||
|     cache &= size >= 2048; | ||||
|  | ||||
|     if (cache) { | ||||
|         if (auto entry = TryGet(host_ptr); entry) { | ||||
|             if (entry->GetSize() >= size && entry->GetAlignment() == alignment) { | ||||
|                 return {entry->GetBuffer(), entry->GetOffset()}; | ||||
|             } | ||||
|             Unregister(entry); | ||||
|         } | ||||
|     if (!internalize && size < 0x800 && | ||||
|         internalized_entries.find(cache_addr) == internalized_entries.end()) { | ||||
|         return StreamBufferUpload(host_ptr, size, alignment); | ||||
|     } | ||||
|  | ||||
|     AlignBuffer(alignment); | ||||
|     const GLintptr uploaded_offset = buffer_offset; | ||||
|  | ||||
|     std::memcpy(buffer_ptr, host_ptr, size); | ||||
|     buffer_ptr += size; | ||||
|     buffer_offset += size; | ||||
|  | ||||
|     const GLuint buffer = stream_buffer.GetHandle(); | ||||
|     if (cache) { | ||||
|         const VAddr cpu_addr = *memory_manager.GpuToCpuAddress(gpu_addr); | ||||
|         Register(std::make_shared<CachedBufferEntry>(cpu_addr, host_ptr, size, alignment, buffer, | ||||
|                                                      uploaded_offset)); | ||||
|     auto entry = TryGet(host_ptr); | ||||
|     if (!entry) { | ||||
|         return FixedBufferUpload(gpu_addr, host_ptr, size, internalize); | ||||
|     } | ||||
|  | ||||
|     return {buffer, uploaded_offset}; | ||||
|     if (entry->GetSize() < size) { | ||||
|         GrowBuffer(entry, size); | ||||
|     } | ||||
|     return {entry->GetBuffer(), CachedBufferOffset}; | ||||
| } | ||||
|  | ||||
| std::pair<GLuint, GLintptr> OGLBufferCache::UploadHostMemory(const void* raw_pointer, | ||||
|                                                              std::size_t size, | ||||
|                                                              std::size_t alignment) { | ||||
|     std::lock_guard lock{mutex}; | ||||
| OGLBufferCache::BufferInfo OGLBufferCache::UploadHostMemory(const void* raw_pointer, | ||||
|                                                             std::size_t size, | ||||
|                                                             std::size_t alignment) { | ||||
|     return StreamBufferUpload(raw_pointer, size, alignment); | ||||
| } | ||||
|  | ||||
| bool OGLBufferCache::Map(std::size_t max_size) { | ||||
|     const auto max_size_ = static_cast<GLsizeiptr>(max_size); | ||||
|     bool invalidate; | ||||
|     std::tie(buffer_ptr, buffer_offset_base, invalidate) = stream_buffer.Map(max_size_, 4); | ||||
|     buffer_offset = buffer_offset_base; | ||||
|     return invalidate; | ||||
| } | ||||
|  | ||||
| void OGLBufferCache::Unmap() { | ||||
|     stream_buffer.Unmap(buffer_offset - buffer_offset_base); | ||||
| } | ||||
|  | ||||
| OGLBufferCache::BufferInfo OGLBufferCache::StreamBufferUpload(const void* raw_pointer, | ||||
|                                                               std::size_t size, | ||||
|                                                               std::size_t alignment) { | ||||
|     AlignBuffer(alignment); | ||||
|     std::memcpy(buffer_ptr, raw_pointer, size); | ||||
|     const GLintptr uploaded_offset = buffer_offset; | ||||
|     std::memcpy(buffer_ptr, raw_pointer, size); | ||||
|  | ||||
|     buffer_ptr += size; | ||||
|     buffer_offset += size; | ||||
|     return {stream_buffer.GetHandle(), uploaded_offset}; | ||||
| } | ||||
|  | ||||
| bool OGLBufferCache::Map(std::size_t max_size) { | ||||
|     bool invalidate; | ||||
|     std::tie(buffer_ptr, buffer_offset_base, invalidate) = | ||||
|         stream_buffer.Map(static_cast<GLsizeiptr>(max_size), 4); | ||||
|     buffer_offset = buffer_offset_base; | ||||
|  | ||||
|     if (invalidate) { | ||||
|         InvalidateAll(); | ||||
| OGLBufferCache::BufferInfo OGLBufferCache::FixedBufferUpload(GPUVAddr gpu_addr, u8* host_ptr, | ||||
|                                                              std::size_t size, bool internalize) { | ||||
|     if (internalize) { | ||||
|         internalized_entries.emplace(ToCacheAddr(host_ptr)); | ||||
|     } | ||||
|     return invalidate; | ||||
|     auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); | ||||
|     const auto cpu_addr = *memory_manager.GpuToCpuAddress(gpu_addr); | ||||
|     auto entry = GetUncachedBuffer(cpu_addr, host_ptr); | ||||
|     entry->SetSize(size); | ||||
|     entry->SetInternalState(internalize); | ||||
|     Register(entry); | ||||
|  | ||||
|     if (entry->GetCapacity() < size) { | ||||
|         entry->SetCapacity(CreateBuffer(size, GL_STATIC_DRAW), size); | ||||
|     } | ||||
|     glNamedBufferSubData(entry->GetBuffer(), 0, static_cast<GLintptr>(size), host_ptr); | ||||
|     return {entry->GetBuffer(), CachedBufferOffset}; | ||||
| } | ||||
|  | ||||
| void OGLBufferCache::Unmap() { | ||||
|     stream_buffer.Unmap(buffer_offset - buffer_offset_base); | ||||
| void OGLBufferCache::GrowBuffer(std::shared_ptr<CachedBufferEntry>& entry, std::size_t new_size) { | ||||
|     const auto old_size = static_cast<GLintptr>(entry->GetSize()); | ||||
|     if (entry->GetCapacity() < new_size) { | ||||
|         const auto old_buffer = entry->GetBuffer(); | ||||
|         OGLBuffer new_buffer = CreateBuffer(new_size, GL_STATIC_COPY); | ||||
|  | ||||
|         // Copy bits from the old buffer to the new buffer. | ||||
|         glCopyNamedBufferSubData(old_buffer, new_buffer.handle, 0, 0, old_size); | ||||
|         entry->SetCapacity(std::move(new_buffer), new_size); | ||||
|     } | ||||
|     // Upload the new bits. | ||||
|     const auto size_diff = static_cast<GLintptr>(new_size - old_size); | ||||
|     glNamedBufferSubData(entry->GetBuffer(), old_size, size_diff, entry->GetHostPtr() + old_size); | ||||
|  | ||||
|     // Update entry's size in the object and in the cache. | ||||
|     entry->SetSize(new_size); | ||||
|     Unregister(entry); | ||||
|     Register(entry); | ||||
| } | ||||
|  | ||||
| std::shared_ptr<CachedBufferEntry> OGLBufferCache::GetUncachedBuffer(VAddr cpu_addr, u8* host_ptr) { | ||||
|     if (auto entry = TryGetReservedBuffer(host_ptr); entry) { | ||||
|         return entry; | ||||
|     } | ||||
|     return std::make_shared<CachedBufferEntry>(cpu_addr, host_ptr); | ||||
| } | ||||
|  | ||||
| std::shared_ptr<CachedBufferEntry> OGLBufferCache::TryGetReservedBuffer(u8* host_ptr) { | ||||
|     const auto it = buffer_reserve.find(ToCacheAddr(host_ptr)); | ||||
|     if (it == buffer_reserve.end()) { | ||||
|         return {}; | ||||
|     } | ||||
|     auto& reserve = it->second; | ||||
|     auto entry = reserve.back(); | ||||
|     reserve.pop_back(); | ||||
|     return entry; | ||||
| } | ||||
|  | ||||
| void OGLBufferCache::ReserveBuffer(std::shared_ptr<CachedBufferEntry> entry) { | ||||
|     buffer_reserve[entry->GetCacheAddr()].push_back(std::move(entry)); | ||||
| } | ||||
|  | ||||
| void OGLBufferCache::AlignBuffer(std::size_t alignment) { | ||||
|   | ||||
| @@ -5,9 +5,12 @@ | ||||
| #pragma once | ||||
|  | ||||
| #include <cstddef> | ||||
| #include <map> | ||||
| #include <memory> | ||||
| #include <tuple> | ||||
| #include <unordered_set> | ||||
| #include <utility> | ||||
| #include <vector> | ||||
|  | ||||
| #include "common/common_types.h" | ||||
| #include "video_core/rasterizer_cache.h" | ||||
| @@ -20,8 +23,7 @@ class RasterizerOpenGL; | ||||
|  | ||||
| class CachedBufferEntry final : public RasterizerCacheObject { | ||||
| public: | ||||
|     explicit CachedBufferEntry(VAddr cpu_addr, u8* host_ptr, std::size_t size, | ||||
|                                std::size_t alignment, GLuint buffer, GLintptr offset); | ||||
|     explicit CachedBufferEntry(VAddr cpu_addr, u8* host_ptr); | ||||
|  | ||||
|     VAddr GetCpuAddr() const override { | ||||
|         return cpu_addr; | ||||
| @@ -35,55 +37,87 @@ public: | ||||
|         return size; | ||||
|     } | ||||
|  | ||||
|     std::size_t GetAlignment() const { | ||||
|         return alignment; | ||||
|     std::size_t GetCapacity() const { | ||||
|         return capacity; | ||||
|     } | ||||
|  | ||||
|     bool IsInternalized() const { | ||||
|         return is_internal; | ||||
|     } | ||||
|  | ||||
|     GLuint GetBuffer() const { | ||||
|         return buffer; | ||||
|         return buffer.handle; | ||||
|     } | ||||
|  | ||||
|     GLintptr GetOffset() const { | ||||
|         return offset; | ||||
|     void SetSize(std::size_t new_size) { | ||||
|         size = new_size; | ||||
|     } | ||||
|  | ||||
|     void SetInternalState(bool is_internal_) { | ||||
|         is_internal = is_internal_; | ||||
|     } | ||||
|  | ||||
|     void SetCapacity(OGLBuffer&& new_buffer, std::size_t new_capacity) { | ||||
|         capacity = new_capacity; | ||||
|         buffer = std::move(new_buffer); | ||||
|     } | ||||
|  | ||||
| private: | ||||
|     VAddr cpu_addr{}; | ||||
|     std::size_t size{}; | ||||
|     std::size_t alignment{}; | ||||
|  | ||||
|     GLuint buffer{}; | ||||
|     GLintptr offset{}; | ||||
|     std::size_t capacity{}; | ||||
|     bool is_internal{}; | ||||
|     OGLBuffer buffer; | ||||
| }; | ||||
|  | ||||
| class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> { | ||||
|     using BufferInfo = std::pair<GLuint, GLintptr>; | ||||
|  | ||||
| public: | ||||
|     explicit OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size); | ||||
|     ~OGLBufferCache(); | ||||
|  | ||||
|     void Unregister(const std::shared_ptr<CachedBufferEntry>& entry) override; | ||||
|  | ||||
|     /// Uploads data from a guest GPU address. Returns the OpenGL buffer where it's located and its | ||||
|     /// offset. | ||||
|     std::pair<GLuint, GLintptr> UploadMemory(GPUVAddr gpu_addr, std::size_t size, | ||||
|                                              std::size_t alignment = 4, bool cache = true); | ||||
|     BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, | ||||
|                             bool internalize = false); | ||||
|  | ||||
|     /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. | ||||
|     std::pair<GLuint, GLintptr> UploadHostMemory(const void* raw_pointer, std::size_t size, | ||||
|                                                  std::size_t alignment = 4); | ||||
|     BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size, | ||||
|                                 std::size_t alignment = 4); | ||||
|  | ||||
|     bool Map(std::size_t max_size); | ||||
|     void Unmap(); | ||||
|  | ||||
| protected: | ||||
|     void AlignBuffer(std::size_t alignment); | ||||
|  | ||||
|     // We do not have to flush this cache as things in it are never modified by us. | ||||
|     void FlushObjectInner(const std::shared_ptr<CachedBufferEntry>& object) override {} | ||||
|  | ||||
| private: | ||||
|     OGLStreamBuffer stream_buffer; | ||||
|     BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size, std::size_t alignment); | ||||
|  | ||||
|     BufferInfo FixedBufferUpload(GPUVAddr gpu_addr, u8* host_ptr, std::size_t size, | ||||
|                                  bool internalize); | ||||
|  | ||||
|     void GrowBuffer(std::shared_ptr<CachedBufferEntry>& entry, std::size_t new_size); | ||||
|  | ||||
|     std::shared_ptr<CachedBufferEntry> GetUncachedBuffer(VAddr cpu_addr, u8* host_ptr); | ||||
|  | ||||
|     std::shared_ptr<CachedBufferEntry> TryGetReservedBuffer(u8* host_ptr); | ||||
|  | ||||
|     void ReserveBuffer(std::shared_ptr<CachedBufferEntry> entry); | ||||
|  | ||||
|     void AlignBuffer(std::size_t alignment); | ||||
|  | ||||
|     u8* buffer_ptr = nullptr; | ||||
|     GLintptr buffer_offset = 0; | ||||
|     GLintptr buffer_offset_base = 0; | ||||
|  | ||||
|     OGLStreamBuffer stream_buffer; | ||||
|     std::unordered_set<CacheAddr> internalized_entries; | ||||
|     std::unordered_map<CacheAddr, std::vector<std::shared_ptr<CachedBufferEntry>>> buffer_reserve; | ||||
| }; | ||||
|  | ||||
| } // namespace OpenGL | ||||
|   | ||||
| @@ -790,7 +790,7 @@ void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& b | ||||
|     size = Common::AlignUp(size, sizeof(GLvec4)); | ||||
|     ASSERT_MSG(size <= MaxConstbufferSize, "Constant buffer is too big"); | ||||
|  | ||||
|     const std::size_t alignment = device.GetUniformBufferAlignment(); | ||||
|     const auto alignment = device.GetUniformBufferAlignment(); | ||||
|     const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment); | ||||
|     bind_ubo_pushbuffer.Push(cbuf, offset, size); | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 ReinUsesLisp
					ReinUsesLisp