Rasterizer cache refactor v2 (#6479)

* rasterizer_cache: Switch to template

* Eliminates all opengl references in the rasterizer cache headers
  thus completing the backend abstraction

* rasterizer_cache: Switch to page table

* Surface storage isn't particularly interval sensitive so we can use a page table to make it faster

* rasterizer_cache: Move sampler management out of rasterizer cache

* rasterizer_cache: Remove shared_ptr usage

* Switches to yuzu's slot vector for improved memory locality.

* rasterizer_cache: Rework reinterpretation lookup

* citra_qt: Per game texture filter

* rasterizer_cache: Log additional settings

* gl_texture_runtime: Resolve shadow map comment

* rasterizer_cache: Don't use float for viewport

* gl_texture_runtime: Fix custom allocation recycling

* rasterizer_cache: Minor cleanups

* Cleanup texture cubes when all the faces have been unregistered from the cache

* custom_tex_manager: Allow multiple hash mappings per texture

* code: Move slot vector to common

* rasterizer_cache: Prevent texture cube crashes

* rasterizer_cache: Improve mipmap validation

* CanSubRect now works properly when validating multi-level surfaces, for example Dark Moon validates a 4 level surface from a 3 level one and it works

* gl_blit_handler: Unbind sampler on reinterpretation
This commit is contained in:
GPUCode
2023-05-07 02:34:28 +03:00
committed by GitHub
parent 322d7a8287
commit 2e655f73b8
32 changed files with 2238 additions and 1927 deletions

View File

@@ -10,9 +10,9 @@ namespace VideoCore {
FramebufferBase::FramebufferBase() = default;
FramebufferBase::FramebufferBase(const Pica::Regs& regs, const SurfaceBase* const color,
u32 color_level, const SurfaceBase* const depth_stencil,
u32 depth_level, Common::Rectangle<u32> surfaces_rect) {
FramebufferBase::FramebufferBase(const Pica::Regs& regs, const SurfaceBase* color, u32 color_level,
const SurfaceBase* depth_stencil, u32 depth_level,
Common::Rectangle<u32> surfaces_rect) {
res_scale = color ? color->res_scale : (depth_stencil ? depth_stencil->res_scale : 1u);
// Determine the draw rectangle (render area + scissor)
@@ -31,10 +31,10 @@ FramebufferBase::FramebufferBase(const Pica::Regs& regs, const SurfaceBase* cons
surfaces_rect.bottom, surfaces_rect.top);
// Update viewport
viewport.x = static_cast<f32>(surfaces_rect.left + viewport_rect.left * res_scale);
viewport.y = static_cast<f32>(surfaces_rect.bottom + viewport_rect.bottom * res_scale);
viewport.width = static_cast<f32>(viewport_rect.GetWidth() * res_scale);
viewport.height = static_cast<f32>(viewport_rect.GetHeight() * res_scale);
viewport.x = static_cast<s32>(surfaces_rect.left) + viewport_rect.left * res_scale;
viewport.y = static_cast<s32>(surfaces_rect.bottom) + viewport_rect.bottom * res_scale;
viewport.width = static_cast<s32>(viewport_rect.GetWidth() * res_scale);
viewport.height = static_cast<s32>(viewport_rect.GetHeight() * res_scale);
// Scissor checks are window-, not viewport-relative, which means that if the cached texture
// sub-rect changes, the scissor bounds also need to be updated.

View File

@@ -16,10 +16,10 @@ namespace VideoCore {
class SurfaceBase;
struct ViewportInfo {
f32 x;
f32 y;
f32 width;
f32 height;
s32 x;
s32 y;
s32 width;
s32 height;
};
/**
@@ -29,8 +29,8 @@ struct ViewportInfo {
class FramebufferBase {
public:
FramebufferBase();
FramebufferBase(const Pica::Regs& regs, const SurfaceBase* const color, u32 color_level,
const SurfaceBase* const depth_stencil, u32 depth_level,
FramebufferBase(const Pica::Regs& regs, const SurfaceBase* color, u32 color_level,
const SurfaceBase* depth_stencil, u32 depth_level,
Common::Rectangle<u32> surfaces_rect);
SurfaceParams ColorParams() const noexcept {
@@ -66,6 +66,7 @@ protected:
switch (type) {
case VideoCore::SurfaceType::Color:
return 0;
case VideoCore::SurfaceType::Depth:
case VideoCore::SurfaceType::DepthStencil:
return 1;
default:

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,229 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <functional>
#include <optional>
#include <unordered_map>
#include <vector>
#include <boost/icl/interval_map.hpp>
#include <tsl/robin_map.h>
#include "video_core/rasterizer_cache/sampler_params.h"
#include "video_core/rasterizer_cache/surface_base.h"
namespace Memory {
class MemorySystem;
}
namespace Pica {
struct Regs;
}
namespace Pica::Texture {
struct TextureInfo;
}
namespace VideoCore {
enum class ScaleMatch {
Exact, ///< Only accept same res scale
Upscale, ///< Only allow higher scale than params
Ignore ///< Accept every scaled res
};
enum class MatchFlags {
Exact = 1 << 0, ///< Surface perfectly matches params
SubRect = 1 << 1, ///< Surface encompasses params
Copy = 1 << 2, ///< Surface that can be used as a copy source
Expand = 1 << 3, ///< Surface that can expand params
TexCopy = 1 << 4, ///< Surface that will match a display transfer "texture copy" parameters
Reinterpret = 1 << 5, ///< Surface might have different pixel format.
};
DECLARE_ENUM_FLAG_OPERATORS(MatchFlags);
class CustomTexManager;
class RendererBase;
template <class T>
class RasterizerCache {
/// Address shift for caching surfaces into a hash table
static constexpr u64 CITRA_PAGEBITS = 18;
using Runtime = typename T::Runtime;
using Sampler = typename T::Sampler;
using Surface = typename T::Surface;
using Framebuffer = typename T::Framebuffer;
using SurfaceMap = boost::icl::interval_map<PAddr, SurfaceId, boost::icl::partial_absorber,
std::less, boost::icl::inplace_plus,
boost::icl::inter_section, SurfaceInterval>;
using SurfaceRect_Tuple = std::pair<SurfaceId, Common::Rectangle<u32>>;
using PageMap = boost::icl::interval_map<u32, int>;
struct RenderTargets {
SurfaceId color_id;
SurfaceId depth_id;
};
struct TextureCube {
SurfaceId surface_id;
std::array<SurfaceId, 6> face_ids;
std::array<u64, 6> ticks;
};
public:
explicit RasterizerCache(Memory::MemorySystem& memory, CustomTexManager& custom_tex_manager,
Runtime& runtime, Pica::Regs& regs, RendererBase& renderer);
~RasterizerCache();
/// Notify the cache that a new frame has been queued
void TickFrame();
/// Perform hardware accelerated texture copy according to the provided configuration
bool AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config);
/// Perform hardware accelerated display transfer according to the provided configuration
bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config);
/// Perform hardware accelerated memory fill according to the provided configuration
bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config);
/// Returns a reference to the surface object assigned to surface_id
Surface& GetSurface(SurfaceId surface_id);
/// Returns a reference to the sampler object matching the provided configuration
Sampler& GetSampler(const Pica::TexturingRegs::TextureConfig& config);
Sampler& GetSampler(SamplerId sampler_id);
/// Copy one surface's region to another
void CopySurface(Surface& src_surface, Surface& dst_surface, SurfaceInterval copy_interval);
/// Load a texture from 3DS memory to OpenGL and cache it (if not already cached)
SurfaceId GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale,
bool load_if_create);
/// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from
/// 3DS memory to OpenGL and caches it (if not already cached)
SurfaceRect_Tuple GetSurfaceSubRect(const SurfaceParams& params, ScaleMatch match_res_scale,
bool load_if_create);
/// Get a surface based on the texture configuration
Surface& GetTextureSurface(const Pica::TexturingRegs::FullTextureConfig& config);
SurfaceId GetTextureSurface(const Pica::Texture::TextureInfo& info, u32 max_level = 0);
/// Get a texture cube based on the texture configuration
Surface& GetTextureCube(const TextureCubeConfig& config);
/// Get the color and depth surfaces based on the framebuffer configuration
Framebuffer GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb);
/// Marks the draw rectangle defined in framebuffer as invalid
void InvalidateFramebuffer(const Framebuffer& framebuffer);
/// Get a surface that matches a "texture copy" display transfer config
SurfaceRect_Tuple GetTexCopySurface(const SurfaceParams& params);
/// Write any cached resources overlapping the region back to memory (if dirty)
void FlushRegion(PAddr addr, u32 size, SurfaceId flush_surface = {});
/// Mark region as being invalidated by region_owner (nullptr if 3DS memory)
void InvalidateRegion(PAddr addr, u32 size, SurfaceId region_owner = {});
/// Flush all cached resources tracked by this cache manager
void FlushAll();
/// Clear all cached resources tracked by this cache manager
void ClearAll(bool flush);
private:
/// Iterate over all page indices in a range
template <typename Func>
void ForEachPage(PAddr addr, size_t size, Func&& func) {
static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
const u64 page_end = (addr + size - 1) >> CITRA_PAGEBITS;
for (u64 page = addr >> CITRA_PAGEBITS; page <= page_end; ++page) {
if constexpr (RETURNS_BOOL) {
if (func(page)) {
break;
}
} else {
func(page);
}
}
}
/// Iterates over all the surfaces in a region calling func
template <typename Func>
void ForEachSurfaceInRegion(PAddr addr, size_t size, Func&& func);
/// Get the best surface match (and its match type) for the given flags
template <MatchFlags find_flags>
SurfaceId FindMatch(const SurfaceParams& params, ScaleMatch match_scale_type,
std::optional<SurfaceInterval> validate_interval = std::nullopt);
/// Transfers ownership of a memory region from src_surface to dest_surface
void DuplicateSurface(SurfaceId src_id, SurfaceId dst_id);
/// Update surface's texture for given region when necessary
void ValidateSurface(SurfaceId surface, PAddr addr, u32 size);
/// Copies pixel data in interval from the guest VRAM to the host GPU surface
void UploadSurface(Surface& surface, SurfaceInterval interval);
/// Uploads a custom texture identified with hash to the target surface
bool UploadCustomSurface(SurfaceId surface_id, SurfaceInterval interval);
/// Copies pixel data in interval from the host GPU surface to the guest VRAM
void DownloadSurface(Surface& surface, SurfaceInterval interval);
/// Downloads a fill surface to guest VRAM
void DownloadFillSurface(Surface& surface, SurfaceInterval interval);
/// Attempt to find a reinterpretable surface in the cache and use it to copy for validation
bool ValidateByReinterpretation(Surface& surface, SurfaceParams params,
const SurfaceInterval& interval);
/// Return true if a surface with an invalid pixel format exists at the interval
bool IntervalHasInvalidPixelFormat(const SurfaceParams& params, SurfaceInterval interval);
/// Create a new surface
SurfaceId CreateSurface(const SurfaceParams& params);
/// Register surface into the cache
void RegisterSurface(SurfaceId surface);
/// Remove surface from the cache
void UnregisterSurface(SurfaceId surface);
/// Unregisters all surfaces from the cache
void UnregisterAll();
/// Increase/decrease the number of surface in pages touching the specified region
void UpdatePagesCachedCount(PAddr addr, u32 size, int delta);
private:
Memory::MemorySystem& memory;
CustomTexManager& custom_tex_manager;
Runtime& runtime;
Pica::Regs& regs;
RendererBase& renderer;
std::unordered_map<TextureCubeConfig, TextureCube> texture_cube_cache;
tsl::robin_pg_map<u64, std::vector<SurfaceId>, Common::IdentityHash<u64>> page_table;
std::unordered_map<SamplerParams, SamplerId> samplers;
Common::SlotVector<Surface> slot_surfaces;
Common::SlotVector<Sampler> slot_samplers;
SurfaceMap dirty_regions;
PageMap cached_pages;
std::vector<SurfaceId> remove_surfaces;
u32 resolution_scale_factor;
RenderTargets render_targets;
bool use_filter;
bool dump_textures;
bool use_custom_textures;
};
} // namespace VideoCore

View File

@@ -0,0 +1,43 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <compare>
#include "common/hash.h"
#include "video_core/regs_texturing.h"
namespace VideoCore {
struct SamplerParams {
using TextureConfig = Pica::TexturingRegs::TextureConfig;
TextureConfig::TextureFilter mag_filter;
TextureConfig::TextureFilter min_filter;
TextureConfig::TextureFilter mip_filter;
TextureConfig::WrapMode wrap_s;
TextureConfig::WrapMode wrap_t;
u32 border_color = 0;
u32 lod_min = 0;
u32 lod_max = 0;
s32 lod_bias = 0;
auto operator<=>(const SamplerParams&) const noexcept = default;
const u64 Hash() const {
return Common::ComputeHash64(this, sizeof(SamplerParams));
}
};
static_assert(std::has_unique_object_representations_v<SamplerParams>,
"SamplerParams is not suitable for hashing");
} // namespace VideoCore
namespace std {
template <>
struct hash<VideoCore::SamplerParams> {
std::size_t operator()(const VideoCore::SamplerParams& params) const noexcept {
return params.Hash();
}
};
} // namespace std

View File

@@ -45,13 +45,16 @@ bool SurfaceBase::CanFill(const SurfaceParams& dest_surface, SurfaceInterval fil
}
bool SurfaceBase::CanCopy(const SurfaceParams& dest_surface, SurfaceInterval copy_interval) const {
SurfaceParams subrect_params = dest_surface.FromInterval(copy_interval);
const SurfaceParams subrect_params = dest_surface.FromInterval(copy_interval);
ASSERT(subrect_params.GetInterval() == copy_interval);
if (CanSubRect(subrect_params))
return true;
if (CanFill(dest_surface, copy_interval))
if (CanSubRect(subrect_params)) {
return true;
}
if (CanFill(dest_surface, copy_interval)) {
return true;
}
return false;
}
@@ -102,6 +105,23 @@ SurfaceInterval SurfaceBase::GetCopyableInterval(const SurfaceParams& params) co
return result;
}
Extent SurfaceBase::RealExtent(bool scaled) {
const bool is_custom = IsCustom();
u32 real_width = width;
u32 real_height = height;
if (is_custom) {
real_width = material->width;
real_height = material->height;
} else if (scaled) {
real_width = GetScaledWidth();
real_height = GetScaledHeight();
}
return Extent{
.width = real_width,
.height = real_height,
};
}
bool SurfaceBase::HasNormalMap() const noexcept {
return material && material->Map(MapType::Normal) != nullptr;
}

View File

@@ -6,6 +6,7 @@
#include <boost/icl/interval_set.hpp>
#include "video_core/rasterizer_cache/surface_params.h"
#include "video_core/rasterizer_cache/utils.h"
namespace VideoCore {
@@ -13,6 +14,15 @@ using SurfaceRegions = boost::icl::interval_set<PAddr, std::less, SurfaceInterva
struct Material;
enum class SurfaceFlagBits : u32 {
Registered = 1 << 0, ///< Surface is registed in the rasterizer cache.
Picked = 1 << 1, ///< Surface has been picked when searching for a match.
Tracked = 1 << 2, ///< Surface is part of a texture cube and should be tracked.
Custom = 1 << 3, ///< Surface texture has been replaced with a custom texture.
ShadowMap = 1 << 4, ///< Surface is used during shadow rendering.
};
DECLARE_ENUM_FLAG_OPERATORS(SurfaceFlagBits);
class SurfaceBase : public SurfaceParams {
public:
SurfaceBase(const SurfaceParams& params);
@@ -30,19 +40,27 @@ public:
/// Returns the clear value used to validate another surface from this fill surface
ClearValue MakeClearValue(PAddr copy_addr, PixelFormat dst_format);
/// Returns the internal surface extent.
Extent RealExtent(bool scaled = true);
/// Returns true if the surface contains a custom material with a normal map.
bool HasNormalMap() const noexcept;
bool Overlaps(PAddr overlap_addr, size_t overlap_size) const noexcept {
const PAddr overlap_end = overlap_addr + static_cast<PAddr>(overlap_size);
return addr < overlap_end && overlap_addr < end;
}
u64 ModificationTick() const noexcept {
return modification_tick;
}
bool IsCustom() const noexcept {
return is_custom && custom_format != CustomPixelFormat::Invalid;
return True(flags & SurfaceFlagBits::Custom) && custom_format != CustomPixelFormat::Invalid;
}
bool IsRegionValid(SurfaceInterval interval) const {
return (invalid_regions.find(interval) == invalid_regions.end());
return invalid_regions.find(interval) == invalid_regions.end();
}
void MarkValid(SurfaceInterval interval) {
@@ -65,8 +83,7 @@ private:
std::array<u8, 4> MakeFillBuffer(PAddr copy_addr);
public:
bool registered = false;
bool is_custom = false;
SurfaceFlagBits flags{};
const Material* material = nullptr;
SurfaceRegions invalid_regions;
u32 fill_size = 0;

View File

@@ -15,14 +15,23 @@ bool SurfaceParams::ExactMatch(const SurfaceParams& other_surface) const {
}
bool SurfaceParams::CanSubRect(const SurfaceParams& sub_surface) const {
const u32 level = LevelOf(sub_surface.addr);
return sub_surface.addr >= addr && sub_surface.end <= end &&
sub_surface.pixel_format == pixel_format && pixel_format != PixelFormat::Invalid &&
sub_surface.is_tiled == is_tiled &&
(sub_surface.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 &&
(sub_surface.stride == stride || sub_surface.height <= (is_tiled ? 8u : 1u)) &&
(sub_surface.addr - mipmap_offsets[level]) % BytesInPixels(is_tiled ? 64 : 1) == 0 &&
(sub_surface.stride == (stride >> level) ||
sub_surface.height <= (is_tiled ? 8u : 1u)) &&
GetSubRect(sub_surface).right <= stride;
}
bool SurfaceParams::CanReinterpret(const SurfaceParams& other_surface) {
return other_surface.addr >= addr && other_surface.end <= end &&
pixel_format != PixelFormat::Invalid && GetFormatBpp() == other_surface.GetFormatBpp() &&
other_surface.is_tiled == is_tiled &&
(other_surface.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0;
}
bool SurfaceParams::CanExpand(const SurfaceParams& expanded_surface) const {
return pixel_format != PixelFormat::Invalid && pixel_format == expanded_surface.pixel_format &&
addr <= expanded_surface.end && expanded_surface.addr <= end &&
@@ -206,7 +215,9 @@ SurfaceInterval SurfaceParams::LevelInterval(u32 level) const {
}
u32 SurfaceParams::LevelOf(PAddr level_addr) const {
ASSERT(level_addr >= addr && level_addr <= end);
if (level_addr < addr || level_addr > end) {
return 0;
}
u32 level = levels - 1;
while (mipmap_offsets[level] > level_addr) {

View File

@@ -4,11 +4,15 @@
#pragma once
#include <boost/icl/right_open_interval.hpp>
#include "common/math_util.h"
#include "video_core/custom_textures/custom_format.h"
#include "video_core/rasterizer_cache/utils.h"
#include "video_core/rasterizer_cache/pixel_format.h"
namespace VideoCore {
using SurfaceInterval = boost::icl::right_open_interval<PAddr>;
constexpr std::size_t MAX_PICA_LEVELS = 8;
class SurfaceParams {
@@ -19,6 +23,9 @@ public:
/// Returns true if sub_surface is a subrect of params
bool CanSubRect(const SurfaceParams& sub_surface) const;
/// Returns true if other_surface can be used for reinterpretion.
bool CanReinterpret(const SurfaceParams& other_surface);
/// Returns true if params can be expanded to match expanded_surface
bool CanExpand(const SurfaceParams& expanded_surface) const;

View File

@@ -4,28 +4,31 @@
#pragma once
#include <compare>
#include <span>
#include <boost/icl/right_open_interval.hpp>
#include "common/hash.h"
#include "common/math_util.h"
#include "common/slot_vector.h"
#include "common/vector_math.h"
#include "video_core/rasterizer_cache/pixel_format.h"
#include "video_core/regs_texturing.h"
namespace VideoCore {
using SurfaceInterval = boost::icl::right_open_interval<PAddr>;
using SurfaceId = Common::SlotId;
using SamplerId = Common::SlotId;
/// Fake surface ID for null surfaces
constexpr SurfaceId NULL_SURFACE_ID{0};
/// Fake surface ID for null cube surfaces
constexpr SurfaceId NULL_SURFACE_CUBE_ID{1};
/// Fake sampler ID for null samplers
constexpr SamplerId NULL_SAMPLER_ID{0};
struct Offset {
constexpr auto operator<=>(const Offset&) const noexcept = default;
u32 x = 0;
u32 y = 0;
};
struct Extent {
constexpr auto operator<=>(const Extent&) const noexcept = default;
u32 width = 1;
u32 height = 1;
};
@@ -71,9 +74,9 @@ struct BufferTextureCopy {
};
struct StagingData {
u32 size = 0;
std::span<u8> mapped{};
u64 buffer_offset = 0;
u32 size;
std::span<u8> mapped;
u64 buffer_offset;
};
struct TextureCubeConfig {