rasterizer_cache: Remove runtime allocation caching (#6705)

* rasterizer_cache: Sentence surfaces

* gl_texture_runtime: Remove runtime side allocation cache

* rasterizer_cache: Adjust surface scale during reinterpreration

* Fixes pixelated outlines. Also allows to remove the d24s8 specific hack and is more generic in general

* rasterizer_cache: Remove Expand flag

* Begone!

* rasterizer_cache: Cache framebuffers with surface id

* rasterizer_cache: Sentence texture cubes

* renderer_opengl: Move texture mailbox to separate file

* Makes renderer_opengl cleaner overall and allows to report removal threshold from runtime instead of hardcoding. Vulkan requires this

* rasterizer_cache: Dont flush cache on layout change

* rasterizer_cache: Overhaul framebuffer management

* video_core: Remove duplicate

* rasterizer_cache: Sentence custom surfaces

* Vulkan cannot destroy images immediately so this ensures we use our garbage collector for that purpose
This commit is contained in:
GPUCode
2023-08-01 03:35:41 +03:00
committed by GitHub
parent 3fedc68230
commit a955f02771
23 changed files with 734 additions and 809 deletions

View File

@@ -1,73 +0,0 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "video_core/rasterizer_cache/framebuffer_base.h"
#include "video_core/rasterizer_cache/surface_base.h"
#include "video_core/regs.h"
namespace VideoCore {
FramebufferBase::FramebufferBase() = default;
FramebufferBase::FramebufferBase(const Pica::Regs& regs, const SurfaceBase* color, u32 color_level,
const SurfaceBase* depth_stencil, u32 depth_level,
Common::Rectangle<u32> surfaces_rect) {
res_scale = color ? color->res_scale : (depth_stencil ? depth_stencil->res_scale : 1u);
// Determine the draw rectangle (render area + scissor)
const Common::Rectangle viewport_rect = regs.rasterizer.GetViewportRect();
draw_rect.left =
std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.left * res_scale,
surfaces_rect.left, surfaces_rect.right);
draw_rect.top =
std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.top * res_scale,
surfaces_rect.bottom, surfaces_rect.top);
draw_rect.right =
std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.right * res_scale,
surfaces_rect.left, surfaces_rect.right);
draw_rect.bottom =
std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.bottom * res_scale,
surfaces_rect.bottom, surfaces_rect.top);
// Update viewport
viewport.x = static_cast<s32>(surfaces_rect.left) + viewport_rect.left * res_scale;
viewport.y = static_cast<s32>(surfaces_rect.bottom) + viewport_rect.bottom * res_scale;
viewport.width = static_cast<s32>(viewport_rect.GetWidth() * res_scale);
viewport.height = static_cast<s32>(viewport_rect.GetHeight() * res_scale);
// Scissor checks are window-, not viewport-relative, which means that if the cached texture
// sub-rect changes, the scissor bounds also need to be updated.
scissor_rect.left =
static_cast<s32>(surfaces_rect.left + regs.rasterizer.scissor_test.x1 * res_scale);
scissor_rect.bottom =
static_cast<s32>(surfaces_rect.bottom + regs.rasterizer.scissor_test.y1 * res_scale);
// x2, y2 have +1 added to cover the entire pixel area, otherwise you might get cracks when
// scaling or doing multisampling.
scissor_rect.right =
static_cast<s32>(surfaces_rect.left + (regs.rasterizer.scissor_test.x2 + 1) * res_scale);
scissor_rect.top =
static_cast<s32>(surfaces_rect.bottom + (regs.rasterizer.scissor_test.y2 + 1) * res_scale);
// Rendering to mipmaps is something quite rare so log it when it occurs.
if (color_level != 0) {
LOG_WARNING(HW_GPU, "Game is rendering to color mipmap {}", color_level);
}
if (depth_level != 0) {
LOG_WARNING(HW_GPU, "Game is rendering to depth mipmap {}", depth_level);
}
// Query surface invalidation intervals
const Common::Rectangle draw_rect_unscaled{draw_rect / res_scale};
if (color) {
color_params = *color;
intervals[0] = color->GetSubRectInterval(draw_rect_unscaled, color_level);
}
if (depth_stencil) {
depth_params = *depth_stencil;
intervals[1] = depth_stencil->GetSubRectInterval(draw_rect_unscaled, depth_level);
}
}
} // namespace VideoCore

View File

@@ -4,12 +4,11 @@
#pragma once
#include "common/hash.h"
#include "common/math_util.h"
#include "video_core/rasterizer_cache/slot_id.h"
#include "video_core/rasterizer_cache/surface_params.h"
namespace Pica {
struct Regs;
}
#include "video_core/regs_rasterizer.h"
namespace VideoCore {
@@ -22,31 +21,109 @@ struct ViewportInfo {
s32 height;
};
struct FramebufferParams {
SurfaceId color_id;
SurfaceId depth_id;
u32 color_level;
u32 depth_level;
bool shadow_rendering;
INSERT_PADDING_BYTES(3);
bool operator==(const FramebufferParams& params) const noexcept {
return std::memcmp(this, &params, sizeof(FramebufferParams)) == 0;
}
u64 Hash() const noexcept {
return Common::ComputeHash64(this, sizeof(FramebufferParams));
}
u32 Index(VideoCore::SurfaceType type) const noexcept {
switch (type) {
case VideoCore::SurfaceType::Color:
return 0;
case VideoCore::SurfaceType::Depth:
case VideoCore::SurfaceType::DepthStencil:
return 1;
default:
LOG_CRITICAL(HW_GPU, "Unknown surface type in framebuffer");
return 0;
}
}
};
static_assert(std::has_unique_object_representations_v<FramebufferParams>,
"FramebufferParams is not suitable for hashing");
template <class T>
class RasterizerCache;
/**
* A framebuffer is a lightweight abstraction over a pair of surfaces and provides
* metadata about them.
* @brief FramebufferHelper is a RAII wrapper over backend specific framebuffer handle that
* provides the viewport/scissor/draw rectanges and performs automatic rasterizer cache invalidation
* when out of scope.
*/
class FramebufferBase {
template <class T>
class FramebufferHelper {
public:
FramebufferBase();
FramebufferBase(const Pica::Regs& regs, const SurfaceBase* color, u32 color_level,
const SurfaceBase* depth_stencil, u32 depth_level,
Common::Rectangle<u32> surfaces_rect);
explicit FramebufferHelper(RasterizerCache<T>* res_cache_, typename T::Framebuffer* fb_,
const Pica::RasterizerRegs& regs,
Common::Rectangle<u32> surfaces_rect)
: res_cache{res_cache_}, fb{fb_} {
const u32 res_scale = fb->Scale();
SurfaceParams ColorParams() const noexcept {
return color_params;
// Determine the draw rectangle (render area + scissor)
const Common::Rectangle viewport_rect = regs.GetViewportRect();
draw_rect.left =
std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.left * res_scale,
surfaces_rect.left, surfaces_rect.right);
draw_rect.top =
std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.top * res_scale,
surfaces_rect.bottom, surfaces_rect.top);
draw_rect.right =
std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.right * res_scale,
surfaces_rect.left, surfaces_rect.right);
draw_rect.bottom = std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) +
viewport_rect.bottom * res_scale,
surfaces_rect.bottom, surfaces_rect.top);
// Update viewport
viewport.x = static_cast<s32>(surfaces_rect.left) + viewport_rect.left * res_scale;
viewport.y = static_cast<s32>(surfaces_rect.bottom) + viewport_rect.bottom * res_scale;
viewport.width = static_cast<s32>(viewport_rect.GetWidth() * res_scale);
viewport.height = static_cast<s32>(viewport_rect.GetHeight() * res_scale);
// Scissor checks are window-, not viewport-relative, which means that if the cached texture
// sub-rect changes, the scissor bounds also need to be updated.
scissor_rect.left = static_cast<s32>(surfaces_rect.left + regs.scissor_test.x1 * res_scale);
scissor_rect.bottom =
static_cast<s32>(surfaces_rect.bottom + regs.scissor_test.y1 * res_scale);
// x2, y2 have +1 added to cover the entire pixel area, otherwise you might get cracks when
// scaling or doing multisampling.
scissor_rect.right =
static_cast<s32>(surfaces_rect.left + (regs.scissor_test.x2 + 1) * res_scale);
scissor_rect.top =
static_cast<s32>(surfaces_rect.bottom + (regs.scissor_test.y2 + 1) * res_scale);
}
SurfaceParams DepthParams() const noexcept {
return depth_params;
~FramebufferHelper() {
const Common::Rectangle draw_rect_unscaled{draw_rect / fb->Scale()};
const auto invalidate = [&](SurfaceId surface_id, u32 level) {
const auto& surface = res_cache->GetSurface(surface_id);
const SurfaceInterval interval = surface.GetSubRectInterval(draw_rect_unscaled, level);
const PAddr addr = boost::icl::first(interval);
const u32 size = boost::icl::length(interval);
res_cache->InvalidateRegion(addr, size, surface_id);
};
if (fb->color_id) {
invalidate(fb->color_id, fb->color_level);
}
if (fb->depth_id) {
invalidate(fb->depth_id, fb->depth_level);
}
}
SurfaceInterval Interval(SurfaceType type) const noexcept {
return intervals[Index(type)];
}
u32 ResolutionScale() const noexcept {
return res_scale;
typename T::Framebuffer* Framebuffer() const noexcept {
return fb;
}
Common::Rectangle<u32> DrawRect() const noexcept {
@@ -61,28 +138,21 @@ public:
return viewport;
}
protected:
u32 Index(VideoCore::SurfaceType type) const noexcept {
switch (type) {
case VideoCore::SurfaceType::Color:
return 0;
case VideoCore::SurfaceType::Depth:
case VideoCore::SurfaceType::DepthStencil:
return 1;
default:
LOG_CRITICAL(HW_GPU, "Unknown surface type in framebuffer");
return 0;
}
}
protected:
SurfaceParams color_params{};
SurfaceParams depth_params{};
std::array<SurfaceInterval, 2> intervals{};
Common::Rectangle<s32> scissor_rect{};
Common::Rectangle<u32> draw_rect{};
private:
RasterizerCache<T>* res_cache;
typename T::Framebuffer* fb;
Common::Rectangle<s32> scissor_rect;
Common::Rectangle<u32> draw_rect;
ViewportInfo viewport;
u32 res_scale{1};
};
} // namespace VideoCore
namespace std {
template <>
struct hash<VideoCore::FramebufferParams> {
std::size_t operator()(const VideoCore::FramebufferParams& params) const noexcept {
return params.Hash();
}
};
} // namespace std

View File

@@ -37,7 +37,7 @@ RasterizerCache<T>::RasterizerCache(Memory::MemorySystem& memory_,
Pica::Regs& regs_, RendererBase& renderer_)
: memory{memory_}, custom_tex_manager{custom_tex_manager_}, runtime{runtime_}, regs{regs_},
renderer{renderer_}, resolution_scale_factor{renderer.GetResolutionScaleFactor()},
use_filter{Settings::values.texture_filter.GetValue() != Settings::TextureFilter::None},
filter{Settings::values.texture_filter.GetValue()},
dump_textures{Settings::values.dump_textures.GetValue()},
use_custom_textures{Settings::values.custom_textures.GetValue()} {
using TextureConfig = Pica::TexturingRegs::TextureConfig;
@@ -76,17 +76,21 @@ RasterizerCache<T>::~RasterizerCache() {
template <class T>
void RasterizerCache<T>::TickFrame() {
custom_tex_manager.TickFrame();
RunGarbageCollector();
const auto new_filter = Settings::values.texture_filter.GetValue();
if (filter != new_filter) [[unlikely]] {
filter = new_filter;
UnregisterAll();
}
const u32 scale_factor = renderer.GetResolutionScaleFactor();
const bool resolution_scale_changed = resolution_scale_factor != scale_factor;
const bool use_custom_texture_changed =
Settings::values.custom_textures.GetValue() != use_custom_textures;
const bool texture_filter_changed =
renderer.Settings().texture_filter_update_requested.exchange(false);
if (resolution_scale_changed || texture_filter_changed || use_custom_texture_changed) {
if (resolution_scale_changed || use_custom_texture_changed) {
resolution_scale_factor = scale_factor;
use_filter = Settings::values.texture_filter.GetValue() != Settings::TextureFilter::None;
use_custom_textures = Settings::values.custom_textures.GetValue();
if (use_custom_textures) {
custom_tex_manager.FindCustomTextures();
@@ -95,6 +99,34 @@ void RasterizerCache<T>::TickFrame() {
}
}
template <class T>
void RasterizerCache<T>::RunGarbageCollector() {
frame_tick++;
for (auto it = sentenced.begin(); it != sentenced.end();) {
const auto [surface_id, tick] = *it;
if (frame_tick - tick <= runtime.RemoveThreshold()) {
it++;
continue;
}
RemoveFramebuffers(surface_id);
slot_surfaces.erase(surface_id);
it = sentenced.erase(it);
}
}
template <class T>
void RasterizerCache<T>::RemoveFramebuffers(SurfaceId surface_id) {
for (auto it = framebuffers.begin(); it != framebuffers.end();) {
const auto& params = it->first;
if (params.color_id == surface_id || params.depth_id == surface_id) {
slot_framebuffers.erase(it->second);
it = framebuffers.erase(it);
} else {
it++;
}
}
}
template <class T>
bool RasterizerCache<T>::AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) {
const DebugScope scope{runtime, Common::Vec4f{0.f, 0.f, 1.f, 1.f},
@@ -322,29 +354,46 @@ template <class T>
void RasterizerCache<T>::CopySurface(Surface& src_surface, Surface& dst_surface,
SurfaceInterval copy_interval) {
MICROPROFILE_SCOPE(RasterizerCache_CopySurface);
const PAddr copy_addr = copy_interval.lower();
const SurfaceParams subrect_params = dst_surface.FromInterval(copy_interval);
const auto dst_rect = dst_surface.GetScaledSubRect(subrect_params);
ASSERT(subrect_params.GetInterval() == copy_interval);
if (src_surface.type == SurfaceType::Fill) {
const TextureClear clear = {
.texture_level = dst_surface.LevelOf(copy_addr),
.texture_rect = dst_rect,
.texture_rect = dst_surface.GetScaledSubRect(subrect_params),
.value = src_surface.MakeClearValue(copy_addr, dst_surface.pixel_format),
};
runtime.ClearTexture(dst_surface, clear);
return;
}
const TextureBlit blit = {
.src_level = src_surface.LevelOf(copy_addr),
.dst_level = dst_surface.LevelOf(copy_addr),
.src_rect = src_surface.GetScaledSubRect(subrect_params),
.dst_rect = dst_rect,
};
runtime.BlitTextures(src_surface, dst_surface, blit);
const u32 src_scale = src_surface.res_scale;
const u32 dst_scale = dst_surface.res_scale;
if (src_scale > dst_scale) {
dst_surface.ScaleUp(src_scale);
}
const auto src_rect = src_surface.GetScaledSubRect(subrect_params);
const auto dst_rect = dst_surface.GetScaledSubRect(subrect_params);
if (src_scale == dst_scale) {
const TextureCopy copy = {
.src_level = src_surface.LevelOf(copy_addr),
.dst_level = dst_surface.LevelOf(copy_addr),
.src_offset = {src_rect.left, src_rect.bottom},
.dst_offset = {dst_rect.left, dst_rect.bottom},
.extent = {src_rect.GetWidth(), src_rect.GetHeight()},
};
runtime.CopyTextures(src_surface, dst_surface, copy);
} else {
const TextureBlit blit = {
.src_level = src_surface.LevelOf(copy_addr),
.dst_level = dst_surface.LevelOf(copy_addr),
.src_rect = src_rect,
.dst_rect = dst_rect,
};
runtime.BlitTextures(src_surface, dst_surface, blit);
}
}
template <class T>
@@ -361,33 +410,7 @@ SurfaceId RasterizerCache<T>::GetSurface(const SurfaceParams& params, ScaleMatch
SurfaceId surface_id = FindMatch<MatchFlags::Exact>(params, match_res_scale);
if (!surface_id) {
u16 target_res_scale = params.res_scale;
if (match_res_scale != ScaleMatch::Exact) {
// This surface may have a subrect of another surface with a higher res_scale, find
// it to adjust our params
SurfaceParams find_params = params;
SurfaceId expandable_id = FindMatch<MatchFlags::Expand>(find_params, match_res_scale);
if (expandable_id) {
Surface& expandable = slot_surfaces[expandable_id];
if (expandable.res_scale > target_res_scale) {
target_res_scale = expandable.res_scale;
}
}
// Keep res_scale when reinterpreting d24s8 -> rgba8
if (params.pixel_format == PixelFormat::RGBA8) {
find_params.pixel_format = PixelFormat::D24S8;
expandable_id = FindMatch<MatchFlags::Expand>(find_params, match_res_scale);
if (expandable_id) {
Surface& expandable = slot_surfaces[expandable_id];
if (expandable.res_scale > target_res_scale) {
target_res_scale = expandable.res_scale;
}
}
}
}
SurfaceParams new_params = params;
new_params.res_scale = target_res_scale;
surface_id = CreateSurface(new_params);
surface_id = CreateSurface(params);
RegisterSurface(surface_id);
}
@@ -429,31 +452,6 @@ typename RasterizerCache<T>::SurfaceRect_Tuple RasterizerCache<T>::GetSurfaceSub
aligned_params.UpdateParams();
}
// Check for a surface we can expand before creating a new one
if (!surface_id) {
surface_id = FindMatch<MatchFlags::Expand>(aligned_params, match_res_scale);
if (surface_id) {
Surface& surface = slot_surfaces[surface_id];
aligned_params.width = aligned_params.stride;
aligned_params.UpdateParams();
SurfaceParams new_params = surface;
new_params.addr = std::min(aligned_params.addr, surface.addr);
new_params.end = std::max(aligned_params.end, surface.end);
new_params.size = new_params.end - new_params.addr;
new_params.height =
new_params.size / aligned_params.BytesInPixels(aligned_params.stride);
new_params.UpdateParams();
ASSERT(new_params.size % aligned_params.BytesInPixels(aligned_params.stride) == 0);
SurfaceId new_surface_id = CreateSurface(new_params);
DuplicateSurface(surface_id, new_surface_id);
UnregisterSurface(surface_id);
RegisterSurface(new_surface_id);
surface_id = new_surface_id;
}
}
// No subrect found - create and return a new surface
if (!surface_id) {
SurfaceParams new_params = aligned_params;
@@ -499,7 +497,7 @@ SurfaceId RasterizerCache<T>::GetTextureSurface(const Pica::Texture::TextureInfo
params.levels = max_level + 1;
params.is_tiled = true;
params.pixel_format = PixelFormatFromTextureFormat(info.format);
params.res_scale = use_filter ? resolution_scale_factor : 1;
params.res_scale = filter != Settings::TextureFilter::None ? resolution_scale_factor : 1;
params.UpdateParams();
const u32 min_width = info.width >> max_level;
@@ -552,7 +550,7 @@ typename T::Surface& RasterizerCache<T>::GetTextureCube(const TextureCubeConfig&
.height = config.width,
.stride = config.width,
.levels = config.levels,
.res_scale = use_filter ? resolution_scale_factor : 1,
.res_scale = filter != Settings::TextureFilter::None ? resolution_scale_factor : 1,
.texture_type = TextureType::CubeMap,
.pixel_format = PixelFormatFromTextureFormat(config.format),
.type = SurfaceType::Texture,
@@ -609,8 +607,8 @@ typename T::Surface& RasterizerCache<T>::GetTextureCube(const TextureCubeConfig&
}
template <class T>
typename T::Framebuffer RasterizerCache<T>::GetFramebufferSurfaces(bool using_color_fb,
bool using_depth_fb) {
FramebufferHelper<T> RasterizerCache<T>::GetFramebufferSurfaces(bool using_color_fb,
bool using_depth_fb) {
const auto& config = regs.framebuffer.framebuffer;
const s32 framebuffer_width = config.GetWidth();
@@ -692,35 +690,20 @@ typename T::Framebuffer RasterizerCache<T>::GetFramebufferSurfaces(bool using_co
boost::icl::length(depth_vp_interval));
}
render_targets = RenderTargets{
fb_params = FramebufferParams{
.color_id = color_id,
.depth_id = depth_id,
.color_level = color_level,
.depth_level = depth_level,
.shadow_rendering = regs.framebuffer.IsShadowRendering(),
};
return Framebuffer{runtime, color_surface, color_level, depth_surface,
depth_level, regs, fb_rect};
}
auto [it, new_framebuffer] = framebuffers.try_emplace(fb_params);
if (new_framebuffer) {
it->second = slot_framebuffers.insert(runtime, fb_params, color_surface, depth_surface);
}
template <class T>
void RasterizerCache<T>::InvalidateFramebuffer(const Framebuffer& framebuffer) {
const auto invalidate = [&](SurfaceId surface_id) {
if (!surface_id) {
return;
}
Surface& surface = slot_surfaces[surface_id];
const SurfaceInterval interval = framebuffer.Interval(surface.type);
const PAddr addr = boost::icl::first(interval);
const u32 size = boost::icl::length(interval);
InvalidateRegion(addr, size, surface_id);
};
const bool has_color = framebuffer.HasAttachment(SurfaceType::Color);
const bool has_depth = framebuffer.HasAttachment(SurfaceType::DepthStencil);
if (has_color) {
invalidate(render_targets.color_id);
}
if (has_depth) {
invalidate(render_targets.depth_id);
}
return FramebufferHelper<T>{this, &slot_framebuffers[it->second], regs.rasterizer, fb_rect};
}
template <class T>
@@ -875,9 +858,6 @@ SurfaceId RasterizerCache<T>::FindMatch(const SurfaceParams& params, ScaleMatch
surface.CanReinterpret(params);
return std::make_pair(matched, copy_interval);
});
IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::Expand>{}, [&] {
return std::make_pair(surface.CanExpand(params), surface.GetInterval());
});
IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::TexCopy>{}, [&] {
return std::make_pair(surface.CanTexCopy(params), surface.GetInterval());
});
@@ -1068,14 +1048,12 @@ bool RasterizerCache<T>::UploadCustomSurface(SurfaceId surface_id, SurfaceInterv
const auto upload = [this, level, surface_id, material]() -> bool {
Surface& surface = slot_surfaces[surface_id];
if (False(surface.flags & SurfaceFlagBits::Custom)) {
LOG_ERROR(HW_GPU, "Surface is not suitable for custom upload, aborting!");
return false;
}
if (!surface.IsCustom() && !surface.Swap(material)) {
LOG_ERROR(HW_GPU, "Custom compressed format {} unsupported by host GPU",
material->format);
return false;
ASSERT_MSG(True(surface.flags & SurfaceFlagBits::Custom),
"Surface is not suitable for custom upload, aborting!");
if (!surface.IsCustom()) {
const SurfaceId old_id =
slot_surfaces.swap_and_insert(surface_id, runtime, surface, material);
sentenced.emplace_back(old_id, frame_tick);
}
surface.UploadCustom(material, level);
if (custom_tex_manager.SkipMipmaps()) {
@@ -1159,6 +1137,10 @@ bool RasterizerCache<T>::ValidateByReinterpretation(Surface& surface, SurfacePar
if (boost::icl::is_empty(copy_interval & interval)) {
return false;
}
const u32 res_scale = src_surface.res_scale;
if (res_scale > surface.res_scale) {
surface.ScaleUp(res_scale);
}
const PAddr addr = boost::icl::lower(interval);
const SurfaceParams copy_params = surface.FromInterval(copy_interval);
const TextureBlit reinterpret = {
@@ -1229,25 +1211,24 @@ void RasterizerCache<T>::FlushRegion(PAddr addr, u32 size, SurfaceId flush_surfa
SurfaceRegions flushed_intervals;
for (const auto& [region, surface_id] : RangeFromInterval(dirty_regions, flush_interval)) {
// Small sizes imply that this most likely comes from the cpu, flush the entire region
// the point is to avoid thousands of small writes every frame if the cpu decides to
// access that region, anything higher than 8 you're guaranteed it comes from a service
auto interval = size <= 8 ? region : region & flush_interval;
if (flush_surface_id && surface_id != flush_surface_id) {
continue;
}
// Small sizes imply that this most likely comes from the cpu, flush the entire region
// the point is to avoid thousands of small writes every frame if the cpu decides to
// access that region, anything higher than 8 you're guaranteed it comes from a service
const auto interval = size <= 8 ? region : region & flush_interval;
Surface& surface = slot_surfaces[surface_id];
ASSERT_MSG(surface.IsRegionValid(interval), "Region owner has invalid regions");
const DebugScope scope{runtime, Common::Vec4f{0.f, 0.f, 0.f, 1.f},
"RasterizerCache::FlushRegion (from {:#x} to {:#x})",
interval.lower(), interval.upper()};
// Sanity check, this surface is the last one that marked this region dirty
Surface& surface = slot_surfaces[surface_id];
ASSERT(surface.IsRegionValid(interval));
if (surface.type == SurfaceType::Fill) {
SCOPE_EXIT({ flushed_intervals += interval; });
if (surface.IsFill()) {
DownloadFillSurface(surface, interval);
flushed_intervals += interval;
continue;
}
@@ -1261,8 +1242,6 @@ void RasterizerCache<T>::FlushRegion(PAddr addr, u32 size, SurfaceId flush_surfa
}
DownloadSurface(surface, download_interval);
}
flushed_intervals += interval;
}
// Reset dirty regions
@@ -1294,7 +1273,6 @@ void RasterizerCache<T>::InvalidateRegion(PAddr addr, u32 size, SurfaceId region
if (surface_id == region_owner_id) {
return;
}
// If the CPU is invalidating this region we want to remove it
// to (likely) mark the memory pages as uncached
if (!region_owner_id && size <= 8) {
@@ -1302,14 +1280,12 @@ void RasterizerCache<T>::InvalidateRegion(PAddr addr, u32 size, SurfaceId region
remove_surfaces.push_back(surface_id);
return;
}
surface.MarkInvalid(surface.GetInterval() & invalid_interval);
// If the surface has no salvageable data it should be removed
// from the cache to avoid clogging the data structure.
if (surface.IsFullyInvalid()) {
remove_surfaces.push_back(surface_id);
const auto interval = surface.GetInterval() & invalid_interval;
surface.MarkInvalid(interval);
if (!surface.IsFullyInvalid()) {
return;
}
remove_surfaces.push_back(surface_id);
});
if (region_owner_id) {
@@ -1318,15 +1294,30 @@ void RasterizerCache<T>::InvalidateRegion(PAddr addr, u32 size, SurfaceId region
dirty_regions.erase(invalid_interval);
}
for (const SurfaceId remove_surface_id : remove_surfaces) {
UnregisterSurface(remove_surface_id);
for (const SurfaceId surface_id : remove_surfaces) {
UnregisterSurface(surface_id);
if (!slot_surfaces[surface_id].IsFill()) {
sentenced.emplace_back(surface_id, frame_tick);
} else {
slot_surfaces.erase(surface_id);
}
}
remove_surfaces.clear();
}
template <class T>
SurfaceId RasterizerCache<T>::CreateSurface(const SurfaceParams& params) {
SurfaceId surface_id = slot_surfaces.insert(runtime, params);
const SurfaceId surface_id = [&] {
const auto it = std::find_if(sentenced.begin(), sentenced.end(), [&](const auto& pair) {
return slot_surfaces[pair.first] == params;
});
if (it != sentenced.end()) {
const SurfaceId surface_id = it->first;
sentenced.erase(it);
return surface_id;
}
return slot_surfaces.insert(runtime, params);
}();
Surface& surface = slot_surfaces[surface_id];
surface.MarkInvalid(surface.GetInterval());
return surface_id;
@@ -1368,8 +1359,6 @@ void RasterizerCache<T>::UnregisterSurface(SurfaceId surface_id) {
surfaces.erase(vector_it);
});
SCOPE_EXIT({ slot_surfaces.erase(surface_id); });
if (False(surface.flags & SurfaceFlagBits::Tracked)) {
return;
}
@@ -1383,7 +1372,7 @@ void RasterizerCache<T>::UnregisterSurface(SurfaceId surface_id) {
}
if (std::none_of(cube.face_ids.begin(), cube.face_ids.end(),
[](SurfaceId id) { return id; })) {
slot_surfaces.erase(cube.surface_id);
sentenced.emplace_back(cube.surface_id, frame_tick);
return true;
}
return false;
@@ -1400,7 +1389,6 @@ void RasterizerCache<T>::UnregisterAll() {
}
texture_cube_cache.clear();
remove_surfaces.clear();
runtime.Reset();
}
template <class T>

View File

@@ -5,11 +5,13 @@
#pragma once
#include <functional>
#include <list>
#include <optional>
#include <unordered_map>
#include <vector>
#include <boost/icl/interval_map.hpp>
#include <tsl/robin_map.h>
#include "video_core/rasterizer_cache/framebuffer_base.h"
#include "video_core/rasterizer_cache/sampler_params.h"
#include "video_core/rasterizer_cache/surface_params.h"
#include "video_core/rasterizer_cache/texture_cube.h"
@@ -26,6 +28,10 @@ namespace Pica::Texture {
struct TextureInfo;
}
namespace Settings {
enum class TextureFilter : u32;
}
namespace VideoCore {
enum class ScaleMatch {
@@ -38,9 +44,8 @@ enum class MatchFlags {
Exact = 1 << 0, ///< Surface perfectly matches params
SubRect = 1 << 1, ///< Surface encompasses params
Copy = 1 << 2, ///< Surface that can be used as a copy source
Expand = 1 << 3, ///< Surface that can expand params
TexCopy = 1 << 4, ///< Surface that will match a display transfer "texture copy" parameters
Reinterpret = 1 << 5, ///< Surface might have different pixel format.
TexCopy = 1 << 3, ///< Surface that will match a display transfer "texture copy" parameters
Reinterpret = 1 << 4, ///< Surface might have different pixel format.
};
DECLARE_ENUM_FLAG_OPERATORS(MatchFlags);
@@ -66,11 +71,6 @@ class RasterizerCache {
using SurfaceRect_Tuple = std::pair<SurfaceId, Common::Rectangle<u32>>;
using PageMap = boost::icl::interval_map<u32, int>;
struct RenderTargets {
SurfaceId color_id;
SurfaceId depth_id;
};
public:
explicit RasterizerCache(Memory::MemorySystem& memory, CustomTexManager& custom_tex_manager,
Runtime& runtime, Pica::Regs& regs, RendererBase& renderer);
@@ -115,10 +115,7 @@ public:
Surface& GetTextureCube(const TextureCubeConfig& config);
/// Get the color and depth surfaces based on the framebuffer configuration
Framebuffer GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb);
/// Marks the draw rectangle defined in framebuffer as invalid
void InvalidateFramebuffer(const Framebuffer& framebuffer);
FramebufferHelper<T> GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb);
/// Get a surface that matches a "texture copy" display transfer config
SurfaceRect_Tuple GetTexCopySurface(const SurfaceParams& params);
@@ -161,6 +158,12 @@ private:
SurfaceId FindMatch(const SurfaceParams& params, ScaleMatch match_scale_type,
std::optional<SurfaceInterval> validate_interval = std::nullopt);
/// Unregisters sentenced surfaces that have surpassed the destruction threshold.
void RunGarbageCollector();
/// Removes any framebuffers that reference the provided surface_id.
void RemoveFramebuffers(SurfaceId surface_id);
/// Transfers ownership of a memory region from src_surface to dest_surface
void DuplicateSurface(SurfaceId src_id, SurfaceId dst_id);
@@ -209,15 +212,19 @@ private:
RendererBase& renderer;
std::unordered_map<TextureCubeConfig, TextureCube> texture_cube_cache;
tsl::robin_pg_map<u64, std::vector<SurfaceId>, Common::IdentityHash<u64>> page_table;
std::unordered_map<FramebufferParams, FramebufferId> framebuffers;
std::unordered_map<SamplerParams, SamplerId> samplers;
std::list<std::pair<SurfaceId, u64>> sentenced;
Common::SlotVector<Surface> slot_surfaces;
Common::SlotVector<Sampler> slot_samplers;
Common::SlotVector<Framebuffer> slot_framebuffers;
SurfaceMap dirty_regions;
PageMap cached_pages;
std::vector<SurfaceId> remove_surfaces;
u32 resolution_scale_factor;
RenderTargets render_targets;
bool use_filter;
u64 frame_tick{};
FramebufferParams fb_params;
Settings::TextureFilter filter;
bool dump_textures;
bool use_custom_textures;
};

View File

@@ -10,6 +10,7 @@ namespace VideoCore {
using SurfaceId = Common::SlotId;
using SamplerId = Common::SlotId;
using FramebufferId = Common::SlotId;
/// Fake surface ID for null surfaces
constexpr SurfaceId NULL_SURFACE_ID{0};

View File

@@ -46,6 +46,10 @@ public:
/// Returns true if the surface contains a custom material with a normal map.
bool HasNormalMap() const noexcept;
bool IsFill() const noexcept {
return type == SurfaceType::Fill;
}
bool Overlaps(PAddr overlap_addr, size_t overlap_size) const noexcept {
const PAddr overlap_end = overlap_addr + static_cast<PAddr>(overlap_size);
return addr < overlap_end && overlap_addr < end;

View File

@@ -34,15 +34,6 @@ bool SurfaceParams::CanReinterpret(const SurfaceParams& other_surface) {
GetSubRect(other_surface).right <= stride;
}
bool SurfaceParams::CanExpand(const SurfaceParams& expanded_surface) const {
return pixel_format != PixelFormat::Invalid && pixel_format == expanded_surface.pixel_format &&
addr <= expanded_surface.end && expanded_surface.addr <= end &&
is_tiled == expanded_surface.is_tiled && stride == expanded_surface.stride &&
(std::max(expanded_surface.addr, addr) - std::min(expanded_surface.addr, addr)) %
BytesInPixels(stride * (is_tiled ? 8 : 1)) ==
0;
}
bool SurfaceParams::CanTexCopy(const SurfaceParams& texcopy_params) const {
const SurfaceInterval copy_interval = texcopy_params.GetInterval();
if (pixel_format == PixelFormat::Invalid || addr > texcopy_params.addr ||

View File

@@ -26,9 +26,6 @@ public:
/// Returns true if other_surface can be used for reinterpretion.
bool CanReinterpret(const SurfaceParams& other_surface);
/// Returns true if params can be expanded to match expanded_surface
bool CanExpand(const SurfaceParams& expanded_surface) const;
/// Returns true if params can be used for texcopy
bool CanTexCopy(const SurfaceParams& texcopy_params) const;
@@ -56,6 +53,10 @@ public:
/// Returns a string identifier of the params object
std::string DebugName(bool scaled, bool custom = false) const noexcept;
bool operator==(const SurfaceParams& other) const noexcept {
return std::memcmp(this, &other, sizeof(SurfaceParams)) == 0;
}
[[nodiscard]] SurfaceInterval GetInterval() const noexcept {
return SurfaceInterval{addr, end};
}

View File

@@ -67,6 +67,7 @@ struct StagingData {
};
class SurfaceParams;
struct FramebufferParams;
u32 MipLevels(u32 width, u32 height, u32 max_level);