diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 877c6635d..ca2da8f97 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -224,8 +224,13 @@ public: const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, const Tegra::Engines::Fermi2D::Config& copy_config) { std::lock_guard lock{mutex}; - std::pair<TSurface, TView> dst_surface = GetFermiSurface(dst_config); - std::pair<TSurface, TView> src_surface = GetFermiSurface(src_config); + SurfaceParams src_params = SurfaceParams::CreateForFermiCopySurface(src_config); + SurfaceParams dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config); + const GPUVAddr src_gpu_addr = src_config.Address(); + const GPUVAddr dst_gpu_addr = dst_config.Address(); + DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr); + std::pair<TSurface, TView> dst_surface = GetSurface(dst_gpu_addr, dst_params, true, false); + std::pair<TSurface, TView> src_surface = GetSurface(src_gpu_addr, src_params, true, false); ImageBlit(src_surface.second, dst_surface.second, copy_config); dst_surface.first->MarkAsModified(true, Tick()); } @@ -357,6 +362,29 @@ private: BufferCopy = 3, }; + enum class DeductionType : u32 { + DeductionComplete, + DeductionIncomplete, + DeductionFailed, + }; + + struct Deduction { + DeductionType type{DeductionType::DeductionFailed}; + TSurface surface{}; + + bool Failed() const { + return type == DeductionType::DeductionFailed; + } + + bool Incomplete() const { + return type == DeductionType::DeductionIncomplete; + } + + bool IsDepth() const { + return surface->GetSurfaceParams().IsPixelFormatZeta(); + } + }; + /** * `PickStrategy` takes care of selecting a proper strategy to deal with a texture recycle. * @param overlaps, the overlapping surfaces registered in the cache. @@ -691,6 +719,120 @@ private: MatchTopologyResult::FullMatch); } + /** + * `DeduceSurface` gets the starting address and parameters of a candidate surface and tries + * to find a matching surface within the cache that's similar to it. If there are many textures + * or the texture found if entirely incompatible, it will fail. If no texture is found, the + * blit will be unsuccessful. + * @param gpu_addr, the starting address of the candidate surface. + * @param params, the paremeters on the candidate surface. + **/ + Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { + const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; + const auto cache_addr{ToCacheAddr(host_ptr)}; + + if (!cache_addr) { + Deduction result{}; + result.type = DeductionType::DeductionFailed; + return result; + } + + if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) { + TSurface& current_surface = iter->second; + const auto topological_result = current_surface->MatchesTopology(params); + if (topological_result != MatchTopologyResult::FullMatch) { + Deduction result{}; + result.type = DeductionType::DeductionFailed; + return result; + } + const auto struct_result = current_surface->MatchesStructure(params); + if (struct_result != MatchStructureResult::None && + current_surface->MatchTarget(params.target)) { + Deduction result{}; + result.type = DeductionType::DeductionComplete; + result.surface = current_surface; + return result; + } + } + + const std::size_t candidate_size = params.GetGuestSizeInBytes(); + auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)}; + + if (overlaps.empty()) { + Deduction result{}; + result.type = DeductionType::DeductionIncomplete; + return result; + } + + if (overlaps.size() > 1) { + Deduction result{}; + result.type = DeductionType::DeductionFailed; + return result; + } else { + Deduction result{}; + result.type = DeductionType::DeductionComplete; + result.surface = overlaps[0]; + return result; + } + } + + /** + * `DeduceBestBlit` gets the a source and destination starting address and parameters, + * and tries to deduce if they are supposed to be depth textures. If so, their + * parameters are modified and fixed into so. + * @param gpu_addr, the starting address of the candidate surface. + * @param params, the parameters on the candidate surface. + **/ + void DeduceBestBlit(SurfaceParams& src_params, SurfaceParams& dst_params, + const GPUVAddr src_gpu_addr, const GPUVAddr dst_gpu_addr) { + auto deduced_src = DeduceSurface(src_gpu_addr, src_params); + auto deduced_dst = DeduceSurface(src_gpu_addr, src_params); + if (deduced_src.Failed() || deduced_dst.Failed()) { + return; + } + + const bool incomplete_src = deduced_src.Incomplete(); + const bool incomplete_dst = deduced_dst.Incomplete(); + + if (incomplete_src && incomplete_dst) { + return; + } + + const bool any_incomplete = incomplete_src || incomplete_dst; + + if (!any_incomplete) { + if (!(deduced_src.IsDepth() && deduced_dst.IsDepth())) { + return; + } + } else { + if (incomplete_src && !(deduced_dst.IsDepth())) { + return; + } + + if (incomplete_dst && !(deduced_src.IsDepth())) { + return; + } + } + + const auto inherit_format = ([](SurfaceParams& to, TSurface from) { + const SurfaceParams& params = from->GetSurfaceParams(); + to.pixel_format = params.pixel_format; + to.component_type = params.component_type; + to.type = params.type; + }); + // Now we got the cases where one or both is Depth and the other is not known + if (!incomplete_src) { + inherit_format(src_params, deduced_src.surface); + } else { + inherit_format(src_params, deduced_dst.surface); + } + if (!incomplete_dst) { + inherit_format(dst_params, deduced_dst.surface); + } else { + inherit_format(dst_params, deduced_src.surface); + } + } + std::pair<TSurface, TView> InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params, bool preserve_contents) { auto new_surface{GetUncachedSurface(gpu_addr, params)};