SMMU: Implement backing CPU page protect/unprotect
This commit is contained in:
		@@ -5,6 +5,8 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
#include <deque>
 | 
					#include <deque>
 | 
				
			||||||
#include <memory>
 | 
					#include <memory>
 | 
				
			||||||
 | 
					#include <array>
 | 
				
			||||||
 | 
					#include <atomic>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include "common/common_types.h"
 | 
					#include "common/common_types.h"
 | 
				
			||||||
#include "common/virtual_buffer.h"
 | 
					#include "common/virtual_buffer.h"
 | 
				
			||||||
@@ -23,6 +25,7 @@ struct DeviceMemoryManagerAllocator;
 | 
				
			|||||||
template <typename Traits>
 | 
					template <typename Traits>
 | 
				
			||||||
class DeviceMemoryManager {
 | 
					class DeviceMemoryManager {
 | 
				
			||||||
    using DeviceInterface = typename Traits::DeviceInterface;
 | 
					    using DeviceInterface = typename Traits::DeviceInterface;
 | 
				
			||||||
 | 
					    using DeviceMethods = Traits::DeviceMethods;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
public:
 | 
					public:
 | 
				
			||||||
    DeviceMemoryManager(const DeviceMemory& device_memory);
 | 
					    DeviceMemoryManager(const DeviceMemory& device_memory);
 | 
				
			||||||
@@ -35,7 +38,7 @@ public:
 | 
				
			|||||||
    DAddr AllocatePinned(size_t size);
 | 
					    DAddr AllocatePinned(size_t size);
 | 
				
			||||||
    void Free(DAddr start, size_t size);
 | 
					    void Free(DAddr start, size_t size);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    void Map(DAddr address, VAddr virtual_address, size_t size, size_t p_id);
 | 
					    void Map(DAddr address, VAddr virtual_address, size_t size, size_t process_id);
 | 
				
			||||||
    void Unmap(DAddr address, size_t size);
 | 
					    void Unmap(DAddr address, size_t size);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // Write / Read
 | 
					    // Write / Read
 | 
				
			||||||
@@ -57,6 +60,8 @@ public:
 | 
				
			|||||||
    size_t RegisterProcess(Memory::Memory* memory);
 | 
					    size_t RegisterProcess(Memory::Memory* memory);
 | 
				
			||||||
    void UnregisterProcess(size_t id);
 | 
					    void UnregisterProcess(size_t id);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    void UpdatePagesCachedCount(DAddr addr, size_t size, s32 delta);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
private:
 | 
					private:
 | 
				
			||||||
    static constexpr bool supports_pinning = Traits::supports_pinning;
 | 
					    static constexpr bool supports_pinning = Traits::supports_pinning;
 | 
				
			||||||
    static constexpr size_t device_virtual_bits = Traits::device_virtual_bits;
 | 
					    static constexpr size_t device_virtual_bits = Traits::device_virtual_bits;
 | 
				
			||||||
@@ -90,8 +95,52 @@ private:
 | 
				
			|||||||
    Common::VirtualBuffer<u32> compressed_physical_ptr;
 | 
					    Common::VirtualBuffer<u32> compressed_physical_ptr;
 | 
				
			||||||
    Common::VirtualBuffer<u32> compressed_device_addr;
 | 
					    Common::VirtualBuffer<u32> compressed_device_addr;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Process memory interfaces
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    std::deque<size_t> id_pool;
 | 
					    std::deque<size_t> id_pool;
 | 
				
			||||||
    std::deque<Memory::Memory*> registered_processes;
 | 
					    std::deque<Memory::Memory*> registered_processes;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Memory protection management
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    static constexpr size_t guest_max_as_bits = 39;
 | 
				
			||||||
 | 
					    static constexpr size_t guest_as_size = 1ULL << guest_max_as_bits;
 | 
				
			||||||
 | 
					    static constexpr size_t guest_mask = guest_as_size - 1ULL;
 | 
				
			||||||
 | 
					    static constexpr size_t process_id_start_bit = guest_max_as_bits;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    std::pair<size_t, VAddr> ExtractCPUBacking(size_t page_index) {
 | 
				
			||||||
 | 
					        auto content = cpu_backing_address[page_index];
 | 
				
			||||||
 | 
					        const VAddr address = content & guest_mask;
 | 
				
			||||||
 | 
					        const size_t process_id = static_cast<size_t>(content >> process_id_start_bit);
 | 
				
			||||||
 | 
					        return std::make_pair(process_id, address);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    void InsertCPUBacking(size_t page_index, VAddr address, size_t process_id) {
 | 
				
			||||||
 | 
					        cpu_backing_address[page_index] = address | (process_id << page_index);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Common::VirtualBuffer<VAddr> cpu_backing_address;
 | 
				
			||||||
 | 
					    static constexpr size_t subentries = 4;
 | 
				
			||||||
 | 
					    static constexpr size_t subentries_mask = subentries - 1;
 | 
				
			||||||
 | 
					    class CounterEntry final {
 | 
				
			||||||
 | 
					    public:
 | 
				
			||||||
 | 
					        CounterEntry() = default;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        std::atomic_uint16_t& Count(std::size_t page) {
 | 
				
			||||||
 | 
					            return values[page & subentries_mask];
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        const std::atomic_uint16_t& Count(std::size_t page) const {
 | 
				
			||||||
 | 
					            return values[page & subentries_mask];
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    private:
 | 
				
			||||||
 | 
					        std::array<std::atomic_uint16_t, subentries> values{};
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					    static_assert(sizeof(CounterEntry) == subentries * sizeof(u16), "CounterEntry should be 8 bytes!");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    static constexpr size_t num_counter_entries = (1ULL << (device_virtual_bits - page_bits)) / subentries;
 | 
				
			||||||
 | 
					    using CachedPages = std::array<CounterEntry, num_counter_entries>;
 | 
				
			||||||
 | 
					    std::unique_ptr<CachedPages> cached_pages;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
} // namespace Core
 | 
					} // namespace Core
 | 
				
			||||||
@@ -2,12 +2,15 @@
 | 
				
			|||||||
// SPDX-License-Identifier: GPL-2.0-or-later
 | 
					// SPDX-License-Identifier: GPL-2.0-or-later
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <atomic>
 | 
					#include <atomic>
 | 
				
			||||||
 | 
					#include <limits>
 | 
				
			||||||
#include <memory>
 | 
					#include <memory>
 | 
				
			||||||
#include <type_traits>
 | 
					#include <type_traits>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include "common/address_space.h"
 | 
					#include "common/address_space.h"
 | 
				
			||||||
#include "common/address_space.inc"
 | 
					#include "common/address_space.inc"
 | 
				
			||||||
#include "common/alignment.h"
 | 
					#include "common/alignment.h"
 | 
				
			||||||
 | 
					#include "common/assert.h"
 | 
				
			||||||
 | 
					#include "common/div_ceil.h"
 | 
				
			||||||
#include "common/scope_exit.h"
 | 
					#include "common/scope_exit.h"
 | 
				
			||||||
#include "core/device_memory.h"
 | 
					#include "core/device_memory.h"
 | 
				
			||||||
#include "core/device_memory_manager.h"
 | 
					#include "core/device_memory_manager.h"
 | 
				
			||||||
@@ -51,7 +54,11 @@ struct DeviceMemoryManagerAllocator {
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    DAddr AllocatePinned(size_t size) {
 | 
					    DAddr AllocatePinned(size_t size) {
 | 
				
			||||||
        return pin_allocator.Allocate(size);
 | 
					        if constexpr (supports_pinning) {
 | 
				
			||||||
 | 
					            return pin_allocator.Allocate(size);
 | 
				
			||||||
 | 
					        } else {
 | 
				
			||||||
 | 
					            return DAddr{};
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    void DoInRange(DAddr address, size_t size, auto pin_func, auto main_func) {
 | 
					    void DoInRange(DAddr address, size_t size, auto pin_func, auto main_func) {
 | 
				
			||||||
@@ -100,6 +107,7 @@ DeviceMemoryManager<Traits>::DeviceMemoryManager(const DeviceMemory& device_memo
 | 
				
			|||||||
      interface{nullptr}, compressed_physical_ptr(device_as_size >> Memory::YUZU_PAGEBITS),
 | 
					      interface{nullptr}, compressed_physical_ptr(device_as_size >> Memory::YUZU_PAGEBITS),
 | 
				
			||||||
      compressed_device_addr(1ULL << (physical_max_bits - Memory::YUZU_PAGEBITS)) {
 | 
					      compressed_device_addr(1ULL << (physical_max_bits - Memory::YUZU_PAGEBITS)) {
 | 
				
			||||||
    impl = std::make_unique<DeviceMemoryManagerAllocator<Traits>>();
 | 
					    impl = std::make_unique<DeviceMemoryManagerAllocator<Traits>>();
 | 
				
			||||||
 | 
					    cached_pages = std::make_unique<CachedPages>();
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template <typename Traits>
 | 
					template <typename Traits>
 | 
				
			||||||
@@ -132,14 +140,14 @@ void DeviceMemoryManager<Traits>::Free(DAddr start, size_t size) {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
template <typename Traits>
 | 
					template <typename Traits>
 | 
				
			||||||
void DeviceMemoryManager<Traits>::Map(DAddr address, VAddr virtual_address, size_t size,
 | 
					void DeviceMemoryManager<Traits>::Map(DAddr address, VAddr virtual_address, size_t size,
 | 
				
			||||||
                                      size_t p_id) {
 | 
					                                      size_t process_id) {
 | 
				
			||||||
    Core::Memory::Memory* process_memory = registered_processes[p_id];
 | 
					    Core::Memory::Memory* process_memory = registered_processes[process_id];
 | 
				
			||||||
    size_t start_page_d = address >> Memory::YUZU_PAGEBITS;
 | 
					    size_t start_page_d = address >> Memory::YUZU_PAGEBITS;
 | 
				
			||||||
    size_t num_pages = Common::AlignUp(size, Memory::YUZU_PAGESIZE) >> Memory::YUZU_PAGEBITS;
 | 
					    size_t num_pages = Common::AlignUp(size, Memory::YUZU_PAGESIZE) >> Memory::YUZU_PAGEBITS;
 | 
				
			||||||
    std::atomic_thread_fence(std::memory_order_acquire);
 | 
					    std::atomic_thread_fence(std::memory_order_acquire);
 | 
				
			||||||
    for (size_t i = 0; i < num_pages; i++) {
 | 
					    for (size_t i = 0; i < num_pages; i++) {
 | 
				
			||||||
        auto* ptr = process_memory->GetPointer(
 | 
					        const VAddr new_vaddress = virtual_address + i * Memory::YUZU_PAGESIZE;
 | 
				
			||||||
            Common::ProcessAddress(virtual_address + i * Memory::YUZU_PAGESIZE));
 | 
					        auto* ptr = process_memory->GetPointer(Common::ProcessAddress(new_vaddress));
 | 
				
			||||||
        if (ptr == nullptr) [[unlikely]] {
 | 
					        if (ptr == nullptr) [[unlikely]] {
 | 
				
			||||||
            compressed_physical_ptr[start_page_d + i] = 0;
 | 
					            compressed_physical_ptr[start_page_d + i] = 0;
 | 
				
			||||||
            continue;
 | 
					            continue;
 | 
				
			||||||
@@ -147,6 +155,7 @@ void DeviceMemoryManager<Traits>::Map(DAddr address, VAddr virtual_address, size
 | 
				
			|||||||
        auto phys_addr = static_cast<u32>(GetRawPhysicalAddr(ptr) >> Memory::YUZU_PAGEBITS) + 1U;
 | 
					        auto phys_addr = static_cast<u32>(GetRawPhysicalAddr(ptr) >> Memory::YUZU_PAGEBITS) + 1U;
 | 
				
			||||||
        compressed_physical_ptr[start_page_d + i] = phys_addr;
 | 
					        compressed_physical_ptr[start_page_d + i] = phys_addr;
 | 
				
			||||||
        compressed_device_addr[phys_addr - 1U] = static_cast<u32>(start_page_d + i);
 | 
					        compressed_device_addr[phys_addr - 1U] = static_cast<u32>(start_page_d + i);
 | 
				
			||||||
 | 
					        InsertCPUBacking(start_page_d + i, new_vaddress, process_id);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    std::atomic_thread_fence(std::memory_order_release);
 | 
					    std::atomic_thread_fence(std::memory_order_release);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@@ -159,6 +168,7 @@ void DeviceMemoryManager<Traits>::Unmap(DAddr address, size_t size) {
 | 
				
			|||||||
    for (size_t i = 0; i < num_pages; i++) {
 | 
					    for (size_t i = 0; i < num_pages; i++) {
 | 
				
			||||||
        auto phys_addr = compressed_physical_ptr[start_page_d + i];
 | 
					        auto phys_addr = compressed_physical_ptr[start_page_d + i];
 | 
				
			||||||
        compressed_physical_ptr[start_page_d + i] = 0;
 | 
					        compressed_physical_ptr[start_page_d + i] = 0;
 | 
				
			||||||
 | 
					        cpu_backing_address[start_page_d + i] = 0;
 | 
				
			||||||
        if (phys_addr != 0) {
 | 
					        if (phys_addr != 0) {
 | 
				
			||||||
            compressed_device_addr[phys_addr - 1] = 0;
 | 
					            compressed_device_addr[phys_addr - 1] = 0;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
@@ -301,4 +311,66 @@ void DeviceMemoryManager<Traits>::UnregisterProcess(size_t id) {
 | 
				
			|||||||
    id_pool.push_front(id);
 | 
					    id_pool.push_front(id);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <typename Traits>
 | 
				
			||||||
 | 
					void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size, s32 delta) {
 | 
				
			||||||
 | 
					    u64 uncache_begin = 0;
 | 
				
			||||||
 | 
					    u64 cache_begin = 0;
 | 
				
			||||||
 | 
					    u64 uncache_bytes = 0;
 | 
				
			||||||
 | 
					    u64 cache_bytes = 0;
 | 
				
			||||||
 | 
					    const auto* MarkRegionCaching = &DeviceMemoryManager<Traits>::DeviceMethods::MarkRegionCaching;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    std::atomic_thread_fence(std::memory_order_acquire);
 | 
				
			||||||
 | 
					    const size_t page_end = Common::DivCeil(addr + size, Memory::YUZU_PAGESIZE);
 | 
				
			||||||
 | 
					    size_t page = addr >> Memory::YUZU_PAGEBITS;
 | 
				
			||||||
 | 
					    auto [process_id, base_vaddress] = ExtractCPUBacking(page);
 | 
				
			||||||
 | 
					    size_t vpage = base_vaddress >> Memory::YUZU_PAGEBITS;
 | 
				
			||||||
 | 
					    auto* memory_interface = registered_processes[process_id];
 | 
				
			||||||
 | 
					    for (; page != page_end; ++page) {
 | 
				
			||||||
 | 
					        std::atomic_uint16_t& count = cached_pages->at(page >> 2).Count(page);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if (delta > 0) {
 | 
				
			||||||
 | 
					            ASSERT_MSG(count.load(std::memory_order::relaxed) < std::numeric_limits<u16>::max(),
 | 
				
			||||||
 | 
					                       "Count may overflow!");
 | 
				
			||||||
 | 
					        } else if (delta < 0) {
 | 
				
			||||||
 | 
					            ASSERT_MSG(count.load(std::memory_order::relaxed) > 0, "Count may underflow!");
 | 
				
			||||||
 | 
					        } else {
 | 
				
			||||||
 | 
					            ASSERT_MSG(false, "Delta must be non-zero!");
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        // Adds or subtracts 1, as count is a unsigned 8-bit value
 | 
				
			||||||
 | 
					        count.fetch_add(static_cast<u16>(delta), std::memory_order_release);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        // Assume delta is either -1 or 1
 | 
				
			||||||
 | 
					        if (count.load(std::memory_order::relaxed) == 0) {
 | 
				
			||||||
 | 
					            if (uncache_bytes == 0) {
 | 
				
			||||||
 | 
					                uncache_begin = vpage;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            uncache_bytes += Memory::YUZU_PAGESIZE;
 | 
				
			||||||
 | 
					        } else if (uncache_bytes > 0) {
 | 
				
			||||||
 | 
					            MarkRegionCaching(memory_interface, uncache_begin << Memory::YUZU_PAGEBITS,
 | 
				
			||||||
 | 
					                              uncache_bytes, false);
 | 
				
			||||||
 | 
					            uncache_bytes = 0;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        if (count.load(std::memory_order::relaxed) == 1 && delta > 0) {
 | 
				
			||||||
 | 
					            if (cache_bytes == 0) {
 | 
				
			||||||
 | 
					                cache_begin = vpage;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            cache_bytes += Memory::YUZU_PAGESIZE;
 | 
				
			||||||
 | 
					        } else if (cache_bytes > 0) {
 | 
				
			||||||
 | 
					            MarkRegionCaching(memory_interface, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes,
 | 
				
			||||||
 | 
					                              true);
 | 
				
			||||||
 | 
					            cache_bytes = 0;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        vpage++;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    if (uncache_bytes > 0) {
 | 
				
			||||||
 | 
					        MarkRegionCaching(memory_interface, uncache_begin << Memory::YUZU_PAGEBITS, uncache_bytes,
 | 
				
			||||||
 | 
					                          false);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    if (cache_bytes > 0) {
 | 
				
			||||||
 | 
					        MarkRegionCaching(memory_interface, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes,
 | 
				
			||||||
 | 
					                          true);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
} // namespace Core
 | 
					} // namespace Core
 | 
				
			||||||
@@ -5,6 +5,17 @@
 | 
				
			|||||||
#include "video_core/host1x/gpu_device_memory_manager.h"
 | 
					#include "video_core/host1x/gpu_device_memory_manager.h"
 | 
				
			||||||
#include "video_core/rasterizer_interface.h"
 | 
					#include "video_core/rasterizer_interface.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace Tegra {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					struct MaxwellDeviceMethods {
 | 
				
			||||||
 | 
					    static inline void MarkRegionCaching(Core::Memory::Memory* interface, VAddr address,
 | 
				
			||||||
 | 
					                                         size_t size, bool caching) {
 | 
				
			||||||
 | 
					        interface->RasterizerMarkRegionCached(address, size, caching);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					} // namespace Tegra
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template struct Core::DeviceMemoryManagerAllocator<Tegra::MaxwellDeviceTraits>;
 | 
					template struct Core::DeviceMemoryManagerAllocator<Tegra::MaxwellDeviceTraits>;
 | 
				
			||||||
template class Core::DeviceMemoryManager<Tegra::MaxwellDeviceTraits>;
 | 
					template class Core::DeviceMemoryManager<Tegra::MaxwellDeviceTraits>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -9,10 +9,13 @@ class RasterizerInterface;
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
namespace Tegra {
 | 
					namespace Tegra {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					struct MaxwellDeviceMethods;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct MaxwellDeviceTraits {
 | 
					struct MaxwellDeviceTraits {
 | 
				
			||||||
    static constexpr bool supports_pinning = true;
 | 
					    static constexpr bool supports_pinning = true;
 | 
				
			||||||
    static constexpr size_t device_virtual_bits = 34;
 | 
					    static constexpr size_t device_virtual_bits = 34;
 | 
				
			||||||
    using DeviceInterface = typename VideoCore::RasterizerInterface;
 | 
					    using DeviceInterface = typename VideoCore::RasterizerInterface;
 | 
				
			||||||
 | 
					    using DeviceMethods = typename MaxwellDeviceMethods;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
using MaxwellDeviceMemoryManager = Core::DeviceMemoryManager<MaxwellDeviceTraits>;
 | 
					using MaxwellDeviceMemoryManager = Core::DeviceMemoryManager<MaxwellDeviceTraits>;
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user