Merge pull request #7118 from ameerj/vc-gpu-impl
gpu: Migrate implementation to the cpp file
This commit is contained in:
		| @@ -24,6 +24,7 @@ | ||||
| #include "command_classes/vic.h" | ||||
| #include "video_core/cdma_pusher.h" | ||||
| #include "video_core/command_classes/nvdec_common.h" | ||||
| #include "video_core/command_classes/sync_manager.h" | ||||
| #include "video_core/engines/maxwell_3d.h" | ||||
| #include "video_core/gpu.h" | ||||
| #include "video_core/memory_manager.h" | ||||
|   | ||||
| @@ -9,13 +9,13 @@ | ||||
|  | ||||
| #include "common/bit_field.h" | ||||
| #include "common/common_types.h" | ||||
| #include "video_core/command_classes/sync_manager.h" | ||||
|  | ||||
| namespace Tegra { | ||||
|  | ||||
| class GPU; | ||||
| class Host1x; | ||||
| class Nvdec; | ||||
| class SyncptIncrManager; | ||||
| class Vic; | ||||
|  | ||||
| enum class ChSubmissionMode : u32 { | ||||
|   | ||||
| @@ -4,8 +4,10 @@ | ||||
|  | ||||
| #pragma once | ||||
|  | ||||
| namespace Tegra { | ||||
| #include "common/common_types.h" | ||||
| #include "common/math_util.h" | ||||
|  | ||||
| namespace Tegra { | ||||
| /** | ||||
|  * Struct describing framebuffer configuration | ||||
|  */ | ||||
| @@ -16,6 +18,21 @@ struct FramebufferConfig { | ||||
|         B8G8R8A8_UNORM = 5, | ||||
|     }; | ||||
|  | ||||
|     enum class TransformFlags : u32 { | ||||
|         /// No transform flags are set | ||||
|         Unset = 0x00, | ||||
|         /// Flip source image horizontally (around the vertical axis) | ||||
|         FlipH = 0x01, | ||||
|         /// Flip source image vertically (around the horizontal axis) | ||||
|         FlipV = 0x02, | ||||
|         /// Rotate source image 90 degrees clockwise | ||||
|         Rotate90 = 0x04, | ||||
|         /// Rotate source image 180 degrees | ||||
|         Rotate180 = 0x03, | ||||
|         /// Rotate source image 270 degrees clockwise | ||||
|         Rotate270 = 0x07, | ||||
|     }; | ||||
|  | ||||
|     VAddr address{}; | ||||
|     u32 offset{}; | ||||
|     u32 width{}; | ||||
| @@ -23,7 +40,6 @@ struct FramebufferConfig { | ||||
|     u32 stride{}; | ||||
|     PixelFormat pixel_format{}; | ||||
|  | ||||
|     using TransformFlags = Service::NVFlinger::BufferQueue::BufferTransformFlags; | ||||
|     TransformFlags transform_flags{}; | ||||
|     Common::Rectangle<int> crop_rect; | ||||
| }; | ||||
|   | ||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -4,28 +4,12 @@ | ||||
|  | ||||
| #pragma once | ||||
|  | ||||
| #include <array> | ||||
| #include <atomic> | ||||
| #include <condition_variable> | ||||
| #include <list> | ||||
| #include <memory> | ||||
| #include <mutex> | ||||
|  | ||||
| #include "common/bit_field.h" | ||||
| #include "common/common_types.h" | ||||
| #include "core/hle/service/nvdrv/nvdata.h" | ||||
| #include "core/hle/service/nvflinger/buffer_queue.h" | ||||
| #include "video_core/cdma_pusher.h" | ||||
| #include "video_core/dma_pusher.h" | ||||
| #include "video_core/framebuffer_config.h" | ||||
| #include "video_core/gpu_thread.h" | ||||
|  | ||||
| using CacheAddr = std::uintptr_t; | ||||
| [[nodiscard]] inline CacheAddr ToCacheAddr(const void* host_ptr) { | ||||
|     return reinterpret_cast<CacheAddr>(host_ptr); | ||||
| } | ||||
|  | ||||
| [[nodiscard]] inline u8* FromCacheAddr(CacheAddr cache_addr) { | ||||
|     return reinterpret_cast<u8*>(cache_addr); | ||||
| } | ||||
|  | ||||
| namespace Core { | ||||
| namespace Frontend { | ||||
| @@ -40,6 +24,9 @@ class ShaderNotify; | ||||
| } // namespace VideoCore | ||||
|  | ||||
| namespace Tegra { | ||||
| class DmaPusher; | ||||
| class CDmaPusher; | ||||
| struct CommandList; | ||||
|  | ||||
| enum class RenderTargetFormat : u32 { | ||||
|     NONE = 0x0, | ||||
| @@ -138,7 +125,18 @@ public: | ||||
|         } | ||||
|     }; | ||||
|  | ||||
|     explicit GPU(Core::System& system_, bool is_async_, bool use_nvdec_); | ||||
|     enum class FenceOperation : u32 { | ||||
|         Acquire = 0, | ||||
|         Increment = 1, | ||||
|     }; | ||||
|  | ||||
|     union FenceAction { | ||||
|         u32 raw; | ||||
|         BitField<0, 1, FenceOperation> op; | ||||
|         BitField<8, 24, u32> syncpoint_id; | ||||
|     }; | ||||
|  | ||||
|     explicit GPU(Core::System& system, bool is_async, bool use_nvdec); | ||||
|     ~GPU(); | ||||
|  | ||||
|     /// Binds a renderer to the GPU. | ||||
| @@ -162,9 +160,7 @@ public: | ||||
|     [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size); | ||||
|  | ||||
|     /// Obtains current flush request fence id. | ||||
|     [[nodiscard]] u64 CurrentFlushRequestFence() const { | ||||
|         return current_flush_fence.load(std::memory_order_relaxed); | ||||
|     } | ||||
|     [[nodiscard]] u64 CurrentFlushRequestFence() const; | ||||
|  | ||||
|     /// Tick pending requests within the GPU. | ||||
|     void TickWork(); | ||||
| @@ -200,24 +196,16 @@ public: | ||||
|     [[nodiscard]] const Tegra::CDmaPusher& CDmaPusher() const; | ||||
|  | ||||
|     /// Returns a reference to the underlying renderer. | ||||
|     [[nodiscard]] VideoCore::RendererBase& Renderer() { | ||||
|         return *renderer; | ||||
|     } | ||||
|     [[nodiscard]] VideoCore::RendererBase& Renderer(); | ||||
|  | ||||
|     /// Returns a const reference to the underlying renderer. | ||||
|     [[nodiscard]] const VideoCore::RendererBase& Renderer() const { | ||||
|         return *renderer; | ||||
|     } | ||||
|     [[nodiscard]] const VideoCore::RendererBase& Renderer() const; | ||||
|  | ||||
|     /// Returns a reference to the shader notifier. | ||||
|     [[nodiscard]] VideoCore::ShaderNotify& ShaderNotify() { | ||||
|         return *shader_notify; | ||||
|     } | ||||
|     [[nodiscard]] VideoCore::ShaderNotify& ShaderNotify(); | ||||
|  | ||||
|     /// Returns a const reference to the shader notifier. | ||||
|     [[nodiscard]] const VideoCore::ShaderNotify& ShaderNotify() const { | ||||
|         return *shader_notify; | ||||
|     } | ||||
|     [[nodiscard]] const VideoCore::ShaderNotify& ShaderNotify() const; | ||||
|  | ||||
|     /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. | ||||
|     void WaitFence(u32 syncpoint_id, u32 value); | ||||
| @@ -232,80 +220,12 @@ public: | ||||
|  | ||||
|     [[nodiscard]] u64 GetTicks() const; | ||||
|  | ||||
|     [[nodiscard]] std::unique_lock<std::mutex> LockSync() { | ||||
|         return std::unique_lock{sync_mutex}; | ||||
|     } | ||||
|     [[nodiscard]] bool IsAsync() const; | ||||
|  | ||||
|     [[nodiscard]] bool IsAsync() const { | ||||
|         return is_async; | ||||
|     } | ||||
|  | ||||
|     [[nodiscard]] bool UseNvdec() const { | ||||
|         return use_nvdec; | ||||
|     } | ||||
|     [[nodiscard]] bool UseNvdec() const; | ||||
|  | ||||
|     void RendererFrameEndNotify(); | ||||
|  | ||||
|     enum class FenceOperation : u32 { | ||||
|         Acquire = 0, | ||||
|         Increment = 1, | ||||
|     }; | ||||
|  | ||||
|     union FenceAction { | ||||
|         u32 raw; | ||||
|         BitField<0, 1, FenceOperation> op; | ||||
|         BitField<8, 24, u32> syncpoint_id; | ||||
|  | ||||
|         [[nodiscard]] static CommandHeader Build(FenceOperation op, u32 syncpoint_id) { | ||||
|             FenceAction result{}; | ||||
|             result.op.Assign(op); | ||||
|             result.syncpoint_id.Assign(syncpoint_id); | ||||
|             return {result.raw}; | ||||
|         } | ||||
|     }; | ||||
|  | ||||
|     struct Regs { | ||||
|         static constexpr size_t NUM_REGS = 0x40; | ||||
|  | ||||
|         union { | ||||
|             struct { | ||||
|                 INSERT_PADDING_WORDS_NOINIT(0x4); | ||||
|                 struct { | ||||
|                     u32 address_high; | ||||
|                     u32 address_low; | ||||
|  | ||||
|                     [[nodiscard]] GPUVAddr SemaphoreAddress() const { | ||||
|                         return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | | ||||
|                                                      address_low); | ||||
|                     } | ||||
|                 } semaphore_address; | ||||
|  | ||||
|                 u32 semaphore_sequence; | ||||
|                 u32 semaphore_trigger; | ||||
|                 INSERT_PADDING_WORDS_NOINIT(0xC); | ||||
|  | ||||
|                 // The pusher and the puller share the reference counter, the pusher only has read | ||||
|                 // access | ||||
|                 u32 reference_count; | ||||
|                 INSERT_PADDING_WORDS_NOINIT(0x5); | ||||
|  | ||||
|                 u32 semaphore_acquire; | ||||
|                 u32 semaphore_release; | ||||
|                 u32 fence_value; | ||||
|                 FenceAction fence_action; | ||||
|                 INSERT_PADDING_WORDS_NOINIT(0xE2); | ||||
|  | ||||
|                 // Puller state | ||||
|                 u32 acquire_mode; | ||||
|                 u32 acquire_source; | ||||
|                 u32 acquire_active; | ||||
|                 u32 acquire_timeout; | ||||
|                 u32 acquire_value; | ||||
|             }; | ||||
|             std::array<u32, NUM_REGS> reg_array; | ||||
|         }; | ||||
|     } regs{}; | ||||
|  | ||||
|     /// Performs any additional setup necessary in order to begin GPU emulation. | ||||
|     /// This can be used to launch any necessary threads and register any necessary | ||||
|     /// core timing events. | ||||
| @@ -338,104 +258,9 @@ public: | ||||
|     /// Notify rasterizer that any caches of the specified region should be flushed and invalidated | ||||
|     void FlushAndInvalidateRegion(VAddr addr, u64 size); | ||||
|  | ||||
| protected: | ||||
|     void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const; | ||||
|  | ||||
| private: | ||||
|     void ProcessBindMethod(const MethodCall& method_call); | ||||
|     void ProcessFenceActionMethod(); | ||||
|     void ProcessWaitForInterruptMethod(); | ||||
|     void ProcessSemaphoreTriggerMethod(); | ||||
|     void ProcessSemaphoreRelease(); | ||||
|     void ProcessSemaphoreAcquire(); | ||||
|  | ||||
|     /// Calls a GPU puller method. | ||||
|     void CallPullerMethod(const MethodCall& method_call); | ||||
|  | ||||
|     /// Calls a GPU engine method. | ||||
|     void CallEngineMethod(const MethodCall& method_call); | ||||
|  | ||||
|     /// Calls a GPU engine multivalue method. | ||||
|     void CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, | ||||
|                                u32 methods_pending); | ||||
|  | ||||
|     /// Determines where the method should be executed. | ||||
|     [[nodiscard]] bool ExecuteMethodOnEngine(u32 method); | ||||
|  | ||||
| protected: | ||||
|     Core::System& system; | ||||
|     std::unique_ptr<Tegra::MemoryManager> memory_manager; | ||||
|     std::unique_ptr<Tegra::DmaPusher> dma_pusher; | ||||
|     std::unique_ptr<Tegra::CDmaPusher> cdma_pusher; | ||||
|     std::unique_ptr<VideoCore::RendererBase> renderer; | ||||
|     VideoCore::RasterizerInterface* rasterizer = nullptr; | ||||
|     const bool use_nvdec; | ||||
|  | ||||
| private: | ||||
|     /// Mapping of command subchannels to their bound engine ids | ||||
|     std::array<EngineID, 8> bound_engines = {}; | ||||
|     /// 3D engine | ||||
|     std::unique_ptr<Engines::Maxwell3D> maxwell_3d; | ||||
|     /// 2D engine | ||||
|     std::unique_ptr<Engines::Fermi2D> fermi_2d; | ||||
|     /// Compute engine | ||||
|     std::unique_ptr<Engines::KeplerCompute> kepler_compute; | ||||
|     /// DMA engine | ||||
|     std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; | ||||
|     /// Inline memory engine | ||||
|     std::unique_ptr<Engines::KeplerMemory> kepler_memory; | ||||
|     /// Shader build notifier | ||||
|     std::unique_ptr<VideoCore::ShaderNotify> shader_notify; | ||||
|     /// When true, we are about to shut down emulation session, so terminate outstanding tasks | ||||
|     std::atomic_bool shutting_down{}; | ||||
|  | ||||
|     std::array<std::atomic<u32>, Service::Nvidia::MaxSyncPoints> syncpoints{}; | ||||
|  | ||||
|     std::array<std::list<u32>, Service::Nvidia::MaxSyncPoints> syncpt_interrupts; | ||||
|  | ||||
|     std::mutex sync_mutex; | ||||
|     std::mutex device_mutex; | ||||
|  | ||||
|     std::condition_variable sync_cv; | ||||
|  | ||||
|     struct FlushRequest { | ||||
|         explicit FlushRequest(u64 fence_, VAddr addr_, std::size_t size_) | ||||
|             : fence{fence_}, addr{addr_}, size{size_} {} | ||||
|         u64 fence; | ||||
|         VAddr addr; | ||||
|         std::size_t size; | ||||
|     }; | ||||
|  | ||||
|     std::list<FlushRequest> flush_requests; | ||||
|     std::atomic<u64> current_flush_fence{}; | ||||
|     u64 last_flush_fence{}; | ||||
|     std::mutex flush_request_mutex; | ||||
|  | ||||
|     const bool is_async; | ||||
|  | ||||
|     VideoCommon::GPUThread::ThreadManager gpu_thread; | ||||
|     std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context; | ||||
|     struct Impl; | ||||
|     std::unique_ptr<Impl> impl; | ||||
| }; | ||||
|  | ||||
| #define ASSERT_REG_POSITION(field_name, position)                                                  \ | ||||
|     static_assert(offsetof(GPU::Regs, field_name) == position * 4,                                 \ | ||||
|                   "Field " #field_name " has invalid position") | ||||
|  | ||||
| ASSERT_REG_POSITION(semaphore_address, 0x4); | ||||
| ASSERT_REG_POSITION(semaphore_sequence, 0x6); | ||||
| ASSERT_REG_POSITION(semaphore_trigger, 0x7); | ||||
| ASSERT_REG_POSITION(reference_count, 0x14); | ||||
| ASSERT_REG_POSITION(semaphore_acquire, 0x1A); | ||||
| ASSERT_REG_POSITION(semaphore_release, 0x1B); | ||||
| ASSERT_REG_POSITION(fence_value, 0x1C); | ||||
| ASSERT_REG_POSITION(fence_action, 0x1D); | ||||
|  | ||||
| ASSERT_REG_POSITION(acquire_mode, 0x100); | ||||
| ASSERT_REG_POSITION(acquire_source, 0x101); | ||||
| ASSERT_REG_POSITION(acquire_active, 0x102); | ||||
| ASSERT_REG_POSITION(acquire_timeout, 0x103); | ||||
| ASSERT_REG_POSITION(acquire_value, 0x104); | ||||
|  | ||||
| #undef ASSERT_REG_POSITION | ||||
|  | ||||
| } // namespace Tegra | ||||
|   | ||||
| @@ -130,9 +130,6 @@ public: | ||||
|     /// Notify rasterizer that any caches of the specified region should be flushed and invalidated | ||||
|     void FlushAndInvalidateRegion(VAddr addr, u64 size); | ||||
|  | ||||
|     // Stops the GPU execution and waits for the GPU to finish working | ||||
|     void ShutDown(); | ||||
|  | ||||
|     void OnCommandListEnd(); | ||||
|  | ||||
| private: | ||||
|   | ||||
| @@ -8,6 +8,7 @@ | ||||
| #include <array> | ||||
| #include <cstring> | ||||
| #include <iterator> | ||||
| #include <list> | ||||
| #include <memory> | ||||
| #include <mutex> | ||||
| #include <optional> | ||||
|   | ||||
| @@ -20,6 +20,7 @@ | ||||
| #include "video_core/surface.h" | ||||
| #include "video_core/texture_cache/formatter.h" | ||||
| #include "video_core/texture_cache/samples_helper.h" | ||||
| #include "video_core/texture_cache/util.h" | ||||
|  | ||||
| namespace OpenGL { | ||||
| namespace { | ||||
|   | ||||
| @@ -12,6 +12,7 @@ | ||||
| #include "shader_recompiler/shader_info.h" | ||||
| #include "video_core/renderer_opengl/gl_resource_manager.h" | ||||
| #include "video_core/renderer_opengl/util_shaders.h" | ||||
| #include "video_core/texture_cache/image_view_base.h" | ||||
| #include "video_core/texture_cache/texture_cache_base.h" | ||||
|  | ||||
| namespace OpenGL { | ||||
|   | ||||
| @@ -21,6 +21,7 @@ | ||||
| #include "video_core/renderer_vulkan/vk_texture_cache.h" | ||||
| #include "video_core/texture_cache/formatter.h" | ||||
| #include "video_core/texture_cache/samples_helper.h" | ||||
| #include "video_core/texture_cache/util.h" | ||||
| #include "video_core/vulkan_common/vulkan_device.h" | ||||
| #include "video_core/vulkan_common/vulkan_memory_allocator.h" | ||||
| #include "video_core/vulkan_common/vulkan_wrapper.h" | ||||
|   | ||||
| @@ -4,11 +4,11 @@ | ||||
|  | ||||
| #pragma once | ||||
|  | ||||
| #include <compare> | ||||
| #include <span> | ||||
|  | ||||
| #include "shader_recompiler/shader_info.h" | ||||
| #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | ||||
| #include "video_core/texture_cache/image_view_base.h" | ||||
| #include "video_core/texture_cache/texture_cache_base.h" | ||||
| #include "video_core/vulkan_common/vulkan_memory_allocator.h" | ||||
| #include "video_core/vulkan_common/vulkan_wrapper.h" | ||||
|   | ||||
| @@ -16,6 +16,7 @@ | ||||
| #include "common/fs/fs.h" | ||||
| #include "common/logging/log.h" | ||||
| #include "shader_recompiler/environment.h" | ||||
| #include "video_core/engines/kepler_compute.h" | ||||
| #include "video_core/memory_manager.h" | ||||
| #include "video_core/shader_environment.h" | ||||
| #include "video_core/textures/texture.h" | ||||
|   | ||||
| @@ -5,13 +5,13 @@ | ||||
| #pragma once | ||||
|  | ||||
| #include <array> | ||||
| #include <atomic> | ||||
| #include <filesystem> | ||||
| #include <iosfwd> | ||||
| #include <limits> | ||||
| #include <memory> | ||||
| #include <optional> | ||||
| #include <span> | ||||
| #include <stop_token> | ||||
| #include <type_traits> | ||||
| #include <unordered_map> | ||||
| #include <vector> | ||||
| @@ -19,9 +19,7 @@ | ||||
| #include "common/common_types.h" | ||||
| #include "common/unique_function.h" | ||||
| #include "shader_recompiler/environment.h" | ||||
| #include "video_core/engines/kepler_compute.h" | ||||
| #include "video_core/engines/maxwell_3d.h" | ||||
| #include "video_core/textures/texture.h" | ||||
|  | ||||
| namespace Tegra { | ||||
| class Memorymanager; | ||||
|   | ||||
| @@ -8,6 +8,7 @@ | ||||
| #include "video_core/texture_cache/image_view_info.h" | ||||
| #include "video_core/texture_cache/texture_cache_base.h" | ||||
| #include "video_core/texture_cache/types.h" | ||||
| #include "video_core/texture_cache/util.h" | ||||
| #include "video_core/textures/texture.h" | ||||
|  | ||||
| namespace VideoCommon { | ||||
|   | ||||
| @@ -4,10 +4,15 @@ | ||||
|  | ||||
| #pragma once | ||||
|  | ||||
| #include <unordered_set> | ||||
|  | ||||
| #include "common/alignment.h" | ||||
| #include "video_core/dirty_flags.h" | ||||
| #include "video_core/engines/kepler_compute.h" | ||||
| #include "video_core/texture_cache/image_view_base.h" | ||||
| #include "video_core/texture_cache/samples_helper.h" | ||||
| #include "video_core/texture_cache/texture_cache_base.h" | ||||
| #include "video_core/texture_cache/util.h" | ||||
|  | ||||
| namespace VideoCommon { | ||||
|  | ||||
|   | ||||
| @@ -4,13 +4,12 @@ | ||||
|  | ||||
| #pragma once | ||||
|  | ||||
| #include <array> | ||||
| #include <mutex> | ||||
| #include <span> | ||||
| #include <type_traits> | ||||
| #include <unordered_map> | ||||
| #include <unordered_set> | ||||
| #include <vector> | ||||
| #include <queue> | ||||
|  | ||||
| #include "common/common_types.h" | ||||
| #include "common/literals.h" | ||||
| @@ -18,10 +17,6 @@ | ||||
| #include "video_core/compatible_formats.h" | ||||
| #include "video_core/delayed_destruction_ring.h" | ||||
| #include "video_core/engines/fermi_2d.h" | ||||
| #include "video_core/engines/kepler_compute.h" | ||||
| #include "video_core/engines/maxwell_3d.h" | ||||
| #include "video_core/memory_manager.h" | ||||
| #include "video_core/rasterizer_interface.h" | ||||
| #include "video_core/surface.h" | ||||
| #include "video_core/texture_cache/descriptor_table.h" | ||||
| #include "video_core/texture_cache/image_base.h" | ||||
| @@ -30,7 +25,6 @@ | ||||
| #include "video_core/texture_cache/render_targets.h" | ||||
| #include "video_core/texture_cache/slot_vector.h" | ||||
| #include "video_core/texture_cache/types.h" | ||||
| #include "video_core/texture_cache/util.h" | ||||
| #include "video_core/textures/texture.h" | ||||
|  | ||||
| namespace VideoCommon { | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Fernando S
					Fernando S