Merge pull request #7780 from lioncash/macro
video_core/macro: Move impl classes into their cpp files
This commit is contained in:
		| @@ -240,7 +240,7 @@ void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters) | |||||||
|         ((method - MacroRegistersStart) >> 1) % static_cast<u32>(macro_positions.size()); |         ((method - MacroRegistersStart) >> 1) % static_cast<u32>(macro_positions.size()); | ||||||
|  |  | ||||||
|     // Execute the current macro. |     // Execute the current macro. | ||||||
|     macro_engine->Execute(*this, macro_positions[entry], parameters); |     macro_engine->Execute(macro_positions[entry], parameters); | ||||||
|     if (mme_draw.current_mode != MMEDrawMode::Undefined) { |     if (mme_draw.current_mode != MMEDrawMode::Undefined) { | ||||||
|         FlushMMEInlineDraw(); |         FlushMMEInlineDraw(); | ||||||
|     } |     } | ||||||
|   | |||||||
| @@ -2,12 +2,13 @@ | |||||||
| // Licensed under GPLv2 or any later version | // Licensed under GPLv2 or any later version | ||||||
| // Refer to the license.txt file included. | // Refer to the license.txt file included. | ||||||
|  |  | ||||||
|  | #include <cstring> | ||||||
| #include <optional> | #include <optional> | ||||||
|  |  | ||||||
| #include <boost/container_hash/hash.hpp> | #include <boost/container_hash/hash.hpp> | ||||||
|  |  | ||||||
| #include "common/assert.h" | #include "common/assert.h" | ||||||
| #include "common/logging/log.h" |  | ||||||
| #include "common/settings.h" | #include "common/settings.h" | ||||||
| #include "video_core/engines/maxwell_3d.h" |  | ||||||
| #include "video_core/macro/macro.h" | #include "video_core/macro/macro.h" | ||||||
| #include "video_core/macro/macro_hle.h" | #include "video_core/macro/macro_hle.h" | ||||||
| #include "video_core/macro/macro_interpreter.h" | #include "video_core/macro/macro_interpreter.h" | ||||||
| @@ -24,8 +25,7 @@ void MacroEngine::AddCode(u32 method, u32 data) { | |||||||
|     uploaded_macro_code[method].push_back(data); |     uploaded_macro_code[method].push_back(data); | ||||||
| } | } | ||||||
|  |  | ||||||
| void MacroEngine::Execute(Engines::Maxwell3D& maxwell3d, u32 method, | void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) { | ||||||
|                           const std::vector<u32>& parameters) { |  | ||||||
|     auto compiled_macro = macro_cache.find(method); |     auto compiled_macro = macro_cache.find(method); | ||||||
|     if (compiled_macro != macro_cache.end()) { |     if (compiled_macro != macro_cache.end()) { | ||||||
|         const auto& cache_info = compiled_macro->second; |         const auto& cache_info = compiled_macro->second; | ||||||
| @@ -66,10 +66,9 @@ void MacroEngine::Execute(Engines::Maxwell3D& maxwell3d, u32 method, | |||||||
|             cache_info.lle_program = Compile(code); |             cache_info.lle_program = Compile(code); | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         auto hle_program = hle_macros->GetHLEProgram(cache_info.hash); |         if (auto hle_program = hle_macros->GetHLEProgram(cache_info.hash)) { | ||||||
|         if (hle_program.has_value()) { |  | ||||||
|             cache_info.has_hle_program = true; |             cache_info.has_hle_program = true; | ||||||
|             cache_info.hle_program = std::move(hle_program.value()); |             cache_info.hle_program = std::move(hle_program); | ||||||
|             cache_info.hle_program->Execute(parameters, method); |             cache_info.hle_program->Execute(parameters, method); | ||||||
|         } else { |         } else { | ||||||
|             cache_info.lle_program->Execute(parameters, method); |             cache_info.lle_program->Execute(parameters, method); | ||||||
|   | |||||||
| @@ -119,7 +119,7 @@ public: | |||||||
|     void AddCode(u32 method, u32 data); |     void AddCode(u32 method, u32 data); | ||||||
|  |  | ||||||
|     // Compiles the macro if its not in the cache, and executes the compiled macro |     // Compiles the macro if its not in the cache, and executes the compiled macro | ||||||
|     void Execute(Engines::Maxwell3D& maxwell3d, u32 method, const std::vector<u32>& parameters); |     void Execute(u32 method, const std::vector<u32>& parameters); | ||||||
|  |  | ||||||
| protected: | protected: | ||||||
|     virtual std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) = 0; |     virtual std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) = 0; | ||||||
|   | |||||||
| @@ -5,12 +5,15 @@ | |||||||
| #include <array> | #include <array> | ||||||
| #include <vector> | #include <vector> | ||||||
| #include "video_core/engines/maxwell_3d.h" | #include "video_core/engines/maxwell_3d.h" | ||||||
|  | #include "video_core/macro/macro.h" | ||||||
| #include "video_core/macro/macro_hle.h" | #include "video_core/macro/macro_hle.h" | ||||||
| #include "video_core/rasterizer_interface.h" | #include "video_core/rasterizer_interface.h" | ||||||
|  |  | ||||||
| namespace Tegra { | namespace Tegra { | ||||||
|  |  | ||||||
| namespace { | namespace { | ||||||
|  |  | ||||||
|  | using HLEFunction = void (*)(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters); | ||||||
|  |  | ||||||
| // HLE'd functions | // HLE'd functions | ||||||
| void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) { | void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) { | ||||||
|     const u32 instance_count = parameters[2] & maxwell3d.GetRegisterValue(0xD1B); |     const u32 instance_count = parameters[2] & maxwell3d.GetRegisterValue(0xD1B); | ||||||
| @@ -77,7 +80,6 @@ void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& | |||||||
|     maxwell3d.CallMethodFromMME(0x8e5, 0x0); |     maxwell3d.CallMethodFromMME(0x8e5, 0x0); | ||||||
|     maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; |     maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; | ||||||
| } | } | ||||||
| } // Anonymous namespace |  | ||||||
|  |  | ||||||
| constexpr std::array<std::pair<u64, HLEFunction>, 3> hle_funcs{{ | constexpr std::array<std::pair<u64, HLEFunction>, 3> hle_funcs{{ | ||||||
|     {0x771BB18C62444DA0, &HLE_771BB18C62444DA0}, |     {0x771BB18C62444DA0, &HLE_771BB18C62444DA0}, | ||||||
| @@ -85,25 +87,31 @@ constexpr std::array<std::pair<u64, HLEFunction>, 3> hle_funcs{{ | |||||||
|     {0x0217920100488FF7, &HLE_0217920100488FF7}, |     {0x0217920100488FF7, &HLE_0217920100488FF7}, | ||||||
| }}; | }}; | ||||||
|  |  | ||||||
|  | class HLEMacroImpl final : public CachedMacro { | ||||||
|  | public: | ||||||
|  |     explicit HLEMacroImpl(Engines::Maxwell3D& maxwell3d_, HLEFunction func_) | ||||||
|  |         : maxwell3d{maxwell3d_}, func{func_} {} | ||||||
|  |  | ||||||
|  |     void Execute(const std::vector<u32>& parameters, u32 method) override { | ||||||
|  |         func(maxwell3d, parameters); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  | private: | ||||||
|  |     Engines::Maxwell3D& maxwell3d; | ||||||
|  |     HLEFunction func; | ||||||
|  | }; | ||||||
|  | } // Anonymous namespace | ||||||
|  |  | ||||||
| HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {} | HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {} | ||||||
| HLEMacro::~HLEMacro() = default; | HLEMacro::~HLEMacro() = default; | ||||||
|  |  | ||||||
| std::optional<std::unique_ptr<CachedMacro>> HLEMacro::GetHLEProgram(u64 hash) const { | std::unique_ptr<CachedMacro> HLEMacro::GetHLEProgram(u64 hash) const { | ||||||
|     const auto it = std::find_if(hle_funcs.cbegin(), hle_funcs.cend(), |     const auto it = std::find_if(hle_funcs.cbegin(), hle_funcs.cend(), | ||||||
|                                  [hash](const auto& pair) { return pair.first == hash; }); |                                  [hash](const auto& pair) { return pair.first == hash; }); | ||||||
|     if (it == hle_funcs.end()) { |     if (it == hle_funcs.end()) { | ||||||
|         return std::nullopt; |         return nullptr; | ||||||
|     } |     } | ||||||
|     return std::make_unique<HLEMacroImpl>(maxwell3d, it->second); |     return std::make_unique<HLEMacroImpl>(maxwell3d, it->second); | ||||||
| } | } | ||||||
|  |  | ||||||
| HLEMacroImpl::~HLEMacroImpl() = default; |  | ||||||
|  |  | ||||||
| HLEMacroImpl::HLEMacroImpl(Engines::Maxwell3D& maxwell3d_, HLEFunction func_) |  | ||||||
|     : maxwell3d{maxwell3d_}, func{func_} {} |  | ||||||
|  |  | ||||||
| void HLEMacroImpl::Execute(const std::vector<u32>& parameters, u32 method) { |  | ||||||
|     func(maxwell3d, parameters); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| } // namespace Tegra | } // namespace Tegra | ||||||
|   | |||||||
| @@ -5,10 +5,7 @@ | |||||||
| #pragma once | #pragma once | ||||||
|  |  | ||||||
| #include <memory> | #include <memory> | ||||||
| #include <optional> |  | ||||||
| #include <vector> |  | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| #include "video_core/macro/macro.h" |  | ||||||
|  |  | ||||||
| namespace Tegra { | namespace Tegra { | ||||||
|  |  | ||||||
| @@ -16,29 +13,17 @@ namespace Engines { | |||||||
| class Maxwell3D; | class Maxwell3D; | ||||||
| } | } | ||||||
|  |  | ||||||
| using HLEFunction = void (*)(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters); |  | ||||||
|  |  | ||||||
| class HLEMacro { | class HLEMacro { | ||||||
| public: | public: | ||||||
|     explicit HLEMacro(Engines::Maxwell3D& maxwell3d_); |     explicit HLEMacro(Engines::Maxwell3D& maxwell3d_); | ||||||
|     ~HLEMacro(); |     ~HLEMacro(); | ||||||
|  |  | ||||||
|     std::optional<std::unique_ptr<CachedMacro>> GetHLEProgram(u64 hash) const; |     // Allocates and returns a cached macro if the hash matches a known function. | ||||||
|  |     // Returns nullptr otherwise. | ||||||
|  |     [[nodiscard]] std::unique_ptr<CachedMacro> GetHLEProgram(u64 hash) const; | ||||||
|  |  | ||||||
| private: | private: | ||||||
|     Engines::Maxwell3D& maxwell3d; |     Engines::Maxwell3D& maxwell3d; | ||||||
| }; | }; | ||||||
|  |  | ||||||
| class HLEMacroImpl : public CachedMacro { |  | ||||||
| public: |  | ||||||
|     explicit HLEMacroImpl(Engines::Maxwell3D& maxwell3d, HLEFunction func); |  | ||||||
|     ~HLEMacroImpl(); |  | ||||||
|  |  | ||||||
|     void Execute(const std::vector<u32>& parameters, u32 method) override; |  | ||||||
|  |  | ||||||
| private: |  | ||||||
|     Engines::Maxwell3D& maxwell3d; |  | ||||||
|     HLEFunction func; |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| } // namespace Tegra | } // namespace Tegra | ||||||
|   | |||||||
| @@ -2,6 +2,9 @@ | |||||||
| // Licensed under GPLv2 or any later version | // Licensed under GPLv2 or any later version | ||||||
| // Refer to the license.txt file included. | // Refer to the license.txt file included. | ||||||
|  |  | ||||||
|  | #include <array> | ||||||
|  | #include <optional> | ||||||
|  |  | ||||||
| #include "common/assert.h" | #include "common/assert.h" | ||||||
| #include "common/logging/log.h" | #include "common/logging/log.h" | ||||||
| #include "common/microprofile.h" | #include "common/microprofile.h" | ||||||
| @@ -11,16 +14,81 @@ | |||||||
| MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192)); | MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192)); | ||||||
|  |  | ||||||
| namespace Tegra { | namespace Tegra { | ||||||
| MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d_) | namespace { | ||||||
|     : MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {} | class MacroInterpreterImpl final : public CachedMacro { | ||||||
|  | public: | ||||||
|  |     explicit MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_) | ||||||
|  |         : maxwell3d{maxwell3d_}, code{code_} {} | ||||||
|  |  | ||||||
| std::unique_ptr<CachedMacro> MacroInterpreter::Compile(const std::vector<u32>& code) { |     void Execute(const std::vector<u32>& params, u32 method) override; | ||||||
|     return std::make_unique<MacroInterpreterImpl>(maxwell3d, code); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| MacroInterpreterImpl::MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d_, | private: | ||||||
|                                            const std::vector<u32>& code_) |     /// Resets the execution engine state, zeroing registers, etc. | ||||||
|     : maxwell3d{maxwell3d_}, code{code_} {} |     void Reset(); | ||||||
|  |  | ||||||
|  |     /** | ||||||
|  |      * Executes a single macro instruction located at the current program counter. Returns whether | ||||||
|  |      * the interpreter should keep running. | ||||||
|  |      * | ||||||
|  |      * @param is_delay_slot Whether the current step is being executed due to a delay slot in a | ||||||
|  |      *                      previous instruction. | ||||||
|  |      */ | ||||||
|  |     bool Step(bool is_delay_slot); | ||||||
|  |  | ||||||
|  |     /// Calculates the result of an ALU operation. src_a OP src_b; | ||||||
|  |     u32 GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b); | ||||||
|  |  | ||||||
|  |     /// Performs the result operation on the input result and stores it in the specified register | ||||||
|  |     /// (if necessary). | ||||||
|  |     void ProcessResult(Macro::ResultOperation operation, u32 reg, u32 result); | ||||||
|  |  | ||||||
|  |     /// Evaluates the branch condition and returns whether the branch should be taken or not. | ||||||
|  |     bool EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const; | ||||||
|  |  | ||||||
|  |     /// Reads an opcode at the current program counter location. | ||||||
|  |     Macro::Opcode GetOpcode() const; | ||||||
|  |  | ||||||
|  |     /// Returns the specified register's value. Register 0 is hardcoded to always return 0. | ||||||
|  |     u32 GetRegister(u32 register_id) const; | ||||||
|  |  | ||||||
|  |     /// Sets the register to the input value. | ||||||
|  |     void SetRegister(u32 register_id, u32 value); | ||||||
|  |  | ||||||
|  |     /// Sets the method address to use for the next Send instruction. | ||||||
|  |     void SetMethodAddress(u32 address); | ||||||
|  |  | ||||||
|  |     /// Calls a GPU Engine method with the input parameter. | ||||||
|  |     void Send(u32 value); | ||||||
|  |  | ||||||
|  |     /// Reads a GPU register located at the method address. | ||||||
|  |     u32 Read(u32 method) const; | ||||||
|  |  | ||||||
|  |     /// Returns the next parameter in the parameter queue. | ||||||
|  |     u32 FetchParameter(); | ||||||
|  |  | ||||||
|  |     Engines::Maxwell3D& maxwell3d; | ||||||
|  |  | ||||||
|  |     /// Current program counter | ||||||
|  |     u32 pc{}; | ||||||
|  |     /// Program counter to execute at after the delay slot is executed. | ||||||
|  |     std::optional<u32> delayed_pc; | ||||||
|  |  | ||||||
|  |     /// General purpose macro registers. | ||||||
|  |     std::array<u32, Macro::NUM_MACRO_REGISTERS> registers = {}; | ||||||
|  |  | ||||||
|  |     /// Method address to use for the next Send instruction. | ||||||
|  |     Macro::MethodAddress method_address = {}; | ||||||
|  |  | ||||||
|  |     /// Input parameters of the current macro. | ||||||
|  |     std::unique_ptr<u32[]> parameters; | ||||||
|  |     std::size_t num_parameters = 0; | ||||||
|  |     std::size_t parameters_capacity = 0; | ||||||
|  |     /// Index of the next parameter that will be fetched by the 'parm' instruction. | ||||||
|  |     u32 next_parameter_index = 0; | ||||||
|  |  | ||||||
|  |     bool carry_flag = false; | ||||||
|  |     const std::vector<u32>& code; | ||||||
|  | }; | ||||||
|  |  | ||||||
| void MacroInterpreterImpl::Execute(const std::vector<u32>& params, u32 method) { | void MacroInterpreterImpl::Execute(const std::vector<u32>& params, u32 method) { | ||||||
|     MICROPROFILE_SCOPE(MacroInterp); |     MICROPROFILE_SCOPE(MacroInterp); | ||||||
| @@ -283,5 +351,13 @@ u32 MacroInterpreterImpl::FetchParameter() { | |||||||
|     ASSERT(next_parameter_index < num_parameters); |     ASSERT(next_parameter_index < num_parameters); | ||||||
|     return parameters[next_parameter_index++]; |     return parameters[next_parameter_index++]; | ||||||
| } | } | ||||||
|  | } // Anonymous namespace | ||||||
|  |  | ||||||
|  | MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d_) | ||||||
|  |     : MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {} | ||||||
|  |  | ||||||
|  | std::unique_ptr<CachedMacro> MacroInterpreter::Compile(const std::vector<u32>& code) { | ||||||
|  |     return std::make_unique<MacroInterpreterImpl>(maxwell3d, code); | ||||||
|  | } | ||||||
|  |  | ||||||
| } // namespace Tegra | } // namespace Tegra | ||||||
|   | |||||||
| @@ -3,10 +3,9 @@ | |||||||
| // Refer to the license.txt file included. | // Refer to the license.txt file included. | ||||||
|  |  | ||||||
| #pragma once | #pragma once | ||||||
| #include <array> |  | ||||||
| #include <optional> |  | ||||||
| #include <vector> | #include <vector> | ||||||
| #include "common/bit_field.h" |  | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| #include "video_core/macro/macro.h" | #include "video_core/macro/macro.h" | ||||||
|  |  | ||||||
| @@ -26,77 +25,4 @@ private: | |||||||
|     Engines::Maxwell3D& maxwell3d; |     Engines::Maxwell3D& maxwell3d; | ||||||
| }; | }; | ||||||
|  |  | ||||||
| class MacroInterpreterImpl : public CachedMacro { |  | ||||||
| public: |  | ||||||
|     explicit MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_); |  | ||||||
|     void Execute(const std::vector<u32>& params, u32 method) override; |  | ||||||
|  |  | ||||||
| private: |  | ||||||
|     /// Resets the execution engine state, zeroing registers, etc. |  | ||||||
|     void Reset(); |  | ||||||
|  |  | ||||||
|     /** |  | ||||||
|      * Executes a single macro instruction located at the current program counter. Returns whether |  | ||||||
|      * the interpreter should keep running. |  | ||||||
|      * |  | ||||||
|      * @param is_delay_slot Whether the current step is being executed due to a delay slot in a |  | ||||||
|      *                      previous instruction. |  | ||||||
|      */ |  | ||||||
|     bool Step(bool is_delay_slot); |  | ||||||
|  |  | ||||||
|     /// Calculates the result of an ALU operation. src_a OP src_b; |  | ||||||
|     u32 GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b); |  | ||||||
|  |  | ||||||
|     /// Performs the result operation on the input result and stores it in the specified register |  | ||||||
|     /// (if necessary). |  | ||||||
|     void ProcessResult(Macro::ResultOperation operation, u32 reg, u32 result); |  | ||||||
|  |  | ||||||
|     /// Evaluates the branch condition and returns whether the branch should be taken or not. |  | ||||||
|     bool EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const; |  | ||||||
|  |  | ||||||
|     /// Reads an opcode at the current program counter location. |  | ||||||
|     Macro::Opcode GetOpcode() const; |  | ||||||
|  |  | ||||||
|     /// Returns the specified register's value. Register 0 is hardcoded to always return 0. |  | ||||||
|     u32 GetRegister(u32 register_id) const; |  | ||||||
|  |  | ||||||
|     /// Sets the register to the input value. |  | ||||||
|     void SetRegister(u32 register_id, u32 value); |  | ||||||
|  |  | ||||||
|     /// Sets the method address to use for the next Send instruction. |  | ||||||
|     void SetMethodAddress(u32 address); |  | ||||||
|  |  | ||||||
|     /// Calls a GPU Engine method with the input parameter. |  | ||||||
|     void Send(u32 value); |  | ||||||
|  |  | ||||||
|     /// Reads a GPU register located at the method address. |  | ||||||
|     u32 Read(u32 method) const; |  | ||||||
|  |  | ||||||
|     /// Returns the next parameter in the parameter queue. |  | ||||||
|     u32 FetchParameter(); |  | ||||||
|  |  | ||||||
|     Engines::Maxwell3D& maxwell3d; |  | ||||||
|  |  | ||||||
|     /// Current program counter |  | ||||||
|     u32 pc; |  | ||||||
|     /// Program counter to execute at after the delay slot is executed. |  | ||||||
|     std::optional<u32> delayed_pc; |  | ||||||
|  |  | ||||||
|     /// General purpose macro registers. |  | ||||||
|     std::array<u32, Macro::NUM_MACRO_REGISTERS> registers = {}; |  | ||||||
|  |  | ||||||
|     /// Method address to use for the next Send instruction. |  | ||||||
|     Macro::MethodAddress method_address = {}; |  | ||||||
|  |  | ||||||
|     /// Input parameters of the current macro. |  | ||||||
|     std::unique_ptr<u32[]> parameters; |  | ||||||
|     std::size_t num_parameters = 0; |  | ||||||
|     std::size_t parameters_capacity = 0; |  | ||||||
|     /// Index of the next parameter that will be fetched by the 'parm' instruction. |  | ||||||
|     u32 next_parameter_index = 0; |  | ||||||
|  |  | ||||||
|     bool carry_flag = false; |  | ||||||
|     const std::vector<u32>& code; |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| } // namespace Tegra | } // namespace Tegra | ||||||
|   | |||||||
| @@ -2,9 +2,17 @@ | |||||||
| // Licensed under GPLv2 or any later version | // Licensed under GPLv2 or any later version | ||||||
| // Refer to the license.txt file included. | // Refer to the license.txt file included. | ||||||
|  |  | ||||||
|  | #include <array> | ||||||
|  | #include <bitset> | ||||||
|  | #include <optional> | ||||||
|  |  | ||||||
|  | #include <xbyak/xbyak.h> | ||||||
|  |  | ||||||
| #include "common/assert.h" | #include "common/assert.h" | ||||||
|  | #include "common/bit_field.h" | ||||||
| #include "common/logging/log.h" | #include "common/logging/log.h" | ||||||
| #include "common/microprofile.h" | #include "common/microprofile.h" | ||||||
|  | #include "common/x64/xbyak_abi.h" | ||||||
| #include "common/x64/xbyak_util.h" | #include "common/x64/xbyak_util.h" | ||||||
| #include "video_core/engines/maxwell_3d.h" | #include "video_core/engines/maxwell_3d.h" | ||||||
| #include "video_core/macro/macro_interpreter.h" | #include "video_core/macro/macro_interpreter.h" | ||||||
| @@ -14,13 +22,14 @@ MICROPROFILE_DEFINE(MacroJitCompile, "GPU", "Compile macro JIT", MP_RGB(173, 255 | |||||||
| MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0)); | MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0)); | ||||||
|  |  | ||||||
| namespace Tegra { | namespace Tegra { | ||||||
|  | namespace { | ||||||
| constexpr Xbyak::Reg64 STATE = Xbyak::util::rbx; | constexpr Xbyak::Reg64 STATE = Xbyak::util::rbx; | ||||||
| constexpr Xbyak::Reg32 RESULT = Xbyak::util::ebp; | constexpr Xbyak::Reg32 RESULT = Xbyak::util::ebp; | ||||||
| constexpr Xbyak::Reg64 PARAMETERS = Xbyak::util::r12; | constexpr Xbyak::Reg64 PARAMETERS = Xbyak::util::r12; | ||||||
| constexpr Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d; | constexpr Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d; | ||||||
| constexpr Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; | constexpr Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; | ||||||
|  |  | ||||||
| static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ | const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ | ||||||
|     STATE, |     STATE, | ||||||
|     RESULT, |     RESULT, | ||||||
|     PARAMETERS, |     PARAMETERS, | ||||||
| @@ -28,19 +37,75 @@ static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ | |||||||
|     BRANCH_HOLDER, |     BRANCH_HOLDER, | ||||||
| }); | }); | ||||||
|  |  | ||||||
| MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d_) | // Arbitrarily chosen based on current booting games. | ||||||
|     : MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {} | constexpr size_t MAX_CODE_SIZE = 0x10000; | ||||||
|  |  | ||||||
| std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) { | std::bitset<32> PersistentCallerSavedRegs() { | ||||||
|     return std::make_unique<MacroJITx64Impl>(maxwell3d, code); |     return PERSISTENT_REGISTERS & Common::X64::ABI_ALL_CALLER_SAVED; | ||||||
| } | } | ||||||
|  |  | ||||||
| MacroJITx64Impl::MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_) | class MacroJITx64Impl final : public Xbyak::CodeGenerator, public CachedMacro { | ||||||
|     : CodeGenerator{MAX_CODE_SIZE}, code{code_}, maxwell3d{maxwell3d_} { | public: | ||||||
|     Compile(); |     explicit MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_) | ||||||
| } |         : CodeGenerator{MAX_CODE_SIZE}, code{code_}, maxwell3d{maxwell3d_} { | ||||||
|  |         Compile(); | ||||||
|  |     } | ||||||
|  |  | ||||||
| MacroJITx64Impl::~MacroJITx64Impl() = default; |     void Execute(const std::vector<u32>& parameters, u32 method) override; | ||||||
|  |  | ||||||
|  |     void Compile_ALU(Macro::Opcode opcode); | ||||||
|  |     void Compile_AddImmediate(Macro::Opcode opcode); | ||||||
|  |     void Compile_ExtractInsert(Macro::Opcode opcode); | ||||||
|  |     void Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode); | ||||||
|  |     void Compile_ExtractShiftLeftRegister(Macro::Opcode opcode); | ||||||
|  |     void Compile_Read(Macro::Opcode opcode); | ||||||
|  |     void Compile_Branch(Macro::Opcode opcode); | ||||||
|  |  | ||||||
|  | private: | ||||||
|  |     void Optimizer_ScanFlags(); | ||||||
|  |  | ||||||
|  |     void Compile(); | ||||||
|  |     bool Compile_NextInstruction(); | ||||||
|  |  | ||||||
|  |     Xbyak::Reg32 Compile_FetchParameter(); | ||||||
|  |     Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst); | ||||||
|  |  | ||||||
|  |     void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg); | ||||||
|  |     void Compile_Send(Xbyak::Reg32 value); | ||||||
|  |  | ||||||
|  |     Macro::Opcode GetOpCode() const; | ||||||
|  |  | ||||||
|  |     struct JITState { | ||||||
|  |         Engines::Maxwell3D* maxwell3d{}; | ||||||
|  |         std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{}; | ||||||
|  |         u32 carry_flag{}; | ||||||
|  |     }; | ||||||
|  |     static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0"); | ||||||
|  |     using ProgramType = void (*)(JITState*, const u32*); | ||||||
|  |  | ||||||
|  |     struct OptimizerState { | ||||||
|  |         bool can_skip_carry{}; | ||||||
|  |         bool has_delayed_pc{}; | ||||||
|  |         bool zero_reg_skip{}; | ||||||
|  |         bool skip_dummy_addimmediate{}; | ||||||
|  |         bool optimize_for_method_move{}; | ||||||
|  |         bool enable_asserts{}; | ||||||
|  |     }; | ||||||
|  |     OptimizerState optimizer{}; | ||||||
|  |  | ||||||
|  |     std::optional<Macro::Opcode> next_opcode{}; | ||||||
|  |     ProgramType program{nullptr}; | ||||||
|  |  | ||||||
|  |     std::array<Xbyak::Label, MAX_CODE_SIZE> labels; | ||||||
|  |     std::array<Xbyak::Label, MAX_CODE_SIZE> delay_skip; | ||||||
|  |     Xbyak::Label end_of_code{}; | ||||||
|  |  | ||||||
|  |     bool is_delay_slot{}; | ||||||
|  |     u32 pc{}; | ||||||
|  |  | ||||||
|  |     const std::vector<u32>& code; | ||||||
|  |     Engines::Maxwell3D& maxwell3d; | ||||||
|  | }; | ||||||
|  |  | ||||||
| void MacroJITx64Impl::Execute(const std::vector<u32>& parameters, u32 method) { | void MacroJITx64Impl::Execute(const std::vector<u32>& parameters, u32 method) { | ||||||
|     MICROPROFILE_SCOPE(MacroJitExecute); |     MICROPROFILE_SCOPE(MacroJitExecute); | ||||||
| @@ -307,11 +372,11 @@ void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) { | |||||||
|     Compile_ProcessResult(opcode.result_operation, opcode.dst); |     Compile_ProcessResult(opcode.result_operation, opcode.dst); | ||||||
| } | } | ||||||
|  |  | ||||||
| static void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) { | void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) { | ||||||
|     maxwell3d->CallMethodFromMME(method_address.address, value); |     maxwell3d->CallMethodFromMME(method_address.address, value); | ||||||
| } | } | ||||||
|  |  | ||||||
| void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { | void MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { | ||||||
|     Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); |     Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); | ||||||
|     mov(Common::X64::ABI_PARAM1, qword[STATE]); |     mov(Common::X64::ABI_PARAM1, qword[STATE]); | ||||||
|     mov(Common::X64::ABI_PARAM2, METHOD_ADDRESS); |     mov(Common::X64::ABI_PARAM2, METHOD_ADDRESS); | ||||||
| @@ -338,7 +403,7 @@ void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { | |||||||
|     L(dont_process); |     L(dont_process); | ||||||
| } | } | ||||||
|  |  | ||||||
| void Tegra::MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) { | void MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) { | ||||||
|     ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid"); |     ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid"); | ||||||
|     const s32 jump_address = |     const s32 jump_address = | ||||||
|         static_cast<s32>(pc) + static_cast<s32>(opcode.GetBranchTarget() / sizeof(s32)); |         static_cast<s32>(pc) + static_cast<s32>(opcode.GetBranchTarget() / sizeof(s32)); | ||||||
| @@ -392,7 +457,7 @@ void Tegra::MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) { | |||||||
|     L(end); |     L(end); | ||||||
| } | } | ||||||
|  |  | ||||||
| void Tegra::MacroJITx64Impl::Optimizer_ScanFlags() { | void MacroJITx64Impl::Optimizer_ScanFlags() { | ||||||
|     optimizer.can_skip_carry = true; |     optimizer.can_skip_carry = true; | ||||||
|     optimizer.has_delayed_pc = false; |     optimizer.has_delayed_pc = false; | ||||||
|     for (auto raw_op : code) { |     for (auto raw_op : code) { | ||||||
| @@ -534,7 +599,7 @@ bool MacroJITx64Impl::Compile_NextInstruction() { | |||||||
|     return true; |     return true; | ||||||
| } | } | ||||||
|  |  | ||||||
| Xbyak::Reg32 Tegra::MacroJITx64Impl::Compile_FetchParameter() { | Xbyak::Reg32 MacroJITx64Impl::Compile_FetchParameter() { | ||||||
|     mov(eax, dword[PARAMETERS]); |     mov(eax, dword[PARAMETERS]); | ||||||
|     add(PARAMETERS, sizeof(u32)); |     add(PARAMETERS, sizeof(u32)); | ||||||
|     return eax; |     return eax; | ||||||
| @@ -611,9 +676,12 @@ Macro::Opcode MacroJITx64Impl::GetOpCode() const { | |||||||
|     ASSERT(pc < code.size()); |     ASSERT(pc < code.size()); | ||||||
|     return {code[pc]}; |     return {code[pc]}; | ||||||
| } | } | ||||||
|  | } // Anonymous namespace | ||||||
|  |  | ||||||
| std::bitset<32> MacroJITx64Impl::PersistentCallerSavedRegs() const { | MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d_) | ||||||
|     return PERSISTENT_REGISTERS & Common::X64::ABI_ALL_CALLER_SAVED; |     : MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {} | ||||||
|  |  | ||||||
|  | std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) { | ||||||
|  |     return std::make_unique<MacroJITx64Impl>(maxwell3d, code); | ||||||
| } | } | ||||||
|  |  | ||||||
| } // namespace Tegra | } // namespace Tegra | ||||||
|   | |||||||
| @@ -4,12 +4,7 @@ | |||||||
|  |  | ||||||
| #pragma once | #pragma once | ||||||
|  |  | ||||||
| #include <array> |  | ||||||
| #include <bitset> |  | ||||||
| #include <xbyak/xbyak.h> |  | ||||||
| #include "common/bit_field.h" |  | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| #include "common/x64/xbyak_abi.h" |  | ||||||
| #include "video_core/macro/macro.h" | #include "video_core/macro/macro.h" | ||||||
|  |  | ||||||
| namespace Tegra { | namespace Tegra { | ||||||
| @@ -18,9 +13,6 @@ namespace Engines { | |||||||
| class Maxwell3D; | class Maxwell3D; | ||||||
| } | } | ||||||
|  |  | ||||||
| /// MAX_CODE_SIZE is arbitrarily chosen based on current booting games |  | ||||||
| constexpr size_t MAX_CODE_SIZE = 0x10000; |  | ||||||
|  |  | ||||||
| class MacroJITx64 final : public MacroEngine { | class MacroJITx64 final : public MacroEngine { | ||||||
| public: | public: | ||||||
|     explicit MacroJITx64(Engines::Maxwell3D& maxwell3d_); |     explicit MacroJITx64(Engines::Maxwell3D& maxwell3d_); | ||||||
| @@ -32,67 +24,4 @@ private: | |||||||
|     Engines::Maxwell3D& maxwell3d; |     Engines::Maxwell3D& maxwell3d; | ||||||
| }; | }; | ||||||
|  |  | ||||||
| class MacroJITx64Impl : public Xbyak::CodeGenerator, public CachedMacro { |  | ||||||
| public: |  | ||||||
|     explicit MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_); |  | ||||||
|     ~MacroJITx64Impl(); |  | ||||||
|  |  | ||||||
|     void Execute(const std::vector<u32>& parameters, u32 method) override; |  | ||||||
|  |  | ||||||
|     void Compile_ALU(Macro::Opcode opcode); |  | ||||||
|     void Compile_AddImmediate(Macro::Opcode opcode); |  | ||||||
|     void Compile_ExtractInsert(Macro::Opcode opcode); |  | ||||||
|     void Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode); |  | ||||||
|     void Compile_ExtractShiftLeftRegister(Macro::Opcode opcode); |  | ||||||
|     void Compile_Read(Macro::Opcode opcode); |  | ||||||
|     void Compile_Branch(Macro::Opcode opcode); |  | ||||||
|  |  | ||||||
| private: |  | ||||||
|     void Optimizer_ScanFlags(); |  | ||||||
|  |  | ||||||
|     void Compile(); |  | ||||||
|     bool Compile_NextInstruction(); |  | ||||||
|  |  | ||||||
|     Xbyak::Reg32 Compile_FetchParameter(); |  | ||||||
|     Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst); |  | ||||||
|  |  | ||||||
|     void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg); |  | ||||||
|     void Compile_Send(Xbyak::Reg32 value); |  | ||||||
|  |  | ||||||
|     Macro::Opcode GetOpCode() const; |  | ||||||
|     std::bitset<32> PersistentCallerSavedRegs() const; |  | ||||||
|  |  | ||||||
|     struct JITState { |  | ||||||
|         Engines::Maxwell3D* maxwell3d{}; |  | ||||||
|         std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{}; |  | ||||||
|         u32 carry_flag{}; |  | ||||||
|     }; |  | ||||||
|     static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0"); |  | ||||||
|     using ProgramType = void (*)(JITState*, const u32*); |  | ||||||
|  |  | ||||||
|     struct OptimizerState { |  | ||||||
|         bool can_skip_carry{}; |  | ||||||
|         bool has_delayed_pc{}; |  | ||||||
|         bool zero_reg_skip{}; |  | ||||||
|         bool skip_dummy_addimmediate{}; |  | ||||||
|         bool optimize_for_method_move{}; |  | ||||||
|         bool enable_asserts{}; |  | ||||||
|     }; |  | ||||||
|     OptimizerState optimizer{}; |  | ||||||
|  |  | ||||||
|     std::optional<Macro::Opcode> next_opcode{}; |  | ||||||
|     ProgramType program{nullptr}; |  | ||||||
|  |  | ||||||
|     std::array<Xbyak::Label, MAX_CODE_SIZE> labels; |  | ||||||
|     std::array<Xbyak::Label, MAX_CODE_SIZE> delay_skip; |  | ||||||
|     Xbyak::Label end_of_code{}; |  | ||||||
|  |  | ||||||
|     bool is_delay_slot{}; |  | ||||||
|     u32 pc{}; |  | ||||||
|     std::optional<u32> delayed_pc; |  | ||||||
|  |  | ||||||
|     const std::vector<u32>& code; |  | ||||||
|     Engines::Maxwell3D& maxwell3d; |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| } // namespace Tegra | } // namespace Tegra | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 bunnei
					bunnei