mirror of
				https://git.suyu.dev/suyu/suyu
				synced 2025-11-04 00:49:02 -06:00 
			
		
		
		
	Merge pull request #7780 from lioncash/macro
video_core/macro: Move impl classes into their cpp files
This commit is contained in:
		@@ -240,7 +240,7 @@ void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters)
 | 
			
		||||
        ((method - MacroRegistersStart) >> 1) % static_cast<u32>(macro_positions.size());
 | 
			
		||||
 | 
			
		||||
    // Execute the current macro.
 | 
			
		||||
    macro_engine->Execute(*this, macro_positions[entry], parameters);
 | 
			
		||||
    macro_engine->Execute(macro_positions[entry], parameters);
 | 
			
		||||
    if (mme_draw.current_mode != MMEDrawMode::Undefined) {
 | 
			
		||||
        FlushMMEInlineDraw();
 | 
			
		||||
    }
 | 
			
		||||
 
 | 
			
		||||
@@ -2,12 +2,13 @@
 | 
			
		||||
// Licensed under GPLv2 or any later version
 | 
			
		||||
// Refer to the license.txt file included.
 | 
			
		||||
 | 
			
		||||
#include <cstring>
 | 
			
		||||
#include <optional>
 | 
			
		||||
 | 
			
		||||
#include <boost/container_hash/hash.hpp>
 | 
			
		||||
 | 
			
		||||
#include "common/assert.h"
 | 
			
		||||
#include "common/logging/log.h"
 | 
			
		||||
#include "common/settings.h"
 | 
			
		||||
#include "video_core/engines/maxwell_3d.h"
 | 
			
		||||
#include "video_core/macro/macro.h"
 | 
			
		||||
#include "video_core/macro/macro_hle.h"
 | 
			
		||||
#include "video_core/macro/macro_interpreter.h"
 | 
			
		||||
@@ -24,8 +25,7 @@ void MacroEngine::AddCode(u32 method, u32 data) {
 | 
			
		||||
    uploaded_macro_code[method].push_back(data);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void MacroEngine::Execute(Engines::Maxwell3D& maxwell3d, u32 method,
 | 
			
		||||
                          const std::vector<u32>& parameters) {
 | 
			
		||||
void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) {
 | 
			
		||||
    auto compiled_macro = macro_cache.find(method);
 | 
			
		||||
    if (compiled_macro != macro_cache.end()) {
 | 
			
		||||
        const auto& cache_info = compiled_macro->second;
 | 
			
		||||
@@ -66,10 +66,9 @@ void MacroEngine::Execute(Engines::Maxwell3D& maxwell3d, u32 method,
 | 
			
		||||
            cache_info.lle_program = Compile(code);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        auto hle_program = hle_macros->GetHLEProgram(cache_info.hash);
 | 
			
		||||
        if (hle_program.has_value()) {
 | 
			
		||||
        if (auto hle_program = hle_macros->GetHLEProgram(cache_info.hash)) {
 | 
			
		||||
            cache_info.has_hle_program = true;
 | 
			
		||||
            cache_info.hle_program = std::move(hle_program.value());
 | 
			
		||||
            cache_info.hle_program = std::move(hle_program);
 | 
			
		||||
            cache_info.hle_program->Execute(parameters, method);
 | 
			
		||||
        } else {
 | 
			
		||||
            cache_info.lle_program->Execute(parameters, method);
 | 
			
		||||
 
 | 
			
		||||
@@ -119,7 +119,7 @@ public:
 | 
			
		||||
    void AddCode(u32 method, u32 data);
 | 
			
		||||
 | 
			
		||||
    // Compiles the macro if its not in the cache, and executes the compiled macro
 | 
			
		||||
    void Execute(Engines::Maxwell3D& maxwell3d, u32 method, const std::vector<u32>& parameters);
 | 
			
		||||
    void Execute(u32 method, const std::vector<u32>& parameters);
 | 
			
		||||
 | 
			
		||||
protected:
 | 
			
		||||
    virtual std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) = 0;
 | 
			
		||||
 
 | 
			
		||||
@@ -5,12 +5,15 @@
 | 
			
		||||
#include <array>
 | 
			
		||||
#include <vector>
 | 
			
		||||
#include "video_core/engines/maxwell_3d.h"
 | 
			
		||||
#include "video_core/macro/macro.h"
 | 
			
		||||
#include "video_core/macro/macro_hle.h"
 | 
			
		||||
#include "video_core/rasterizer_interface.h"
 | 
			
		||||
 | 
			
		||||
namespace Tegra {
 | 
			
		||||
 | 
			
		||||
namespace {
 | 
			
		||||
 | 
			
		||||
using HLEFunction = void (*)(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters);
 | 
			
		||||
 | 
			
		||||
// HLE'd functions
 | 
			
		||||
void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters) {
 | 
			
		||||
    const u32 instance_count = parameters[2] & maxwell3d.GetRegisterValue(0xD1B);
 | 
			
		||||
@@ -77,7 +80,6 @@ void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector<u32>&
 | 
			
		||||
    maxwell3d.CallMethodFromMME(0x8e5, 0x0);
 | 
			
		||||
    maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined;
 | 
			
		||||
}
 | 
			
		||||
} // Anonymous namespace
 | 
			
		||||
 | 
			
		||||
constexpr std::array<std::pair<u64, HLEFunction>, 3> hle_funcs{{
 | 
			
		||||
    {0x771BB18C62444DA0, &HLE_771BB18C62444DA0},
 | 
			
		||||
@@ -85,25 +87,31 @@ constexpr std::array<std::pair<u64, HLEFunction>, 3> hle_funcs{{
 | 
			
		||||
    {0x0217920100488FF7, &HLE_0217920100488FF7},
 | 
			
		||||
}};
 | 
			
		||||
 | 
			
		||||
class HLEMacroImpl final : public CachedMacro {
 | 
			
		||||
public:
 | 
			
		||||
    explicit HLEMacroImpl(Engines::Maxwell3D& maxwell3d_, HLEFunction func_)
 | 
			
		||||
        : maxwell3d{maxwell3d_}, func{func_} {}
 | 
			
		||||
 | 
			
		||||
    void Execute(const std::vector<u32>& parameters, u32 method) override {
 | 
			
		||||
        func(maxwell3d, parameters);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    Engines::Maxwell3D& maxwell3d;
 | 
			
		||||
    HLEFunction func;
 | 
			
		||||
};
 | 
			
		||||
} // Anonymous namespace
 | 
			
		||||
 | 
			
		||||
HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {}
 | 
			
		||||
HLEMacro::~HLEMacro() = default;
 | 
			
		||||
 | 
			
		||||
std::optional<std::unique_ptr<CachedMacro>> HLEMacro::GetHLEProgram(u64 hash) const {
 | 
			
		||||
std::unique_ptr<CachedMacro> HLEMacro::GetHLEProgram(u64 hash) const {
 | 
			
		||||
    const auto it = std::find_if(hle_funcs.cbegin(), hle_funcs.cend(),
 | 
			
		||||
                                 [hash](const auto& pair) { return pair.first == hash; });
 | 
			
		||||
    if (it == hle_funcs.end()) {
 | 
			
		||||
        return std::nullopt;
 | 
			
		||||
        return nullptr;
 | 
			
		||||
    }
 | 
			
		||||
    return std::make_unique<HLEMacroImpl>(maxwell3d, it->second);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
HLEMacroImpl::~HLEMacroImpl() = default;
 | 
			
		||||
 | 
			
		||||
HLEMacroImpl::HLEMacroImpl(Engines::Maxwell3D& maxwell3d_, HLEFunction func_)
 | 
			
		||||
    : maxwell3d{maxwell3d_}, func{func_} {}
 | 
			
		||||
 | 
			
		||||
void HLEMacroImpl::Execute(const std::vector<u32>& parameters, u32 method) {
 | 
			
		||||
    func(maxwell3d, parameters);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
} // namespace Tegra
 | 
			
		||||
 
 | 
			
		||||
@@ -5,10 +5,7 @@
 | 
			
		||||
#pragma once
 | 
			
		||||
 | 
			
		||||
#include <memory>
 | 
			
		||||
#include <optional>
 | 
			
		||||
#include <vector>
 | 
			
		||||
#include "common/common_types.h"
 | 
			
		||||
#include "video_core/macro/macro.h"
 | 
			
		||||
 | 
			
		||||
namespace Tegra {
 | 
			
		||||
 | 
			
		||||
@@ -16,29 +13,17 @@ namespace Engines {
 | 
			
		||||
class Maxwell3D;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
using HLEFunction = void (*)(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters);
 | 
			
		||||
 | 
			
		||||
class HLEMacro {
 | 
			
		||||
public:
 | 
			
		||||
    explicit HLEMacro(Engines::Maxwell3D& maxwell3d_);
 | 
			
		||||
    ~HLEMacro();
 | 
			
		||||
 | 
			
		||||
    std::optional<std::unique_ptr<CachedMacro>> GetHLEProgram(u64 hash) const;
 | 
			
		||||
    // Allocates and returns a cached macro if the hash matches a known function.
 | 
			
		||||
    // Returns nullptr otherwise.
 | 
			
		||||
    [[nodiscard]] std::unique_ptr<CachedMacro> GetHLEProgram(u64 hash) const;
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    Engines::Maxwell3D& maxwell3d;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
class HLEMacroImpl : public CachedMacro {
 | 
			
		||||
public:
 | 
			
		||||
    explicit HLEMacroImpl(Engines::Maxwell3D& maxwell3d, HLEFunction func);
 | 
			
		||||
    ~HLEMacroImpl();
 | 
			
		||||
 | 
			
		||||
    void Execute(const std::vector<u32>& parameters, u32 method) override;
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    Engines::Maxwell3D& maxwell3d;
 | 
			
		||||
    HLEFunction func;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
} // namespace Tegra
 | 
			
		||||
 
 | 
			
		||||
@@ -2,6 +2,9 @@
 | 
			
		||||
// Licensed under GPLv2 or any later version
 | 
			
		||||
// Refer to the license.txt file included.
 | 
			
		||||
 | 
			
		||||
#include <array>
 | 
			
		||||
#include <optional>
 | 
			
		||||
 | 
			
		||||
#include "common/assert.h"
 | 
			
		||||
#include "common/logging/log.h"
 | 
			
		||||
#include "common/microprofile.h"
 | 
			
		||||
@@ -11,16 +14,81 @@
 | 
			
		||||
MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192));
 | 
			
		||||
 | 
			
		||||
namespace Tegra {
 | 
			
		||||
MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d_)
 | 
			
		||||
    : MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {}
 | 
			
		||||
namespace {
 | 
			
		||||
class MacroInterpreterImpl final : public CachedMacro {
 | 
			
		||||
public:
 | 
			
		||||
    explicit MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_)
 | 
			
		||||
        : maxwell3d{maxwell3d_}, code{code_} {}
 | 
			
		||||
 | 
			
		||||
std::unique_ptr<CachedMacro> MacroInterpreter::Compile(const std::vector<u32>& code) {
 | 
			
		||||
    return std::make_unique<MacroInterpreterImpl>(maxwell3d, code);
 | 
			
		||||
}
 | 
			
		||||
    void Execute(const std::vector<u32>& params, u32 method) override;
 | 
			
		||||
 | 
			
		||||
MacroInterpreterImpl::MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d_,
 | 
			
		||||
                                           const std::vector<u32>& code_)
 | 
			
		||||
    : maxwell3d{maxwell3d_}, code{code_} {}
 | 
			
		||||
private:
 | 
			
		||||
    /// Resets the execution engine state, zeroing registers, etc.
 | 
			
		||||
    void Reset();
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Executes a single macro instruction located at the current program counter. Returns whether
 | 
			
		||||
     * the interpreter should keep running.
 | 
			
		||||
     *
 | 
			
		||||
     * @param is_delay_slot Whether the current step is being executed due to a delay slot in a
 | 
			
		||||
     *                      previous instruction.
 | 
			
		||||
     */
 | 
			
		||||
    bool Step(bool is_delay_slot);
 | 
			
		||||
 | 
			
		||||
    /// Calculates the result of an ALU operation. src_a OP src_b;
 | 
			
		||||
    u32 GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b);
 | 
			
		||||
 | 
			
		||||
    /// Performs the result operation on the input result and stores it in the specified register
 | 
			
		||||
    /// (if necessary).
 | 
			
		||||
    void ProcessResult(Macro::ResultOperation operation, u32 reg, u32 result);
 | 
			
		||||
 | 
			
		||||
    /// Evaluates the branch condition and returns whether the branch should be taken or not.
 | 
			
		||||
    bool EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const;
 | 
			
		||||
 | 
			
		||||
    /// Reads an opcode at the current program counter location.
 | 
			
		||||
    Macro::Opcode GetOpcode() const;
 | 
			
		||||
 | 
			
		||||
    /// Returns the specified register's value. Register 0 is hardcoded to always return 0.
 | 
			
		||||
    u32 GetRegister(u32 register_id) const;
 | 
			
		||||
 | 
			
		||||
    /// Sets the register to the input value.
 | 
			
		||||
    void SetRegister(u32 register_id, u32 value);
 | 
			
		||||
 | 
			
		||||
    /// Sets the method address to use for the next Send instruction.
 | 
			
		||||
    void SetMethodAddress(u32 address);
 | 
			
		||||
 | 
			
		||||
    /// Calls a GPU Engine method with the input parameter.
 | 
			
		||||
    void Send(u32 value);
 | 
			
		||||
 | 
			
		||||
    /// Reads a GPU register located at the method address.
 | 
			
		||||
    u32 Read(u32 method) const;
 | 
			
		||||
 | 
			
		||||
    /// Returns the next parameter in the parameter queue.
 | 
			
		||||
    u32 FetchParameter();
 | 
			
		||||
 | 
			
		||||
    Engines::Maxwell3D& maxwell3d;
 | 
			
		||||
 | 
			
		||||
    /// Current program counter
 | 
			
		||||
    u32 pc{};
 | 
			
		||||
    /// Program counter to execute at after the delay slot is executed.
 | 
			
		||||
    std::optional<u32> delayed_pc;
 | 
			
		||||
 | 
			
		||||
    /// General purpose macro registers.
 | 
			
		||||
    std::array<u32, Macro::NUM_MACRO_REGISTERS> registers = {};
 | 
			
		||||
 | 
			
		||||
    /// Method address to use for the next Send instruction.
 | 
			
		||||
    Macro::MethodAddress method_address = {};
 | 
			
		||||
 | 
			
		||||
    /// Input parameters of the current macro.
 | 
			
		||||
    std::unique_ptr<u32[]> parameters;
 | 
			
		||||
    std::size_t num_parameters = 0;
 | 
			
		||||
    std::size_t parameters_capacity = 0;
 | 
			
		||||
    /// Index of the next parameter that will be fetched by the 'parm' instruction.
 | 
			
		||||
    u32 next_parameter_index = 0;
 | 
			
		||||
 | 
			
		||||
    bool carry_flag = false;
 | 
			
		||||
    const std::vector<u32>& code;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
void MacroInterpreterImpl::Execute(const std::vector<u32>& params, u32 method) {
 | 
			
		||||
    MICROPROFILE_SCOPE(MacroInterp);
 | 
			
		||||
@@ -283,5 +351,13 @@ u32 MacroInterpreterImpl::FetchParameter() {
 | 
			
		||||
    ASSERT(next_parameter_index < num_parameters);
 | 
			
		||||
    return parameters[next_parameter_index++];
 | 
			
		||||
}
 | 
			
		||||
} // Anonymous namespace
 | 
			
		||||
 | 
			
		||||
MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d_)
 | 
			
		||||
    : MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {}
 | 
			
		||||
 | 
			
		||||
std::unique_ptr<CachedMacro> MacroInterpreter::Compile(const std::vector<u32>& code) {
 | 
			
		||||
    return std::make_unique<MacroInterpreterImpl>(maxwell3d, code);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
} // namespace Tegra
 | 
			
		||||
 
 | 
			
		||||
@@ -3,10 +3,9 @@
 | 
			
		||||
// Refer to the license.txt file included.
 | 
			
		||||
 | 
			
		||||
#pragma once
 | 
			
		||||
#include <array>
 | 
			
		||||
#include <optional>
 | 
			
		||||
 | 
			
		||||
#include <vector>
 | 
			
		||||
#include "common/bit_field.h"
 | 
			
		||||
 | 
			
		||||
#include "common/common_types.h"
 | 
			
		||||
#include "video_core/macro/macro.h"
 | 
			
		||||
 | 
			
		||||
@@ -26,77 +25,4 @@ private:
 | 
			
		||||
    Engines::Maxwell3D& maxwell3d;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
class MacroInterpreterImpl : public CachedMacro {
 | 
			
		||||
public:
 | 
			
		||||
    explicit MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_);
 | 
			
		||||
    void Execute(const std::vector<u32>& params, u32 method) override;
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    /// Resets the execution engine state, zeroing registers, etc.
 | 
			
		||||
    void Reset();
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Executes a single macro instruction located at the current program counter. Returns whether
 | 
			
		||||
     * the interpreter should keep running.
 | 
			
		||||
     *
 | 
			
		||||
     * @param is_delay_slot Whether the current step is being executed due to a delay slot in a
 | 
			
		||||
     *                      previous instruction.
 | 
			
		||||
     */
 | 
			
		||||
    bool Step(bool is_delay_slot);
 | 
			
		||||
 | 
			
		||||
    /// Calculates the result of an ALU operation. src_a OP src_b;
 | 
			
		||||
    u32 GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b);
 | 
			
		||||
 | 
			
		||||
    /// Performs the result operation on the input result and stores it in the specified register
 | 
			
		||||
    /// (if necessary).
 | 
			
		||||
    void ProcessResult(Macro::ResultOperation operation, u32 reg, u32 result);
 | 
			
		||||
 | 
			
		||||
    /// Evaluates the branch condition and returns whether the branch should be taken or not.
 | 
			
		||||
    bool EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const;
 | 
			
		||||
 | 
			
		||||
    /// Reads an opcode at the current program counter location.
 | 
			
		||||
    Macro::Opcode GetOpcode() const;
 | 
			
		||||
 | 
			
		||||
    /// Returns the specified register's value. Register 0 is hardcoded to always return 0.
 | 
			
		||||
    u32 GetRegister(u32 register_id) const;
 | 
			
		||||
 | 
			
		||||
    /// Sets the register to the input value.
 | 
			
		||||
    void SetRegister(u32 register_id, u32 value);
 | 
			
		||||
 | 
			
		||||
    /// Sets the method address to use for the next Send instruction.
 | 
			
		||||
    void SetMethodAddress(u32 address);
 | 
			
		||||
 | 
			
		||||
    /// Calls a GPU Engine method with the input parameter.
 | 
			
		||||
    void Send(u32 value);
 | 
			
		||||
 | 
			
		||||
    /// Reads a GPU register located at the method address.
 | 
			
		||||
    u32 Read(u32 method) const;
 | 
			
		||||
 | 
			
		||||
    /// Returns the next parameter in the parameter queue.
 | 
			
		||||
    u32 FetchParameter();
 | 
			
		||||
 | 
			
		||||
    Engines::Maxwell3D& maxwell3d;
 | 
			
		||||
 | 
			
		||||
    /// Current program counter
 | 
			
		||||
    u32 pc;
 | 
			
		||||
    /// Program counter to execute at after the delay slot is executed.
 | 
			
		||||
    std::optional<u32> delayed_pc;
 | 
			
		||||
 | 
			
		||||
    /// General purpose macro registers.
 | 
			
		||||
    std::array<u32, Macro::NUM_MACRO_REGISTERS> registers = {};
 | 
			
		||||
 | 
			
		||||
    /// Method address to use for the next Send instruction.
 | 
			
		||||
    Macro::MethodAddress method_address = {};
 | 
			
		||||
 | 
			
		||||
    /// Input parameters of the current macro.
 | 
			
		||||
    std::unique_ptr<u32[]> parameters;
 | 
			
		||||
    std::size_t num_parameters = 0;
 | 
			
		||||
    std::size_t parameters_capacity = 0;
 | 
			
		||||
    /// Index of the next parameter that will be fetched by the 'parm' instruction.
 | 
			
		||||
    u32 next_parameter_index = 0;
 | 
			
		||||
 | 
			
		||||
    bool carry_flag = false;
 | 
			
		||||
    const std::vector<u32>& code;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
} // namespace Tegra
 | 
			
		||||
 
 | 
			
		||||
@@ -2,9 +2,17 @@
 | 
			
		||||
// Licensed under GPLv2 or any later version
 | 
			
		||||
// Refer to the license.txt file included.
 | 
			
		||||
 | 
			
		||||
#include <array>
 | 
			
		||||
#include <bitset>
 | 
			
		||||
#include <optional>
 | 
			
		||||
 | 
			
		||||
#include <xbyak/xbyak.h>
 | 
			
		||||
 | 
			
		||||
#include "common/assert.h"
 | 
			
		||||
#include "common/bit_field.h"
 | 
			
		||||
#include "common/logging/log.h"
 | 
			
		||||
#include "common/microprofile.h"
 | 
			
		||||
#include "common/x64/xbyak_abi.h"
 | 
			
		||||
#include "common/x64/xbyak_util.h"
 | 
			
		||||
#include "video_core/engines/maxwell_3d.h"
 | 
			
		||||
#include "video_core/macro/macro_interpreter.h"
 | 
			
		||||
@@ -14,13 +22,14 @@ MICROPROFILE_DEFINE(MacroJitCompile, "GPU", "Compile macro JIT", MP_RGB(173, 255
 | 
			
		||||
MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0));
 | 
			
		||||
 | 
			
		||||
namespace Tegra {
 | 
			
		||||
namespace {
 | 
			
		||||
constexpr Xbyak::Reg64 STATE = Xbyak::util::rbx;
 | 
			
		||||
constexpr Xbyak::Reg32 RESULT = Xbyak::util::ebp;
 | 
			
		||||
constexpr Xbyak::Reg64 PARAMETERS = Xbyak::util::r12;
 | 
			
		||||
constexpr Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d;
 | 
			
		||||
constexpr Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15;
 | 
			
		||||
 | 
			
		||||
static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({
 | 
			
		||||
const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({
 | 
			
		||||
    STATE,
 | 
			
		||||
    RESULT,
 | 
			
		||||
    PARAMETERS,
 | 
			
		||||
@@ -28,19 +37,75 @@ static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({
 | 
			
		||||
    BRANCH_HOLDER,
 | 
			
		||||
});
 | 
			
		||||
 | 
			
		||||
MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d_)
 | 
			
		||||
    : MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {}
 | 
			
		||||
// Arbitrarily chosen based on current booting games.
 | 
			
		||||
constexpr size_t MAX_CODE_SIZE = 0x10000;
 | 
			
		||||
 | 
			
		||||
std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) {
 | 
			
		||||
    return std::make_unique<MacroJITx64Impl>(maxwell3d, code);
 | 
			
		||||
std::bitset<32> PersistentCallerSavedRegs() {
 | 
			
		||||
    return PERSISTENT_REGISTERS & Common::X64::ABI_ALL_CALLER_SAVED;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
MacroJITx64Impl::MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_)
 | 
			
		||||
    : CodeGenerator{MAX_CODE_SIZE}, code{code_}, maxwell3d{maxwell3d_} {
 | 
			
		||||
    Compile();
 | 
			
		||||
}
 | 
			
		||||
class MacroJITx64Impl final : public Xbyak::CodeGenerator, public CachedMacro {
 | 
			
		||||
public:
 | 
			
		||||
    explicit MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_)
 | 
			
		||||
        : CodeGenerator{MAX_CODE_SIZE}, code{code_}, maxwell3d{maxwell3d_} {
 | 
			
		||||
        Compile();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
MacroJITx64Impl::~MacroJITx64Impl() = default;
 | 
			
		||||
    void Execute(const std::vector<u32>& parameters, u32 method) override;
 | 
			
		||||
 | 
			
		||||
    void Compile_ALU(Macro::Opcode opcode);
 | 
			
		||||
    void Compile_AddImmediate(Macro::Opcode opcode);
 | 
			
		||||
    void Compile_ExtractInsert(Macro::Opcode opcode);
 | 
			
		||||
    void Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode);
 | 
			
		||||
    void Compile_ExtractShiftLeftRegister(Macro::Opcode opcode);
 | 
			
		||||
    void Compile_Read(Macro::Opcode opcode);
 | 
			
		||||
    void Compile_Branch(Macro::Opcode opcode);
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    void Optimizer_ScanFlags();
 | 
			
		||||
 | 
			
		||||
    void Compile();
 | 
			
		||||
    bool Compile_NextInstruction();
 | 
			
		||||
 | 
			
		||||
    Xbyak::Reg32 Compile_FetchParameter();
 | 
			
		||||
    Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst);
 | 
			
		||||
 | 
			
		||||
    void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg);
 | 
			
		||||
    void Compile_Send(Xbyak::Reg32 value);
 | 
			
		||||
 | 
			
		||||
    Macro::Opcode GetOpCode() const;
 | 
			
		||||
 | 
			
		||||
    struct JITState {
 | 
			
		||||
        Engines::Maxwell3D* maxwell3d{};
 | 
			
		||||
        std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{};
 | 
			
		||||
        u32 carry_flag{};
 | 
			
		||||
    };
 | 
			
		||||
    static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0");
 | 
			
		||||
    using ProgramType = void (*)(JITState*, const u32*);
 | 
			
		||||
 | 
			
		||||
    struct OptimizerState {
 | 
			
		||||
        bool can_skip_carry{};
 | 
			
		||||
        bool has_delayed_pc{};
 | 
			
		||||
        bool zero_reg_skip{};
 | 
			
		||||
        bool skip_dummy_addimmediate{};
 | 
			
		||||
        bool optimize_for_method_move{};
 | 
			
		||||
        bool enable_asserts{};
 | 
			
		||||
    };
 | 
			
		||||
    OptimizerState optimizer{};
 | 
			
		||||
 | 
			
		||||
    std::optional<Macro::Opcode> next_opcode{};
 | 
			
		||||
    ProgramType program{nullptr};
 | 
			
		||||
 | 
			
		||||
    std::array<Xbyak::Label, MAX_CODE_SIZE> labels;
 | 
			
		||||
    std::array<Xbyak::Label, MAX_CODE_SIZE> delay_skip;
 | 
			
		||||
    Xbyak::Label end_of_code{};
 | 
			
		||||
 | 
			
		||||
    bool is_delay_slot{};
 | 
			
		||||
    u32 pc{};
 | 
			
		||||
 | 
			
		||||
    const std::vector<u32>& code;
 | 
			
		||||
    Engines::Maxwell3D& maxwell3d;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
void MacroJITx64Impl::Execute(const std::vector<u32>& parameters, u32 method) {
 | 
			
		||||
    MICROPROFILE_SCOPE(MacroJitExecute);
 | 
			
		||||
@@ -307,11 +372,11 @@ void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) {
 | 
			
		||||
    Compile_ProcessResult(opcode.result_operation, opcode.dst);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) {
 | 
			
		||||
void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) {
 | 
			
		||||
    maxwell3d->CallMethodFromMME(method_address.address, value);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) {
 | 
			
		||||
void MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) {
 | 
			
		||||
    Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
 | 
			
		||||
    mov(Common::X64::ABI_PARAM1, qword[STATE]);
 | 
			
		||||
    mov(Common::X64::ABI_PARAM2, METHOD_ADDRESS);
 | 
			
		||||
@@ -338,7 +403,7 @@ void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) {
 | 
			
		||||
    L(dont_process);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void Tegra::MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) {
 | 
			
		||||
void MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) {
 | 
			
		||||
    ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid");
 | 
			
		||||
    const s32 jump_address =
 | 
			
		||||
        static_cast<s32>(pc) + static_cast<s32>(opcode.GetBranchTarget() / sizeof(s32));
 | 
			
		||||
@@ -392,7 +457,7 @@ void Tegra::MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) {
 | 
			
		||||
    L(end);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void Tegra::MacroJITx64Impl::Optimizer_ScanFlags() {
 | 
			
		||||
void MacroJITx64Impl::Optimizer_ScanFlags() {
 | 
			
		||||
    optimizer.can_skip_carry = true;
 | 
			
		||||
    optimizer.has_delayed_pc = false;
 | 
			
		||||
    for (auto raw_op : code) {
 | 
			
		||||
@@ -534,7 +599,7 @@ bool MacroJITx64Impl::Compile_NextInstruction() {
 | 
			
		||||
    return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
Xbyak::Reg32 Tegra::MacroJITx64Impl::Compile_FetchParameter() {
 | 
			
		||||
Xbyak::Reg32 MacroJITx64Impl::Compile_FetchParameter() {
 | 
			
		||||
    mov(eax, dword[PARAMETERS]);
 | 
			
		||||
    add(PARAMETERS, sizeof(u32));
 | 
			
		||||
    return eax;
 | 
			
		||||
@@ -611,9 +676,12 @@ Macro::Opcode MacroJITx64Impl::GetOpCode() const {
 | 
			
		||||
    ASSERT(pc < code.size());
 | 
			
		||||
    return {code[pc]};
 | 
			
		||||
}
 | 
			
		||||
} // Anonymous namespace
 | 
			
		||||
 | 
			
		||||
std::bitset<32> MacroJITx64Impl::PersistentCallerSavedRegs() const {
 | 
			
		||||
    return PERSISTENT_REGISTERS & Common::X64::ABI_ALL_CALLER_SAVED;
 | 
			
		||||
MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d_)
 | 
			
		||||
    : MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {}
 | 
			
		||||
 | 
			
		||||
std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) {
 | 
			
		||||
    return std::make_unique<MacroJITx64Impl>(maxwell3d, code);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
} // namespace Tegra
 | 
			
		||||
 
 | 
			
		||||
@@ -4,12 +4,7 @@
 | 
			
		||||
 | 
			
		||||
#pragma once
 | 
			
		||||
 | 
			
		||||
#include <array>
 | 
			
		||||
#include <bitset>
 | 
			
		||||
#include <xbyak/xbyak.h>
 | 
			
		||||
#include "common/bit_field.h"
 | 
			
		||||
#include "common/common_types.h"
 | 
			
		||||
#include "common/x64/xbyak_abi.h"
 | 
			
		||||
#include "video_core/macro/macro.h"
 | 
			
		||||
 | 
			
		||||
namespace Tegra {
 | 
			
		||||
@@ -18,9 +13,6 @@ namespace Engines {
 | 
			
		||||
class Maxwell3D;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// MAX_CODE_SIZE is arbitrarily chosen based on current booting games
 | 
			
		||||
constexpr size_t MAX_CODE_SIZE = 0x10000;
 | 
			
		||||
 | 
			
		||||
class MacroJITx64 final : public MacroEngine {
 | 
			
		||||
public:
 | 
			
		||||
    explicit MacroJITx64(Engines::Maxwell3D& maxwell3d_);
 | 
			
		||||
@@ -32,67 +24,4 @@ private:
 | 
			
		||||
    Engines::Maxwell3D& maxwell3d;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
class MacroJITx64Impl : public Xbyak::CodeGenerator, public CachedMacro {
 | 
			
		||||
public:
 | 
			
		||||
    explicit MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_);
 | 
			
		||||
    ~MacroJITx64Impl();
 | 
			
		||||
 | 
			
		||||
    void Execute(const std::vector<u32>& parameters, u32 method) override;
 | 
			
		||||
 | 
			
		||||
    void Compile_ALU(Macro::Opcode opcode);
 | 
			
		||||
    void Compile_AddImmediate(Macro::Opcode opcode);
 | 
			
		||||
    void Compile_ExtractInsert(Macro::Opcode opcode);
 | 
			
		||||
    void Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode);
 | 
			
		||||
    void Compile_ExtractShiftLeftRegister(Macro::Opcode opcode);
 | 
			
		||||
    void Compile_Read(Macro::Opcode opcode);
 | 
			
		||||
    void Compile_Branch(Macro::Opcode opcode);
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    void Optimizer_ScanFlags();
 | 
			
		||||
 | 
			
		||||
    void Compile();
 | 
			
		||||
    bool Compile_NextInstruction();
 | 
			
		||||
 | 
			
		||||
    Xbyak::Reg32 Compile_FetchParameter();
 | 
			
		||||
    Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst);
 | 
			
		||||
 | 
			
		||||
    void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg);
 | 
			
		||||
    void Compile_Send(Xbyak::Reg32 value);
 | 
			
		||||
 | 
			
		||||
    Macro::Opcode GetOpCode() const;
 | 
			
		||||
    std::bitset<32> PersistentCallerSavedRegs() const;
 | 
			
		||||
 | 
			
		||||
    struct JITState {
 | 
			
		||||
        Engines::Maxwell3D* maxwell3d{};
 | 
			
		||||
        std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{};
 | 
			
		||||
        u32 carry_flag{};
 | 
			
		||||
    };
 | 
			
		||||
    static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0");
 | 
			
		||||
    using ProgramType = void (*)(JITState*, const u32*);
 | 
			
		||||
 | 
			
		||||
    struct OptimizerState {
 | 
			
		||||
        bool can_skip_carry{};
 | 
			
		||||
        bool has_delayed_pc{};
 | 
			
		||||
        bool zero_reg_skip{};
 | 
			
		||||
        bool skip_dummy_addimmediate{};
 | 
			
		||||
        bool optimize_for_method_move{};
 | 
			
		||||
        bool enable_asserts{};
 | 
			
		||||
    };
 | 
			
		||||
    OptimizerState optimizer{};
 | 
			
		||||
 | 
			
		||||
    std::optional<Macro::Opcode> next_opcode{};
 | 
			
		||||
    ProgramType program{nullptr};
 | 
			
		||||
 | 
			
		||||
    std::array<Xbyak::Label, MAX_CODE_SIZE> labels;
 | 
			
		||||
    std::array<Xbyak::Label, MAX_CODE_SIZE> delay_skip;
 | 
			
		||||
    Xbyak::Label end_of_code{};
 | 
			
		||||
 | 
			
		||||
    bool is_delay_slot{};
 | 
			
		||||
    u32 pc{};
 | 
			
		||||
    std::optional<u32> delayed_pc;
 | 
			
		||||
 | 
			
		||||
    const std::vector<u32>& code;
 | 
			
		||||
    Engines::Maxwell3D& maxwell3d;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
} // namespace Tegra
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user