From a6a43a5ae047404ca0b03aa647ed5b17400ca7b6 Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Mon, 15 Jun 2020 20:28:30 +0100
Subject: [PATCH 1/7] macro_jit_x64: Remove RESULT_64

This Reg64 codepath has the exact same behaviour as the Reg32 one.
---
 src/video_core/macro/macro_jit_x64.cpp | 18 +++---------------
 src/video_core/macro/macro_jit_x64.h   |  1 -
 2 files changed, 3 insertions(+), 16 deletions(-)

diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp
index 11c1cc3be..9a9d50866 100644
--- a/src/video_core/macro/macro_jit_x64.cpp
+++ b/src/video_core/macro/macro_jit_x64.cpp
@@ -19,7 +19,6 @@ static const Xbyak::Reg64 REGISTERS = Xbyak::util::r10;
 static const Xbyak::Reg64 STATE = Xbyak::util::r11;
 static const Xbyak::Reg64 NEXT_PARAMETER = Xbyak::util::r12;
 static const Xbyak::Reg32 RESULT = Xbyak::util::r13d;
-static const Xbyak::Reg64 RESULT_64 = Xbyak::util::r13;
 static const Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d;
 static const Xbyak::Reg64 METHOD_ADDRESS_64 = Xbyak::util::r14;
 static const Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15;
@@ -64,15 +63,15 @@ void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) {
     const bool is_move_operation = !is_a_zero && is_b_zero;
     const bool has_zero_register = is_a_zero || is_b_zero;
 
-    Xbyak::Reg64 src_a;
+    Xbyak::Reg32 src_a;
     Xbyak::Reg32 src_b;
 
     if (!optimizer.zero_reg_skip) {
-        src_a = Compile_GetRegister(opcode.src_a, RESULT_64);
+        src_a = Compile_GetRegister(opcode.src_a, RESULT);
         src_b = Compile_GetRegister(opcode.src_b, ebx);
     } else {
         if (!is_a_zero) {
-            src_a = Compile_GetRegister(opcode.src_a, RESULT_64);
+            src_a = Compile_GetRegister(opcode.src_a, RESULT);
         }
         if (!is_b_zero) {
             src_b = Compile_GetRegister(opcode.src_b, ebx);
@@ -553,17 +552,6 @@ Xbyak::Reg32 MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg32 dst) {
     return dst;
 }
 
-Xbyak::Reg64 Tegra::MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg64 dst) {
-    if (index == 0) {
-        // Register 0 is always zero
-        xor_(dst, dst);
-    } else {
-        mov(dst, dword[REGISTERS + index * sizeof(u32)]);
-    }
-
-    return dst;
-}
-
 void Tegra::MacroJITx64Impl::Compile_WriteCarry(Xbyak::Reg64 dst) {
     Xbyak::Label zero{}, end{};
     xor_(ecx, ecx);
diff --git a/src/video_core/macro/macro_jit_x64.h b/src/video_core/macro/macro_jit_x64.h
index 21ee157cf..377368086 100644
--- a/src/video_core/macro/macro_jit_x64.h
+++ b/src/video_core/macro/macro_jit_x64.h
@@ -55,7 +55,6 @@ private:
 
     Xbyak::Reg32 Compile_FetchParameter();
     Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst);
-    Xbyak::Reg64 Compile_GetRegister(u32 index, Xbyak::Reg64 dst);
     void Compile_WriteCarry(Xbyak::Reg64 dst);
 
     void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg);

From 389549b80d7cd7054ec622f4038ff599386e1c04 Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Mon, 15 Jun 2020 20:51:33 +0100
Subject: [PATCH 2/7] macro_jit_x64: Remove METHOD_ADDRESS_64

Unnecessary variable.
---
 src/video_core/macro/macro_jit_x64.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp
index 9a9d50866..1dcf9957c 100644
--- a/src/video_core/macro/macro_jit_x64.cpp
+++ b/src/video_core/macro/macro_jit_x64.cpp
@@ -20,7 +20,6 @@ static const Xbyak::Reg64 STATE = Xbyak::util::r11;
 static const Xbyak::Reg64 NEXT_PARAMETER = Xbyak::util::r12;
 static const Xbyak::Reg32 RESULT = Xbyak::util::r13d;
 static const Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d;
-static const Xbyak::Reg64 METHOD_ADDRESS_64 = Xbyak::util::r14;
 static const Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15;
 
 static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({
@@ -328,7 +327,7 @@ void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) {
     and_(METHOD_ADDRESS, 0xfff);
     shr(ecx, 12);
     and_(ecx, 0x3f);
-    lea(eax, ptr[rcx + METHOD_ADDRESS_64]);
+    lea(eax, ptr[rcx + METHOD_ADDRESS.cvt64()]);
     sal(ecx, 12);
     or_(eax, ecx);
 

From 35db6e1c68f18f401bcae8bd8e8937648c7c67c6 Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Mon, 15 Jun 2020 20:55:02 +0100
Subject: [PATCH 3/7] macro_jit_x64: Remove JITState::parameters

This can be passed in as an argument instead.
---
 src/video_core/macro/macro_jit_x64.cpp | 6 ++----
 src/video_core/macro/macro_jit_x64.h   | 3 +--
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp
index 1dcf9957c..f1d123f51 100644
--- a/src/video_core/macro/macro_jit_x64.cpp
+++ b/src/video_core/macro/macro_jit_x64.cpp
@@ -51,8 +51,7 @@ void MacroJITx64Impl::Execute(const std::vector<u32>& parameters, u32 method) {
     JITState state{};
     state.maxwell3d = &maxwell3d;
     state.registers = {};
-    state.parameters = parameters.data();
-    program(&state);
+    program(&state, parameters.data());
 }
 
 void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) {
@@ -422,8 +421,7 @@ void MacroJITx64Impl::Compile() {
     Common::X64::ABI_PushRegistersAndAdjustStackGPS(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8);
     // JIT state
     mov(STATE, Common::X64::ABI_PARAM1);
-    mov(PARAMETERS, qword[Common::X64::ABI_PARAM1 +
-                          static_cast<Xbyak::uint32>(offsetof(JITState, parameters))]);
+    mov(PARAMETERS, Common::X64::ABI_PARAM2);
     mov(REGISTERS, Common::X64::ABI_PARAM1);
     add(REGISTERS, static_cast<Xbyak::uint32>(offsetof(JITState, registers)));
     xor_(RESULT, RESULT);
diff --git a/src/video_core/macro/macro_jit_x64.h b/src/video_core/macro/macro_jit_x64.h
index 377368086..9167b2a93 100644
--- a/src/video_core/macro/macro_jit_x64.h
+++ b/src/video_core/macro/macro_jit_x64.h
@@ -66,11 +66,10 @@ private:
     struct JITState {
         Engines::Maxwell3D* maxwell3d{};
         std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{};
-        const u32* parameters{};
         u32 carry_flag{};
     };
     static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0");
-    using ProgramType = void (*)(JITState*);
+    using ProgramType = void (*)(JITState*, const u32*);
 
     struct OptimizerState {
         bool can_skip_carry{};

From 79aa7b3aceeecadfb5b15bc25431db7768434f23 Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Mon, 15 Jun 2020 21:00:59 +0100
Subject: [PATCH 4/7] macro_jit_x64: Remove REGISTERS

Unnecessary since this is just an offset from STATE.
---
 src/video_core/macro/macro_jit_x64.cpp | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp
index f1d123f51..da3b86d3d 100644
--- a/src/video_core/macro/macro_jit_x64.cpp
+++ b/src/video_core/macro/macro_jit_x64.cpp
@@ -15,7 +15,6 @@ MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255
 
 namespace Tegra {
 static const Xbyak::Reg64 PARAMETERS = Xbyak::util::r9;
-static const Xbyak::Reg64 REGISTERS = Xbyak::util::r10;
 static const Xbyak::Reg64 STATE = Xbyak::util::r11;
 static const Xbyak::Reg64 NEXT_PARAMETER = Xbyak::util::r12;
 static const Xbyak::Reg32 RESULT = Xbyak::util::r13d;
@@ -24,7 +23,6 @@ static const Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15;
 
 static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({
     PARAMETERS,
-    REGISTERS,
     STATE,
     NEXT_PARAMETER,
     RESULT,
@@ -422,14 +420,12 @@ void MacroJITx64Impl::Compile() {
     // JIT state
     mov(STATE, Common::X64::ABI_PARAM1);
     mov(PARAMETERS, Common::X64::ABI_PARAM2);
-    mov(REGISTERS, Common::X64::ABI_PARAM1);
-    add(REGISTERS, static_cast<Xbyak::uint32>(offsetof(JITState, registers)));
     xor_(RESULT, RESULT);
     xor_(METHOD_ADDRESS, METHOD_ADDRESS);
     xor_(NEXT_PARAMETER, NEXT_PARAMETER);
     xor_(BRANCH_HOLDER, BRANCH_HOLDER);
 
-    mov(dword[REGISTERS + 4], Compile_FetchParameter());
+    mov(dword[STATE + offsetof(JITState, registers) + 4], Compile_FetchParameter());
 
     // Track get register for zero registers and mark it as no-op
     optimizer.zero_reg_skip = true;
@@ -543,7 +539,7 @@ Xbyak::Reg32 MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg32 dst) {
         // Register 0 is always zero
         xor_(dst, dst);
     } else {
-        mov(dst, dword[REGISTERS + index * sizeof(u32)]);
+        mov(dst, dword[STATE + offsetof(JITState, registers) + index * sizeof(u32)]);
     }
 
     return dst;
@@ -564,7 +560,7 @@ void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u3
         if (reg == 0) {
             return;
         }
-        mov(dword[REGISTERS + reg * sizeof(u32)], result);
+        mov(dword[STATE + offsetof(JITState, registers) + reg * sizeof(u32)], result);
     };
     auto SetMethodAddress = [=](Xbyak::Reg32 reg) { mov(METHOD_ADDRESS, reg); };
 

From c09a9e5cc7f53280218cdfbfd7d7ff056f1c2ff5 Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Mon, 15 Jun 2020 21:12:53 +0100
Subject: [PATCH 5/7] macro_jit_x64: Select better registers

All registers are now callee-save registers.

RBX and RBP selected for STATE and RESULT because these are most commonly accessed; this is to avoid the REX prefix.
RBP not used for STATE because there are some SIB restrictions, RBX emits smaller code.
---
 src/video_core/macro/macro_jit_x64.cpp | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp
index da3b86d3d..1e7b05ac9 100644
--- a/src/video_core/macro/macro_jit_x64.cpp
+++ b/src/video_core/macro/macro_jit_x64.cpp
@@ -14,18 +14,18 @@ MICROPROFILE_DEFINE(MacroJitCompile, "GPU", "Compile macro JIT", MP_RGB(173, 255
 MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0));
 
 namespace Tegra {
-static const Xbyak::Reg64 PARAMETERS = Xbyak::util::r9;
-static const Xbyak::Reg64 STATE = Xbyak::util::r11;
-static const Xbyak::Reg64 NEXT_PARAMETER = Xbyak::util::r12;
-static const Xbyak::Reg32 RESULT = Xbyak::util::r13d;
+static const Xbyak::Reg64 STATE = Xbyak::util::rbx;
+static const Xbyak::Reg32 RESULT = Xbyak::util::ebp;
+static const Xbyak::Reg64 PARAMETERS = Xbyak::util::r12;
+static const Xbyak::Reg64 NEXT_PARAMETER = Xbyak::util::r13;
 static const Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d;
 static const Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15;
 
 static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({
-    PARAMETERS,
     STATE,
-    NEXT_PARAMETER,
     RESULT,
+    PARAMETERS,
+    NEXT_PARAMETER,
     METHOD_ADDRESS,
     BRANCH_HOLDER,
 });
@@ -64,13 +64,13 @@ void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) {
 
     if (!optimizer.zero_reg_skip) {
         src_a = Compile_GetRegister(opcode.src_a, RESULT);
-        src_b = Compile_GetRegister(opcode.src_b, ebx);
+        src_b = Compile_GetRegister(opcode.src_b, eax);
     } else {
         if (!is_a_zero) {
             src_a = Compile_GetRegister(opcode.src_a, RESULT);
         }
         if (!is_b_zero) {
-            src_b = Compile_GetRegister(opcode.src_b, ebx);
+            src_b = Compile_GetRegister(opcode.src_b, eax);
         }
     }
     Xbyak::Label skip_carry{};

From 1799f4e7743557c8e41c15201c42431f8d6d6dde Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Mon, 15 Jun 2020 21:14:10 +0100
Subject: [PATCH 6/7] macro_jit_x64: Remove unused function Compile_WriteCarry

---
 src/video_core/macro/macro_jit_x64.cpp | 8 --------
 src/video_core/macro/macro_jit_x64.h   | 1 -
 2 files changed, 9 deletions(-)

diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp
index 1e7b05ac9..b703daad9 100644
--- a/src/video_core/macro/macro_jit_x64.cpp
+++ b/src/video_core/macro/macro_jit_x64.cpp
@@ -545,14 +545,6 @@ Xbyak::Reg32 MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg32 dst) {
     return dst;
 }
 
-void Tegra::MacroJITx64Impl::Compile_WriteCarry(Xbyak::Reg64 dst) {
-    Xbyak::Label zero{}, end{};
-    xor_(ecx, ecx);
-    shr(dst, 32);
-    setne(cl);
-    mov(dword[STATE + offsetof(JITState, carry_flag)], ecx);
-}
-
 void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u32 reg) {
     auto SetRegister = [=](u32 reg, Xbyak::Reg32 result) {
         // Register 0 is supposed to always return 0. NOP is implemented as a store to the zero
diff --git a/src/video_core/macro/macro_jit_x64.h b/src/video_core/macro/macro_jit_x64.h
index 9167b2a93..a05d8df15 100644
--- a/src/video_core/macro/macro_jit_x64.h
+++ b/src/video_core/macro/macro_jit_x64.h
@@ -55,7 +55,6 @@ private:
 
     Xbyak::Reg32 Compile_FetchParameter();
     Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst);
-    void Compile_WriteCarry(Xbyak::Reg64 dst);
 
     void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg);
     void Compile_Send(Xbyak::Reg32 value);

From cf0aad7d6a22024362c7adf04b605108141453f6 Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Mon, 15 Jun 2020 21:16:47 +0100
Subject: [PATCH 7/7] macro_jit_x64: Remove NEXT_PARAMETER

Not required, as PARAMETERS can just be incremented directly.
---
 src/video_core/macro/macro_jit_x64.cpp | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp
index b703daad9..2eb98173d 100644
--- a/src/video_core/macro/macro_jit_x64.cpp
+++ b/src/video_core/macro/macro_jit_x64.cpp
@@ -17,7 +17,6 @@ namespace Tegra {
 static const Xbyak::Reg64 STATE = Xbyak::util::rbx;
 static const Xbyak::Reg32 RESULT = Xbyak::util::ebp;
 static const Xbyak::Reg64 PARAMETERS = Xbyak::util::r12;
-static const Xbyak::Reg64 NEXT_PARAMETER = Xbyak::util::r13;
 static const Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d;
 static const Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15;
 
@@ -25,7 +24,6 @@ static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({
     STATE,
     RESULT,
     PARAMETERS,
-    NEXT_PARAMETER,
     METHOD_ADDRESS,
     BRANCH_HOLDER,
 });
@@ -422,7 +420,6 @@ void MacroJITx64Impl::Compile() {
     mov(PARAMETERS, Common::X64::ABI_PARAM2);
     xor_(RESULT, RESULT);
     xor_(METHOD_ADDRESS, METHOD_ADDRESS);
-    xor_(NEXT_PARAMETER, NEXT_PARAMETER);
     xor_(BRANCH_HOLDER, BRANCH_HOLDER);
 
     mov(dword[STATE + offsetof(JITState, registers) + 4], Compile_FetchParameter());
@@ -529,8 +526,8 @@ bool MacroJITx64Impl::Compile_NextInstruction() {
 }
 
 Xbyak::Reg32 Tegra::MacroJITx64Impl::Compile_FetchParameter() {
-    mov(eax, dword[PARAMETERS + NEXT_PARAMETER * sizeof(u32)]);
-    inc(NEXT_PARAMETER);
+    mov(eax, dword[PARAMETERS]);
+    add(PARAMETERS, sizeof(u32));
     return eax;
 }