From c9d10de644078a29e2310791ee221f3bc916e923 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Sun, 20 Mar 2016 00:37:05 -0400
Subject: [PATCH] shader_jit_x64: Allocate each program independently and
 persist for emu session.

---
 src/video_core/shader/shader.cpp         | 29 ++++++++----------------
 src/video_core/shader/shader_jit_x64.cpp | 17 +++++++-------
 src/video_core/shader/shader_jit_x64.h   | 20 ++++++++--------
 3 files changed, 28 insertions(+), 38 deletions(-)

diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp
index 78d295c76..e17368a4a 100644
--- a/src/video_core/shader/shader.cpp
+++ b/src/video_core/shader/shader.cpp
@@ -28,15 +28,8 @@ namespace Pica {
 namespace Shader {
 
 #ifdef ARCHITECTURE_x86_64
-static std::unordered_map<u64, CompiledShader*> shader_map;
-static JitCompiler jit;
-static CompiledShader* jit_shader;
-
-static void ClearCache() {
-    shader_map.clear();
-    jit.Clear();
-    LOG_INFO(HW_GPU, "Shader JIT cache cleared");
-}
+static std::unordered_map<u64, std::unique_ptr<JitCompiler>> shader_map;
+static const JitCompiler* jit_shader;
 #endif // ARCHITECTURE_x86_64
 
 void Setup(UnitState<false>& state) {
@@ -48,16 +41,12 @@ void Setup(UnitState<false>& state) {
 
         auto iter = shader_map.find(cache_key);
         if (iter != shader_map.end()) {
-            jit_shader = iter->second;
+            jit_shader = iter->second.get();
         } else {
-            // Check if remaining JIT code space is enough for at least one more (massive) shader
-            if (jit.GetSpaceLeft() < jit_shader_size) {
-                // If not, clear the cache of all previously compiled shaders
-                ClearCache();
-            }
-
-            jit_shader = jit.Compile();
-            shader_map.emplace(cache_key, jit_shader);
+            auto shader = std::make_unique<JitCompiler>();
+            shader->Compile();
+            jit_shader = shader.get();
+            shader_map[cache_key] = std::move(shader);
         }
     }
 #endif // ARCHITECTURE_x86_64
@@ -65,7 +54,7 @@ void Setup(UnitState<false>& state) {
 
 void Shutdown() {
 #ifdef ARCHITECTURE_x86_64
-    ClearCache();
+    shader_map.clear();
 #endif // ARCHITECTURE_x86_64
 }
 
@@ -109,7 +98,7 @@ OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attr
 
 #ifdef ARCHITECTURE_x86_64
     if (VideoCore::g_shader_jit_enabled)
-        jit_shader(&state.registers);
+        jit_shader->Run(&state.registers);
     else
         RunInterpreter(state);
 #else
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
index c798992ec..3da4e51fa 100644
--- a/src/video_core/shader/shader_jit_x64.cpp
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -589,7 +589,7 @@ void JitCompiler::Compile_CALL(Instruction instr) {
     fixup_branches.push_back({ b, instr.flow_control.dest_offset });
 
     // Make sure that if the above code changes, SKIP gets updated
-    ASSERT(reinterpret_cast<uintptr_t>(GetCodePtr()) - start == SKIP);
+    ASSERT(reinterpret_cast<ptrdiff_t>(GetCodePtr()) - start == SKIP);
 }
 
 void JitCompiler::Compile_CALLC(Instruction instr) {
@@ -803,8 +803,8 @@ void JitCompiler::FindReturnOffsets() {
     }
 }
 
-CompiledShader* JitCompiler::Compile() {
-    const u8* start = GetCodePtr();
+void JitCompiler::Compile() {
+    program = (CompiledShader*)GetCodePtr();
 
     // The stack pointer is 8 modulo 16 at the entry of a procedure
     ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
@@ -850,15 +850,14 @@ CompiledShader* JitCompiler::Compile() {
         SetJumpTarget(branch.first, code_ptr[branch.second]);
     }
 
-    return (CompiledShader*)start;
+    uintptr_t size = reinterpret_cast<uintptr_t>(GetCodePtr()) - reinterpret_cast<uintptr_t>(program);
+    ASSERT_MSG(size <= MAX_SHADER_SIZE, "Compiled a shader that exceeds the allocated size!");
+
+    LOG_DEBUG(HW_GPU, "Compiled shader size=%d", size);
 }
 
 JitCompiler::JitCompiler() {
-    AllocCodeSpace(jit_cache_size);
-}
-
-void JitCompiler::Clear() {
-    ClearCodeSpace();
+    AllocCodeSpace(MAX_SHADER_SIZE);
 }
 
 } // namespace Shader
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h
index d6f03892d..19f9bdb56 100644
--- a/src/video_core/shader/shader_jit_x64.h
+++ b/src/video_core/shader/shader_jit_x64.h
@@ -22,10 +22,8 @@ namespace Pica {
 
 namespace Shader {
 
-/// Memory needed to be available to compile the next shader (otherwise, clear the cache)
-constexpr size_t jit_shader_size = 1024 * 512;
-/// Memory allocated for the JIT code space cache
-constexpr size_t jit_cache_size = 1024 * 1024 * 8;
+/// Memory allocated for each compiled shader (64Kb)
+constexpr size_t MAX_SHADER_SIZE = 1024 * 64;
 
 using CompiledShader = void(void* registers);
 
@@ -37,9 +35,11 @@ class JitCompiler : public Gen::XCodeBlock {
 public:
     JitCompiler();
 
-    CompiledShader* Compile();
+    void Run(void* registers) const {
+        program(registers);
+    }
 
-    void Clear();
+    void Compile();
 
     void Compile_ADD(Instruction instr);
     void Compile_DP3(Instruction instr);
@@ -104,12 +104,14 @@ private:
     /// Offsets in code where a return needs to be inserted
     std::set<unsigned> return_offsets;
 
-    unsigned last_program_counter;  ///< Offset of the most recent instruction decoded
-    unsigned program_counter;       ///< Offset of the next instruction to decode
-    bool looping = false;           ///< True if compiling a loop, used to check for nested loops
+    unsigned last_program_counter = 0;  ///< Offset of the most recent instruction decoded
+    unsigned program_counter = 0;       ///< Offset of the next instruction to decode
+    bool looping = false;               ///< True if compiling a loop, used to check for nested loops
 
     /// Branches that need to be fixed up once the entire shader program is compiled
     std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches;
+
+    CompiledShader* program = nullptr;
 };
 
 } // Shader