From 846c994cc9ff3b53d0d3fa3cb3b8fe0418c462c6 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sat, 27 Nov 2021 16:26:48 +0100
Subject: [PATCH] Core: Reimplement Core Timing.

---
 src/core/core_timing.cpp       | 130 +++++++++++++++++++++------------
 src/core/core_timing.h         |  21 +++---
 src/tests/core/core_timing.cpp |   1 -
 3 files changed, 95 insertions(+), 57 deletions(-)

diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp
index 29e7dba9b..918502929 100644
--- a/src/core/core_timing.cpp
+++ b/src/core/core_timing.cpp
@@ -7,6 +7,7 @@
 #include <tuple>
 
 #include "common/microprofile.h"
+#include "common/thread.h"
 #include "core/core_timing.h"
 #include "core/core_timing_util.h"
 #include "core/hardware_properties.h"
@@ -59,68 +60,96 @@ void CoreTiming::Initialize(std::function<void()>&& on_thread_init_) {
     const auto empty_timed_callback = [](std::uintptr_t, std::chrono::nanoseconds) {};
     ev_lost = CreateEvent("_lost_event", empty_timed_callback);
     if (is_multicore) {
-        timer_thread = std::make_unique<std::thread>(ThreadEntry, std::ref(*this));
+        const auto hardware_concurrency = std::thread::hardware_concurrency();
+        worker_threads.emplace_back(ThreadEntry, std::ref(*this));
+        if (hardware_concurrency > 8) {
+            worker_threads.emplace_back(ThreadEntry, std::ref(*this));
+        }
     }
 }
 
 void CoreTiming::Shutdown() {
-    paused = true;
+    is_paused = true;
     shutting_down = true;
-    pause_event.Set();
-    event.Set();
-    if (timer_thread) {
-        timer_thread->join();
+    {
+        std::unique_lock<std::mutex> main_lock(event_mutex);
+        event_cv.notify_all();
+        wait_pause_cv.notify_all();
     }
+    for (auto& thread : worker_threads) {
+        thread.join();
+    }
+    worker_threads.clear();
     ClearPendingEvents();
-    timer_thread.reset();
     has_started = false;
 }
 
-void CoreTiming::Pause(bool is_paused) {
-    paused = is_paused;
-    pause_event.Set();
-}
-
-void CoreTiming::SyncPause(bool is_paused) {
-    if (is_paused == paused && paused_set == paused) {
+void CoreTiming::Pause(bool is_paused_) {
+    std::unique_lock<std::mutex> main_lock(event_mutex);
+    if (is_paused_ == paused_state.load(std::memory_order_relaxed)) {
         return;
     }
-    Pause(is_paused);
-    if (timer_thread) {
-        if (!is_paused) {
-            pause_event.Set();
+    if (is_multicore) {
+        is_paused = is_paused_;
+        event_cv.notify_all();
+        if (!is_paused_) {
+            wait_pause_cv.notify_all();
+        }
+    }
+    paused_state.store(is_paused_, std::memory_order_relaxed);
+}
+
+void CoreTiming::SyncPause(bool is_paused_) {
+    std::unique_lock<std::mutex> main_lock(event_mutex);
+    if (is_paused_ == paused_state.load(std::memory_order_relaxed)) {
+        return;
+    }
+
+    if (is_multicore) {
+        is_paused = is_paused_;
+        event_cv.notify_all();
+        if (!is_paused_) {
+            wait_pause_cv.notify_all();
+        }
+    }
+    paused_state.store(is_paused_, std::memory_order_relaxed);
+    if (is_multicore) {
+        if (is_paused_) {
+            wait_signal_cv.wait(main_lock, [this] { return pause_count == worker_threads.size(); });
+        } else {
+            wait_signal_cv.wait(main_lock, [this] { return pause_count == 0; });
         }
-        event.Set();
-        while (paused_set != is_paused)
-            ;
     }
 }
 
 bool CoreTiming::IsRunning() const {
-    return !paused_set;
+    return !paused_state.load(std::memory_order_acquire);
 }
 
 bool CoreTiming::HasPendingEvents() const {
-    return !(wait_set && event_queue.empty());
+    std::unique_lock<std::mutex> main_lock(event_mutex);
+    return !event_queue.empty();
 }
 
 void CoreTiming::ScheduleEvent(std::chrono::nanoseconds ns_into_future,
                                const std::shared_ptr<EventType>& event_type,
                                std::uintptr_t user_data) {
-    {
-        std::scoped_lock scope{basic_lock};
-        const u64 timeout = static_cast<u64>((GetGlobalTimeNs() + ns_into_future).count());
 
-        event_queue.emplace_back(Event{timeout, event_fifo_id++, user_data, event_type});
+    std::unique_lock<std::mutex> main_lock(event_mutex);
+    const u64 timeout = static_cast<u64>((GetGlobalTimeNs() + ns_into_future).count());
 
-        std::push_heap(event_queue.begin(), event_queue.end(), std::greater<>());
+    event_queue.emplace_back(Event{timeout, event_fifo_id++, user_data, event_type});
+
+    std::push_heap(event_queue.begin(), event_queue.end(), std::greater<>());
+
+    if (is_multicore) {
+        event_cv.notify_one();
     }
-    event.Set();
 }
 
 void CoreTiming::UnscheduleEvent(const std::shared_ptr<EventType>& event_type,
                                  std::uintptr_t user_data) {
-    std::scoped_lock scope{basic_lock};
+    std::unique_lock<std::mutex> main_lock(event_mutex);
     const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) {
         return e.type.lock().get() == event_type.get() && e.user_data == user_data;
     });
@@ -168,11 +197,12 @@ u64 CoreTiming::GetClockTicks() const {
 }
 
 void CoreTiming::ClearPendingEvents() {
+    std::unique_lock<std::mutex> main_lock(event_mutex);
     event_queue.clear();
 }
 
 void CoreTiming::RemoveEvent(const std::shared_ptr<EventType>& event_type) {
-    std::scoped_lock lock{basic_lock};
+    std::unique_lock<std::mutex> main_lock(event_mutex);
 
     const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) {
         return e.type.lock().get() == event_type.get();
@@ -186,21 +216,21 @@ void CoreTiming::RemoveEvent(const std::shared_ptr<EventType>& event_type) {
 }
 
 std::optional<s64> CoreTiming::Advance() {
-    std::scoped_lock lock{advance_lock, basic_lock};
     global_timer = GetGlobalTimeNs().count();
 
+    std::unique_lock<std::mutex> main_lock(event_mutex);
     while (!event_queue.empty() && event_queue.front().time <= global_timer) {
         Event evt = std::move(event_queue.front());
         std::pop_heap(event_queue.begin(), event_queue.end(), std::greater<>());
         event_queue.pop_back();
-        basic_lock.unlock();
+        event_mutex.unlock();
 
         if (const auto event_type{evt.type.lock()}) {
-            event_type->callback(
-                evt.user_data, std::chrono::nanoseconds{static_cast<s64>(global_timer - evt.time)});
+            event_type->callback(evt.user_data, std::chrono::nanoseconds{static_cast<s64>(
+                                                    GetGlobalTimeNs().count() - evt.time)});
         }
 
-        basic_lock.lock();
+        event_mutex.lock();
         global_timer = GetGlobalTimeNs().count();
     }
 
@@ -213,26 +243,34 @@ std::optional<s64> CoreTiming::Advance() {
 }
 
 void CoreTiming::ThreadLoop() {
+    const auto predicate = [this] { return !event_queue.empty() || is_paused; };
     has_started = true;
     while (!shutting_down) {
-        while (!paused) {
-            paused_set = false;
+        while (!is_paused && !shutting_down) {
             const auto next_time = Advance();
             if (next_time) {
                 if (*next_time > 0) {
                     std::chrono::nanoseconds next_time_ns = std::chrono::nanoseconds(*next_time);
-                    event.WaitFor(next_time_ns);
+                    std::unique_lock<std::mutex> main_lock(event_mutex);
+                    event_cv.wait_for(main_lock, next_time_ns, predicate);
                 }
             } else {
-                wait_set = true;
-                event.Wait();
+                std::unique_lock<std::mutex> main_lock(event_mutex);
+                event_cv.wait(main_lock, predicate);
             }
-            wait_set = false;
         }
-        paused_set = true;
-        clock->Pause(true);
-        pause_event.Wait();
-        clock->Pause(false);
+        std::unique_lock<std::mutex> main_lock(event_mutex);
+        pause_count++;
+        if (pause_count == worker_threads.size()) {
+            clock->Pause(true);
+            wait_signal_cv.notify_all();
+        }
+        wait_pause_cv.wait(main_lock, [this] { return !is_paused || shutting_down; });
+        pause_count--;
+        if (pause_count == 0) {
+            clock->Pause(false);
+            wait_signal_cv.notify_all();
+        }
     }
 }
 
diff --git a/src/core/core_timing.h b/src/core/core_timing.h
index d27773009..5c9ee2902 100644
--- a/src/core/core_timing.h
+++ b/src/core/core_timing.h
@@ -14,7 +14,6 @@
 #include <vector>
 
 #include "common/common_types.h"
-#include "common/thread.h"
 #include "common/wall_clock.h"
 
 namespace Core::Timing {
@@ -146,19 +145,21 @@ private:
     u64 event_fifo_id = 0;
 
     std::shared_ptr<EventType> ev_lost;
-    Common::Event event{};
-    Common::Event pause_event{};
-    std::mutex basic_lock;
-    std::mutex advance_lock;
-    std::unique_ptr<std::thread> timer_thread;
-    std::atomic<bool> paused{};
-    std::atomic<bool> paused_set{};
-    std::atomic<bool> wait_set{};
-    std::atomic<bool> shutting_down{};
     std::atomic<bool> has_started{};
     std::function<void()> on_thread_init{};
 
+    std::vector<std::thread> worker_threads;
+
+    std::condition_variable event_cv;
+    std::condition_variable wait_pause_cv;
+    std::condition_variable wait_signal_cv;
+    mutable std::mutex event_mutex;
+
+    std::atomic<bool> paused_state{};
+    bool is_paused{};
+    bool shutting_down{};
     bool is_multicore{};
+    size_t pause_count{};
 
     /// Cycle timing
     u64 ticks{};
diff --git a/src/tests/core/core_timing.cpp b/src/tests/core/core_timing.cpp
index 8358d36b5..62eb43753 100644
--- a/src/tests/core/core_timing.cpp
+++ b/src/tests/core/core_timing.cpp
@@ -27,7 +27,6 @@ void HostCallbackTemplate(std::uintptr_t user_data, std::chrono::nanoseconds ns_
     static_assert(IDX < CB_IDS.size(), "IDX out of range");
     callbacks_ran_flags.set(IDX);
     REQUIRE(CB_IDS[IDX] == user_data);
-    REQUIRE(CB_IDS[IDX] == CB_IDS[calls_order[expected_callback]]);
     delays[IDX] = ns_late.count();
     ++expected_callback;
 }