From 836ec9176aee5558c69764df46aa8347fca2e3d2 Mon Sep 17 00:00:00 2001
From: MerryMage <MerryMage@users.noreply.github.com>
Date: Sun, 16 Aug 2020 13:19:55 +0100
Subject: [PATCH] dynarmic: Add unsafe optimizations

---
 externals/dynarmic                        |  2 +-
 src/core/arm/dynarmic/arm_dynarmic_32.cpp | 13 +++++-
 src/core/arm/dynarmic/arm_dynarmic_64.cpp | 13 +++++-
 src/core/settings.h                       |  6 ++-
 src/yuzu/configuration/config.cpp         | 10 +++++
 src/yuzu/configuration/configure_cpu.cpp  | 17 +++++++-
 src/yuzu/configuration/configure_cpu.h    |  1 +
 src/yuzu/configuration/configure_cpu.ui   | 52 +++++++++++++++++++++++
 8 files changed, 109 insertions(+), 5 deletions(-)

diff --git a/externals/dynarmic b/externals/dynarmic
index 82417da78..0e1112b7d 160000
--- a/externals/dynarmic
+++ b/externals/dynarmic
@@ -1 +1 @@
-Subproject commit 82417da7803e2cf18efc28a1cd3f3d0a4b6045ae
+Subproject commit 0e1112b7df77ae55a62a51622940d5c8f9e8c84c
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
index 443ca72eb..b5f28a86e 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
@@ -143,7 +143,7 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable&
     config.wall_clock_cntpct = uses_wall_clock;
 
     // Safe optimizations
-    if (Settings::values.cpu_accuracy != Settings::CPUAccuracy::Accurate) {
+    if (Settings::values.cpu_accuracy == Settings::CPUAccuracy::DebugMode) {
         if (!Settings::values.cpuopt_page_tables) {
             config.page_table = nullptr;
         }
@@ -170,6 +170,17 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable&
         }
     }
 
+    // Unsafe optimizations
+    if (Settings::values.cpu_accuracy == Settings::CPUAccuracy::Unsafe) {
+        config.unsafe_optimizations = true;
+        if (Settings::values.cpuopt_unsafe_unfuse_fma) {
+            config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA;
+        }
+        if (Settings::values.cpuopt_unsafe_reduce_fp_error) {
+            config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_ReducedErrorFP;
+        }
+    }
+
     return std::make_unique<Dynarmic::A32::Jit>(config);
 }
 
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
index a63a04a25..ce9968724 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@@ -195,7 +195,7 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable&
     config.wall_clock_cntpct = uses_wall_clock;
 
     // Safe optimizations
-    if (Settings::values.cpu_accuracy != Settings::CPUAccuracy::Accurate) {
+    if (Settings::values.cpu_accuracy == Settings::CPUAccuracy::DebugMode) {
         if (!Settings::values.cpuopt_page_tables) {
             config.page_table = nullptr;
         }
@@ -222,6 +222,17 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable&
         }
     }
 
+    // Unsafe optimizations
+    if (Settings::values.cpu_accuracy == Settings::CPUAccuracy::Unsafe) {
+        config.unsafe_optimizations = true;
+        if (Settings::values.cpuopt_unsafe_unfuse_fma) {
+            config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA;
+        }
+        if (Settings::values.cpuopt_unsafe_reduce_fp_error) {
+            config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_ReducedErrorFP;
+        }
+    }
+
     return std::make_shared<Dynarmic::A64::Jit>(config);
 }
 
diff --git a/src/core/settings.h b/src/core/settings.h
index bb145f193..3681b5e9d 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -359,7 +359,8 @@ enum class GPUAccuracy : u32 {
 
 enum class CPUAccuracy {
     Accurate = 0,
-    DebugMode = 1,
+    Unsafe = 1,
+    DebugMode = 2,
 };
 
 extern bool configuring_global;
@@ -419,6 +420,9 @@ struct Values {
     bool cpuopt_misc_ir;
     bool cpuopt_reduce_misalign_checks;
 
+    bool cpuopt_unsafe_unfuse_fma;
+    bool cpuopt_unsafe_reduce_fp_error;
+
     // Renderer
     Setting<RendererBackend> renderer_backend;
     bool renderer_debug;
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index cb71b8d11..d8bb86421 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -638,6 +638,11 @@ void Config::ReadCpuValues() {
             ReadSetting(QStringLiteral("cpuopt_misc_ir"), true).toBool();
         Settings::values.cpuopt_reduce_misalign_checks =
             ReadSetting(QStringLiteral("cpuopt_reduce_misalign_checks"), true).toBool();
+
+        Settings::values.cpuopt_unsafe_unfuse_fma =
+            ReadSetting(QStringLiteral("cpuopt_unsafe_unfuse_fma"), true).toBool();
+        Settings::values.cpuopt_unsafe_reduce_fp_error =
+            ReadSetting(QStringLiteral("cpuopt_unsafe_reduce_fp_error"), true).toBool();
     }
 
     qt_config->endGroup();
@@ -1135,6 +1140,11 @@ void Config::SaveCpuValues() {
         WriteSetting(QStringLiteral("cpuopt_misc_ir"), Settings::values.cpuopt_misc_ir, true);
         WriteSetting(QStringLiteral("cpuopt_reduce_misalign_checks"),
                      Settings::values.cpuopt_reduce_misalign_checks, true);
+
+        WriteSetting(QStringLiteral("cpuopt_unsafe_unfuse_fma"),
+                     Settings::values.cpuopt_unsafe_unfuse_fma, true);
+        WriteSetting(QStringLiteral("cpuopt_unsafe_reduce_fp_error"),
+                     Settings::values.cpuopt_unsafe_reduce_fp_error, true);
     }
 
     qt_config->endGroup();
diff --git a/src/yuzu/configuration/configure_cpu.cpp b/src/yuzu/configuration/configure_cpu.cpp
index 7493e5ffb..37fcd6adc 100644
--- a/src/yuzu/configuration/configure_cpu.cpp
+++ b/src/yuzu/configuration/configure_cpu.cpp
@@ -19,6 +19,8 @@ ConfigureCpu::ConfigureCpu(QWidget* parent) : QWidget(parent), ui(new Ui::Config
 
     connect(ui->accuracy, qOverload<int>(&QComboBox::activated), this,
             &ConfigureCpu::AccuracyUpdated);
+    connect(ui->accuracy, qOverload<int>(&QComboBox::currentIndexChanged), this,
+            &ConfigureCpu::UpdateGroup);
 }
 
 ConfigureCpu::~ConfigureCpu() = default;
@@ -28,6 +30,12 @@ void ConfigureCpu::SetConfiguration() {
 
     ui->accuracy->setEnabled(runtime_lock);
     ui->accuracy->setCurrentIndex(static_cast<int>(Settings::values.cpu_accuracy));
+    UpdateGroup(static_cast<int>(Settings::values.cpu_accuracy));
+
+    ui->cpuopt_unsafe_unfuse_fma->setEnabled(runtime_lock);
+    ui->cpuopt_unsafe_unfuse_fma->setChecked(Settings::values.cpuopt_unsafe_unfuse_fma);
+    ui->cpuopt_unsafe_reduce_fp_error->setEnabled(runtime_lock);
+    ui->cpuopt_unsafe_reduce_fp_error->setChecked(Settings::values.cpuopt_unsafe_reduce_fp_error);
 }
 
 void ConfigureCpu::AccuracyUpdated(int index) {
@@ -38,14 +46,21 @@ void ConfigureCpu::AccuracyUpdated(int index) {
                                                  QMessageBox::Yes | QMessageBox::No);
         if (result == QMessageBox::No) {
             ui->accuracy->setCurrentIndex(static_cast<int>(Settings::CPUAccuracy::Accurate));
-            return;
+            UpdateGroup(static_cast<int>(Settings::CPUAccuracy::Accurate));
         }
     }
 }
 
+void ConfigureCpu::UpdateGroup(int index) {
+    ui->unsafe_group->setVisible(static_cast<Settings::CPUAccuracy>(index) ==
+                                 Settings::CPUAccuracy::Unsafe);
+}
+
 void ConfigureCpu::ApplyConfiguration() {
     Settings::values.cpu_accuracy =
         static_cast<Settings::CPUAccuracy>(ui->accuracy->currentIndex());
+    Settings::values.cpuopt_unsafe_unfuse_fma = ui->cpuopt_unsafe_unfuse_fma->isChecked();
+    Settings::values.cpuopt_unsafe_reduce_fp_error = ui->cpuopt_unsafe_reduce_fp_error->isChecked();
 }
 
 void ConfigureCpu::changeEvent(QEvent* event) {
diff --git a/src/yuzu/configuration/configure_cpu.h b/src/yuzu/configuration/configure_cpu.h
index e4741d3a4..3c5683d81 100644
--- a/src/yuzu/configuration/configure_cpu.h
+++ b/src/yuzu/configuration/configure_cpu.h
@@ -26,6 +26,7 @@ private:
     void RetranslateUI();
 
     void AccuracyUpdated(int index);
+    void UpdateGroup(int index);
 
     void SetConfiguration();
 
diff --git a/src/yuzu/configuration/configure_cpu.ui b/src/yuzu/configuration/configure_cpu.ui
index bf6ea79bb..ebdd2e6e9 100644
--- a/src/yuzu/configuration/configure_cpu.ui
+++ b/src/yuzu/configuration/configure_cpu.ui
@@ -38,6 +38,11 @@
               <string>Accurate</string>
              </property>
             </item>
+            <item>
+             <property name="text">
+              <string>Unsafe</string>
+             </property>
+            </item>
             <item>
              <property name="text">
               <string>Enable Debug Mode</string>
@@ -62,6 +67,53 @@
      </item>
     </layout>
    </item>
+   <item>
+    <layout class="QVBoxLayout">
+     <item>
+      <widget class="QGroupBox" name="unsafe_group">
+       <property name="title">
+        <string>Unsafe CPU Optimization Settings</string>
+       </property>
+       <layout class="QVBoxLayout">
+        <item>
+         <widget class="QLabel">
+          <property name="wordWrap">
+            <bool>1</bool>
+          </property>
+          <property name="text">
+           <string>These settings reduce accuracy for speed.</string>
+          </property>
+         </widget>
+        </item>
+        <item>
+         <widget class="QCheckBox" name="cpuopt_unsafe_unfuse_fma">
+          <property name="text">
+           <string>Unfuse FMA (improve performance on CPUs without FMA)</string>
+          </property>
+          <property name="toolTip">
+           <string>
+            &lt;div&gt;This option improves speed by reducing accuracy of fused-multiply-add instructions on CPUs without native FMA support.&lt;/div&gt;
+           </string>
+          </property>
+         </widget>
+        </item>
+        <item>
+         <widget class="QCheckBox" name="cpuopt_unsafe_reduce_fp_error">
+          <property name="text">
+           <string>Faster FRSQRTE and FRECPE</string>
+          </property>
+          <property name="toolTip">
+           <string>
+            &lt;div&gt;This option improves the speed of some approximate floating-point functions by using less accurate native approximations.&lt;/div&gt;
+           </string>
+          </property>
+         </widget>
+        </item>
+       </layout>
+      </widget>
+     </item>
+    </layout>
+   </item>
    <item>
     <spacer name="verticalSpacer">
      <property name="orientation">