From dcc5b4f6b005a2c89bb4e77bca4cfe8705734021 Mon Sep 17 00:00:00 2001
From: Marshall Mohror <mohror64@gmail.com>
Date: Fri, 22 Oct 2021 23:09:29 -0500
Subject: [PATCH] Presentation: Only use FP16 in scaling shaders on supported
 devices in Vulkan

---
 externals/CMakeLists.txt                      |   3 +
 src/video_core/CMakeLists.txt                 |   1 +
 src/video_core/host_shaders/CMakeLists.txt    |  10 +-
 .../host_shaders/fidelityfx_fsr.comp          | 106 +++++++++---------
 ...ce.frag => opengl_present_scaleforce.frag} |  84 ++++++++------
 ...p => vulkan_fidelityfx_fsr_easu_fp16.comp} |   4 +-
 .../vulkan_fidelityfx_fsr_easu_fp32.comp      |  10 ++
 ...p => vulkan_fidelityfx_fsr_rcas_fp16.comp} |   4 +-
 .../vulkan_fidelityfx_fsr_rcas_fp32.comp      |  10 ++
 .../vulkan_present_scaleforce_fp16.frag       |   7 ++
 .../vulkan_present_scaleforce_fp32.frag       |   5 +
 .../renderer_opengl/renderer_opengl.cpp       |   5 +-
 .../renderer_vulkan/vk_blit_screen.cpp        |  17 ++-
 src/video_core/renderer_vulkan/vk_fsr.cpp     |  46 +++++---
 src/video_core/renderer_vulkan/vk_fsr.h       |   2 +-
 15 files changed, 199 insertions(+), 115 deletions(-)
 rename src/video_core/host_shaders/{present_scaleforce.frag => opengl_present_scaleforce.frag} (56%)
 rename src/video_core/host_shaders/{vulkan_fidelityfx_fsr_easu.comp => vulkan_fidelityfx_fsr_easu_fp16.comp} (67%)
 create mode 100644 src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.comp
 rename src/video_core/host_shaders/{vulkan_fidelityfx_fsr_rcas.comp => vulkan_fidelityfx_fsr_rcas_fp16.comp} (67%)
 create mode 100644 src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.comp
 create mode 100644 src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag
 create mode 100644 src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag

diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt
index a76a3d800..7ff2ccc24 100644
--- a/externals/CMakeLists.txt
+++ b/externals/CMakeLists.txt
@@ -25,6 +25,9 @@ if (ARCHITECTURE_x86_64)
     add_subdirectory(dynarmic)
 endif()
 
+add_library(ffx-fsr INTERFACE)
+target_include_directories(ffx-fsr INTERFACE FidelityFX-FSR/ffx-fsr)
+
 # getopt
 if (MSVC)
     add_subdirectory(getopt)
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 91a30fef7..07b94dcc8 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -237,6 +237,7 @@ target_include_directories(video_core PRIVATE ${FFmpeg_INCLUDE_DIR})
 target_link_libraries(video_core PRIVATE ${FFmpeg_LIBRARIES})
 target_link_options(video_core PRIVATE ${FFmpeg_LDFLAGS})
 
+target_link_libraries(video_core PRIVATE ffx-fsr)
 add_dependencies(video_core host_shaders)
 target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE})
 target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include)
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt
index 6b5ea649a..d779a967a 100644
--- a/src/video_core/host_shaders/CMakeLists.txt
+++ b/src/video_core/host_shaders/CMakeLists.txt
@@ -18,16 +18,20 @@ set(SHADER_FILES
     opengl_copy_bc4.comp
     opengl_present.frag
     opengl_present.vert
+    opengl_present_scaleforce.frag
     pitch_unswizzle.comp
-    present_scaleforce.frag
     present_bicubic.frag
     present_gaussian.frag
     vulkan_blit_color_float.frag
     vulkan_blit_depth_stencil.frag
-    vulkan_fidelityfx_fsr_easu.comp
-    vulkan_fidelityfx_fsr_rcas.comp
+    vulkan_fidelityfx_fsr_easu_fp16.comp
+    vulkan_fidelityfx_fsr_easu_fp32.comp
+    vulkan_fidelityfx_fsr_rcas_fp16.comp
+    vulkan_fidelityfx_fsr_rcas_fp32.comp
     vulkan_present.frag
     vulkan_present.vert
+    vulkan_present_scaleforce_fp16.frag
+    vulkan_present_scaleforce_fp32.frag
     vulkan_quad_indexed.comp
     vulkan_uint8.comp
 )
diff --git a/src/video_core/host_shaders/fidelityfx_fsr.comp b/src/video_core/host_shaders/fidelityfx_fsr.comp
index cbb601580..6b97f789d 100644
--- a/src/video_core/host_shaders/fidelityfx_fsr.comp
+++ b/src/video_core/host_shaders/fidelityfx_fsr.comp
@@ -28,80 +28,82 @@
 // THE SOFTWARE.
 
 layout( push_constant ) uniform constants {
-    u32vec2 input_size;
+    uvec4 Const0;
+    uvec4 Const1;
+    uvec4 Const2;
+    uvec4 Const3;
 };
 
-uvec4 Const0;
-uvec4 Const1;
-uvec4 Const2;
-uvec4 Const3;
+layout(set=0,binding=0) uniform sampler2D InputTexture;
+layout(set=0,binding=1,rgba16f) uniform image2D OutputTexture;
 
 #define A_GPU 1
 #define A_GLSL 1
-#define A_HALF
 
-#include "ffx_a.h"
+#ifndef YUZU_USE_FP16
+    #include "ffx_a.h"
 
-f16vec4 LinearToSRGB(f16vec4 linear) {
-    bvec4 selector = greaterThan(linear, f16vec4(0.00313066844250063));
-    f16vec4 low = linear * float16_t(12.92);
-    f16vec4 high = float16_t(1.055) * pow(linear, f16vec4(1 / 2.4)) - float16_t(0.055);
-    return mix(low, high, selector);
-}
+    #if USE_EASU
+        #define FSR_EASU_F 1
+        AF4 FsrEasuRF(AF2 p) { AF4 res = textureGather(InputTexture, p, 0); return res; }
+        AF4 FsrEasuGF(AF2 p) { AF4 res = textureGather(InputTexture, p, 1); return res; }
+        AF4 FsrEasuBF(AF2 p) { AF4 res = textureGather(InputTexture, p, 2); return res; }
+    #endif
+    #if USE_RCAS
+        #define FSR_RCAS_F 1
+        AF4 FsrRcasLoadF(ASU2 p) { return texelFetch(InputTexture, ASU2(p), 0); }
+        void FsrRcasInputF(inout AF1 r, inout AF1 g, inout AF1 b) {}
+    #endif
+#else
+    #define A_HALF
+    #include "ffx_a.h"
 
-f16vec4 SRGBToLinear(f16vec4 srgb) {
-    bvec4 selector = greaterThan(srgb, f16vec4(0.0404482362771082));
-    f16vec4 low = srgb * float16_t(1.0 / 12.92);
-    f16vec4 high = pow((srgb + float16_t(0.055)) * float16_t(1.0 / 1.055), f16vec4(2.4));
-    return mix(low, high, selector);
-}
-
-#if USE_EASU
-    #define FSR_EASU_H 1
-    f16vec4 FsrEasuRH(vec2 p) { f16vec4 res = f16vec4(textureGather(InputTexture, p, 0)); return res; }
-    f16vec4 FsrEasuGH(vec2 p) { f16vec4 res = f16vec4(textureGather(InputTexture, p, 1)); return res; }
-    f16vec4 FsrEasuBH(vec2 p) { f16vec4 res = f16vec4(textureGather(InputTexture, p, 2)); return res; }
-#endif
-#if USE_RCAS
-    #define FSR_RCAS_H 1
-    f16vec4 FsrRcasLoadH(ASW2 p) { return f16vec4(texelFetch(InputTexture, ASU2(p), 0)); }
-    void FsrRcasInputH(inout float16_t r, inout float16_t g, inout float16_t b) {}
+    #if USE_EASU
+        #define FSR_EASU_H 1
+        AH4 FsrEasuRH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 0)); return res; }
+        AH4 FsrEasuGH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 1)); return res; }
+        AH4 FsrEasuBH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 2)); return res; }
+    #endif
+    #if USE_RCAS
+        #define FSR_RCAS_H 1
+        AH4 FsrRcasLoadH(ASW2 p) { return AH4(texelFetch(InputTexture, ASU2(p), 0)); }
+        void FsrRcasInputH(inout AH1 r,inout AH1 g,inout AH1 b){}
+    #endif
 #endif
 
 #include "ffx_fsr1.h"
 
-void CurrFilter(u32vec2 pos) {
-    // For debugging
+void CurrFilter(AU2 pos) {
 #if USE_BILINEAR
-    vec2 pp = (vec2(pos) * vec2_AU2(Const0.xy) + vec2_AU2(Const0.zw)) * vec2_AU2(Const1.xy) + vec2(0.5, -0.5) * vec2_AU2(Const1.zw);
-    imageStore(OutputTexture, ivec2(pos), textureLod(InputTexture, pp, 0.0));
+    AF2 pp = (AF2(pos) * AF2_AU2(Const0.xy) + AF2_AU2(Const0.zw)) * AF2_AU2(Const1.xy) + AF2(0.5, -0.5) * AF2_AU2(Const1.zw);
+    imageStore(OutputTexture, ASU2(pos), textureLod(InputTexture, pp, 0.0));
 #endif
 #if USE_EASU
-    f16vec3 c;
-    FsrEasuH(c, pos, Const0, Const1, Const2, Const3);
-    imageStore(OutputTexture, ivec2(pos), f16vec4(c, 1));
+    #ifndef YUZU_USE_FP16
+        AF3 c;
+        FsrEasuF(c, pos, Const0, Const1, Const2, Const3);
+        imageStore(OutputTexture, ASU2(pos), AF4(c, 1));
+    #else
+        AH3 c;
+        FsrEasuH(c, pos, Const0, Const1, Const2, Const3);
+        imageStore(OutputTexture, ASU2(pos), AH4(c, 1));
+    #endif
 #endif
 #if USE_RCAS
-    f16vec3 c;
-    FsrRcasH(c.r, c.g, c.b, pos, Const0);
-    imageStore(OutputTexture, ivec2(pos), f16vec4(c, 1));
+    #ifndef YUZU_USE_FP16
+        AF3 c;
+        FsrRcasF(c.r, c.g, c.b, pos, Const0);
+        imageStore(OutputTexture, ASU2(pos), AF4(c, 1));
+    #else
+        AH3 c;
+        FsrRcasH(c.r, c.g, c.b, pos, Const0);
+        imageStore(OutputTexture, ASU2(pos), AH4(c, 1));
+    #endif
 #endif
-
 }
 
 layout(local_size_x=64) in;
 void main() {
-
-#if USE_EASU || USE_BILINEAR
-    vec2 ires = vec2(input_size);
-    vec2 tres = textureSize(InputTexture, 0);
-    vec2 ores = imageSize(OutputTexture);
-    FsrEasuCon(Const0, Const1, Const2, Const3, ires.x, ires.y, tres.x, tres.y, ores.x, ores.y);
-#endif
-#if USE_RCAS
-    FsrRcasCon(Const0, 0.25f);
-#endif
-
     // Do remapping of local xy in workgroup for a more PS-like swizzle pattern.
     AU2 gxy = ARmp8x8(gl_LocalInvocationID.x) + AU2(gl_WorkGroupID.x << 4u, gl_WorkGroupID.y << 4u);
     CurrFilter(gxy);
diff --git a/src/video_core/host_shaders/present_scaleforce.frag b/src/video_core/host_shaders/opengl_present_scaleforce.frag
similarity index 56%
rename from src/video_core/host_shaders/present_scaleforce.frag
rename to src/video_core/host_shaders/opengl_present_scaleforce.frag
index ebc0d9b90..71ff9e1e3 100644
--- a/src/video_core/host_shaders/present_scaleforce.frag
+++ b/src/video_core/host_shaders/opengl_present_scaleforce.frag
@@ -22,11 +22,29 @@
 
 // Adapted from https://github.com/BreadFish64/ScaleFish/tree/master/scaleforce
 
-#version 460
+//! #version 460
+
+#extension GL_ARB_separate_shader_objects : enable
+
+#ifdef YUZU_USE_FP16
 
 #extension GL_AMD_gpu_shader_half_float : enable
 #extension GL_NV_gpu_shader5 : enable
 
+#define lfloat float16_t
+#define lvec2 f16vec2
+#define lvec3 f16vec3
+#define lvec4 f16vec4
+
+#else
+
+#define lfloat float
+#define lvec2 vec2
+#define lvec3 vec3
+#define lvec4 vec4
+
+#endif
+
 #ifdef VULKAN
 
 #define BINDING_COLOR_TEXTURE 1
@@ -45,25 +63,25 @@ layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D input_texture;
 
 const bool ignore_alpha = true;
 
-float16_t ColorDist1(f16vec4 a, f16vec4 b) {
+lfloat ColorDist1(lvec4 a, lvec4 b) {
     // https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.2020_conversion
-    const f16vec3 K = f16vec3(0.2627, 0.6780, 0.0593);
-    const float16_t scaleB = float16_t(0.5) / (float16_t(1.0) - K.b);
-    const float16_t scaleR = float16_t(0.5) / (float16_t(1.0) - K.r);
-    f16vec4 diff = a - b;
-    float16_t Y = dot(diff.rgb, K);
-    float16_t Cb = scaleB * (diff.b - Y);
-    float16_t Cr = scaleR * (diff.r - Y);
-    f16vec3 YCbCr = f16vec3(Y, Cb, Cr);
-    float16_t d = length(YCbCr);
+    const lvec3 K = lvec3(0.2627, 0.6780, 0.0593);
+    const lfloat scaleB = lfloat(0.5) / (lfloat(1.0) - K.b);
+    const lfloat scaleR = lfloat(0.5) / (lfloat(1.0) - K.r);
+    lvec4 diff = a - b;
+    lfloat Y = dot(diff.rgb, K);
+    lfloat Cb = scaleB * (diff.b - Y);
+    lfloat Cr = scaleR * (diff.r - Y);
+    lvec3 YCbCr = lvec3(Y, Cb, Cr);
+    lfloat d = length(YCbCr);
     if (ignore_alpha) {
         return d;
     }
     return sqrt(a.a * b.a * d * d + diff.a * diff.a);
 }
 
-f16vec4 ColorDist(f16vec4 ref, f16vec4 A, f16vec4 B, f16vec4 C, f16vec4 D) {
-    return f16vec4(
+lvec4 ColorDist(lvec4 ref, lvec4 A, lvec4 B, lvec4 C, lvec4 D) {
+    return lvec4(
             ColorDist1(ref, A),
             ColorDist1(ref, B),
             ColorDist1(ref, C),
@@ -72,36 +90,36 @@ f16vec4 ColorDist(f16vec4 ref, f16vec4 A, f16vec4 B, f16vec4 C, f16vec4 D) {
 }
 
 vec4 Scaleforce(sampler2D tex, vec2 tex_coord) {
-    f16vec4 bl = f16vec4(textureOffset(tex, tex_coord, ivec2(-1, -1)));
-    f16vec4 bc = f16vec4(textureOffset(tex, tex_coord, ivec2(0, -1)));
-    f16vec4 br = f16vec4(textureOffset(tex, tex_coord, ivec2(1, -1)));
-    f16vec4 cl = f16vec4(textureOffset(tex, tex_coord, ivec2(-1, 0)));
-    f16vec4 cc = f16vec4(texture(tex, tex_coord));
-    f16vec4 cr = f16vec4(textureOffset(tex, tex_coord, ivec2(1, 0)));
-    f16vec4 tl = f16vec4(textureOffset(tex, tex_coord, ivec2(-1, 1)));
-    f16vec4 tc = f16vec4(textureOffset(tex, tex_coord, ivec2(0, 1)));
-    f16vec4 tr = f16vec4(textureOffset(tex, tex_coord, ivec2(1, 1)));
+    lvec4 bl = lvec4(textureOffset(tex, tex_coord, ivec2(-1, -1)));
+    lvec4 bc = lvec4(textureOffset(tex, tex_coord, ivec2(0, -1)));
+    lvec4 br = lvec4(textureOffset(tex, tex_coord, ivec2(1, -1)));
+    lvec4 cl = lvec4(textureOffset(tex, tex_coord, ivec2(-1, 0)));
+    lvec4 cc = lvec4(texture(tex, tex_coord));
+    lvec4 cr = lvec4(textureOffset(tex, tex_coord, ivec2(1, 0)));
+    lvec4 tl = lvec4(textureOffset(tex, tex_coord, ivec2(-1, 1)));
+    lvec4 tc = lvec4(textureOffset(tex, tex_coord, ivec2(0, 1)));
+    lvec4 tr = lvec4(textureOffset(tex, tex_coord, ivec2(1, 1)));
 
-    f16vec4 offset_tl = ColorDist(cc, tl, tc, tr, cr);
-    f16vec4 offset_br = ColorDist(cc, br, bc, bl, cl);
+    lvec4 offset_tl = ColorDist(cc, tl, tc, tr, cr);
+    lvec4 offset_br = ColorDist(cc, br, bc, bl, cl);
 
     // Calculate how different cc is from the texels around it
-    const float16_t plus_weight = float16_t(1.5);
-    const float16_t cross_weight = float16_t(1.5);
-    float16_t total_dist = dot(offset_tl + offset_br, f16vec4(cross_weight, plus_weight, cross_weight, plus_weight));
+    const lfloat plus_weight = lfloat(1.5);
+    const lfloat cross_weight = lfloat(1.5);
+    lfloat total_dist = dot(offset_tl + offset_br, lvec4(cross_weight, plus_weight, cross_weight, plus_weight));
 
-    if (total_dist == float16_t(0.0)) {
+    if (total_dist == lfloat(0.0)) {
         return cc;
     } else {
         // Add together all the distances with direction taken into account
-        f16vec4 tmp = offset_tl - offset_br;
-        f16vec2 total_offset = tmp.wy * plus_weight + (tmp.zz + f16vec2(-tmp.x, tmp.x)) * cross_weight;
+        lvec4 tmp = offset_tl - offset_br;
+        lvec2 total_offset = tmp.wy * plus_weight + (tmp.zz + lvec2(-tmp.x, tmp.x)) * cross_weight;
 
         // When the image has thin points, they tend to split apart.
         // This is because the texels all around are different and total_offset reaches into clear areas.
         // This works pretty well to keep the offset in bounds for these cases.
-        float16_t clamp_val = length(total_offset) / total_dist;
-        f16vec2 final_offset = clamp(total_offset, -clamp_val, clamp_val) / f16vec2(textureSize(tex, 0));
+        lfloat clamp_val = length(total_offset) / total_dist;
+        vec2 final_offset = vec2(clamp(total_offset, -clamp_val, clamp_val)) / textureSize(tex, 0);
 
         return texture(tex, tex_coord - final_offset);
     }
@@ -109,4 +127,4 @@ vec4 Scaleforce(sampler2D tex, vec2 tex_coord) {
 
 void main() {
     frag_color = Scaleforce(input_texture, tex_coord);
-}
\ No newline at end of file
+}
diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.comp
similarity index 67%
rename from src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu.comp
rename to src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.comp
index 6525eeeb5..1c96a7905 100644
--- a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu.comp
+++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.comp
@@ -5,9 +5,7 @@
 #version 460 core
 #extension GL_GOOGLE_include_directive : enable
 
-layout(set=0,binding=0) uniform sampler2D InputTexture;
-layout(set=0,binding=1,rgba16f) uniform image2D OutputTexture;
-
+#define YUZU_USE_FP16
 #define USE_EASU 1
 
 #include "fidelityfx_fsr.comp"
diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.comp
new file mode 100644
index 000000000..f4daff739
--- /dev/null
+++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.comp
@@ -0,0 +1,10 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#version 460 core
+#extension GL_GOOGLE_include_directive : enable
+
+#define USE_EASU 1
+
+#include "fidelityfx_fsr.comp"
diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.comp
similarity index 67%
rename from src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas.comp
rename to src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.comp
index 9463ed842..6b6796dd1 100644
--- a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas.comp
+++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.comp
@@ -5,9 +5,7 @@
 #version 460 core
 #extension GL_GOOGLE_include_directive : enable
 
-layout(set=0,binding=0) uniform sampler2D InputTexture;
-layout(set=0,binding=1,rgba16f) uniform image2D OutputTexture;
-
+#define YUZU_USE_FP16
 #define USE_RCAS 1
 
 #include "fidelityfx_fsr.comp"
diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.comp
new file mode 100644
index 000000000..f785eebf3
--- /dev/null
+++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.comp
@@ -0,0 +1,10 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#version 460 core
+#extension GL_GOOGLE_include_directive : enable
+
+#define USE_RCAS 1
+
+#include "fidelityfx_fsr.comp"
diff --git a/src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag b/src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag
new file mode 100644
index 000000000..924c03060
--- /dev/null
+++ b/src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag
@@ -0,0 +1,7 @@
+#version 460
+
+#extension GL_GOOGLE_include_directive : enable
+
+#define YUZU_USE_FP16
+
+#include "opengl_present_scaleforce.frag"
diff --git a/src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag b/src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag
new file mode 100644
index 000000000..a594b83ca
--- /dev/null
+++ b/src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag
@@ -0,0 +1,5 @@
+#version 460
+
+#extension GL_GOOGLE_include_directive : enable
+
+#include "opengl_present_scaleforce.frag"
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index e63f0bdd8..28daacd82 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -24,10 +24,10 @@
 #include "video_core/host_shaders/fxaa_frag.h"
 #include "video_core/host_shaders/fxaa_vert.h"
 #include "video_core/host_shaders/opengl_present_frag.h"
+#include "video_core/host_shaders/opengl_present_scaleforce_frag.h"
 #include "video_core/host_shaders/opengl_present_vert.h"
 #include "video_core/host_shaders/present_bicubic_frag.h"
 #include "video_core/host_shaders/present_gaussian_frag.h"
-#include "video_core/host_shaders/present_scaleforce_frag.h"
 #include "video_core/renderer_opengl/gl_rasterizer.h"
 #include "video_core/renderer_opengl/gl_shader_manager.h"
 #include "video_core/renderer_opengl/gl_shader_util.h"
@@ -266,7 +266,8 @@ void RendererOpenGL::InitOpenGLObjects() {
     present_gaussian_fragment =
         CreateProgram(HostShaders::PRESENT_GAUSSIAN_FRAG, GL_FRAGMENT_SHADER);
     present_scaleforce_fragment =
-        CreateProgram(HostShaders::PRESENT_SCALEFORCE_FRAG, GL_FRAGMENT_SHADER);
+        CreateProgram(fmt::format("#version 460\n{}", HostShaders::OPENGL_PRESENT_SCALEFORCE_FRAG),
+                      GL_FRAGMENT_SHADER);
 
     // Generate presentation sampler
     present_sampler.Create();
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index 2bed4f3c5..9dfc508bc 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -21,8 +21,9 @@
 #include "video_core/host_shaders/fxaa_vert_spv.h"
 #include "video_core/host_shaders/present_bicubic_frag_spv.h"
 #include "video_core/host_shaders/present_gaussian_frag_spv.h"
-#include "video_core/host_shaders/present_scaleforce_frag_spv.h"
 #include "video_core/host_shaders/vulkan_present_frag_spv.h"
+#include "video_core/host_shaders/vulkan_present_scaleforce_fp16_frag_spv.h"
+#include "video_core/host_shaders/vulkan_present_scaleforce_fp32_frag_spv.h"
 #include "video_core/host_shaders/vulkan_present_vert_spv.h"
 #include "video_core/renderer_vulkan/renderer_vulkan.h"
 #include "video_core/renderer_vulkan/vk_blit_screen.h"
@@ -328,7 +329,7 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
                 blit_read_barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
                 blit_read_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
 
-                cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT ,
+                cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
                                        VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, blit_read_barrier);
             }
         });
@@ -344,8 +345,12 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
             crop_rect.bottom = framebuffer.height;
         }
         crop_rect = crop_rect.Scale(Settings::values.resolution_info.up_factor);
+        VkExtent2D fsr_input_size{
+            .width = Settings::values.resolution_info.ScaleUp(framebuffer.width),
+            .height = Settings::values.resolution_info.ScaleUp(framebuffer.height),
+        };
         VkImageView fsr_image_view =
-            fsr->Draw(scheduler, image_index, source_image_view, crop_rect);
+            fsr->Draw(scheduler, image_index, source_image_view, fsr_input_size, crop_rect);
         UpdateDescriptorSet(image_index, fsr_image_view, true);
     } else {
         const bool is_nn =
@@ -500,7 +505,11 @@ void VKBlitScreen::CreateShaders() {
     bilinear_fragment_shader = BuildShader(device, VULKAN_PRESENT_FRAG_SPV);
     bicubic_fragment_shader = BuildShader(device, PRESENT_BICUBIC_FRAG_SPV);
     gaussian_fragment_shader = BuildShader(device, PRESENT_GAUSSIAN_FRAG_SPV);
-    scaleforce_fragment_shader = BuildShader(device, PRESENT_SCALEFORCE_FRAG_SPV);
+    if (device.IsFloat16Supported()) {
+        scaleforce_fragment_shader = BuildShader(device, VULKAN_PRESENT_SCALEFORCE_FP16_FRAG_SPV);
+    } else {
+        scaleforce_fragment_shader = BuildShader(device, VULKAN_PRESENT_SCALEFORCE_FP32_FRAG_SPV);
+    }
 }
 
 void VKBlitScreen::CreateSemaphores() {
diff --git a/src/video_core/renderer_vulkan/vk_fsr.cpp b/src/video_core/renderer_vulkan/vk_fsr.cpp
index 1f60974be..9288aa7c2 100644
--- a/src/video_core/renderer_vulkan/vk_fsr.cpp
+++ b/src/video_core/renderer_vulkan/vk_fsr.cpp
@@ -4,13 +4,19 @@
 
 #include "common/common_types.h"
 #include "common/div_ceil.h"
-#include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_comp_spv.h"
-#include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_comp_spv.h"
+#include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16_comp_spv.h"
+#include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32_comp_spv.h"
+#include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16_comp_spv.h"
+#include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32_comp_spv.h"
 #include "video_core/renderer_vulkan/vk_fsr.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/renderer_vulkan/vk_shader_util.h"
 #include "video_core/vulkan_common/vulkan_device.h"
 
+#define A_CPU
+#include <ffx_a.h>
+#include <ffx_fsr1.h>
+
 namespace Vulkan {
 
 FSR::FSR(const Device& device_, MemoryAllocator& memory_allocator_, size_t image_count_,
@@ -29,11 +35,11 @@ FSR::FSR(const Device& device_, MemoryAllocator& memory_allocator_, size_t image
 }
 
 VkImageView FSR::Draw(VKScheduler& scheduler, size_t image_index, VkImageView image_view,
-                      const Common::Rectangle<int>& crop_rect) {
+                      VkExtent2D input_image_extent, const Common::Rectangle<int>& crop_rect) {
 
     UpdateDescriptorSet(image_index, image_view);
 
-    scheduler.Record([this, image_index, crop_rect](vk::CommandBuffer cmdbuf) {
+    scheduler.Record([this, image_index, input_image_extent, crop_rect](vk::CommandBuffer cmdbuf) {
         const VkImageMemoryBarrier base_barrier{
             .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
             .pNext = nullptr,
@@ -54,13 +60,18 @@ VkImageView FSR::Draw(VKScheduler& scheduler, size_t image_index, VkImageView im
                 },
         };
 
-        // TODO: Support clear color
         cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *easu_pipeline);
-        cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT,
-                             VkExtent2D{
-                                 .width = static_cast<u32>(crop_rect.GetWidth()),
-                                 .height = static_cast<u32>(crop_rect.GetHeight()),
-                             });
+
+        std::array<AU1, 4 * 4> push_constants;
+        FsrEasuConOffset(
+            push_constants.data() + 0, push_constants.data() + 4, push_constants.data() + 8,
+            push_constants.data() + 12,
+
+            static_cast<AF1>(crop_rect.GetWidth()), static_cast<AF1>(crop_rect.GetHeight()),
+            static_cast<AF1>(input_image_extent.width), static_cast<AF1>(input_image_extent.height),
+            static_cast<AF1>(output_size.width), static_cast<AF1>(output_size.height),
+            static_cast<AF1>(crop_rect.left), static_cast<AF1>(crop_rect.top));
+        cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, push_constants);
 
         {
             VkImageMemoryBarrier fsr_write_barrier = base_barrier;
@@ -77,7 +88,9 @@ VkImageView FSR::Draw(VKScheduler& scheduler, size_t image_index, VkImageView im
                         Common::DivCeil(output_size.height, 16u), 1);
 
         cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *rcas_pipeline);
-        cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, output_size);
+
+        FsrRcasCon(push_constants.data(), 0.25f);
+        cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, push_constants);
 
         {
             std::array<VkImageMemoryBarrier, 2> barriers;
@@ -247,7 +260,7 @@ void FSR::CreatePipelineLayout() {
     VkPushConstantRange push_const{
         .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
         .offset = 0,
-        .size = sizeof(std::array<u32, 2>),
+        .size = sizeof(std::array<u32, 4 * 4>),
     };
     VkPipelineLayoutCreateInfo ci{
         .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
@@ -344,8 +357,13 @@ void FSR::CreateSampler() {
 }
 
 void FSR::CreateShaders() {
-    easu_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_EASU_COMP_SPV);
-    rcas_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_RCAS_COMP_SPV);
+    if (device.IsFloat16Supported()) {
+        easu_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_EASU_FP16_COMP_SPV);
+        rcas_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_RCAS_FP16_COMP_SPV);
+    } else {
+        easu_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_EASU_FP32_COMP_SPV);
+        rcas_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_RCAS_FP32_COMP_SPV);
+    }
 }
 
 void FSR::CreatePipeline() {
diff --git a/src/video_core/renderer_vulkan/vk_fsr.h b/src/video_core/renderer_vulkan/vk_fsr.h
index 8391e2e58..6bbec3d36 100644
--- a/src/video_core/renderer_vulkan/vk_fsr.h
+++ b/src/video_core/renderer_vulkan/vk_fsr.h
@@ -18,7 +18,7 @@ public:
     explicit FSR(const Device& device, MemoryAllocator& memory_allocator, size_t image_count,
                  VkExtent2D output_size);
     VkImageView Draw(VKScheduler& scheduler, size_t image_index, VkImageView image_view,
-                     const Common::Rectangle<int>& crop_rect);
+                     VkExtent2D input_image_extent, const Common::Rectangle<int>& crop_rect);
 
 private:
     void CreateDescriptorPool();