From dcc5b4f6b005a2c89bb4e77bca4cfe8705734021 Mon Sep 17 00:00:00 2001 From: Marshall Mohror <mohror64@gmail.com> Date: Fri, 22 Oct 2021 23:09:29 -0500 Subject: [PATCH] Presentation: Only use FP16 in scaling shaders on supported devices in Vulkan --- externals/CMakeLists.txt | 3 + src/video_core/CMakeLists.txt | 1 + src/video_core/host_shaders/CMakeLists.txt | 10 +- .../host_shaders/fidelityfx_fsr.comp | 106 +++++++++--------- ...ce.frag => opengl_present_scaleforce.frag} | 84 ++++++++------ ...p => vulkan_fidelityfx_fsr_easu_fp16.comp} | 4 +- .../vulkan_fidelityfx_fsr_easu_fp32.comp | 10 ++ ...p => vulkan_fidelityfx_fsr_rcas_fp16.comp} | 4 +- .../vulkan_fidelityfx_fsr_rcas_fp32.comp | 10 ++ .../vulkan_present_scaleforce_fp16.frag | 7 ++ .../vulkan_present_scaleforce_fp32.frag | 5 + .../renderer_opengl/renderer_opengl.cpp | 5 +- .../renderer_vulkan/vk_blit_screen.cpp | 17 ++- src/video_core/renderer_vulkan/vk_fsr.cpp | 46 +++++--- src/video_core/renderer_vulkan/vk_fsr.h | 2 +- 15 files changed, 199 insertions(+), 115 deletions(-) rename src/video_core/host_shaders/{present_scaleforce.frag => opengl_present_scaleforce.frag} (56%) rename src/video_core/host_shaders/{vulkan_fidelityfx_fsr_easu.comp => vulkan_fidelityfx_fsr_easu_fp16.comp} (67%) create mode 100644 src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.comp rename src/video_core/host_shaders/{vulkan_fidelityfx_fsr_rcas.comp => vulkan_fidelityfx_fsr_rcas_fp16.comp} (67%) create mode 100644 src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.comp create mode 100644 src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag create mode 100644 src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index a76a3d800..7ff2ccc24 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -25,6 +25,9 @@ if (ARCHITECTURE_x86_64) add_subdirectory(dynarmic) endif() +add_library(ffx-fsr INTERFACE) +target_include_directories(ffx-fsr INTERFACE FidelityFX-FSR/ffx-fsr) + # getopt if (MSVC) add_subdirectory(getopt) diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 91a30fef7..07b94dcc8 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -237,6 +237,7 @@ target_include_directories(video_core PRIVATE ${FFmpeg_INCLUDE_DIR}) target_link_libraries(video_core PRIVATE ${FFmpeg_LIBRARIES}) target_link_options(video_core PRIVATE ${FFmpeg_LDFLAGS}) +target_link_libraries(video_core PRIVATE ffx-fsr) add_dependencies(video_core host_shaders) target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE}) target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include) diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 6b5ea649a..d779a967a 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -18,16 +18,20 @@ set(SHADER_FILES opengl_copy_bc4.comp opengl_present.frag opengl_present.vert + opengl_present_scaleforce.frag pitch_unswizzle.comp - present_scaleforce.frag present_bicubic.frag present_gaussian.frag vulkan_blit_color_float.frag vulkan_blit_depth_stencil.frag - vulkan_fidelityfx_fsr_easu.comp - vulkan_fidelityfx_fsr_rcas.comp + vulkan_fidelityfx_fsr_easu_fp16.comp + vulkan_fidelityfx_fsr_easu_fp32.comp + vulkan_fidelityfx_fsr_rcas_fp16.comp + vulkan_fidelityfx_fsr_rcas_fp32.comp vulkan_present.frag vulkan_present.vert + vulkan_present_scaleforce_fp16.frag + vulkan_present_scaleforce_fp32.frag vulkan_quad_indexed.comp vulkan_uint8.comp ) diff --git a/src/video_core/host_shaders/fidelityfx_fsr.comp b/src/video_core/host_shaders/fidelityfx_fsr.comp index cbb601580..6b97f789d 100644 --- a/src/video_core/host_shaders/fidelityfx_fsr.comp +++ b/src/video_core/host_shaders/fidelityfx_fsr.comp @@ -28,80 +28,82 @@ // THE SOFTWARE. layout( push_constant ) uniform constants { - u32vec2 input_size; + uvec4 Const0; + uvec4 Const1; + uvec4 Const2; + uvec4 Const3; }; -uvec4 Const0; -uvec4 Const1; -uvec4 Const2; -uvec4 Const3; +layout(set=0,binding=0) uniform sampler2D InputTexture; +layout(set=0,binding=1,rgba16f) uniform image2D OutputTexture; #define A_GPU 1 #define A_GLSL 1 -#define A_HALF -#include "ffx_a.h" +#ifndef YUZU_USE_FP16 + #include "ffx_a.h" -f16vec4 LinearToSRGB(f16vec4 linear) { - bvec4 selector = greaterThan(linear, f16vec4(0.00313066844250063)); - f16vec4 low = linear * float16_t(12.92); - f16vec4 high = float16_t(1.055) * pow(linear, f16vec4(1 / 2.4)) - float16_t(0.055); - return mix(low, high, selector); -} + #if USE_EASU + #define FSR_EASU_F 1 + AF4 FsrEasuRF(AF2 p) { AF4 res = textureGather(InputTexture, p, 0); return res; } + AF4 FsrEasuGF(AF2 p) { AF4 res = textureGather(InputTexture, p, 1); return res; } + AF4 FsrEasuBF(AF2 p) { AF4 res = textureGather(InputTexture, p, 2); return res; } + #endif + #if USE_RCAS + #define FSR_RCAS_F 1 + AF4 FsrRcasLoadF(ASU2 p) { return texelFetch(InputTexture, ASU2(p), 0); } + void FsrRcasInputF(inout AF1 r, inout AF1 g, inout AF1 b) {} + #endif +#else + #define A_HALF + #include "ffx_a.h" -f16vec4 SRGBToLinear(f16vec4 srgb) { - bvec4 selector = greaterThan(srgb, f16vec4(0.0404482362771082)); - f16vec4 low = srgb * float16_t(1.0 / 12.92); - f16vec4 high = pow((srgb + float16_t(0.055)) * float16_t(1.0 / 1.055), f16vec4(2.4)); - return mix(low, high, selector); -} - -#if USE_EASU - #define FSR_EASU_H 1 - f16vec4 FsrEasuRH(vec2 p) { f16vec4 res = f16vec4(textureGather(InputTexture, p, 0)); return res; } - f16vec4 FsrEasuGH(vec2 p) { f16vec4 res = f16vec4(textureGather(InputTexture, p, 1)); return res; } - f16vec4 FsrEasuBH(vec2 p) { f16vec4 res = f16vec4(textureGather(InputTexture, p, 2)); return res; } -#endif -#if USE_RCAS - #define FSR_RCAS_H 1 - f16vec4 FsrRcasLoadH(ASW2 p) { return f16vec4(texelFetch(InputTexture, ASU2(p), 0)); } - void FsrRcasInputH(inout float16_t r, inout float16_t g, inout float16_t b) {} + #if USE_EASU + #define FSR_EASU_H 1 + AH4 FsrEasuRH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 0)); return res; } + AH4 FsrEasuGH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 1)); return res; } + AH4 FsrEasuBH(AF2 p) { AH4 res = AH4(textureGather(InputTexture, p, 2)); return res; } + #endif + #if USE_RCAS + #define FSR_RCAS_H 1 + AH4 FsrRcasLoadH(ASW2 p) { return AH4(texelFetch(InputTexture, ASU2(p), 0)); } + void FsrRcasInputH(inout AH1 r,inout AH1 g,inout AH1 b){} + #endif #endif #include "ffx_fsr1.h" -void CurrFilter(u32vec2 pos) { - // For debugging +void CurrFilter(AU2 pos) { #if USE_BILINEAR - vec2 pp = (vec2(pos) * vec2_AU2(Const0.xy) + vec2_AU2(Const0.zw)) * vec2_AU2(Const1.xy) + vec2(0.5, -0.5) * vec2_AU2(Const1.zw); - imageStore(OutputTexture, ivec2(pos), textureLod(InputTexture, pp, 0.0)); + AF2 pp = (AF2(pos) * AF2_AU2(Const0.xy) + AF2_AU2(Const0.zw)) * AF2_AU2(Const1.xy) + AF2(0.5, -0.5) * AF2_AU2(Const1.zw); + imageStore(OutputTexture, ASU2(pos), textureLod(InputTexture, pp, 0.0)); #endif #if USE_EASU - f16vec3 c; - FsrEasuH(c, pos, Const0, Const1, Const2, Const3); - imageStore(OutputTexture, ivec2(pos), f16vec4(c, 1)); + #ifndef YUZU_USE_FP16 + AF3 c; + FsrEasuF(c, pos, Const0, Const1, Const2, Const3); + imageStore(OutputTexture, ASU2(pos), AF4(c, 1)); + #else + AH3 c; + FsrEasuH(c, pos, Const0, Const1, Const2, Const3); + imageStore(OutputTexture, ASU2(pos), AH4(c, 1)); + #endif #endif #if USE_RCAS - f16vec3 c; - FsrRcasH(c.r, c.g, c.b, pos, Const0); - imageStore(OutputTexture, ivec2(pos), f16vec4(c, 1)); + #ifndef YUZU_USE_FP16 + AF3 c; + FsrRcasF(c.r, c.g, c.b, pos, Const0); + imageStore(OutputTexture, ASU2(pos), AF4(c, 1)); + #else + AH3 c; + FsrRcasH(c.r, c.g, c.b, pos, Const0); + imageStore(OutputTexture, ASU2(pos), AH4(c, 1)); + #endif #endif - } layout(local_size_x=64) in; void main() { - -#if USE_EASU || USE_BILINEAR - vec2 ires = vec2(input_size); - vec2 tres = textureSize(InputTexture, 0); - vec2 ores = imageSize(OutputTexture); - FsrEasuCon(Const0, Const1, Const2, Const3, ires.x, ires.y, tres.x, tres.y, ores.x, ores.y); -#endif -#if USE_RCAS - FsrRcasCon(Const0, 0.25f); -#endif - // Do remapping of local xy in workgroup for a more PS-like swizzle pattern. AU2 gxy = ARmp8x8(gl_LocalInvocationID.x) + AU2(gl_WorkGroupID.x << 4u, gl_WorkGroupID.y << 4u); CurrFilter(gxy); diff --git a/src/video_core/host_shaders/present_scaleforce.frag b/src/video_core/host_shaders/opengl_present_scaleforce.frag similarity index 56% rename from src/video_core/host_shaders/present_scaleforce.frag rename to src/video_core/host_shaders/opengl_present_scaleforce.frag index ebc0d9b90..71ff9e1e3 100644 --- a/src/video_core/host_shaders/present_scaleforce.frag +++ b/src/video_core/host_shaders/opengl_present_scaleforce.frag @@ -22,11 +22,29 @@ // Adapted from https://github.com/BreadFish64/ScaleFish/tree/master/scaleforce -#version 460 +//! #version 460 + +#extension GL_ARB_separate_shader_objects : enable + +#ifdef YUZU_USE_FP16 #extension GL_AMD_gpu_shader_half_float : enable #extension GL_NV_gpu_shader5 : enable +#define lfloat float16_t +#define lvec2 f16vec2 +#define lvec3 f16vec3 +#define lvec4 f16vec4 + +#else + +#define lfloat float +#define lvec2 vec2 +#define lvec3 vec3 +#define lvec4 vec4 + +#endif + #ifdef VULKAN #define BINDING_COLOR_TEXTURE 1 @@ -45,25 +63,25 @@ layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D input_texture; const bool ignore_alpha = true; -float16_t ColorDist1(f16vec4 a, f16vec4 b) { +lfloat ColorDist1(lvec4 a, lvec4 b) { // https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.2020_conversion - const f16vec3 K = f16vec3(0.2627, 0.6780, 0.0593); - const float16_t scaleB = float16_t(0.5) / (float16_t(1.0) - K.b); - const float16_t scaleR = float16_t(0.5) / (float16_t(1.0) - K.r); - f16vec4 diff = a - b; - float16_t Y = dot(diff.rgb, K); - float16_t Cb = scaleB * (diff.b - Y); - float16_t Cr = scaleR * (diff.r - Y); - f16vec3 YCbCr = f16vec3(Y, Cb, Cr); - float16_t d = length(YCbCr); + const lvec3 K = lvec3(0.2627, 0.6780, 0.0593); + const lfloat scaleB = lfloat(0.5) / (lfloat(1.0) - K.b); + const lfloat scaleR = lfloat(0.5) / (lfloat(1.0) - K.r); + lvec4 diff = a - b; + lfloat Y = dot(diff.rgb, K); + lfloat Cb = scaleB * (diff.b - Y); + lfloat Cr = scaleR * (diff.r - Y); + lvec3 YCbCr = lvec3(Y, Cb, Cr); + lfloat d = length(YCbCr); if (ignore_alpha) { return d; } return sqrt(a.a * b.a * d * d + diff.a * diff.a); } -f16vec4 ColorDist(f16vec4 ref, f16vec4 A, f16vec4 B, f16vec4 C, f16vec4 D) { - return f16vec4( +lvec4 ColorDist(lvec4 ref, lvec4 A, lvec4 B, lvec4 C, lvec4 D) { + return lvec4( ColorDist1(ref, A), ColorDist1(ref, B), ColorDist1(ref, C), @@ -72,36 +90,36 @@ f16vec4 ColorDist(f16vec4 ref, f16vec4 A, f16vec4 B, f16vec4 C, f16vec4 D) { } vec4 Scaleforce(sampler2D tex, vec2 tex_coord) { - f16vec4 bl = f16vec4(textureOffset(tex, tex_coord, ivec2(-1, -1))); - f16vec4 bc = f16vec4(textureOffset(tex, tex_coord, ivec2(0, -1))); - f16vec4 br = f16vec4(textureOffset(tex, tex_coord, ivec2(1, -1))); - f16vec4 cl = f16vec4(textureOffset(tex, tex_coord, ivec2(-1, 0))); - f16vec4 cc = f16vec4(texture(tex, tex_coord)); - f16vec4 cr = f16vec4(textureOffset(tex, tex_coord, ivec2(1, 0))); - f16vec4 tl = f16vec4(textureOffset(tex, tex_coord, ivec2(-1, 1))); - f16vec4 tc = f16vec4(textureOffset(tex, tex_coord, ivec2(0, 1))); - f16vec4 tr = f16vec4(textureOffset(tex, tex_coord, ivec2(1, 1))); + lvec4 bl = lvec4(textureOffset(tex, tex_coord, ivec2(-1, -1))); + lvec4 bc = lvec4(textureOffset(tex, tex_coord, ivec2(0, -1))); + lvec4 br = lvec4(textureOffset(tex, tex_coord, ivec2(1, -1))); + lvec4 cl = lvec4(textureOffset(tex, tex_coord, ivec2(-1, 0))); + lvec4 cc = lvec4(texture(tex, tex_coord)); + lvec4 cr = lvec4(textureOffset(tex, tex_coord, ivec2(1, 0))); + lvec4 tl = lvec4(textureOffset(tex, tex_coord, ivec2(-1, 1))); + lvec4 tc = lvec4(textureOffset(tex, tex_coord, ivec2(0, 1))); + lvec4 tr = lvec4(textureOffset(tex, tex_coord, ivec2(1, 1))); - f16vec4 offset_tl = ColorDist(cc, tl, tc, tr, cr); - f16vec4 offset_br = ColorDist(cc, br, bc, bl, cl); + lvec4 offset_tl = ColorDist(cc, tl, tc, tr, cr); + lvec4 offset_br = ColorDist(cc, br, bc, bl, cl); // Calculate how different cc is from the texels around it - const float16_t plus_weight = float16_t(1.5); - const float16_t cross_weight = float16_t(1.5); - float16_t total_dist = dot(offset_tl + offset_br, f16vec4(cross_weight, plus_weight, cross_weight, plus_weight)); + const lfloat plus_weight = lfloat(1.5); + const lfloat cross_weight = lfloat(1.5); + lfloat total_dist = dot(offset_tl + offset_br, lvec4(cross_weight, plus_weight, cross_weight, plus_weight)); - if (total_dist == float16_t(0.0)) { + if (total_dist == lfloat(0.0)) { return cc; } else { // Add together all the distances with direction taken into account - f16vec4 tmp = offset_tl - offset_br; - f16vec2 total_offset = tmp.wy * plus_weight + (tmp.zz + f16vec2(-tmp.x, tmp.x)) * cross_weight; + lvec4 tmp = offset_tl - offset_br; + lvec2 total_offset = tmp.wy * plus_weight + (tmp.zz + lvec2(-tmp.x, tmp.x)) * cross_weight; // When the image has thin points, they tend to split apart. // This is because the texels all around are different and total_offset reaches into clear areas. // This works pretty well to keep the offset in bounds for these cases. - float16_t clamp_val = length(total_offset) / total_dist; - f16vec2 final_offset = clamp(total_offset, -clamp_val, clamp_val) / f16vec2(textureSize(tex, 0)); + lfloat clamp_val = length(total_offset) / total_dist; + vec2 final_offset = vec2(clamp(total_offset, -clamp_val, clamp_val)) / textureSize(tex, 0); return texture(tex, tex_coord - final_offset); } @@ -109,4 +127,4 @@ vec4 Scaleforce(sampler2D tex, vec2 tex_coord) { void main() { frag_color = Scaleforce(input_texture, tex_coord); -} \ No newline at end of file +} diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.comp similarity index 67% rename from src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu.comp rename to src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.comp index 6525eeeb5..1c96a7905 100644 --- a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu.comp +++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.comp @@ -5,9 +5,7 @@ #version 460 core #extension GL_GOOGLE_include_directive : enable -layout(set=0,binding=0) uniform sampler2D InputTexture; -layout(set=0,binding=1,rgba16f) uniform image2D OutputTexture; - +#define YUZU_USE_FP16 #define USE_EASU 1 #include "fidelityfx_fsr.comp" diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.comp new file mode 100644 index 000000000..f4daff739 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.comp @@ -0,0 +1,10 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 460 core +#extension GL_GOOGLE_include_directive : enable + +#define USE_EASU 1 + +#include "fidelityfx_fsr.comp" diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.comp similarity index 67% rename from src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas.comp rename to src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.comp index 9463ed842..6b6796dd1 100644 --- a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas.comp +++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.comp @@ -5,9 +5,7 @@ #version 460 core #extension GL_GOOGLE_include_directive : enable -layout(set=0,binding=0) uniform sampler2D InputTexture; -layout(set=0,binding=1,rgba16f) uniform image2D OutputTexture; - +#define YUZU_USE_FP16 #define USE_RCAS 1 #include "fidelityfx_fsr.comp" diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.comp new file mode 100644 index 000000000..f785eebf3 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.comp @@ -0,0 +1,10 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 460 core +#extension GL_GOOGLE_include_directive : enable + +#define USE_RCAS 1 + +#include "fidelityfx_fsr.comp" diff --git a/src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag b/src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag new file mode 100644 index 000000000..924c03060 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag @@ -0,0 +1,7 @@ +#version 460 + +#extension GL_GOOGLE_include_directive : enable + +#define YUZU_USE_FP16 + +#include "opengl_present_scaleforce.frag" diff --git a/src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag b/src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag new file mode 100644 index 000000000..a594b83ca --- /dev/null +++ b/src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag @@ -0,0 +1,5 @@ +#version 460 + +#extension GL_GOOGLE_include_directive : enable + +#include "opengl_present_scaleforce.frag" diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index e63f0bdd8..28daacd82 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -24,10 +24,10 @@ #include "video_core/host_shaders/fxaa_frag.h" #include "video_core/host_shaders/fxaa_vert.h" #include "video_core/host_shaders/opengl_present_frag.h" +#include "video_core/host_shaders/opengl_present_scaleforce_frag.h" #include "video_core/host_shaders/opengl_present_vert.h" #include "video_core/host_shaders/present_bicubic_frag.h" #include "video_core/host_shaders/present_gaussian_frag.h" -#include "video_core/host_shaders/present_scaleforce_frag.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_shader_util.h" @@ -266,7 +266,8 @@ void RendererOpenGL::InitOpenGLObjects() { present_gaussian_fragment = CreateProgram(HostShaders::PRESENT_GAUSSIAN_FRAG, GL_FRAGMENT_SHADER); present_scaleforce_fragment = - CreateProgram(HostShaders::PRESENT_SCALEFORCE_FRAG, GL_FRAGMENT_SHADER); + CreateProgram(fmt::format("#version 460\n{}", HostShaders::OPENGL_PRESENT_SCALEFORCE_FRAG), + GL_FRAGMENT_SHADER); // Generate presentation sampler present_sampler.Create(); diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 2bed4f3c5..9dfc508bc 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -21,8 +21,9 @@ #include "video_core/host_shaders/fxaa_vert_spv.h" #include "video_core/host_shaders/present_bicubic_frag_spv.h" #include "video_core/host_shaders/present_gaussian_frag_spv.h" -#include "video_core/host_shaders/present_scaleforce_frag_spv.h" #include "video_core/host_shaders/vulkan_present_frag_spv.h" +#include "video_core/host_shaders/vulkan_present_scaleforce_fp16_frag_spv.h" +#include "video_core/host_shaders/vulkan_present_scaleforce_fp32_frag_spv.h" #include "video_core/host_shaders/vulkan_present_vert_spv.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/renderer_vulkan/vk_blit_screen.h" @@ -328,7 +329,7 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, blit_read_barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; blit_read_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT , + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, blit_read_barrier); } }); @@ -344,8 +345,12 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, crop_rect.bottom = framebuffer.height; } crop_rect = crop_rect.Scale(Settings::values.resolution_info.up_factor); + VkExtent2D fsr_input_size{ + .width = Settings::values.resolution_info.ScaleUp(framebuffer.width), + .height = Settings::values.resolution_info.ScaleUp(framebuffer.height), + }; VkImageView fsr_image_view = - fsr->Draw(scheduler, image_index, source_image_view, crop_rect); + fsr->Draw(scheduler, image_index, source_image_view, fsr_input_size, crop_rect); UpdateDescriptorSet(image_index, fsr_image_view, true); } else { const bool is_nn = @@ -500,7 +505,11 @@ void VKBlitScreen::CreateShaders() { bilinear_fragment_shader = BuildShader(device, VULKAN_PRESENT_FRAG_SPV); bicubic_fragment_shader = BuildShader(device, PRESENT_BICUBIC_FRAG_SPV); gaussian_fragment_shader = BuildShader(device, PRESENT_GAUSSIAN_FRAG_SPV); - scaleforce_fragment_shader = BuildShader(device, PRESENT_SCALEFORCE_FRAG_SPV); + if (device.IsFloat16Supported()) { + scaleforce_fragment_shader = BuildShader(device, VULKAN_PRESENT_SCALEFORCE_FP16_FRAG_SPV); + } else { + scaleforce_fragment_shader = BuildShader(device, VULKAN_PRESENT_SCALEFORCE_FP32_FRAG_SPV); + } } void VKBlitScreen::CreateSemaphores() { diff --git a/src/video_core/renderer_vulkan/vk_fsr.cpp b/src/video_core/renderer_vulkan/vk_fsr.cpp index 1f60974be..9288aa7c2 100644 --- a/src/video_core/renderer_vulkan/vk_fsr.cpp +++ b/src/video_core/renderer_vulkan/vk_fsr.cpp @@ -4,13 +4,19 @@ #include "common/common_types.h" #include "common/div_ceil.h" -#include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_comp_spv.h" -#include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_comp_spv.h" +#include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16_comp_spv.h" +#include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32_comp_spv.h" +#include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16_comp_spv.h" +#include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32_comp_spv.h" #include "video_core/renderer_vulkan/vk_fsr.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/vulkan_common/vulkan_device.h" +#define A_CPU +#include <ffx_a.h> +#include <ffx_fsr1.h> + namespace Vulkan { FSR::FSR(const Device& device_, MemoryAllocator& memory_allocator_, size_t image_count_, @@ -29,11 +35,11 @@ FSR::FSR(const Device& device_, MemoryAllocator& memory_allocator_, size_t image } VkImageView FSR::Draw(VKScheduler& scheduler, size_t image_index, VkImageView image_view, - const Common::Rectangle<int>& crop_rect) { + VkExtent2D input_image_extent, const Common::Rectangle<int>& crop_rect) { UpdateDescriptorSet(image_index, image_view); - scheduler.Record([this, image_index, crop_rect](vk::CommandBuffer cmdbuf) { + scheduler.Record([this, image_index, input_image_extent, crop_rect](vk::CommandBuffer cmdbuf) { const VkImageMemoryBarrier base_barrier{ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, .pNext = nullptr, @@ -54,13 +60,18 @@ VkImageView FSR::Draw(VKScheduler& scheduler, size_t image_index, VkImageView im }, }; - // TODO: Support clear color cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *easu_pipeline); - cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, - VkExtent2D{ - .width = static_cast<u32>(crop_rect.GetWidth()), - .height = static_cast<u32>(crop_rect.GetHeight()), - }); + + std::array<AU1, 4 * 4> push_constants; + FsrEasuConOffset( + push_constants.data() + 0, push_constants.data() + 4, push_constants.data() + 8, + push_constants.data() + 12, + + static_cast<AF1>(crop_rect.GetWidth()), static_cast<AF1>(crop_rect.GetHeight()), + static_cast<AF1>(input_image_extent.width), static_cast<AF1>(input_image_extent.height), + static_cast<AF1>(output_size.width), static_cast<AF1>(output_size.height), + static_cast<AF1>(crop_rect.left), static_cast<AF1>(crop_rect.top)); + cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, push_constants); { VkImageMemoryBarrier fsr_write_barrier = base_barrier; @@ -77,7 +88,9 @@ VkImageView FSR::Draw(VKScheduler& scheduler, size_t image_index, VkImageView im Common::DivCeil(output_size.height, 16u), 1); cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *rcas_pipeline); - cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, output_size); + + FsrRcasCon(push_constants.data(), 0.25f); + cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, push_constants); { std::array<VkImageMemoryBarrier, 2> barriers; @@ -247,7 +260,7 @@ void FSR::CreatePipelineLayout() { VkPushConstantRange push_const{ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, .offset = 0, - .size = sizeof(std::array<u32, 2>), + .size = sizeof(std::array<u32, 4 * 4>), }; VkPipelineLayoutCreateInfo ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, @@ -344,8 +357,13 @@ void FSR::CreateSampler() { } void FSR::CreateShaders() { - easu_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_EASU_COMP_SPV); - rcas_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_RCAS_COMP_SPV); + if (device.IsFloat16Supported()) { + easu_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_EASU_FP16_COMP_SPV); + rcas_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_RCAS_FP16_COMP_SPV); + } else { + easu_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_EASU_FP32_COMP_SPV); + rcas_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_RCAS_FP32_COMP_SPV); + } } void FSR::CreatePipeline() { diff --git a/src/video_core/renderer_vulkan/vk_fsr.h b/src/video_core/renderer_vulkan/vk_fsr.h index 8391e2e58..6bbec3d36 100644 --- a/src/video_core/renderer_vulkan/vk_fsr.h +++ b/src/video_core/renderer_vulkan/vk_fsr.h @@ -18,7 +18,7 @@ public: explicit FSR(const Device& device, MemoryAllocator& memory_allocator, size_t image_count, VkExtent2D output_size); VkImageView Draw(VKScheduler& scheduler, size_t image_index, VkImageView image_view, - const Common::Rectangle<int>& crop_rect); + VkExtent2D input_image_extent, const Common::Rectangle<int>& crop_rect); private: void CreateDescriptorPool();