Prepare frontend for multiple graphics APIs (#6347)

* externals: Update dynarmic

* settings: Introduce GraphicsAPI enum

* For now it's OpenGL only but will be expanded upon later

* citra_qt: Introduce backend agnostic context management

* Mostly a direct port from yuzu

* core: Simplify context acquire

* settings: Add option to create debug contexts

* renderer_opengl: Abstract initialization to Driver

* This commit also updates glad and adds some useful extensions which we will use in part 2

* Rasterizer construction is moved to the specific renderer instead of RendererBase.
  Software rendering has been disable to achieve this but will be brought back in the next commit.

* video_core: Remove Init/Shutdown methods from renderer

* The constructor and destructor can do the same job

* In addition move opengl function loading to Qt since SDL already does this. Also remove ErrorVideoCore which is never reached

* citra_qt: Decouple software renderer from opengl part 1

* citra: Decouple software renderer from opengl part 2

* android: Decouple software renderer from opengl part 3

* swrasterizer: Decouple software renderer from opengl part 4

* This commit simply enforces the renderer naming conventions in the software renderer

* video_core: Move RendererBase to VideoCore

* video_core: De-globalize screenshot state

* video_core: Pass system to the renderers

* video_core: Commonize shader uniform data

* video_core: Abstract backend agnostic rasterizer operations

* bootmanager: Remove references to OpenGL for macOS

OpenGL macOS headers definitions clash heavily with each other

* citra_qt: Proper title for api settings

* video_core: Reduce boost usage

* bootmanager: Fix hide mouse option

Remove event handlers from RenderWidget for events that are
already handled by the parent GRenderWindow.
Also enable mouse tracking on the RenderWidget.

* android: Remove software from graphics api list

* code: Address review comments

* citra: Port per-game settings read

* Having to update the default value for all backends is a pain so lets centralize it

* android: Rename to OpenGLES

---------

Co-authored-by: MerryMage <MerryMage@users.noreply.github.com>
Co-authored-by: Vitor Kiguchi <vitor-kiguchi@hotmail.com>
This commit is contained in:
GPUCode
2023-03-27 14:29:17 +03:00
committed by GitHub
parent 9ef42040af
commit b5d6f645bd
99 changed files with 3165 additions and 4501 deletions

View File

@@ -13,6 +13,8 @@ add_library(video_core STATIC
precompiled_headers.h
primitive_assembly.cpp
primitive_assembly.h
rasterizer_accelerated.cpp
rasterizer_accelerated.h
rasterizer_interface.h
regs.cpp
regs.h
@@ -39,6 +41,8 @@ add_library(video_core STATIC
rasterizer_cache/texture_runtime.h
renderer_opengl/frame_dumper_opengl.cpp
renderer_opengl/frame_dumper_opengl.h
renderer_opengl/gl_driver.cpp
renderer_opengl/gl_driver.h
renderer_opengl/gl_rasterizer.cpp
renderer_opengl/gl_rasterizer.h
renderer_opengl/gl_resource_manager.cpp
@@ -82,6 +86,22 @@ add_library(video_core STATIC
#temporary, move these back in alphabetical order before merging
renderer_opengl/gl_format_reinterpreter.cpp
renderer_opengl/gl_format_reinterpreter.h
renderer_software/rasterizer.cpp
renderer_software/rasterizer.h
renderer_software/renderer_software.cpp
renderer_software/renderer_software.h
renderer_software/sw_clipper.cpp
renderer_software/sw_clipper.h
renderer_software/sw_framebuffer.cpp
renderer_software/sw_framebuffer.h
renderer_software/sw_lighting.cpp
renderer_software/sw_lighting.h
renderer_software/sw_proctex.cpp
renderer_software/sw_proctex.h
renderer_software/sw_rasterizer.cpp
renderer_software/sw_rasterizer.h
renderer_software/sw_texturing.cpp
renderer_software/sw_texturing.h
shader/debug_data.h
shader/shader.cpp
shader/shader.h
@@ -91,20 +111,8 @@ add_library(video_core STATIC
shader/shader_jit_x64_compiler.cpp
shader/shader_jit_x64.h
shader/shader_jit_x64_compiler.h
swrasterizer/clipper.cpp
swrasterizer/clipper.h
swrasterizer/framebuffer.cpp
swrasterizer/framebuffer.h
swrasterizer/lighting.cpp
swrasterizer/lighting.h
swrasterizer/proctex.cpp
swrasterizer/proctex.h
swrasterizer/rasterizer.cpp
swrasterizer/rasterizer.h
swrasterizer/swrasterizer.cpp
swrasterizer/swrasterizer.h
swrasterizer/texturing.cpp
swrasterizer/texturing.h
shader/shader_uniforms.cpp
shader/shader_uniforms.h
texture/etc1.cpp
texture/etc1.h
texture/texture_decode.cpp

View File

@@ -0,0 +1,832 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <limits>
#include "common/alignment.h"
#include "core/memory.h"
#include "video_core/pica_state.h"
#include "video_core/rasterizer_accelerated.h"
namespace VideoCore {
static Common::Vec4f ColorRGBA8(const u32 color) {
const auto rgba =
Common::Vec4u{color >> 0 & 0xFF, color >> 8 & 0xFF, color >> 16 & 0xFF, color >> 24 & 0xFF};
return rgba / 255.0f;
}
static Common::Vec3f LightColor(const Pica::LightingRegs::LightColor& color) {
return Common::Vec3u{color.r, color.g, color.b} / 255.0f;
}
RasterizerAccelerated::HardwareVertex::HardwareVertex(const Pica::Shader::OutputVertex& v,
bool flip_quaternion) {
position[0] = v.pos.x.ToFloat32();
position[1] = v.pos.y.ToFloat32();
position[2] = v.pos.z.ToFloat32();
position[3] = v.pos.w.ToFloat32();
color[0] = v.color.x.ToFloat32();
color[1] = v.color.y.ToFloat32();
color[2] = v.color.z.ToFloat32();
color[3] = v.color.w.ToFloat32();
tex_coord0[0] = v.tc0.x.ToFloat32();
tex_coord0[1] = v.tc0.y.ToFloat32();
tex_coord1[0] = v.tc1.x.ToFloat32();
tex_coord1[1] = v.tc1.y.ToFloat32();
tex_coord2[0] = v.tc2.x.ToFloat32();
tex_coord2[1] = v.tc2.y.ToFloat32();
tex_coord0_w = v.tc0_w.ToFloat32();
normquat[0] = v.quat.x.ToFloat32();
normquat[1] = v.quat.y.ToFloat32();
normquat[2] = v.quat.z.ToFloat32();
normquat[3] = v.quat.w.ToFloat32();
view[0] = v.view.x.ToFloat32();
view[1] = v.view.y.ToFloat32();
view[2] = v.view.z.ToFloat32();
if (flip_quaternion) {
normquat = -normquat;
}
}
RasterizerAccelerated::RasterizerAccelerated(Memory::MemorySystem& memory_)
: memory{memory_}, regs{Pica::g_state.regs} {
uniform_block_data.lighting_lut_dirty.fill(true);
}
/**
* This is a helper function to resolve an issue when interpolating opposite quaternions. See below
* for a detailed description of this issue (yuriks):
*
* For any rotation, there are two quaternions Q, and -Q, that represent the same rotation. If you
* interpolate two quaternions that are opposite, instead of going from one rotation to another
* using the shortest path, you'll go around the longest path. You can test if two quaternions are
* opposite by checking if Dot(Q1, Q2) < 0. In that case, you can flip either of them, therefore
* making Dot(Q1, -Q2) positive.
*
* This solution corrects this issue per-vertex before passing the quaternions to OpenGL. This is
* correct for most cases but can still rotate around the long way sometimes. An implementation
* which did `lerp(lerp(Q1, Q2), Q3)` (with proper weighting), applying the dot product check
* between each step would work for those cases at the cost of being more complex to implement.
*
* Fortunately however, the 3DS hardware happens to also use this exact same logic to work around
* these issues, making this basic implementation actually more accurate to the hardware.
*/
static bool AreQuaternionsOpposite(Common::Vec4<Pica::float24> qa, Common::Vec4<Pica::float24> qb) {
Common::Vec4f a{qa.x.ToFloat32(), qa.y.ToFloat32(), qa.z.ToFloat32(), qa.w.ToFloat32()};
Common::Vec4f b{qb.x.ToFloat32(), qb.y.ToFloat32(), qb.z.ToFloat32(), qb.w.ToFloat32()};
return (Common::Dot(a, b) < 0.f);
}
void RasterizerAccelerated::AddTriangle(const Pica::Shader::OutputVertex& v0,
const Pica::Shader::OutputVertex& v1,
const Pica::Shader::OutputVertex& v2) {
vertex_batch.emplace_back(v0, false);
vertex_batch.emplace_back(v1, AreQuaternionsOpposite(v0.quat, v1.quat));
vertex_batch.emplace_back(v2, AreQuaternionsOpposite(v0.quat, v2.quat));
}
RasterizerAccelerated::VertexArrayInfo RasterizerAccelerated::AnalyzeVertexArray(
bool is_indexed, u32 stride_alignment) {
const auto& vertex_attributes = regs.pipeline.vertex_attributes;
u32 vertex_min;
u32 vertex_max;
if (is_indexed) {
const auto& index_info = regs.pipeline.index_array;
const PAddr address = vertex_attributes.GetPhysicalBaseAddress() + index_info.offset;
const u8* index_address_8 = memory.GetPhysicalPointer(address);
const u16* index_address_16 = reinterpret_cast<const u16*>(index_address_8);
const bool index_u16 = index_info.format != 0;
vertex_min = 0xFFFF;
vertex_max = 0;
const u32 size = regs.pipeline.num_vertices * (index_u16 ? 2 : 1);
FlushRegion(address, size);
for (u32 index = 0; index < regs.pipeline.num_vertices; ++index) {
const u32 vertex = index_u16 ? index_address_16[index] : index_address_8[index];
vertex_min = std::min(vertex_min, vertex);
vertex_max = std::max(vertex_max, vertex);
}
} else {
vertex_min = regs.pipeline.vertex_offset;
vertex_max = regs.pipeline.vertex_offset + regs.pipeline.num_vertices - 1;
}
const u32 vertex_num = vertex_max - vertex_min + 1;
u32 vs_input_size = 0;
for (const auto& loader : vertex_attributes.attribute_loaders) {
if (loader.component_count != 0) {
const u32 aligned_stride =
Common::AlignUp(static_cast<u32>(loader.byte_count), stride_alignment);
vs_input_size += Common::AlignUp(aligned_stride * vertex_num, 4);
}
}
return {vertex_min, vertex_max, vs_input_size};
}
void RasterizerAccelerated::SyncEntireState() {
// Sync renderer-specific fixed-function state
SyncFixedState();
// Sync uniforms
SyncClipCoef();
SyncDepthScale();
SyncDepthOffset();
SyncAlphaTest();
SyncCombinerColor();
auto& tev_stages = regs.texturing.GetTevStages();
for (std::size_t index = 0; index < tev_stages.size(); ++index) {
SyncTevConstColor(index, tev_stages[index]);
}
SyncGlobalAmbient();
for (unsigned light_index = 0; light_index < 8; light_index++) {
SyncLightSpecular0(light_index);
SyncLightSpecular1(light_index);
SyncLightDiffuse(light_index);
SyncLightAmbient(light_index);
SyncLightPosition(light_index);
SyncLightDistanceAttenuationBias(light_index);
SyncLightDistanceAttenuationScale(light_index);
}
SyncFogColor();
SyncProcTexNoise();
SyncProcTexBias();
SyncShadowBias();
SyncShadowTextureBias();
for (unsigned tex_index = 0; tex_index < 3; tex_index++) {
SyncTextureLodBias(tex_index);
}
}
void RasterizerAccelerated::NotifyPicaRegisterChanged(u32 id) {
switch (id) {
// Depth modifiers
case PICA_REG_INDEX(rasterizer.viewport_depth_range):
SyncDepthScale();
break;
case PICA_REG_INDEX(rasterizer.viewport_depth_near_plane):
SyncDepthOffset();
break;
// Depth buffering
case PICA_REG_INDEX(rasterizer.depthmap_enable):
shader_dirty = true;
break;
// Shadow texture
case PICA_REG_INDEX(texturing.shadow):
SyncShadowTextureBias();
break;
// Fog state
case PICA_REG_INDEX(texturing.fog_color):
SyncFogColor();
break;
case PICA_REG_INDEX(texturing.fog_lut_data[0]):
case PICA_REG_INDEX(texturing.fog_lut_data[1]):
case PICA_REG_INDEX(texturing.fog_lut_data[2]):
case PICA_REG_INDEX(texturing.fog_lut_data[3]):
case PICA_REG_INDEX(texturing.fog_lut_data[4]):
case PICA_REG_INDEX(texturing.fog_lut_data[5]):
case PICA_REG_INDEX(texturing.fog_lut_data[6]):
case PICA_REG_INDEX(texturing.fog_lut_data[7]):
uniform_block_data.fog_lut_dirty = true;
break;
// ProcTex state
case PICA_REG_INDEX(texturing.proctex):
case PICA_REG_INDEX(texturing.proctex_lut):
case PICA_REG_INDEX(texturing.proctex_lut_offset):
SyncProcTexBias();
shader_dirty = true;
break;
case PICA_REG_INDEX(texturing.proctex_noise_u):
case PICA_REG_INDEX(texturing.proctex_noise_v):
case PICA_REG_INDEX(texturing.proctex_noise_frequency):
SyncProcTexNoise();
break;
case PICA_REG_INDEX(texturing.proctex_lut_data[0]):
case PICA_REG_INDEX(texturing.proctex_lut_data[1]):
case PICA_REG_INDEX(texturing.proctex_lut_data[2]):
case PICA_REG_INDEX(texturing.proctex_lut_data[3]):
case PICA_REG_INDEX(texturing.proctex_lut_data[4]):
case PICA_REG_INDEX(texturing.proctex_lut_data[5]):
case PICA_REG_INDEX(texturing.proctex_lut_data[6]):
case PICA_REG_INDEX(texturing.proctex_lut_data[7]):
using Pica::TexturingRegs;
switch (regs.texturing.proctex_lut_config.ref_table.Value()) {
case TexturingRegs::ProcTexLutTable::Noise:
uniform_block_data.proctex_noise_lut_dirty = true;
break;
case TexturingRegs::ProcTexLutTable::ColorMap:
uniform_block_data.proctex_color_map_dirty = true;
break;
case TexturingRegs::ProcTexLutTable::AlphaMap:
uniform_block_data.proctex_alpha_map_dirty = true;
break;
case TexturingRegs::ProcTexLutTable::Color:
uniform_block_data.proctex_lut_dirty = true;
break;
case TexturingRegs::ProcTexLutTable::ColorDiff:
uniform_block_data.proctex_diff_lut_dirty = true;
break;
}
break;
// Alpha test
case PICA_REG_INDEX(framebuffer.output_merger.alpha_test):
SyncAlphaTest();
shader_dirty = true;
break;
case PICA_REG_INDEX(framebuffer.shadow):
SyncShadowBias();
break;
// Scissor test
case PICA_REG_INDEX(rasterizer.scissor_test.mode):
shader_dirty = true;
break;
case PICA_REG_INDEX(texturing.main_config):
shader_dirty = true;
break;
// Texture 0 type
case PICA_REG_INDEX(texturing.texture0.type):
shader_dirty = true;
break;
// TEV stages
// (This also syncs fog_mode and fog_flip which are part of tev_combiner_buffer_input)
case PICA_REG_INDEX(texturing.tev_stage0.color_source1):
case PICA_REG_INDEX(texturing.tev_stage0.color_modifier1):
case PICA_REG_INDEX(texturing.tev_stage0.color_op):
case PICA_REG_INDEX(texturing.tev_stage0.color_scale):
case PICA_REG_INDEX(texturing.tev_stage1.color_source1):
case PICA_REG_INDEX(texturing.tev_stage1.color_modifier1):
case PICA_REG_INDEX(texturing.tev_stage1.color_op):
case PICA_REG_INDEX(texturing.tev_stage1.color_scale):
case PICA_REG_INDEX(texturing.tev_stage2.color_source1):
case PICA_REG_INDEX(texturing.tev_stage2.color_modifier1):
case PICA_REG_INDEX(texturing.tev_stage2.color_op):
case PICA_REG_INDEX(texturing.tev_stage2.color_scale):
case PICA_REG_INDEX(texturing.tev_stage3.color_source1):
case PICA_REG_INDEX(texturing.tev_stage3.color_modifier1):
case PICA_REG_INDEX(texturing.tev_stage3.color_op):
case PICA_REG_INDEX(texturing.tev_stage3.color_scale):
case PICA_REG_INDEX(texturing.tev_stage4.color_source1):
case PICA_REG_INDEX(texturing.tev_stage4.color_modifier1):
case PICA_REG_INDEX(texturing.tev_stage4.color_op):
case PICA_REG_INDEX(texturing.tev_stage4.color_scale):
case PICA_REG_INDEX(texturing.tev_stage5.color_source1):
case PICA_REG_INDEX(texturing.tev_stage5.color_modifier1):
case PICA_REG_INDEX(texturing.tev_stage5.color_op):
case PICA_REG_INDEX(texturing.tev_stage5.color_scale):
case PICA_REG_INDEX(texturing.tev_combiner_buffer_input):
shader_dirty = true;
break;
case PICA_REG_INDEX(texturing.tev_stage0.const_r):
SyncTevConstColor(0, regs.texturing.tev_stage0);
break;
case PICA_REG_INDEX(texturing.tev_stage1.const_r):
SyncTevConstColor(1, regs.texturing.tev_stage1);
break;
case PICA_REG_INDEX(texturing.tev_stage2.const_r):
SyncTevConstColor(2, regs.texturing.tev_stage2);
break;
case PICA_REG_INDEX(texturing.tev_stage3.const_r):
SyncTevConstColor(3, regs.texturing.tev_stage3);
break;
case PICA_REG_INDEX(texturing.tev_stage4.const_r):
SyncTevConstColor(4, regs.texturing.tev_stage4);
break;
case PICA_REG_INDEX(texturing.tev_stage5.const_r):
SyncTevConstColor(5, regs.texturing.tev_stage5);
break;
// TEV combiner buffer color
case PICA_REG_INDEX(texturing.tev_combiner_buffer_color):
SyncCombinerColor();
break;
// Fragment lighting switches
case PICA_REG_INDEX(lighting.disable):
case PICA_REG_INDEX(lighting.max_light_index):
case PICA_REG_INDEX(lighting.config0):
case PICA_REG_INDEX(lighting.config1):
case PICA_REG_INDEX(lighting.abs_lut_input):
case PICA_REG_INDEX(lighting.lut_input):
case PICA_REG_INDEX(lighting.lut_scale):
case PICA_REG_INDEX(lighting.light_enable):
break;
// Fragment lighting specular 0 color
case PICA_REG_INDEX(lighting.light[0].specular_0):
SyncLightSpecular0(0);
break;
case PICA_REG_INDEX(lighting.light[1].specular_0):
SyncLightSpecular0(1);
break;
case PICA_REG_INDEX(lighting.light[2].specular_0):
SyncLightSpecular0(2);
break;
case PICA_REG_INDEX(lighting.light[3].specular_0):
SyncLightSpecular0(3);
break;
case PICA_REG_INDEX(lighting.light[4].specular_0):
SyncLightSpecular0(4);
break;
case PICA_REG_INDEX(lighting.light[5].specular_0):
SyncLightSpecular0(5);
break;
case PICA_REG_INDEX(lighting.light[6].specular_0):
SyncLightSpecular0(6);
break;
case PICA_REG_INDEX(lighting.light[7].specular_0):
SyncLightSpecular0(7);
break;
// Fragment lighting specular 1 color
case PICA_REG_INDEX(lighting.light[0].specular_1):
SyncLightSpecular1(0);
break;
case PICA_REG_INDEX(lighting.light[1].specular_1):
SyncLightSpecular1(1);
break;
case PICA_REG_INDEX(lighting.light[2].specular_1):
SyncLightSpecular1(2);
break;
case PICA_REG_INDEX(lighting.light[3].specular_1):
SyncLightSpecular1(3);
break;
case PICA_REG_INDEX(lighting.light[4].specular_1):
SyncLightSpecular1(4);
break;
case PICA_REG_INDEX(lighting.light[5].specular_1):
SyncLightSpecular1(5);
break;
case PICA_REG_INDEX(lighting.light[6].specular_1):
SyncLightSpecular1(6);
break;
case PICA_REG_INDEX(lighting.light[7].specular_1):
SyncLightSpecular1(7);
break;
// Fragment lighting diffuse color
case PICA_REG_INDEX(lighting.light[0].diffuse):
SyncLightDiffuse(0);
break;
case PICA_REG_INDEX(lighting.light[1].diffuse):
SyncLightDiffuse(1);
break;
case PICA_REG_INDEX(lighting.light[2].diffuse):
SyncLightDiffuse(2);
break;
case PICA_REG_INDEX(lighting.light[3].diffuse):
SyncLightDiffuse(3);
break;
case PICA_REG_INDEX(lighting.light[4].diffuse):
SyncLightDiffuse(4);
break;
case PICA_REG_INDEX(lighting.light[5].diffuse):
SyncLightDiffuse(5);
break;
case PICA_REG_INDEX(lighting.light[6].diffuse):
SyncLightDiffuse(6);
break;
case PICA_REG_INDEX(lighting.light[7].diffuse):
SyncLightDiffuse(7);
break;
// Fragment lighting ambient color
case PICA_REG_INDEX(lighting.light[0].ambient):
SyncLightAmbient(0);
break;
case PICA_REG_INDEX(lighting.light[1].ambient):
SyncLightAmbient(1);
break;
case PICA_REG_INDEX(lighting.light[2].ambient):
SyncLightAmbient(2);
break;
case PICA_REG_INDEX(lighting.light[3].ambient):
SyncLightAmbient(3);
break;
case PICA_REG_INDEX(lighting.light[4].ambient):
SyncLightAmbient(4);
break;
case PICA_REG_INDEX(lighting.light[5].ambient):
SyncLightAmbient(5);
break;
case PICA_REG_INDEX(lighting.light[6].ambient):
SyncLightAmbient(6);
break;
case PICA_REG_INDEX(lighting.light[7].ambient):
SyncLightAmbient(7);
break;
// Fragment lighting position
case PICA_REG_INDEX(lighting.light[0].x):
case PICA_REG_INDEX(lighting.light[0].z):
SyncLightPosition(0);
break;
case PICA_REG_INDEX(lighting.light[1].x):
case PICA_REG_INDEX(lighting.light[1].z):
SyncLightPosition(1);
break;
case PICA_REG_INDEX(lighting.light[2].x):
case PICA_REG_INDEX(lighting.light[2].z):
SyncLightPosition(2);
break;
case PICA_REG_INDEX(lighting.light[3].x):
case PICA_REG_INDEX(lighting.light[3].z):
SyncLightPosition(3);
break;
case PICA_REG_INDEX(lighting.light[4].x):
case PICA_REG_INDEX(lighting.light[4].z):
SyncLightPosition(4);
break;
case PICA_REG_INDEX(lighting.light[5].x):
case PICA_REG_INDEX(lighting.light[5].z):
SyncLightPosition(5);
break;
case PICA_REG_INDEX(lighting.light[6].x):
case PICA_REG_INDEX(lighting.light[6].z):
SyncLightPosition(6);
break;
case PICA_REG_INDEX(lighting.light[7].x):
case PICA_REG_INDEX(lighting.light[7].z):
SyncLightPosition(7);
break;
// Fragment spot lighting direction
case PICA_REG_INDEX(lighting.light[0].spot_x):
case PICA_REG_INDEX(lighting.light[0].spot_z):
SyncLightSpotDirection(0);
break;
case PICA_REG_INDEX(lighting.light[1].spot_x):
case PICA_REG_INDEX(lighting.light[1].spot_z):
SyncLightSpotDirection(1);
break;
case PICA_REG_INDEX(lighting.light[2].spot_x):
case PICA_REG_INDEX(lighting.light[2].spot_z):
SyncLightSpotDirection(2);
break;
case PICA_REG_INDEX(lighting.light[3].spot_x):
case PICA_REG_INDEX(lighting.light[3].spot_z):
SyncLightSpotDirection(3);
break;
case PICA_REG_INDEX(lighting.light[4].spot_x):
case PICA_REG_INDEX(lighting.light[4].spot_z):
SyncLightSpotDirection(4);
break;
case PICA_REG_INDEX(lighting.light[5].spot_x):
case PICA_REG_INDEX(lighting.light[5].spot_z):
SyncLightSpotDirection(5);
break;
case PICA_REG_INDEX(lighting.light[6].spot_x):
case PICA_REG_INDEX(lighting.light[6].spot_z):
SyncLightSpotDirection(6);
break;
case PICA_REG_INDEX(lighting.light[7].spot_x):
case PICA_REG_INDEX(lighting.light[7].spot_z):
SyncLightSpotDirection(7);
break;
// Fragment lighting light source config
case PICA_REG_INDEX(lighting.light[0].config):
case PICA_REG_INDEX(lighting.light[1].config):
case PICA_REG_INDEX(lighting.light[2].config):
case PICA_REG_INDEX(lighting.light[3].config):
case PICA_REG_INDEX(lighting.light[4].config):
case PICA_REG_INDEX(lighting.light[5].config):
case PICA_REG_INDEX(lighting.light[6].config):
case PICA_REG_INDEX(lighting.light[7].config):
shader_dirty = true;
break;
// Fragment lighting distance attenuation bias
case PICA_REG_INDEX(lighting.light[0].dist_atten_bias):
SyncLightDistanceAttenuationBias(0);
break;
case PICA_REG_INDEX(lighting.light[1].dist_atten_bias):
SyncLightDistanceAttenuationBias(1);
break;
case PICA_REG_INDEX(lighting.light[2].dist_atten_bias):
SyncLightDistanceAttenuationBias(2);
break;
case PICA_REG_INDEX(lighting.light[3].dist_atten_bias):
SyncLightDistanceAttenuationBias(3);
break;
case PICA_REG_INDEX(lighting.light[4].dist_atten_bias):
SyncLightDistanceAttenuationBias(4);
break;
case PICA_REG_INDEX(lighting.light[5].dist_atten_bias):
SyncLightDistanceAttenuationBias(5);
break;
case PICA_REG_INDEX(lighting.light[6].dist_atten_bias):
SyncLightDistanceAttenuationBias(6);
break;
case PICA_REG_INDEX(lighting.light[7].dist_atten_bias):
SyncLightDistanceAttenuationBias(7);
break;
// Fragment lighting distance attenuation scale
case PICA_REG_INDEX(lighting.light[0].dist_atten_scale):
SyncLightDistanceAttenuationScale(0);
break;
case PICA_REG_INDEX(lighting.light[1].dist_atten_scale):
SyncLightDistanceAttenuationScale(1);
break;
case PICA_REG_INDEX(lighting.light[2].dist_atten_scale):
SyncLightDistanceAttenuationScale(2);
break;
case PICA_REG_INDEX(lighting.light[3].dist_atten_scale):
SyncLightDistanceAttenuationScale(3);
break;
case PICA_REG_INDEX(lighting.light[4].dist_atten_scale):
SyncLightDistanceAttenuationScale(4);
break;
case PICA_REG_INDEX(lighting.light[5].dist_atten_scale):
SyncLightDistanceAttenuationScale(5);
break;
case PICA_REG_INDEX(lighting.light[6].dist_atten_scale):
SyncLightDistanceAttenuationScale(6);
break;
case PICA_REG_INDEX(lighting.light[7].dist_atten_scale):
SyncLightDistanceAttenuationScale(7);
break;
// Fragment lighting global ambient color (emission + ambient * ambient)
case PICA_REG_INDEX(lighting.global_ambient):
SyncGlobalAmbient();
break;
// Fragment lighting lookup tables
case PICA_REG_INDEX(lighting.lut_data[0]):
case PICA_REG_INDEX(lighting.lut_data[1]):
case PICA_REG_INDEX(lighting.lut_data[2]):
case PICA_REG_INDEX(lighting.lut_data[3]):
case PICA_REG_INDEX(lighting.lut_data[4]):
case PICA_REG_INDEX(lighting.lut_data[5]):
case PICA_REG_INDEX(lighting.lut_data[6]):
case PICA_REG_INDEX(lighting.lut_data[7]): {
const auto& lut_config = regs.lighting.lut_config;
uniform_block_data.lighting_lut_dirty[lut_config.type] = true;
uniform_block_data.lighting_lut_dirty_any = true;
break;
}
// Texture LOD biases
case PICA_REG_INDEX(texturing.texture0.lod.bias):
SyncTextureLodBias(0);
break;
case PICA_REG_INDEX(texturing.texture1.lod.bias):
SyncTextureLodBias(1);
break;
case PICA_REG_INDEX(texturing.texture2.lod.bias):
SyncTextureLodBias(2);
break;
// Clipping plane
case PICA_REG_INDEX(rasterizer.clip_coef[0]):
case PICA_REG_INDEX(rasterizer.clip_coef[1]):
case PICA_REG_INDEX(rasterizer.clip_coef[2]):
case PICA_REG_INDEX(rasterizer.clip_coef[3]):
SyncClipCoef();
break;
default:
// Forward registers that map to fixed function API features to the video backend
NotifyFixedFunctionPicaRegisterChanged(id);
}
}
void RasterizerAccelerated::SyncDepthScale() {
float depth_scale = Pica::float24::FromRaw(regs.rasterizer.viewport_depth_range).ToFloat32();
if (depth_scale != uniform_block_data.data.depth_scale) {
uniform_block_data.data.depth_scale = depth_scale;
uniform_block_data.dirty = true;
}
}
void RasterizerAccelerated::SyncDepthOffset() {
float depth_offset =
Pica::float24::FromRaw(regs.rasterizer.viewport_depth_near_plane).ToFloat32();
if (depth_offset != uniform_block_data.data.depth_offset) {
uniform_block_data.data.depth_offset = depth_offset;
uniform_block_data.dirty = true;
}
}
void RasterizerAccelerated::SyncFogColor() {
const auto& fog_color_regs = regs.texturing.fog_color;
const Common::Vec3f fog_color = {
fog_color_regs.r.Value() / 255.0f,
fog_color_regs.g.Value() / 255.0f,
fog_color_regs.b.Value() / 255.0f,
};
if (fog_color != uniform_block_data.data.fog_color) {
uniform_block_data.data.fog_color = fog_color;
uniform_block_data.dirty = true;
}
}
void RasterizerAccelerated::SyncProcTexNoise() {
const Common::Vec2f proctex_noise_f = {
Pica::float16::FromRaw(regs.texturing.proctex_noise_frequency.u).ToFloat32(),
Pica::float16::FromRaw(regs.texturing.proctex_noise_frequency.v).ToFloat32(),
};
const Common::Vec2f proctex_noise_a = {
regs.texturing.proctex_noise_u.amplitude / 4095.0f,
regs.texturing.proctex_noise_v.amplitude / 4095.0f,
};
const Common::Vec2f proctex_noise_p = {
Pica::float16::FromRaw(regs.texturing.proctex_noise_u.phase).ToFloat32(),
Pica::float16::FromRaw(regs.texturing.proctex_noise_v.phase).ToFloat32(),
};
if (proctex_noise_f != uniform_block_data.data.proctex_noise_f ||
proctex_noise_a != uniform_block_data.data.proctex_noise_a ||
proctex_noise_p != uniform_block_data.data.proctex_noise_p) {
uniform_block_data.data.proctex_noise_f = proctex_noise_f;
uniform_block_data.data.proctex_noise_a = proctex_noise_a;
uniform_block_data.data.proctex_noise_p = proctex_noise_p;
uniform_block_data.dirty = true;
}
}
void RasterizerAccelerated::SyncProcTexBias() {
const auto proctex_bias = Pica::float16::FromRaw(regs.texturing.proctex.bias_low |
(regs.texturing.proctex_lut.bias_high << 8))
.ToFloat32();
if (proctex_bias != uniform_block_data.data.proctex_bias) {
uniform_block_data.data.proctex_bias = proctex_bias;
uniform_block_data.dirty = true;
}
}
void RasterizerAccelerated::SyncAlphaTest() {
if (regs.framebuffer.output_merger.alpha_test.ref != uniform_block_data.data.alphatest_ref) {
uniform_block_data.data.alphatest_ref = regs.framebuffer.output_merger.alpha_test.ref;
uniform_block_data.dirty = true;
}
}
void RasterizerAccelerated::SyncCombinerColor() {
auto combiner_color = ColorRGBA8(regs.texturing.tev_combiner_buffer_color.raw);
if (combiner_color != uniform_block_data.data.tev_combiner_buffer_color) {
uniform_block_data.data.tev_combiner_buffer_color = combiner_color;
uniform_block_data.dirty = true;
}
}
void RasterizerAccelerated::SyncTevConstColor(
std::size_t stage_index, const Pica::TexturingRegs::TevStageConfig& tev_stage) {
const auto const_color = ColorRGBA8(tev_stage.const_color);
if (const_color == uniform_block_data.data.const_color[stage_index]) {
return;
}
uniform_block_data.data.const_color[stage_index] = const_color;
uniform_block_data.dirty = true;
}
void RasterizerAccelerated::SyncGlobalAmbient() {
auto color = LightColor(regs.lighting.global_ambient);
if (color != uniform_block_data.data.lighting_global_ambient) {
uniform_block_data.data.lighting_global_ambient = color;
uniform_block_data.dirty = true;
}
}
void RasterizerAccelerated::SyncLightSpecular0(int light_index) {
auto color = LightColor(regs.lighting.light[light_index].specular_0);
if (color != uniform_block_data.data.light_src[light_index].specular_0) {
uniform_block_data.data.light_src[light_index].specular_0 = color;
uniform_block_data.dirty = true;
}
}
void RasterizerAccelerated::SyncLightSpecular1(int light_index) {
auto color = LightColor(regs.lighting.light[light_index].specular_1);
if (color != uniform_block_data.data.light_src[light_index].specular_1) {
uniform_block_data.data.light_src[light_index].specular_1 = color;
uniform_block_data.dirty = true;
}
}
void RasterizerAccelerated::SyncLightDiffuse(int light_index) {
auto color = LightColor(regs.lighting.light[light_index].diffuse);
if (color != uniform_block_data.data.light_src[light_index].diffuse) {
uniform_block_data.data.light_src[light_index].diffuse = color;
uniform_block_data.dirty = true;
}
}
void RasterizerAccelerated::SyncLightAmbient(int light_index) {
auto color = LightColor(regs.lighting.light[light_index].ambient);
if (color != uniform_block_data.data.light_src[light_index].ambient) {
uniform_block_data.data.light_src[light_index].ambient = color;
uniform_block_data.dirty = true;
}
}
void RasterizerAccelerated::SyncLightPosition(int light_index) {
const Common::Vec3f position = {
Pica::float16::FromRaw(regs.lighting.light[light_index].x).ToFloat32(),
Pica::float16::FromRaw(regs.lighting.light[light_index].y).ToFloat32(),
Pica::float16::FromRaw(regs.lighting.light[light_index].z).ToFloat32(),
};
if (position != uniform_block_data.data.light_src[light_index].position) {
uniform_block_data.data.light_src[light_index].position = position;
uniform_block_data.dirty = true;
}
}
void RasterizerAccelerated::SyncLightSpotDirection(int light_index) {
const auto& light = regs.lighting.light[light_index];
const auto spot_direction =
Common::Vec3f{light.spot_x / 2047.0f, light.spot_y / 2047.0f, light.spot_z / 2047.0f};
if (spot_direction != uniform_block_data.data.light_src[light_index].spot_direction) {
uniform_block_data.data.light_src[light_index].spot_direction = spot_direction;
uniform_block_data.dirty = true;
}
}
void RasterizerAccelerated::SyncLightDistanceAttenuationBias(int light_index) {
float dist_atten_bias =
Pica::float20::FromRaw(regs.lighting.light[light_index].dist_atten_bias).ToFloat32();
if (dist_atten_bias != uniform_block_data.data.light_src[light_index].dist_atten_bias) {
uniform_block_data.data.light_src[light_index].dist_atten_bias = dist_atten_bias;
uniform_block_data.dirty = true;
}
}
void RasterizerAccelerated::SyncLightDistanceAttenuationScale(int light_index) {
float dist_atten_scale =
Pica::float20::FromRaw(regs.lighting.light[light_index].dist_atten_scale).ToFloat32();
if (dist_atten_scale != uniform_block_data.data.light_src[light_index].dist_atten_scale) {
uniform_block_data.data.light_src[light_index].dist_atten_scale = dist_atten_scale;
uniform_block_data.dirty = true;
}
}
void RasterizerAccelerated::SyncShadowBias() {
const auto& shadow = regs.framebuffer.shadow;
float constant = Pica::float16::FromRaw(shadow.constant).ToFloat32();
float linear = Pica::float16::FromRaw(shadow.linear).ToFloat32();
if (constant != uniform_block_data.data.shadow_bias_constant ||
linear != uniform_block_data.data.shadow_bias_linear) {
uniform_block_data.data.shadow_bias_constant = constant;
uniform_block_data.data.shadow_bias_linear = linear;
uniform_block_data.dirty = true;
}
}
void RasterizerAccelerated::SyncShadowTextureBias() {
int bias = regs.texturing.shadow.bias << 1;
if (bias != uniform_block_data.data.shadow_texture_bias) {
uniform_block_data.data.shadow_texture_bias = bias;
uniform_block_data.dirty = true;
}
}
void RasterizerAccelerated::SyncTextureLodBias(int tex_index) {
const auto pica_textures = regs.texturing.GetTextures();
const float bias = pica_textures[tex_index].config.lod.bias / 256.0f;
if (bias != uniform_block_data.data.tex_lod_bias[tex_index]) {
uniform_block_data.data.tex_lod_bias[tex_index] = bias;
uniform_block_data.dirty = true;
}
}
void RasterizerAccelerated::SyncClipCoef() {
const auto raw_clip_coef = regs.rasterizer.GetClipCoef();
const Common::Vec4f new_clip_coef = {raw_clip_coef.x.ToFloat32(), raw_clip_coef.y.ToFloat32(),
raw_clip_coef.z.ToFloat32(), raw_clip_coef.w.ToFloat32()};
if (new_clip_coef != uniform_block_data.data.clip_coef) {
uniform_block_data.data.clip_coef = new_clip_coef;
uniform_block_data.dirty = true;
}
}
} // namespace VideoCore

View File

@@ -0,0 +1,159 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/vector_math.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/regs_texturing.h"
#include "video_core/shader/shader_uniforms.h"
namespace Memory {
class MemorySystem;
}
namespace Pica {
struct Regs;
}
namespace VideoCore {
class RasterizerAccelerated : public RasterizerInterface {
public:
RasterizerAccelerated(Memory::MemorySystem& memory);
virtual ~RasterizerAccelerated() = default;
void AddTriangle(const Pica::Shader::OutputVertex& v0, const Pica::Shader::OutputVertex& v1,
const Pica::Shader::OutputVertex& v2) override;
void NotifyPicaRegisterChanged(u32 id) override;
void SyncEntireState() override;
protected:
/// Sync fixed-function pipeline state
virtual void SyncFixedState() = 0;
/// Notifies that a fixed function PICA register changed to the video backend
virtual void NotifyFixedFunctionPicaRegisterChanged(u32 id) = 0;
/// Syncs the depth scale to match the PICA register
void SyncDepthScale();
/// Syncs the depth offset to match the PICA register
void SyncDepthOffset();
/// Syncs the fog states to match the PICA register
void SyncFogColor();
/// Sync the procedural texture noise configuration to match the PICA register
void SyncProcTexNoise();
/// Sync the procedural texture bias configuration to match the PICA register
void SyncProcTexBias();
/// Syncs the alpha test states to match the PICA register
void SyncAlphaTest();
/// Syncs the TEV combiner color buffer to match the PICA register
void SyncCombinerColor();
/// Syncs the TEV constant color to match the PICA register
void SyncTevConstColor(std::size_t tev_index,
const Pica::TexturingRegs::TevStageConfig& tev_stage);
/// Syncs the lighting global ambient color to match the PICA register
void SyncGlobalAmbient();
/// Syncs the specified light's specular 0 color to match the PICA register
void SyncLightSpecular0(int light_index);
/// Syncs the specified light's specular 1 color to match the PICA register
void SyncLightSpecular1(int light_index);
/// Syncs the specified light's diffuse color to match the PICA register
void SyncLightDiffuse(int light_index);
/// Syncs the specified light's ambient color to match the PICA register
void SyncLightAmbient(int light_index);
/// Syncs the specified light's position to match the PICA register
void SyncLightPosition(int light_index);
/// Syncs the specified spot light direcition to match the PICA register
void SyncLightSpotDirection(int light_index);
/// Syncs the specified light's distance attenuation bias to match the PICA register
void SyncLightDistanceAttenuationBias(int light_index);
/// Syncs the specified light's distance attenuation scale to match the PICA register
void SyncLightDistanceAttenuationScale(int light_index);
/// Syncs the shadow rendering bias to match the PICA register
void SyncShadowBias();
/// Syncs the shadow texture bias to match the PICA register
void SyncShadowTextureBias();
/// Syncs the texture LOD bias to match the PICA register
void SyncTextureLodBias(int tex_index);
/// Syncs the clip coefficients to match the PICA register
void SyncClipCoef();
protected:
/// Structure that keeps tracks of the uniform state
struct UniformBlockData {
Pica::Shader::UniformData data{};
std::array<bool, Pica::LightingRegs::NumLightingSampler> lighting_lut_dirty{};
bool lighting_lut_dirty_any = true;
bool fog_lut_dirty = true;
bool proctex_noise_lut_dirty = true;
bool proctex_color_map_dirty = true;
bool proctex_alpha_map_dirty = true;
bool proctex_lut_dirty = true;
bool proctex_diff_lut_dirty = true;
bool dirty = true;
};
/// Structure that the hardware rendered vertices are composed of
struct HardwareVertex {
HardwareVertex() = default;
HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion);
Common::Vec4f position;
Common::Vec4f color;
Common::Vec2f tex_coord0;
Common::Vec2f tex_coord1;
Common::Vec2f tex_coord2;
float tex_coord0_w;
Common::Vec4f normquat;
Common::Vec3f view;
};
struct VertexArrayInfo {
u32 vs_input_index_min;
u32 vs_input_index_max;
u32 vs_input_size;
};
/// Retrieve the range and the size of the input vertex
VertexArrayInfo AnalyzeVertexArray(bool is_indexed, u32 stride_alignment = 1);
protected:
Memory::MemorySystem& memory;
Pica::Regs& regs;
std::vector<HardwareVertex> vertex_batch;
bool shader_dirty = true;
UniformBlockData uniform_block_data{};
std::array<std::array<Common::Vec2f, 256>, Pica::LightingRegs::NumLightingSampler>
lighting_lut_data{};
std::array<Common::Vec2f, 128> fog_lut_data{};
std::array<Common::Vec2f, 128> proctex_noise_lut_data{};
std::array<Common::Vec2f, 128> proctex_color_map_data{};
std::array<Common::Vec2f, 128> proctex_alpha_map_data{};
std::array<Common::Vec4f, 256> proctex_lut_data{};
std::array<Common::Vec4f, 256> proctex_diff_lut_data{};
};
} // namespace VideoCore

View File

@@ -2,15 +2,17 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <memory>
#include "core/core.h"
#include "core/frontend/emu_window.h"
#include "core/tracer/recorder.h"
#include "video_core/debug_utils/debug_utils.h"
#include "video_core/renderer_base.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/swrasterizer/swrasterizer.h"
#include "video_core/video_core.h"
RendererBase::RendererBase(Frontend::EmuWindow& window, Frontend::EmuWindow* secondary_window_)
: render_window{window}, secondary_window{secondary_window_} {}
namespace VideoCore {
RendererBase::RendererBase(Core::System& system_, Frontend::EmuWindow& window,
Frontend::EmuWindow* secondary_window_)
: system{system_}, render_window{window}, secondary_window{secondary_window_} {}
RendererBase::~RendererBase() = default;
@@ -25,19 +27,35 @@ void RendererBase::UpdateCurrentFramebufferLayout(bool is_portrait_mode) {
}
}
void RendererBase::RefreshRasterizerSetting() {
bool hw_renderer_enabled = VideoCore::g_hw_renderer_enabled;
if (rasterizer == nullptr || opengl_rasterizer_active != hw_renderer_enabled) {
opengl_rasterizer_active = hw_renderer_enabled;
void RendererBase::EndFrame() {
current_frame++;
if (hw_renderer_enabled) {
rasterizer = std::make_unique<OpenGL::RasterizerOpenGL>(render_window);
} else {
rasterizer = std::make_unique<VideoCore::SWRasterizer>();
}
system.perf_stats->EndSystemFrame();
render_window.PollEvents();
system.frame_limiter.DoFrameLimiting(system.CoreTiming().GetGlobalTimeUs());
system.perf_stats->BeginSystemFrame();
if (Pica::g_debug_context && Pica::g_debug_context->recorder) {
Pica::g_debug_context->recorder->FrameFinished();
}
}
void RendererBase::Sync() {
rasterizer->SyncEntireState();
bool RendererBase::IsScreenshotPending() const {
return renderer_settings.screenshot_requested;
}
void RendererBase::RequestScreenshot(void* data, std::function<void()> callback,
const Layout::FramebufferLayout& layout) {
if (renderer_settings.screenshot_requested) {
LOG_ERROR(Render, "A screenshot is already requested or in progress, ignoring the request");
return;
}
renderer_settings.screenshot_bits = data;
renderer_settings.screenshot_complete_callback = callback;
renderer_settings.screenshot_framebuffer_layout = layout;
renderer_settings.screenshot_requested = true;
}
} // namespace VideoCore

View File

@@ -4,25 +4,36 @@
#pragma once
#include <memory>
#include "common/common_types.h"
#include "core/frontend/framebuffer_layout.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/video_core.h"
namespace Frontend {
class EmuWindow;
}
namespace Core {
class System;
}
namespace VideoCore {
struct RendererSettings {
// Screenshot
std::atomic_bool screenshot_requested{false};
void* screenshot_bits{};
std::function<void()> screenshot_complete_callback;
Layout::FramebufferLayout screenshot_framebuffer_layout;
};
class RendererBase : NonCopyable {
public:
explicit RendererBase(Frontend::EmuWindow& window, Frontend::EmuWindow* secondary_window);
explicit RendererBase(Core::System& system, Frontend::EmuWindow& window,
Frontend::EmuWindow* secondary_window);
virtual ~RendererBase();
/// Initialize the renderer
virtual VideoCore::ResultStatus Init() = 0;
/// Shutdown the renderer
virtual void ShutDown() = 0;
/// Returns the rasterizer owned by the renderer
virtual VideoCore::RasterizerInterface* Rasterizer() const = 0;
/// Finalize rendering the guest frame and draw into the presentation texture
virtual void SwapBuffers() = 0;
@@ -35,27 +46,29 @@ public:
}
/// Prepares for video dumping (e.g. create necessary buffers, etc)
virtual void PrepareVideoDumping() = 0;
virtual void PrepareVideoDumping() {}
/// Cleans up after video dumping is ended
virtual void CleanupVideoDumping() = 0;
virtual void CleanupVideoDumping() {}
/// Synchronizes fixed function renderer state
virtual void Sync() {}
/// Updates the framebuffer layout of the contained render window handle.
void UpdateCurrentFramebufferLayout(bool is_portrait_mode = {});
/// Ends the current frame
void EndFrame();
// Getter/setter functions:
// ------------------------
f32 GetCurrentFPS() const {
return m_current_fps;
return current_fps;
}
int GetCurrentFrame() const {
return m_current_frame;
}
VideoCore::RasterizerInterface* Rasterizer() const {
return rasterizer.get();
return current_frame;
}
Frontend::EmuWindow& GetRenderWindow() {
@@ -66,16 +79,28 @@ public:
return render_window;
}
void RefreshRasterizerSetting();
void Sync();
[[nodiscard]] RendererSettings& Settings() {
return renderer_settings;
}
[[nodiscard]] const RendererSettings& Settings() const {
return renderer_settings;
}
/// Returns true if a screenshot is being processed
[[nodiscard]] bool IsScreenshotPending() const;
/// Request a screenshot of the next frame
void RequestScreenshot(void* data, std::function<void()> callback,
const Layout::FramebufferLayout& layout);
protected:
Core::System& system;
RendererSettings renderer_settings;
Frontend::EmuWindow& render_window; ///< Reference to the render window handle.
Frontend::EmuWindow* secondary_window; ///< Reference to the secondary render window handle.
std::unique_ptr<VideoCore::RasterizerInterface> rasterizer;
f32 m_current_fps = 0.0f; ///< Current framerate, should be set by the renderer
int m_current_frame = 0; ///< Current frame, should be set by the renderer
private:
bool opengl_rasterizer_active = false;
f32 current_fps = 0.0f; ///< Current framerate, should be set by the renderer
int current_frame = 0; ///< Current frame, should be set by the renderer
};
} // namespace VideoCore

View File

@@ -4,7 +4,6 @@
#include <glad/glad.h>
#include "core/frontend/emu_window.h"
#include "core/frontend/scope_acquire_context.h"
#include "video_core/renderer_opengl/frame_dumper_opengl.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
@@ -39,7 +38,7 @@ void FrameDumperOpenGL::StopDumping() {
}
void FrameDumperOpenGL::PresentLoop() {
Frontend::ScopeAcquireContext scope{*context};
const auto scope = context->Acquire();
InitializeOpenGLObjects();
const auto& layout = GetLayout();

View File

@@ -0,0 +1,156 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <glad/glad.h>
#include "common/assert.h"
#include "common/settings.h"
#include "core/telemetry_session.h"
#include "video_core/renderer_opengl/gl_driver.h"
namespace OpenGL {
DECLARE_ENUM_FLAG_OPERATORS(DriverBug);
inline std::string_view GetSource(GLenum source) {
#define RET(s) \
case GL_DEBUG_SOURCE_##s: \
return #s
switch (source) {
RET(API);
RET(WINDOW_SYSTEM);
RET(SHADER_COMPILER);
RET(THIRD_PARTY);
RET(APPLICATION);
RET(OTHER);
default:
UNREACHABLE();
}
#undef RET
return std::string_view{};
}
inline std::string_view GetType(GLenum type) {
#define RET(t) \
case GL_DEBUG_TYPE_##t: \
return #t
switch (type) {
RET(ERROR);
RET(DEPRECATED_BEHAVIOR);
RET(UNDEFINED_BEHAVIOR);
RET(PORTABILITY);
RET(PERFORMANCE);
RET(OTHER);
RET(MARKER);
default:
UNREACHABLE();
}
#undef RET
return std::string_view{};
}
static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severity,
GLsizei length, const GLchar* message, const void* user_param) {
Log::Level level = Log::Level::Info;
switch (severity) {
case GL_DEBUG_SEVERITY_HIGH:
level = Log::Level::Critical;
break;
case GL_DEBUG_SEVERITY_MEDIUM:
level = Log::Level::Warning;
break;
case GL_DEBUG_SEVERITY_NOTIFICATION:
case GL_DEBUG_SEVERITY_LOW:
level = Log::Level::Debug;
break;
}
LOG_GENERIC(Log::Class::Render_OpenGL, level, "{} {} {}: {}", GetSource(source), GetType(type),
id, message);
}
Driver::Driver(Core::TelemetrySession& telemetry_session_) : telemetry_session{telemetry_session_} {
const bool enable_debug = Settings::values.renderer_debug.GetValue();
if (enable_debug) {
glEnable(GL_DEBUG_OUTPUT);
glDebugMessageCallback(DebugHandler, nullptr);
}
ReportDriverInfo();
DeduceVendor();
CheckExtensionSupport();
FindBugs();
}
Driver::~Driver() = default;
bool Driver::HasBug(DriverBug bug) const {
return True(bugs & bug);
}
void Driver::ReportDriverInfo() {
// Report the context version and the vendor string
gl_version = std::string_view{reinterpret_cast<const char*>(glGetString(GL_VERSION))};
gpu_vendor = std::string_view{reinterpret_cast<const char*>(glGetString(GL_VENDOR))};
gpu_model = std::string_view{reinterpret_cast<const char*>(glGetString(GL_RENDERER))};
LOG_INFO(Render_OpenGL, "GL_VERSION: {}", gl_version);
LOG_INFO(Render_OpenGL, "GL_VENDOR: {}", gpu_vendor);
LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model);
// Add the information to the telemetry system
constexpr auto user_system = Common::Telemetry::FieldType::UserSystem;
telemetry_session.AddField(user_system, "GPU_Vendor", std::string{gpu_vendor});
telemetry_session.AddField(user_system, "GPU_Model", std::string{gpu_model});
telemetry_session.AddField(user_system, "GPU_OpenGL_Version", std::string{gl_version});
}
void Driver::DeduceVendor() {
if (gpu_vendor.find("NVIDIA") != gpu_vendor.npos) {
vendor = Vendor::Nvidia;
} else if ((gpu_vendor.find("ATI") != gpu_vendor.npos) ||
(gpu_vendor.find("AMD") != gpu_vendor.npos) ||
(gpu_vendor.find("Advanced Micro Devices") != gpu_vendor.npos)) {
vendor = Vendor::AMD;
} else if (gpu_vendor.find("Intel") != gpu_vendor.npos) {
vendor = Vendor::Intel;
} else if (gpu_vendor.find("ARM") != gpu_vendor.npos) {
vendor = Vendor::ARM;
} else if (gpu_vendor.find("Qualcomm") != gpu_vendor.npos) {
vendor = Vendor::Qualcomm;
} else if (gpu_vendor.find("Samsung") != gpu_vendor.npos) {
vendor = Vendor::Samsung;
} else if (gpu_vendor.find("GDI Generic") != gpu_vendor.npos) {
vendor = Vendor::Generic;
}
}
void Driver::CheckExtensionSupport() {
ext_buffer_storage = GLAD_GL_EXT_buffer_storage;
arb_buffer_storage = GLAD_GL_ARB_buffer_storage;
arb_clear_texture = GLAD_GL_ARB_clear_texture;
arb_get_texture_sub_image = GLAD_GL_ARB_get_texture_sub_image;
ext_clip_cull_distance = GLAD_GL_EXT_clip_cull_distance;
is_suitable = GLAD_GL_VERSION_4_3 || GLAD_GL_ES_VERSION_3_1;
}
void Driver::FindBugs() {
#ifdef __unix__
const bool is_linux = true;
#else
const bool is_linux = false;
#endif
// TODO: Check if these have been fixed in the newer driver
if (vendor == Vendor::AMD) {
bugs |= DriverBug::ShaderStageChangeFreeze | DriverBug::VertexArrayOutOfBound;
}
if (vendor == Vendor::AMD || (vendor == Vendor::Intel && !is_linux)) {
bugs |= DriverBug::BrokenTextureView;
}
}
} // namespace OpenGL

View File

@@ -0,0 +1,114 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <string_view>
#include "common/common_types.h"
namespace Core {
class TelemetrySession;
}
namespace OpenGL {
enum class Vendor {
Unknown = 0,
AMD = 1,
Nvidia = 2,
Intel = 3,
ARM = 4,
Qualcomm = 5,
Samsung = 6,
Generic = 7,
};
enum class DriverBug {
// AMD drivers sometimes freezes when one shader stage is changed but not the others.
ShaderStageChangeFreeze = 1 << 0,
// On AMD drivers there is a strange crash in indexed drawing. The crash happens when the buffer
// read position is near the end and is an out-of-bound access to the vertex buffer. This is
// probably a bug in the driver and is related to the usage of vec3<byte> attributes in the
// vertex array. Doubling the allocation size for the vertex buffer seems to avoid the crash.
VertexArrayOutOfBound = 1 << 1,
// On AMD and Intel drivers on Windows glTextureView produces incorrect results
BrokenTextureView = 1 << 2,
};
/**
* Utility class that loads the OpenGL function pointers and reports
* information about the graphics device and driver used
*/
class Driver {
public:
Driver(Core::TelemetrySession& telemetry_session);
~Driver();
/// Returns true of the driver has a particular bug stated in the DriverBug enum
bool HasBug(DriverBug bug) const;
/// Returns the vendor of the currently selected physical device
Vendor GetVendor() const {
return vendor;
}
/// Returns the gpu vendor string returned by the driver
std::string_view GetVendorString() const {
return gpu_vendor;
}
/// Returns true if the implementation is suitable for emulation
bool IsSuitable() const {
return is_suitable;
}
/// Returns true if the implementation supports ARB_buffer_storage
bool HasArbBufferStorage() const {
return arb_buffer_storage;
}
/// Returns true if the implementation supports EXT_buffer_storage
bool HasExtBufferStorage() const {
return ext_buffer_storage;
}
/// Returns true if the implementation supports ARB_clear_texture
bool HasArbClearTexture() const {
return arb_clear_texture;
}
/// Returns true if the implementation supports ARB_get_texture_sub_image
bool HasArbGetTextureSubImage() const {
return arb_get_texture_sub_image;
}
/// Returns true if the implementation supports EXT_clip_cull_distance
bool HasExtClipCullDistance() const {
return ext_clip_cull_distance;
}
private:
void ReportDriverInfo();
void DeduceVendor();
void CheckExtensionSupport();
void FindBugs();
private:
Core::TelemetrySession& telemetry_session;
Vendor vendor = Vendor::Unknown;
DriverBug bugs{};
bool is_suitable{};
bool ext_buffer_storage{};
bool arb_buffer_storage{};
bool arb_clear_texture{};
bool arb_get_texture_sub_image{};
bool ext_clip_cull_distance{};
std::string_view gl_version{};
std::string_view gpu_vendor{};
std::string_view gpu_model{};
};
} // namespace OpenGL

File diff suppressed because it is too large Load Diff

View File

@@ -3,12 +3,11 @@
// Refer to the license.txt file included.
#pragma once
#include "common/vector_math.h"
#include "core/hw/gpu.h"
#include "video_core/pica_types.h"
#include "video_core/rasterizer_accelerated.h"
#include "video_core/rasterizer_cache/rasterizer_cache.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/regs_lighting.h"
#include "video_core/regs_texturing.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state.h"
@@ -20,20 +19,20 @@ class EmuWindow;
}
namespace OpenGL {
class Driver;
class ShaderProgramManager;
class RasterizerOpenGL : public VideoCore::RasterizerInterface {
class RasterizerOpenGL : public VideoCore::RasterizerAccelerated {
public:
explicit RasterizerOpenGL(Frontend::EmuWindow& emu_window);
explicit RasterizerOpenGL(Memory::MemorySystem& memory, Frontend::EmuWindow& emu_window,
Driver& driver);
~RasterizerOpenGL() override;
void LoadDiskResources(const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) override;
void AddTriangle(const Pica::Shader::OutputVertex& v0, const Pica::Shader::OutputVertex& v1,
const Pica::Shader::OutputVertex& v2) override;
void DrawTriangles() override;
void NotifyPicaRegisterChanged(u32 id) override;
void FlushAll() override;
void FlushRegion(PAddr addr, u32 size) override;
void InvalidateRegion(PAddr addr, u32 size) override;
@@ -46,10 +45,10 @@ public:
u32 pixel_stride, ScreenInfo& screen_info) override;
bool AccelerateDrawBatch(bool is_indexed) override;
/// Syncs entire status to match PICA registers
void SyncEntireState() override;
private:
void SyncFixedState() override;
void NotifyFixedFunctionPicaRegisterChanged(u32 id) override;
struct SamplerInfo {
using TextureConfig = Pica::TexturingRegs::TextureConfig;
@@ -76,66 +75,15 @@ private:
bool supress_mipmap_for_cube = false;
};
/// Structure that the hardware rendered vertices are composed of
struct HardwareVertex {
HardwareVertex() = default;
HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion) {
position[0] = v.pos.x.ToFloat32();
position[1] = v.pos.y.ToFloat32();
position[2] = v.pos.z.ToFloat32();
position[3] = v.pos.w.ToFloat32();
color[0] = v.color.x.ToFloat32();
color[1] = v.color.y.ToFloat32();
color[2] = v.color.z.ToFloat32();
color[3] = v.color.w.ToFloat32();
tex_coord0[0] = v.tc0.x.ToFloat32();
tex_coord0[1] = v.tc0.y.ToFloat32();
tex_coord1[0] = v.tc1.x.ToFloat32();
tex_coord1[1] = v.tc1.y.ToFloat32();
tex_coord2[0] = v.tc2.x.ToFloat32();
tex_coord2[1] = v.tc2.y.ToFloat32();
tex_coord0_w = v.tc0_w.ToFloat32();
normquat[0] = v.quat.x.ToFloat32();
normquat[1] = v.quat.y.ToFloat32();
normquat[2] = v.quat.z.ToFloat32();
normquat[3] = v.quat.w.ToFloat32();
view[0] = v.view.x.ToFloat32();
view[1] = v.view.y.ToFloat32();
view[2] = v.view.z.ToFloat32();
if (flip_quaternion) {
normquat = -normquat;
}
}
Common::Vec4f position;
Common::Vec4f color;
Common::Vec2f tex_coord0;
Common::Vec2f tex_coord1;
Common::Vec2f tex_coord2;
float tex_coord0_w;
Common::Vec4f normquat;
Common::Vec3f view;
};
/// Syncs the clip enabled status to match the PICA register
void SyncClipEnabled();
/// Syncs the clip coefficients to match the PICA register
void SyncClipCoef();
/// Sets the OpenGL shader in accordance with the current PICA register state
void SetShader();
/// Syncs the cull mode to match the PICA register
void SyncCullMode();
/// Syncs the depth scale to match the PICA register
void SyncDepthScale();
/// Syncs the depth offset to match the PICA register
void SyncDepthOffset();
/// Syncs the blend enabled status to match the PICA register
void SyncBlendEnabled();
@@ -145,18 +93,6 @@ private:
/// Syncs the blend color to match the PICA register
void SyncBlendColor();
/// Syncs the fog states to match the PICA register
void SyncFogColor();
/// Sync the procedural texture noise configuration to match the PICA register
void SyncProcTexNoise();
/// Sync the procedural texture bias configuration to match the PICA register
void SyncProcTexBias();
/// Syncs the alpha test states to match the PICA register
void SyncAlphaTest();
/// Syncs the logic op states to match the PICA register
void SyncLogicOp();
@@ -175,46 +111,6 @@ private:
/// Syncs the depth test states to match the PICA register
void SyncDepthTest();
/// Syncs the TEV combiner color buffer to match the PICA register
void SyncCombinerColor();
/// Syncs the TEV constant color to match the PICA register
void SyncTevConstColor(std::size_t tev_index,
const Pica::TexturingRegs::TevStageConfig& tev_stage);
/// Syncs the lighting global ambient color to match the PICA register
void SyncGlobalAmbient();
/// Syncs the specified light's specular 0 color to match the PICA register
void SyncLightSpecular0(int light_index);
/// Syncs the specified light's specular 1 color to match the PICA register
void SyncLightSpecular1(int light_index);
/// Syncs the specified light's diffuse color to match the PICA register
void SyncLightDiffuse(int light_index);
/// Syncs the specified light's ambient color to match the PICA register
void SyncLightAmbient(int light_index);
/// Syncs the specified light's position to match the PICA register
void SyncLightPosition(int light_index);
/// Syncs the specified spot light direcition to match the PICA register
void SyncLightSpotDirection(int light_index);
/// Syncs the specified light's distance attenuation bias to match the PICA register
void SyncLightDistanceAttenuationBias(int light_index);
/// Syncs the specified light's distance attenuation scale to match the PICA register
void SyncLightDistanceAttenuationScale(int light_index);
/// Syncs the shadow rendering bias to match the PICA register
void SyncShadowBias();
/// Syncs the shadow texture bias to match the PICA register
void SyncShadowTextureBias();
/// Syncs and uploads the lighting, fog and proctex LUTs
void SyncAndUploadLUTs();
void SyncAndUploadLUTsLF();
@@ -228,15 +124,6 @@ private:
/// Internal implementation for AccelerateDrawBatch
bool AccelerateDrawBatchInternal(bool is_indexed);
struct VertexArrayInfo {
u32 vs_input_index_min;
u32 vs_input_index_max;
u32 vs_input_size;
};
/// Retrieve the range and the size of the input vertex
VertexArrayInfo AnalyzeVertexArray(bool is_indexed);
/// Setup vertex array for AccelerateDrawBatch
void SetupVertexArray(u8* array_ptr, GLintptr buffer_offset, GLuint vs_input_index_min,
GLuint vs_input_index_max);
@@ -247,38 +134,13 @@ private:
/// Setup geometry shader for AccelerateDrawBatch
bool SetupGeometryShader();
bool is_amd;
private:
Driver& driver;
OpenGLState state;
GLuint default_texture;
RasterizerCacheOpenGL res_cache;
std::vector<HardwareVertex> vertex_batch;
bool shader_dirty = true;
struct {
UniformData data;
std::array<bool, Pica::LightingRegs::NumLightingSampler> lighting_lut_dirty;
bool lighting_lut_dirty_any;
bool fog_lut_dirty;
bool proctex_noise_lut_dirty;
bool proctex_color_map_dirty;
bool proctex_alpha_map_dirty;
bool proctex_lut_dirty;
bool proctex_diff_lut_dirty;
bool dirty;
} uniform_block_data = {};
std::unique_ptr<ShaderProgramManager> shader_program_manager;
// They shall be big enough for about one frame.
static constexpr std::size_t VERTEX_BUFFER_SIZE = 16 * 1024 * 1024;
static constexpr std::size_t INDEX_BUFFER_SIZE = 1 * 1024 * 1024;
static constexpr std::size_t UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024;
static constexpr std::size_t TEXTURE_BUFFER_SIZE = 1 * 1024 * 1024;
OGLVertexArray sw_vao; // VAO for software shader draw
OGLVertexArray hw_vao; // VAO for hardware shader / accelerate draw
std::array<bool, 16> hw_vao_enabled_attributes{};
@@ -299,15 +161,6 @@ private:
OGLTexture texture_buffer_lut_lf;
OGLTexture texture_buffer_lut_rg;
OGLTexture texture_buffer_lut_rgba;
std::array<std::array<Common::Vec2f, 256>, Pica::LightingRegs::NumLightingSampler>
lighting_lut_data{};
std::array<Common::Vec2f, 128> fog_lut_data{};
std::array<Common::Vec2f, 128> proctex_noise_lut_data{};
std::array<Common::Vec2f, 128> proctex_color_map_data{};
std::array<Common::Vec2f, 128> proctex_alpha_map_data{};
std::array<Common::Vec4f, 256> proctex_lut_data{};
std::array<Common::Vec4f, 256> proctex_diff_lut_data{};
};
} // namespace OpenGL

View File

@@ -12,6 +12,7 @@
#include "video_core/renderer_opengl/gl_shader_gen.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
#include "video_core/renderer_opengl/gl_vars.h"
#include "video_core/shader/shader_uniforms.h"
#include "video_core/video_core.h"
using Pica::FramebufferRegs;
@@ -23,53 +24,7 @@ using VSOutputAttributes = RasterizerRegs::VSOutputAttributes;
namespace OpenGL {
constexpr std::string_view UniformBlockDef = R"(
#define NUM_TEV_STAGES 6
#define NUM_LIGHTS 8
#define NUM_LIGHTING_SAMPLERS 24
struct LightSrc {
vec3 specular_0;
vec3 specular_1;
vec3 diffuse;
vec3 ambient;
vec3 position;
vec3 spot_direction;
float dist_atten_bias;
float dist_atten_scale;
};
layout (std140) uniform shader_data {
int framebuffer_scale;
int alphatest_ref;
float depth_scale;
float depth_offset;
float shadow_bias_constant;
float shadow_bias_linear;
int scissor_x1;
int scissor_y1;
int scissor_x2;
int scissor_y2;
int fog_lut_offset;
int proctex_noise_lut_offset;
int proctex_color_map_offset;
int proctex_alpha_map_offset;
int proctex_lut_offset;
int proctex_diff_lut_offset;
float proctex_bias;
int shadow_texture_bias;
ivec4 lighting_lut_offset[NUM_LIGHTING_SAMPLERS / 4];
vec3 fog_color;
vec2 proctex_noise_f;
vec2 proctex_noise_a;
vec2 proctex_noise_p;
vec3 lighting_global_ambient;
LightSrc light_src[NUM_LIGHTS];
vec4 const_color[NUM_TEV_STAGES];
vec4 tev_combiner_buffer_color;
vec4 clip_coef;
};
)";
const std::string UniformBlockDef = Pica::Shader::BuildShaderUniformDefinitions();
static std::string GetVertexInterfaceDeclaration(bool is_output, bool separable_shader) {
std::string out;

View File

@@ -6,13 +6,13 @@
#include <set>
#include <thread>
#include <unordered_map>
#include <boost/variant.hpp>
#include "core/frontend/scope_acquire_context.h"
#include <variant>
#include "video_core/renderer_opengl/gl_driver.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/gl_vars.h"
#include "video_core/shader/shader_uniforms.h"
#include "video_core/video_core.h"
namespace OpenGL {
@@ -85,7 +85,8 @@ static std::tuple<PicaVSConfig, Pica::Shader::ShaderSetup> BuildVSConfigFromRaw(
return {PicaVSConfig{raw.GetRawShaderConfig().vs, setup}, setup};
}
static void SetShaderUniformBlockBinding(GLuint shader, const char* name, UniformBindings binding,
static void SetShaderUniformBlockBinding(GLuint shader, const char* name,
Pica::Shader::UniformBindings binding,
std::size_t expected_size) {
const GLuint ub_index = glGetUniformBlockIndex(shader, name);
if (ub_index == GL_INVALID_INDEX) {
@@ -100,9 +101,10 @@ static void SetShaderUniformBlockBinding(GLuint shader, const char* name, Unifor
}
static void SetShaderUniformBlockBindings(GLuint shader) {
SetShaderUniformBlockBinding(shader, "shader_data", UniformBindings::Common,
sizeof(UniformData));
SetShaderUniformBlockBinding(shader, "vs_config", UniformBindings::VS, sizeof(VSUniformData));
SetShaderUniformBlockBinding(shader, "shader_data", Pica::Shader::UniformBindings::Common,
sizeof(Pica::Shader::UniformData));
SetShaderUniformBlockBinding(shader, "vs_config", Pica::Shader::UniformBindings::VS,
sizeof(Pica::Shader::VSUniformData));
}
static void SetShaderSamplerBinding(GLuint shader, const char* name,
@@ -148,21 +150,6 @@ static void SetShaderSamplerBindings(GLuint shader) {
cur_state.Apply();
}
void PicaUniformsData::SetFromRegs(const Pica::ShaderRegs& regs,
const Pica::Shader::ShaderSetup& setup) {
std::transform(std::begin(setup.uniforms.b), std::end(setup.uniforms.b), std::begin(bools),
[](bool value) -> BoolAligned { return {value ? GL_TRUE : GL_FALSE}; });
std::transform(std::begin(regs.int_uniforms), std::end(regs.int_uniforms), std::begin(i),
[](const auto& value) -> Common::Vec4u {
return {value.x.Value(), value.y.Value(), value.z.Value(), value.w.Value()};
});
std::transform(std::begin(setup.uniforms.f), std::end(setup.uniforms.f), std::begin(f),
[](const auto& value) -> Common::Vec4f {
return {value.x.ToFloat32(), value.y.ToFloat32(), value.z.ToFloat32(),
value.w.ToFloat32()};
});
}
/**
* An object representing a shader program staging. It can be either a shader object or a program
* object, depending on whether separable program is used.
@@ -178,12 +165,12 @@ public:
}
void Create(const char* source, GLenum type) {
if (shader_or_program.which() == 0) {
boost::get<OGLShader>(shader_or_program).Create(source, type);
if (shader_or_program.index() == 0) {
std::get<OGLShader>(shader_or_program).Create(source, type);
} else {
OGLShader shader;
shader.Create(source, type);
OGLProgram& program = boost::get<OGLProgram>(shader_or_program);
OGLProgram& program = std::get<OGLProgram>(shader_or_program);
program.Create(true, {shader.handle});
SetShaderUniformBlockBindings(program.handle);
@@ -194,10 +181,10 @@ public:
}
GLuint GetHandle() const {
if (shader_or_program.which() == 0) {
return boost::get<OGLShader>(shader_or_program).handle;
if (shader_or_program.index() == 0) {
return std::get<OGLShader>(shader_or_program).handle;
} else {
return boost::get<OGLProgram>(shader_or_program).handle;
return std::get<OGLProgram>(shader_or_program).handle;
}
}
@@ -208,7 +195,7 @@ public:
}
private:
boost::variant<OGLShader, OGLProgram> shader_or_program;
std::variant<OGLShader, OGLProgram> shader_or_program;
};
class TrivialVertexShader {
@@ -329,8 +316,8 @@ using FragmentShaders = ShaderCache<PicaFSConfig, &GenerateFragmentShader, GL_FR
class ShaderProgramManager::Impl {
public:
explicit Impl(bool separable, bool is_amd)
: is_amd(is_amd), separable(separable), programmable_vertex_shaders(separable),
explicit Impl(bool separable)
: separable(separable), programmable_vertex_shaders(separable),
trivial_vertex_shader(separable), fixed_geometry_shaders(separable),
fragment_shaders(separable), disk_cache(separable) {
if (separable)
@@ -363,7 +350,6 @@ public:
static_assert(offsetof(ShaderTuple, fs_hash) == sizeof(std::size_t) * 2,
"ShaderTuple layout changed!");
bool is_amd;
bool separable;
ShaderTuple current;
@@ -379,9 +365,9 @@ public:
ShaderDiskCache disk_cache;
};
ShaderProgramManager::ShaderProgramManager(Frontend::EmuWindow& emu_window_, bool separable,
bool is_amd)
: impl(std::make_unique<Impl>(separable, is_amd)), emu_window{emu_window_} {}
ShaderProgramManager::ShaderProgramManager(Frontend::EmuWindow& emu_window_, const Driver& driver_,
bool separable)
: impl(std::make_unique<Impl>(separable)), emu_window{emu_window_}, driver{driver_} {}
ShaderProgramManager::~ShaderProgramManager() = default;
@@ -443,10 +429,7 @@ void ShaderProgramManager::UseFragmentShader(const Pica::Regs& regs) {
void ShaderProgramManager::ApplyTo(OpenGLState& state) {
if (impl->separable) {
if (impl->is_amd) {
// Without this reseting, AMD sometimes freezes when one stage is changed but not
// for the others. On the other hand, including this reset seems to introduce memory
// leak in Intel Graphics.
if (driver.HasBug(DriverBug::ShaderStageChangeFreeze)) {
glUseProgramStages(
impl->pipeline.handle,
GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | GL_FRAGMENT_SHADER_BIT, 0);
@@ -641,7 +624,7 @@ void ShaderProgramManager::LoadDiskCache(const std::atomic_bool& stop_loading,
std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex
const auto LoadRawSepareble = [&](Frontend::GraphicsContext* context, std::size_t begin,
std::size_t end) {
Frontend::ScopeAcquireContext scope(*context);
const auto scope = context->Acquire();
for (std::size_t i = begin; i < end; ++i) {
if (stop_loading || compilation_failed) {
return;

View File

@@ -5,13 +5,7 @@
#pragma once
#include <memory>
#include "common/vector_math.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/regs_lighting.h"
namespace Core {
class System;
}
namespace Frontend {
class EmuWindow;
@@ -19,8 +13,7 @@ class EmuWindow;
namespace Pica {
struct Regs;
struct ShaderRegs;
} // namespace Pica
}
namespace Pica::Shader {
struct ShaderSetup;
@@ -28,87 +21,13 @@ struct ShaderSetup;
namespace OpenGL {
enum class UniformBindings : u32 { Common, VS, GS };
struct LightSrc {
alignas(16) Common::Vec3f specular_0;
alignas(16) Common::Vec3f specular_1;
alignas(16) Common::Vec3f diffuse;
alignas(16) Common::Vec3f ambient;
alignas(16) Common::Vec3f position;
alignas(16) Common::Vec3f spot_direction; // negated
float dist_atten_bias;
float dist_atten_scale;
};
/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
// NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at
// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
// Not following that rule will cause problems on some AMD drivers.
struct UniformData {
int framebuffer_scale;
int alphatest_ref;
float depth_scale;
float depth_offset;
float shadow_bias_constant;
float shadow_bias_linear;
int scissor_x1;
int scissor_y1;
int scissor_x2;
int scissor_y2;
int fog_lut_offset;
int proctex_noise_lut_offset;
int proctex_color_map_offset;
int proctex_alpha_map_offset;
int proctex_lut_offset;
int proctex_diff_lut_offset;
float proctex_bias;
int shadow_texture_bias;
alignas(16) Common::Vec4i lighting_lut_offset[Pica::LightingRegs::NumLightingSampler / 4];
alignas(16) Common::Vec3f fog_color;
alignas(8) Common::Vec2f proctex_noise_f;
alignas(8) Common::Vec2f proctex_noise_a;
alignas(8) Common::Vec2f proctex_noise_p;
alignas(16) Common::Vec3f lighting_global_ambient;
LightSrc light_src[8];
alignas(16) Common::Vec4f const_color[6]; // A vec4 color for each of the six tev stages
alignas(16) Common::Vec4f tev_combiner_buffer_color;
alignas(16) Common::Vec4f clip_coef;
};
static_assert(sizeof(UniformData) == 0x4F0,
"The size of the UniformData does not match the structure in the shader");
static_assert(sizeof(UniformData) < 16384,
"UniformData structure must be less than 16kb as per the OpenGL spec");
/// Uniform struct for the Uniform Buffer Object that contains PICA vertex/geometry shader uniforms.
// NOTE: the same rule from UniformData also applies here.
struct PicaUniformsData {
void SetFromRegs(const Pica::ShaderRegs& regs, const Pica::Shader::ShaderSetup& setup);
struct BoolAligned {
alignas(16) int b;
};
std::array<BoolAligned, 16> bools;
alignas(16) std::array<Common::Vec4u, 4> i;
alignas(16) std::array<Common::Vec4f, 96> f;
};
struct VSUniformData {
PicaUniformsData uniforms;
};
static_assert(sizeof(VSUniformData) == 1856,
"The size of the VSUniformData does not match the structure in the shader");
static_assert(sizeof(VSUniformData) < 16384,
"VSUniformData structure must be less than 16kb as per the OpenGL spec");
class Driver;
class OpenGLState;
/// A class that manage different shader stages and configures them with given config data.
class ShaderProgramManager {
public:
ShaderProgramManager(Frontend::EmuWindow& emu_window_, bool separable, bool is_amd);
ShaderProgramManager(Frontend::EmuWindow& emu_window, const Driver& driver, bool separable);
~ShaderProgramManager();
void LoadDiskCache(const std::atomic_bool& stop_loading,
@@ -131,5 +50,6 @@ private:
std::unique_ptr<Impl> impl;
Frontend::EmuWindow& emu_window;
const Driver& driver;
};
} // namespace OpenGL

View File

@@ -5,6 +5,7 @@
#include "common/alignment.h"
#include "common/assert.h"
#include "common/microprofile.h"
#include "video_core/renderer_opengl/gl_driver.h"
#include "video_core/renderer_opengl/gl_stream_buffer.h"
MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
@@ -12,19 +13,14 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
namespace OpenGL {
OGLStreamBuffer::OGLStreamBuffer(GLenum target, GLsizeiptr size, bool array_buffer_for_amd,
OGLStreamBuffer::OGLStreamBuffer(Driver& driver, GLenum target, GLsizeiptr size,
bool prefer_coherent)
: gl_target(target), buffer_size(size) {
gl_buffer.Create();
glBindBuffer(gl_target, gl_buffer.handle);
GLsizeiptr allocate_size = size;
if (array_buffer_for_amd) {
// On AMD GPU there is a strange crash in indexed drawing. The crash happens when the buffer
// read position is near the end and is an out-of-bound access to the vertex buffer. This is
// probably a bug in the driver and is related to the usage of vec3<byte> attributes in the
// vertex array. Doubling the allocation size for the vertex buffer seems to avoid the
// crash.
if (driver.HasBug(DriverBug::VertexArrayOutOfBound) && target == GL_ARRAY_BUFFER) {
allocate_size *= 2;
}

View File

@@ -3,14 +3,17 @@
// Refer to the license.txt file included.
#pragma once
#include <tuple>
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL {
class Driver;
class OGLStreamBuffer : private NonCopyable {
public:
explicit OGLStreamBuffer(GLenum target, GLsizeiptr size, bool array_buffer_for_amd,
explicit OGLStreamBuffer(Driver& driver, GLenum target, GLsizeiptr size,
bool prefer_coherent = false);
~OGLStreamBuffer();

View File

@@ -13,8 +13,6 @@
#include "core/hw/hw.h"
#include "core/hw/lcd.h"
#include "core/memory.h"
#include "core/tracer/recorder.h"
#include "video_core/debug_utils/debug_utils.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
#include "video_core/renderer_opengl/gl_state.h"
@@ -352,14 +350,17 @@ static std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(const float width, cons
return matrix;
}
RendererOpenGL::RendererOpenGL(Frontend::EmuWindow& window, Frontend::EmuWindow* secondary_window)
: RendererBase{window, secondary_window},
frame_dumper(Core::System::GetInstance().VideoDumper(), window) {
RendererOpenGL::RendererOpenGL(Core::System& system, Frontend::EmuWindow& window,
Frontend::EmuWindow* secondary_window)
: VideoCore::RendererBase{system, window, secondary_window}, driver{system.TelemetrySession()},
frame_dumper{system.VideoDumper(), window} {
window.mailbox = std::make_unique<OGLTextureMailbox>();
if (secondary_window) {
secondary_window->mailbox = std::make_unique<OGLTextureMailbox>();
}
frame_dumper.mailbox = std::make_unique<OGLVideoDumpingMailbox>();
InitOpenGLObjects();
rasterizer = std::make_unique<RasterizerOpenGL>(system.Memory(), render_window, driver);
}
RendererOpenGL::~RendererOpenGL() = default;
@@ -374,7 +375,6 @@ void RendererOpenGL::SwapBuffers() {
state.Apply();
PrepareRendertarget();
RenderScreenshot();
const auto& main_layout = render_window.GetFramebufferLayout();
@@ -396,26 +396,12 @@ void RendererOpenGL::SwapBuffers() {
}
}
m_current_frame++;
Core::System::GetInstance().perf_stats->EndSystemFrame();
render_window.PollEvents();
Core::System::GetInstance().frame_limiter.DoFrameLimiting(
Core::System::GetInstance().CoreTiming().GetGlobalTimeUs());
Core::System::GetInstance().perf_stats->BeginSystemFrame();
EndFrame();
prev_state.Apply();
RefreshRasterizerSetting();
if (Pica::g_debug_context && Pica::g_debug_context->recorder) {
Pica::g_debug_context->recorder->FrameFinished();
}
}
void RendererOpenGL::RenderScreenshot() {
if (VideoCore::g_renderer_screenshot_requested) {
if (renderer_settings.screenshot_requested.exchange(false)) {
// Draw this frame to the screenshot framebuffer
screenshot_framebuffer.Create();
GLuint old_read_fb = state.draw.read_framebuffer;
@@ -423,7 +409,7 @@ void RendererOpenGL::RenderScreenshot() {
state.draw.read_framebuffer = state.draw.draw_framebuffer = screenshot_framebuffer.handle;
state.Apply();
Layout::FramebufferLayout layout{VideoCore::g_screenshot_framebuffer_layout};
const auto layout{renderer_settings.screenshot_framebuffer_layout};
GLuint renderbuffer;
glGenRenderbuffers(1, &renderbuffer);
@@ -435,7 +421,7 @@ void RendererOpenGL::RenderScreenshot() {
DrawScreens(layout, false);
glReadPixels(0, 0, layout.width, layout.height, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV,
VideoCore::g_screenshot_bits);
renderer_settings.screenshot_bits);
screenshot_framebuffer.Release();
state.draw.read_framebuffer = old_read_fb;
@@ -443,8 +429,7 @@ void RendererOpenGL::RenderScreenshot() {
state.Apply();
glDeleteRenderbuffers(1, &renderbuffer);
VideoCore::g_screenshot_complete_callback();
VideoCore::g_renderer_screenshot_requested = false;
renderer_settings.screenshot_complete_callback();
}
}
@@ -1226,109 +1211,8 @@ void RendererOpenGL::CleanupVideoDumping() {
mailbox->free_cv.notify_one();
}
static const char* GetSource(GLenum source) {
#define RET(s) \
case GL_DEBUG_SOURCE_##s: \
return #s
switch (source) {
RET(API);
RET(WINDOW_SYSTEM);
RET(SHADER_COMPILER);
RET(THIRD_PARTY);
RET(APPLICATION);
RET(OTHER);
default:
UNREACHABLE();
}
#undef RET
return "";
void RendererOpenGL::Sync() {
rasterizer->SyncEntireState();
}
static const char* GetType(GLenum type) {
#define RET(t) \
case GL_DEBUG_TYPE_##t: \
return #t
switch (type) {
RET(ERROR);
RET(DEPRECATED_BEHAVIOR);
RET(UNDEFINED_BEHAVIOR);
RET(PORTABILITY);
RET(PERFORMANCE);
RET(OTHER);
RET(MARKER);
default:
UNREACHABLE();
}
#undef RET
return "";
}
static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severity,
GLsizei length, const GLchar* message, const void* user_param) {
Log::Level level;
switch (severity) {
case GL_DEBUG_SEVERITY_HIGH:
level = Log::Level::Critical;
break;
case GL_DEBUG_SEVERITY_MEDIUM:
level = Log::Level::Warning;
break;
case GL_DEBUG_SEVERITY_NOTIFICATION:
case GL_DEBUG_SEVERITY_LOW:
level = Log::Level::Debug;
break;
}
LOG_GENERIC(Log::Class::Render_OpenGL, level, "{} {} {}: {}", GetSource(source), GetType(type),
id, message);
}
/// Initialize the renderer
VideoCore::ResultStatus RendererOpenGL::Init() {
#ifndef ANDROID
if (!gladLoadGL()) {
return VideoCore::ResultStatus::ErrorBelowGL43;
}
// Qualcomm has some spammy info messages that are marked as errors but not important
// https://developer.qualcomm.com/comment/11845
if (GLAD_GL_KHR_debug) {
glEnable(GL_DEBUG_OUTPUT);
glDebugMessageCallback(DebugHandler, nullptr);
}
#endif
const std::string_view gl_version{reinterpret_cast<char const*>(glGetString(GL_VERSION))};
const std::string_view gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))};
const std::string_view gpu_model{reinterpret_cast<char const*>(glGetString(GL_RENDERER))};
LOG_INFO(Render_OpenGL, "GL_VERSION: {}", gl_version);
LOG_INFO(Render_OpenGL, "GL_VENDOR: {}", gpu_vendor);
LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model);
auto& telemetry_session = Core::System::GetInstance().TelemetrySession();
constexpr auto user_system = Common::Telemetry::FieldType::UserSystem;
telemetry_session.AddField(user_system, "GPU_Vendor", std::string(gpu_vendor));
telemetry_session.AddField(user_system, "GPU_Model", std::string(gpu_model));
telemetry_session.AddField(user_system, "GPU_OpenGL_Version", std::string(gl_version));
if (gpu_vendor == "GDI Generic") {
return VideoCore::ResultStatus::ErrorGenericDrivers;
}
if (!(GLAD_GL_VERSION_4_3 || GLAD_GL_ES_VERSION_3_1)) {
return VideoCore::ResultStatus::ErrorBelowGL43;
}
InitOpenGLObjects();
RefreshRasterizerSetting();
return VideoCore::ResultStatus::Success;
}
/// Shutdown the renderer
void RendererOpenGL::ShutDown() {}
} // namespace OpenGL

View File

@@ -8,6 +8,8 @@
#include "core/hw/gpu.h"
#include "video_core/renderer_base.h"
#include "video_core/renderer_opengl/frame_dumper_opengl.h"
#include "video_core/renderer_opengl/gl_driver.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_state.h"
@@ -15,6 +17,10 @@ namespace Layout {
struct FramebufferLayout;
}
namespace Core {
class System;
}
namespace Frontend {
struct Frame {
@@ -48,35 +54,21 @@ struct ScreenInfo {
TextureInfo texture;
};
struct PresentationTexture {
u32 width = 0;
u32 height = 0;
OGLTexture texture;
};
class RendererOpenGL : public RendererBase {
class RendererOpenGL : public VideoCore::RendererBase {
public:
explicit RendererOpenGL(Frontend::EmuWindow& window, Frontend::EmuWindow* secondary_window);
explicit RendererOpenGL(Core::System& system, Frontend::EmuWindow& window,
Frontend::EmuWindow* secondary_window);
~RendererOpenGL() override;
/// Initialize the renderer
VideoCore::ResultStatus Init() override;
[[nodiscard]] VideoCore::RasterizerInterface* Rasterizer() const override {
return rasterizer.get();
}
/// Shutdown the renderer
void ShutDown() override;
/// Finalizes rendering the guest frame
void SwapBuffers() override;
/// Draws the latest frame from texture mailbox to the currently bound draw framebuffer in this
/// context
void TryPresent(int timeout_ms, bool is_secondary) override;
/// Prepares for video dumping (e.g. create necessary buffers, etc)
void PrepareVideoDumping() override;
/// Cleans up after video dumping is ended
void CleanupVideoDumping() override;
void Sync() override;
private:
void InitOpenGLObjects();
@@ -111,7 +103,10 @@ private:
// Fills active OpenGL texture with the given RGB color.
void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, const TextureInfo& texture);
private:
Driver driver;
OpenGLState state;
std::unique_ptr<RasterizerOpenGL> rasterizer;
// OpenGL object IDs
OGLVertexArray vertex_array;

View File

@@ -22,12 +22,12 @@
#include "video_core/regs_framebuffer.h"
#include "video_core/regs_rasterizer.h"
#include "video_core/regs_texturing.h"
#include "video_core/renderer_software/rasterizer.h"
#include "video_core/renderer_software/sw_framebuffer.h"
#include "video_core/renderer_software/sw_lighting.h"
#include "video_core/renderer_software/sw_proctex.h"
#include "video_core/renderer_software/sw_texturing.h"
#include "video_core/shader/shader.h"
#include "video_core/swrasterizer/framebuffer.h"
#include "video_core/swrasterizer/lighting.h"
#include "video_core/swrasterizer/proctex.h"
#include "video_core/swrasterizer/rasterizer.h"
#include "video_core/swrasterizer/texturing.h"
#include "video_core/texture/texture_decode.h"
#include "video_core/utils.h"
#include "video_core/video_core.h"

View File

@@ -0,0 +1,19 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "video_core/renderer_software/renderer_software.h"
namespace VideoCore {
RendererSoftware::RendererSoftware(Core::System& system, Frontend::EmuWindow& window)
: VideoCore::RendererBase{system, window, nullptr},
rasterizer{std::make_unique<RasterizerSoftware>()} {}
RendererSoftware::~RendererSoftware() = default;
void RendererSoftware::SwapBuffers() {
EndFrame();
}
} // namespace VideoCore

View File

@@ -0,0 +1,33 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "video_core/renderer_base.h"
#include "video_core/renderer_software/sw_rasterizer.h"
namespace Core {
class System;
}
namespace VideoCore {
class RendererSoftware : public VideoCore::RendererBase {
public:
explicit RendererSoftware(Core::System& system, Frontend::EmuWindow& window);
~RendererSoftware() override;
[[nodiscard]] VideoCore::RasterizerInterface* Rasterizer() const override {
return rasterizer.get();
}
void SwapBuffers() override;
void TryPresent(int timeout_ms, bool is_secondary) override {}
void Sync() override {}
private:
std::unique_ptr<RasterizerSoftware> rasterizer;
};
} // namespace VideoCore

View File

@@ -12,9 +12,9 @@
#include "common/vector_math.h"
#include "video_core/pica_state.h"
#include "video_core/pica_types.h"
#include "video_core/renderer_software/rasterizer.h"
#include "video_core/renderer_software/sw_clipper.h"
#include "video_core/shader/shader.h"
#include "video_core/swrasterizer/clipper.h"
#include "video_core/swrasterizer/rasterizer.h"
using Pica::Rasterizer::Vertex;

View File

@@ -12,7 +12,7 @@
#include "core/memory.h"
#include "video_core/pica_state.h"
#include "video_core/regs_framebuffer.h"
#include "video_core/swrasterizer/framebuffer.h"
#include "video_core/renderer_software/sw_framebuffer.h"
#include "video_core/utils.h"
#include "video_core/video_core.h"

View File

@@ -3,7 +3,7 @@
// Refer to the license.txt file included.
#include <algorithm>
#include "video_core/swrasterizer/lighting.h"
#include "video_core/renderer_software/sw_lighting.h"
namespace Pica {

View File

@@ -5,7 +5,7 @@
#include <array>
#include <cmath>
#include "common/math_util.h"
#include "video_core/swrasterizer/proctex.h"
#include "video_core/renderer_software/sw_proctex.h"
namespace Pica::Rasterizer {

View File

@@ -0,0 +1,16 @@
// Copyright 2015 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "video_core/renderer_software/sw_clipper.h"
#include "video_core/renderer_software/sw_rasterizer.h"
namespace VideoCore {
void RasterizerSoftware::AddTriangle(const Pica::Shader::OutputVertex& v0,
const Pica::Shader::OutputVertex& v1,
const Pica::Shader::OutputVertex& v2) {
Pica::Clipper::ProcessTriangle(v0, v1, v2);
}
} // namespace VideoCore

View File

@@ -13,7 +13,7 @@ struct OutputVertex;
namespace VideoCore {
class SWRasterizer : public RasterizerInterface {
class RasterizerSoftware : public RasterizerInterface {
void AddTriangle(const Pica::Shader::OutputVertex& v0, const Pica::Shader::OutputVertex& v1,
const Pica::Shader::OutputVertex& v2) override;
void DrawTriangles() override {}

View File

@@ -7,7 +7,7 @@
#include "common/common_types.h"
#include "common/vector_math.h"
#include "video_core/regs_texturing.h"
#include "video_core/swrasterizer/texturing.h"
#include "video_core/renderer_software/sw_texturing.h"
namespace Pica::Rasterizer {

View File

@@ -86,7 +86,7 @@ void UnitState::LoadInput(const ShaderRegs& config, const AttributeBuffer& input
}
}
static void CopyRegistersToOutput(const Common::Vec4<float24>* regs, u32 mask,
static void CopyRegistersToOutput(std::span<Common::Vec4<float24>, 16> regs, u32 mask,
AttributeBuffer& buffer) {
int output_i = 0;
for (int reg : Common::BitSet<u32>(mask)) {
@@ -108,7 +108,7 @@ GSEmitter::~GSEmitter() {
delete handlers;
}
void GSEmitter::Emit(Common::Vec4<float24> (&output_regs)[16]) {
void GSEmitter::Emit(std::span<Common::Vec4<float24>, 16> output_regs) {
ASSERT(vertex_id < 3);
// TODO: This should be merged with UnitState::WriteOutput somehow
CopyRegistersToOutput(output_regs, output_mask, buffer[vertex_id]);

View File

@@ -7,6 +7,7 @@
#include <array>
#include <cstddef>
#include <functional>
#include <span>
#include <type_traits>
#include <boost/serialization/access.hpp>
#include <boost/serialization/array.hpp>
@@ -113,7 +114,7 @@ struct GSEmitter {
GSEmitter();
~GSEmitter();
void Emit(Common::Vec4<float24> (&output_regs)[16]);
void Emit(std::span<Common::Vec4<float24>, 16> output_regs);
private:
friend class boost::serialization::access;
@@ -140,9 +141,9 @@ struct UnitState {
struct Registers {
// The registers are accessed by the shader JIT using SSE instructions, and are therefore
// required to be 16-byte aligned.
alignas(16) Common::Vec4<float24> input[16];
alignas(16) Common::Vec4<float24> temporary[16];
alignas(16) Common::Vec4<float24> output[16];
alignas(16) std::array<Common::Vec4<float24>, 16> input;
alignas(16) std::array<Common::Vec4<float24>, 16> temporary;
alignas(16) std::array<Common::Vec4<float24>, 16> output;
private:
friend class boost::serialization::access;

View File

@@ -7,7 +7,6 @@
#include <cmath>
#include <numeric>
#include <boost/container/static_vector.hpp>
#include <boost/range/algorithm/fill.hpp>
#include <nihstro/shader_bytecode.h>
#include "common/assert.h"
#include "common/common_types.h"
@@ -688,7 +687,7 @@ DebugData<true> InterpreterEngine::ProduceDebugInfo(const ShaderSetup& setup,
DebugData<true> debug_data;
// Setup input register table
boost::fill(state.registers.input, Common::Vec4<float24>::AssignToAll(float24::Zero()));
state.registers.input.fill(Common::Vec4<float24>::AssignToAll(float24::Zero()));
state.LoadInput(config, input);
RunInterpreter(setup, state, debug_data, setup.engine_data.entry_point);
return debug_data;

View File

@@ -0,0 +1,78 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include "video_core/shader/shader.h"
#include "video_core/shader/shader_uniforms.h"
namespace Pica::Shader {
void PicaUniformsData::SetFromRegs(const Pica::ShaderRegs& regs,
const Pica::Shader::ShaderSetup& setup) {
std::transform(std::begin(setup.uniforms.b), std::end(setup.uniforms.b), std::begin(bools),
[](bool value) -> BoolAligned { return {value ? 1 : 0}; });
std::transform(std::begin(regs.int_uniforms), std::end(regs.int_uniforms), std::begin(i),
[](const auto& value) -> Common::Vec4u {
return {value.x.Value(), value.y.Value(), value.z.Value(), value.w.Value()};
});
std::transform(std::begin(setup.uniforms.f), std::end(setup.uniforms.f), std::begin(f),
[](const auto& value) -> Common::Vec4f {
return {value.x.ToFloat32(), value.y.ToFloat32(), value.z.ToFloat32(),
value.w.ToFloat32()};
});
}
constexpr std::string_view UniformBlockDefFormat = R"(
#define NUM_TEV_STAGES 6
#define NUM_LIGHTS 8
#define NUM_LIGHTING_SAMPLERS 24
struct LightSrc {{
vec3 specular_0;
vec3 specular_1;
vec3 diffuse;
vec3 ambient;
vec3 position;
vec3 spot_direction;
float dist_atten_bias;
float dist_atten_scale;
}};
layout ({}std140) uniform shader_data {{
int framebuffer_scale;
int alphatest_ref;
float depth_scale;
float depth_offset;
float shadow_bias_constant;
float shadow_bias_linear;
int scissor_x1;
int scissor_y1;
int scissor_x2;
int scissor_y2;
int fog_lut_offset;
int proctex_noise_lut_offset;
int proctex_color_map_offset;
int proctex_alpha_map_offset;
int proctex_lut_offset;
int proctex_diff_lut_offset;
float proctex_bias;
int shadow_texture_bias;
bool enable_clip1;
ivec4 lighting_lut_offset[NUM_LIGHTING_SAMPLERS / 4];
vec3 fog_color;
vec2 proctex_noise_f;
vec2 proctex_noise_a;
vec2 proctex_noise_p;
vec3 lighting_global_ambient;
LightSrc light_src[NUM_LIGHTS];
vec4 const_color[NUM_TEV_STAGES];
vec4 tev_combiner_buffer_color;
vec3 tex_lod_bias;
vec4 clip_coef;
}};
)";
std::string BuildShaderUniformDefinitions(const std::string& extra_layout_parameters) {
return fmt::format(UniformBlockDefFormat, extra_layout_parameters);
}
} // namespace Pica::Shader

View File

@@ -0,0 +1,101 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/vector_math.h"
#include "video_core/regs_lighting.h"
namespace Pica {
struct ShaderRegs;
}
namespace Pica::Shader {
struct ShaderSetup;
enum class UniformBindings : u32 { Common, VS, GS };
struct LightSrc {
alignas(16) Common::Vec3f specular_0;
alignas(16) Common::Vec3f specular_1;
alignas(16) Common::Vec3f diffuse;
alignas(16) Common::Vec3f ambient;
alignas(16) Common::Vec3f position;
alignas(16) Common::Vec3f spot_direction; // negated
float dist_atten_bias;
float dist_atten_scale;
};
/**
* Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
* NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at
* the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
* Not following that rule will cause problems on some AMD drivers.
*/
struct UniformData {
int framebuffer_scale;
int alphatest_ref;
float depth_scale;
float depth_offset;
float shadow_bias_constant;
float shadow_bias_linear;
int scissor_x1;
int scissor_y1;
int scissor_x2;
int scissor_y2;
int fog_lut_offset;
int proctex_noise_lut_offset;
int proctex_color_map_offset;
int proctex_alpha_map_offset;
int proctex_lut_offset;
int proctex_diff_lut_offset;
float proctex_bias;
int shadow_texture_bias;
alignas(4) bool enable_clip1;
alignas(16) Common::Vec4i lighting_lut_offset[LightingRegs::NumLightingSampler / 4];
alignas(16) Common::Vec3f fog_color;
alignas(8) Common::Vec2f proctex_noise_f;
alignas(8) Common::Vec2f proctex_noise_a;
alignas(8) Common::Vec2f proctex_noise_p;
alignas(16) Common::Vec3f lighting_global_ambient;
LightSrc light_src[8];
alignas(16) Common::Vec4f const_color[6]; // A vec4 color for each of the six tev stages
alignas(16) Common::Vec4f tev_combiner_buffer_color;
alignas(16) Common::Vec3f tex_lod_bias;
alignas(16) Common::Vec4f clip_coef;
};
static_assert(sizeof(UniformData) == 0x500,
"The size of the UniformData does not match the structure in the shader");
static_assert(sizeof(UniformData) < 16384,
"UniformData structure must be less than 16kb as per the OpenGL spec");
/**
* Uniform struct for the Uniform Buffer Object that contains PICA vertex/geometry shader uniforms.
* NOTE: the same rule from UniformData also applies here.
*/
struct PicaUniformsData {
void SetFromRegs(const ShaderRegs& regs, const ShaderSetup& setup);
struct BoolAligned {
alignas(16) int b;
};
std::array<BoolAligned, 16> bools;
alignas(16) std::array<Common::Vec4u, 4> i;
alignas(16) std::array<Common::Vec4f, 96> f;
};
struct VSUniformData {
PicaUniformsData uniforms;
};
static_assert(sizeof(VSUniformData) == 1856,
"The size of the VSUniformData does not match the structure in the shader");
static_assert(sizeof(VSUniformData) < 16384,
"VSUniformData structure must be less than 16kb as per the OpenGL spec");
std::string BuildShaderUniformDefinitions(const std::string& extra_layout_parameters = "");
} // namespace Pica::Shader

View File

@@ -1,16 +0,0 @@
// Copyright 2015 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "video_core/swrasterizer/clipper.h"
#include "video_core/swrasterizer/swrasterizer.h"
namespace VideoCore {
void SWRasterizer::AddTriangle(const Pica::Shader::OutputVertex& v0,
const Pica::Shader::OutputVertex& v1,
const Pica::Shader::OutputVertex& v2) {
Pica::Clipper::ProcessTriangle(v0, v1, v2);
}
} // namespace VideoCore

View File

@@ -1,5 +1,4 @@
#include <memory>
#include <boost/range/algorithm/fill.hpp>
#include "common/alignment.h"
#include "common/assert.h"
#include "common/bit_field.h"
@@ -23,7 +22,7 @@ void VertexLoader::Setup(const PipelineRegs& regs) {
const auto& attribute_config = regs.vertex_attributes;
num_total_attributes = attribute_config.GetNumTotalAttributes();
boost::fill(vertex_attribute_sources, 0xdeadbeef);
vertex_attribute_sources.fill(0xdeadbeef);
for (int i = 0; i < 16; i++) {
vertex_attribute_is_default[i] = attribute_config.IsDefaultAttribute(i);

View File

@@ -6,11 +6,13 @@
#include "common/archives.h"
#include "common/logging/log.h"
#include "common/settings.h"
#include "core/core.h"
#include "video_core/pica.h"
#include "video_core/pica_state.h"
#include "video_core/renderer_base.h"
#include "video_core/renderer_opengl/gl_vars.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
#include "video_core/renderer_software/renderer_software.h"
#include "video_core/video_core.h"
////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -18,9 +20,8 @@
namespace VideoCore {
std::unique_ptr<RendererBase> g_renderer; ///< Renderer plugin
std::unique_ptr<RendererBase> g_renderer{}; ///< Renderer plugin
std::atomic<bool> g_hw_renderer_enabled;
std::atomic<bool> g_shader_jit_enabled;
std::atomic<bool> g_hw_shader_enabled;
std::atomic<bool> g_separable_shader_enabled;
@@ -30,65 +31,49 @@ std::atomic<bool> g_renderer_bg_color_update_requested;
std::atomic<bool> g_renderer_sampler_update_requested;
std::atomic<bool> g_renderer_shader_update_requested;
std::atomic<bool> g_texture_filter_update_requested;
// Screenshot
std::atomic<bool> g_renderer_screenshot_requested;
void* g_screenshot_bits;
std::function<void()> g_screenshot_complete_callback;
Layout::FramebufferLayout g_screenshot_framebuffer_layout;
Memory::MemorySystem* g_memory;
/// Initialize the video core
ResultStatus Init(Frontend::EmuWindow& emu_window, Frontend::EmuWindow* secondary_window,
Memory::MemorySystem& memory) {
g_memory = &memory;
void Init(Frontend::EmuWindow& emu_window, Frontend::EmuWindow* secondary_window,
Core::System& system) {
g_memory = &system.Memory();
Pica::Init();
const Settings::GraphicsAPI graphics_api = Settings::values.graphics_api.GetValue();
OpenGL::GLES = Settings::values.use_gles.GetValue();
g_renderer = std::make_unique<OpenGL::RendererOpenGL>(emu_window, secondary_window);
ResultStatus result = g_renderer->Init();
if (result != ResultStatus::Success) {
LOG_ERROR(Render, "initialization failed !");
} else {
LOG_DEBUG(Render, "initialized OK");
switch (graphics_api) {
case Settings::GraphicsAPI::Software:
g_renderer = std::make_unique<VideoCore::RendererSoftware>(system, emu_window);
break;
case Settings::GraphicsAPI::OpenGL:
g_renderer = std::make_unique<OpenGL::RendererOpenGL>(system, emu_window, secondary_window);
break;
default:
LOG_CRITICAL(Render, "Unknown graphics API {}, using OpenGL", graphics_api);
g_renderer = std::make_unique<OpenGL::RendererOpenGL>(system, emu_window, secondary_window);
}
return result;
}
/// Shutdown the video core
void Shutdown() {
Pica::Shutdown();
g_renderer->ShutDown();
g_renderer.reset();
LOG_DEBUG(Render, "shutdown OK");
}
void RequestScreenshot(void* data, std::function<void()> callback,
const Layout::FramebufferLayout& layout) {
if (g_renderer_screenshot_requested) {
LOG_ERROR(Render, "A screenshot is already requested or in progress, ignoring the request");
return;
}
g_screenshot_bits = data;
g_screenshot_complete_callback = std::move(callback);
g_screenshot_framebuffer_layout = layout;
g_renderer_screenshot_requested = true;
}
u16 GetResolutionScaleFactor() {
if (g_hw_renderer_enabled) {
return Settings::values.resolution_factor.GetValue()
? Settings::values.resolution_factor.GetValue()
: g_renderer->GetRenderWindow().GetFramebufferLayout().GetScalingRatio();
} else {
const auto graphics_api = Settings::values.graphics_api.GetValue();
if (graphics_api == Settings::GraphicsAPI::Software) {
// Software renderer always render at native resolution
return 1;
}
return Settings::values.resolution_factor.GetValue()
? Settings::values.resolution_factor.GetValue()
: g_renderer->GetRenderWindow().GetFramebufferLayout().GetScalingRatio();
}
template <class Archive>

View File

@@ -14,7 +14,9 @@ namespace Frontend {
class EmuWindow;
}
class RendererBase;
namespace Core {
class System;
}
namespace Memory {
class MemorySystem;
@@ -25,11 +27,12 @@ class MemorySystem;
namespace VideoCore {
class RendererBase;
extern std::unique_ptr<RendererBase> g_renderer; ///< Renderer plugin
// TODO: Wrap these in a user settings struct along with any other graphics settings (often set from
// qt ui)
extern std::atomic<bool> g_hw_renderer_enabled;
extern std::atomic<bool> g_shader_jit_enabled;
extern std::atomic<bool> g_hw_shader_enabled;
extern std::atomic<bool> g_separable_shader_enabled;
@@ -39,31 +42,16 @@ extern std::atomic<bool> g_renderer_bg_color_update_requested;
extern std::atomic<bool> g_renderer_sampler_update_requested;
extern std::atomic<bool> g_renderer_shader_update_requested;
extern std::atomic<bool> g_texture_filter_update_requested;
// Screenshot
extern std::atomic<bool> g_renderer_screenshot_requested;
extern void* g_screenshot_bits;
extern std::function<void()> g_screenshot_complete_callback;
extern Layout::FramebufferLayout g_screenshot_framebuffer_layout;
extern Memory::MemorySystem* g_memory;
enum class ResultStatus {
Success,
ErrorGenericDrivers,
ErrorBelowGL43,
};
/// Initialize the video core
ResultStatus Init(Frontend::EmuWindow& emu_window, Frontend::EmuWindow* secondary_window,
Memory::MemorySystem& memory);
void Init(Frontend::EmuWindow& emu_window, Frontend::EmuWindow* secondary_window,
Core::System& system);
/// Shutdown the video core
void Shutdown();
/// Request a screenshot of the next frame
void RequestScreenshot(void* data, std::function<void()> callback,
const Layout::FramebufferLayout& layout);
u16 GetResolutionScaleFactor();
template <class Archive>