mirror of
				https://git.suyu.dev/suyu/suyu
				synced 2025-10-30 15:39:02 -05:00 
			
		
		
		
	Merge branch 'master' into tlds
This commit is contained in:
		| @@ -14,6 +14,7 @@ add_library(video_core STATIC | ||||
|     engines/maxwell_dma.cpp | ||||
|     engines/maxwell_dma.h | ||||
|     engines/shader_bytecode.h | ||||
|     engines/shader_header.h | ||||
|     gpu.cpp | ||||
|     gpu.h | ||||
|     macro_interpreter.cpp | ||||
|   | ||||
| @@ -26,7 +26,7 @@ public: | ||||
|     void WriteReg(u32 method, u32 value); | ||||
|  | ||||
|     struct Regs { | ||||
|         static constexpr size_t NUM_REGS = 0x258; | ||||
|         static constexpr std::size_t NUM_REGS = 0x258; | ||||
|  | ||||
|         struct Surface { | ||||
|             RenderTargetFormat format; | ||||
|   | ||||
| @@ -248,8 +248,8 @@ void Maxwell3D::DrawArrays() { | ||||
|  | ||||
| void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) { | ||||
|     // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage. | ||||
|     auto& shader = state.shader_stages[static_cast<size_t>(stage)]; | ||||
|     auto& bind_data = regs.cb_bind[static_cast<size_t>(stage)]; | ||||
|     auto& shader = state.shader_stages[static_cast<std::size_t>(stage)]; | ||||
|     auto& bind_data = regs.cb_bind[static_cast<std::size_t>(stage)]; | ||||
|  | ||||
|     auto& buffer = shader.const_buffers[bind_data.index]; | ||||
|  | ||||
| @@ -316,14 +316,14 @@ Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const { | ||||
| std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderStage stage) const { | ||||
|     std::vector<Texture::FullTextureInfo> textures; | ||||
|  | ||||
|     auto& fragment_shader = state.shader_stages[static_cast<size_t>(stage)]; | ||||
|     auto& fragment_shader = state.shader_stages[static_cast<std::size_t>(stage)]; | ||||
|     auto& tex_info_buffer = fragment_shader.const_buffers[regs.tex_cb_index]; | ||||
|     ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0); | ||||
|  | ||||
|     GPUVAddr tex_info_buffer_end = tex_info_buffer.address + tex_info_buffer.size; | ||||
|  | ||||
|     // Offset into the texture constbuffer where the texture info begins. | ||||
|     static constexpr size_t TextureInfoOffset = 0x20; | ||||
|     static constexpr std::size_t TextureInfoOffset = 0x20; | ||||
|  | ||||
|     for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset; | ||||
|          current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) { | ||||
| @@ -360,8 +360,9 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt | ||||
|     return textures; | ||||
| } | ||||
|  | ||||
| Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage, size_t offset) const { | ||||
|     auto& shader = state.shader_stages[static_cast<size_t>(stage)]; | ||||
| Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage, | ||||
|                                                     std::size_t offset) const { | ||||
|     auto& shader = state.shader_stages[static_cast<std::size_t>(stage)]; | ||||
|     auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index]; | ||||
|     ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0); | ||||
|  | ||||
|   | ||||
| @@ -34,17 +34,17 @@ public: | ||||
|     /// Register structure of the Maxwell3D engine. | ||||
|     /// TODO(Subv): This structure will need to be made bigger as more registers are discovered. | ||||
|     struct Regs { | ||||
|         static constexpr size_t NUM_REGS = 0xE00; | ||||
|         static constexpr std::size_t NUM_REGS = 0xE00; | ||||
|  | ||||
|         static constexpr size_t NumRenderTargets = 8; | ||||
|         static constexpr size_t NumViewports = 16; | ||||
|         static constexpr size_t NumCBData = 16; | ||||
|         static constexpr size_t NumVertexArrays = 32; | ||||
|         static constexpr size_t NumVertexAttributes = 32; | ||||
|         static constexpr size_t MaxShaderProgram = 6; | ||||
|         static constexpr size_t MaxShaderStage = 5; | ||||
|         static constexpr std::size_t NumRenderTargets = 8; | ||||
|         static constexpr std::size_t NumViewports = 16; | ||||
|         static constexpr std::size_t NumCBData = 16; | ||||
|         static constexpr std::size_t NumVertexArrays = 32; | ||||
|         static constexpr std::size_t NumVertexAttributes = 32; | ||||
|         static constexpr std::size_t MaxShaderProgram = 6; | ||||
|         static constexpr std::size_t MaxShaderStage = 5; | ||||
|         // Maximum number of const buffers per shader stage. | ||||
|         static constexpr size_t MaxConstBuffers = 18; | ||||
|         static constexpr std::size_t MaxConstBuffers = 18; | ||||
|  | ||||
|         enum class QueryMode : u32 { | ||||
|             Write = 0, | ||||
| @@ -443,9 +443,9 @@ public: | ||||
|             } | ||||
|         }; | ||||
|  | ||||
|         bool IsShaderConfigEnabled(size_t index) const { | ||||
|         bool IsShaderConfigEnabled(std::size_t index) const { | ||||
|             // The VertexB is always enabled. | ||||
|             if (index == static_cast<size_t>(Regs::ShaderProgram::VertexB)) { | ||||
|             if (index == static_cast<std::size_t>(Regs::ShaderProgram::VertexB)) { | ||||
|                 return true; | ||||
|             } | ||||
|             return shader_config[index].enable != 0; | ||||
| @@ -571,7 +571,7 @@ public: | ||||
|                         BitField<25, 3, u32> map_7; | ||||
|                     }; | ||||
|  | ||||
|                     u32 GetMap(size_t index) const { | ||||
|                     u32 GetMap(std::size_t index) const { | ||||
|                         const std::array<u32, NumRenderTargets> maps{map_0, map_1, map_2, map_3, | ||||
|                                                                      map_4, map_5, map_6, map_7}; | ||||
|                         ASSERT(index < maps.size()); | ||||
| @@ -925,7 +925,7 @@ public: | ||||
|     std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const; | ||||
|  | ||||
|     /// Returns the texture information for a specific texture in a specific shader stage. | ||||
|     Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, size_t offset) const; | ||||
|     Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const; | ||||
|  | ||||
| private: | ||||
|     VideoCore::RasterizerInterface& rasterizer; | ||||
|   | ||||
| @@ -50,7 +50,7 @@ void MaxwellDMA::HandleCopy() { | ||||
|     ASSERT(regs.dst_params.pos_y == 0); | ||||
|  | ||||
|     if (regs.exec.is_dst_linear == regs.exec.is_src_linear) { | ||||
|         size_t copy_size = regs.x_count; | ||||
|         std::size_t copy_size = regs.x_count; | ||||
|  | ||||
|         // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D | ||||
|         // buffer of length `x_count`, otherwise we copy a 2D buffer of size (x_count, y_count). | ||||
|   | ||||
| @@ -23,7 +23,7 @@ public: | ||||
|     void WriteReg(u32 method, u32 value); | ||||
|  | ||||
|     struct Regs { | ||||
|         static constexpr size_t NUM_REGS = 0x1D6; | ||||
|         static constexpr std::size_t NUM_REGS = 0x1D6; | ||||
|  | ||||
|         struct Parameters { | ||||
|             union { | ||||
|   | ||||
| @@ -20,10 +20,10 @@ namespace Tegra::Shader { | ||||
|  | ||||
| struct Register { | ||||
|     /// Number of registers | ||||
|     static constexpr size_t NumRegisters = 256; | ||||
|     static constexpr std::size_t NumRegisters = 256; | ||||
|  | ||||
|     /// Register 255 is special cased to always be 0 | ||||
|     static constexpr size_t ZeroIndex = 255; | ||||
|     static constexpr std::size_t ZeroIndex = 255; | ||||
|  | ||||
|     enum class Size : u64 { | ||||
|         Byte = 0, | ||||
| @@ -240,6 +240,41 @@ enum class FlowCondition : u64 { | ||||
|     Fcsm_Tr = 0x1C, // TODO(bunnei): What is this used for? | ||||
| }; | ||||
|  | ||||
| enum class ControlCode : u64 { | ||||
|     F = 0, | ||||
|     LT = 1, | ||||
|     EQ = 2, | ||||
|     LE = 3, | ||||
|     GT = 4, | ||||
|     NE = 5, | ||||
|     GE = 6, | ||||
|     Num = 7, | ||||
|     Nan = 8, | ||||
|     LTU = 9, | ||||
|     EQU = 10, | ||||
|     LEU = 11, | ||||
|     GTU = 12, | ||||
|     NEU = 13, | ||||
|     GEU = 14, | ||||
|     // | ||||
|     OFF = 16, | ||||
|     LO = 17, | ||||
|     SFF = 18, | ||||
|     LS = 19, | ||||
|     HI = 20, | ||||
|     SFT = 21, | ||||
|     HS = 22, | ||||
|     OFT = 23, | ||||
|     CSM_TA = 24, | ||||
|     CSM_TR = 25, | ||||
|     CSM_MX = 26, | ||||
|     FCSM_TA = 27, | ||||
|     FCSM_TR = 28, | ||||
|     FCSM_MX = 29, | ||||
|     RLE = 30, | ||||
|     RGT = 31, | ||||
| }; | ||||
|  | ||||
| enum class PredicateResultMode : u64 { | ||||
|     None = 0x0, | ||||
|     NotZero = 0x3, | ||||
| @@ -271,6 +306,15 @@ enum class TextureProcessMode : u64 { | ||||
|     LLA = 7  // Load LOD. The A is unknown, does not appear to differ with LL | ||||
| }; | ||||
|  | ||||
| enum class TextureMiscMode : u64 { | ||||
|     DC, | ||||
|     AOFFI, // Uses Offset | ||||
|     NDV, | ||||
|     NODEP, | ||||
|     MZ, | ||||
|     PTP, | ||||
| }; | ||||
|  | ||||
| enum class IpaInterpMode : u64 { Linear = 0, Perspective = 1, Flat = 2, Sc = 3 }; | ||||
| enum class IpaSampleMode : u64 { Default = 0, Centroid = 1, Offset = 2 }; | ||||
|  | ||||
| @@ -545,6 +589,15 @@ union Instruction { | ||||
|         BitField<45, 2, PredOperation> op; | ||||
|     } pset; | ||||
|  | ||||
|     union { | ||||
|         BitField<0, 3, u64> pred0; | ||||
|         BitField<3, 3, u64> pred3; | ||||
|         BitField<8, 5, ControlCode> cc; // flag in cc | ||||
|         BitField<39, 3, u64> pred39; | ||||
|         BitField<42, 1, u64> neg_pred39; | ||||
|         BitField<45, 4, PredOperation> op; // op with pred39 | ||||
|     } csetp; | ||||
|  | ||||
|     union { | ||||
|         BitField<39, 3, u64> pred39; | ||||
|         BitField<42, 1, u64> neg_pred; | ||||
| @@ -590,42 +643,127 @@ union Instruction { | ||||
|         BitField<28, 1, u64> array; | ||||
|         BitField<29, 2, TextureType> texture_type; | ||||
|         BitField<31, 4, u64> component_mask; | ||||
|         BitField<49, 1, u64> nodep_flag; | ||||
|         BitField<50, 1, u64> dc_flag; | ||||
|         BitField<54, 1, u64> aoffi_flag; | ||||
|         BitField<55, 3, TextureProcessMode> process_mode; | ||||
|  | ||||
|         bool IsComponentEnabled(size_t component) const { | ||||
|         bool IsComponentEnabled(std::size_t component) const { | ||||
|             return ((1ull << component) & component_mask) != 0; | ||||
|         } | ||||
|  | ||||
|         TextureProcessMode GetTextureProcessMode() const { | ||||
|             return process_mode; | ||||
|         } | ||||
|  | ||||
|         bool UsesMiscMode(TextureMiscMode mode) const { | ||||
|             switch (mode) { | ||||
|             case TextureMiscMode::DC: | ||||
|                 return dc_flag != 0; | ||||
|             case TextureMiscMode::NODEP: | ||||
|                 return nodep_flag != 0; | ||||
|             case TextureMiscMode::AOFFI: | ||||
|                 return aoffi_flag != 0; | ||||
|             default: | ||||
|                 break; | ||||
|             } | ||||
|             return false; | ||||
|         } | ||||
|     } tex; | ||||
|  | ||||
|     union { | ||||
|         BitField<22, 6, TextureQueryType> query_type; | ||||
|         BitField<31, 4, u64> component_mask; | ||||
|         BitField<49, 1, u64> nodep_flag; | ||||
|  | ||||
|         bool UsesMiscMode(TextureMiscMode mode) const { | ||||
|             switch (mode) { | ||||
|             case TextureMiscMode::NODEP: | ||||
|                 return nodep_flag != 0; | ||||
|             default: | ||||
|                 break; | ||||
|             } | ||||
|             return false; | ||||
|         } | ||||
|     } txq; | ||||
|  | ||||
|     union { | ||||
|         BitField<28, 1, u64> array; | ||||
|         BitField<29, 2, TextureType> texture_type; | ||||
|         BitField<31, 4, u64> component_mask; | ||||
|         BitField<35, 1, u64> ndv_flag; | ||||
|         BitField<49, 1, u64> nodep_flag; | ||||
|  | ||||
|         bool IsComponentEnabled(size_t component) const { | ||||
|         bool IsComponentEnabled(std::size_t component) const { | ||||
|             return ((1ull << component) & component_mask) != 0; | ||||
|         } | ||||
|  | ||||
|         bool UsesMiscMode(TextureMiscMode mode) const { | ||||
|             switch (mode) { | ||||
|             case TextureMiscMode::NDV: | ||||
|                 return (ndv_flag != 0); | ||||
|             case TextureMiscMode::NODEP: | ||||
|                 return (nodep_flag != 0); | ||||
|             default: | ||||
|                 break; | ||||
|             } | ||||
|             return false; | ||||
|         } | ||||
|     } tmml; | ||||
|  | ||||
|     union { | ||||
|         BitField<28, 1, u64> array; | ||||
|         BitField<29, 2, TextureType> texture_type; | ||||
|         BitField<35, 1, u64> ndv_flag; | ||||
|         BitField<49, 1, u64> nodep_flag; | ||||
|         BitField<50, 1, u64> dc_flag; | ||||
|         BitField<54, 2, u64> info; | ||||
|         BitField<56, 2, u64> component; | ||||
|  | ||||
|         bool UsesMiscMode(TextureMiscMode mode) const { | ||||
|             switch (mode) { | ||||
|             case TextureMiscMode::NDV: | ||||
|                 return ndv_flag != 0; | ||||
|             case TextureMiscMode::NODEP: | ||||
|                 return nodep_flag != 0; | ||||
|             case TextureMiscMode::DC: | ||||
|                 return dc_flag != 0; | ||||
|             case TextureMiscMode::AOFFI: | ||||
|                 return info == 1; | ||||
|             case TextureMiscMode::PTP: | ||||
|                 return info == 2; | ||||
|             default: | ||||
|                 break; | ||||
|             } | ||||
|             return false; | ||||
|         } | ||||
|     } tld4; | ||||
|  | ||||
|     union { | ||||
|         BitField<49, 1, u64> nodep_flag; | ||||
|         BitField<50, 1, u64> dc_flag; | ||||
|         BitField<51, 1, u64> aoffi_flag; | ||||
|         BitField<52, 2, u64> component; | ||||
|  | ||||
|         bool UsesMiscMode(TextureMiscMode mode) const { | ||||
|             switch (mode) { | ||||
|             case TextureMiscMode::DC: | ||||
|                 return dc_flag != 0; | ||||
|             case TextureMiscMode::NODEP: | ||||
|                 return nodep_flag != 0; | ||||
|             case TextureMiscMode::AOFFI: | ||||
|                 return aoffi_flag != 0; | ||||
|             default: | ||||
|                 break; | ||||
|             } | ||||
|             return false; | ||||
|         } | ||||
|     } tld4s; | ||||
|  | ||||
|     union { | ||||
|         BitField<0, 8, Register> gpr0; | ||||
|         BitField<28, 8, Register> gpr28; | ||||
|         BitField<49, 1, u64> nodep; | ||||
|         BitField<49, 1, u64> nodep_flag; | ||||
|         BitField<50, 3, u64> component_mask_selector; | ||||
|         BitField<53, 4, u64> texture_info; | ||||
|  | ||||
| @@ -645,6 +783,37 @@ union Instruction { | ||||
|             UNREACHABLE(); | ||||
|         } | ||||
|  | ||||
|         TextureProcessMode GetTextureProcessMode() const { | ||||
|             switch (texture_info) { | ||||
|             case 0: | ||||
|             case 2: | ||||
|             case 6: | ||||
|             case 8: | ||||
|             case 9: | ||||
|             case 11: | ||||
|                 return TextureProcessMode::LZ; | ||||
|             case 3: | ||||
|             case 5: | ||||
|             case 13: | ||||
|                 return TextureProcessMode::LL; | ||||
|             default: | ||||
|                 break; | ||||
|             } | ||||
|             return TextureProcessMode::None; | ||||
|         } | ||||
|  | ||||
|         bool UsesMiscMode(TextureMiscMode mode) const { | ||||
|             switch (mode) { | ||||
|             case TextureMiscMode::DC: | ||||
|                 return (texture_info >= 4 && texture_info <= 6) || texture_info == 9; | ||||
|             case TextureMiscMode::NODEP: | ||||
|                 return nodep_flag != 0; | ||||
|             default: | ||||
|                 break; | ||||
|             } | ||||
|             return false; | ||||
|         } | ||||
|  | ||||
|         bool IsArrayTexture() const { | ||||
|             // TEXS only supports Texture2D arrays. | ||||
|             return texture_info >= 7 && texture_info <= 9; | ||||
| @@ -654,7 +823,7 @@ union Instruction { | ||||
|             return gpr28.Value() != Register::ZeroIndex; | ||||
|         } | ||||
|  | ||||
|         bool IsComponentEnabled(size_t component) const { | ||||
|         bool IsComponentEnabled(std::size_t component) const { | ||||
|             static constexpr std::array<std::array<u32, 8>, 4> mask_lut{{ | ||||
|                 {}, | ||||
|                 {0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc}, | ||||
| @@ -662,7 +831,7 @@ union Instruction { | ||||
|                 {0x7, 0xb, 0xd, 0xe, 0xf}, | ||||
|             }}; | ||||
|  | ||||
|             size_t index{gpr0.Value() != Register::ZeroIndex ? 1U : 0U}; | ||||
|             std::size_t index{gpr0.Value() != Register::ZeroIndex ? 1U : 0U}; | ||||
|             index |= gpr28.Value() != Register::ZeroIndex ? 2 : 0; | ||||
|  | ||||
|             u32 mask = mask_lut[index][component_mask_selector]; | ||||
| @@ -673,6 +842,7 @@ union Instruction { | ||||
|     } texs; | ||||
|  | ||||
|     union { | ||||
|         BitField<49, 1, u64> nodep_flag; | ||||
|         BitField<53, 4, u64> texture_info; | ||||
|  | ||||
|         TextureType GetTextureType() const { | ||||
| @@ -693,6 +863,26 @@ union Instruction { | ||||
|             UNREACHABLE(); | ||||
|         } | ||||
|  | ||||
|         TextureProcessMode GetTextureProcessMode() const { | ||||
|             if (texture_info == 1 || texture_info == 5 || texture_info == 12) | ||||
|                 return TextureProcessMode::LL; | ||||
|             return TextureProcessMode::LZ; | ||||
|         } | ||||
|  | ||||
|         bool UsesMiscMode(TextureMiscMode mode) const { | ||||
|             switch (mode) { | ||||
|             case TextureMiscMode::AOFFI: | ||||
|                 return texture_info == 12 || texture_info == 4; | ||||
|             case TextureMiscMode::MZ: | ||||
|                 return texture_info == 5; | ||||
|             case TextureMiscMode::NODEP: | ||||
|                 return nodep_flag != 0; | ||||
|             default: | ||||
|                 break; | ||||
|             } | ||||
|             return false; | ||||
|         } | ||||
|  | ||||
|         bool IsArrayTexture() const { | ||||
|             // TEXS only supports Texture2D arrays. | ||||
|             return texture_info == 8; | ||||
| @@ -735,6 +925,7 @@ union Instruction { | ||||
|         BitField<36, 5, u64> index; | ||||
|     } cbuf36; | ||||
|  | ||||
|     BitField<47, 1, u64> generates_cc; | ||||
|     BitField<61, 1, u64> is_b_imm; | ||||
|     BitField<60, 1, u64> is_b_gpr; | ||||
|     BitField<59, 1, u64> is_c_gpr; | ||||
| @@ -859,6 +1050,7 @@ public: | ||||
|         ISET_IMM, | ||||
|         PSETP, | ||||
|         PSET, | ||||
|         CSETP, | ||||
|         XMAD_IMM, | ||||
|         XMAD_CR, | ||||
|         XMAD_RC, | ||||
| @@ -947,7 +1139,7 @@ public: | ||||
| private: | ||||
|     struct Detail { | ||||
|     private: | ||||
|         static constexpr size_t opcode_bitsize = 16; | ||||
|         static constexpr std::size_t opcode_bitsize = 16; | ||||
|  | ||||
|         /** | ||||
|          * Generates the mask and the expected value after masking from a given bitstring. | ||||
| @@ -956,8 +1148,8 @@ private: | ||||
|          */ | ||||
|         static auto GetMaskAndExpect(const char* const bitstring) { | ||||
|             u16 mask = 0, expect = 0; | ||||
|             for (size_t i = 0; i < opcode_bitsize; i++) { | ||||
|                 const size_t bit_position = opcode_bitsize - i - 1; | ||||
|             for (std::size_t i = 0; i < opcode_bitsize; i++) { | ||||
|                 const std::size_t bit_position = opcode_bitsize - i - 1; | ||||
|                 switch (bitstring[i]) { | ||||
|                 case '0': | ||||
|                     mask |= 1 << bit_position; | ||||
| @@ -1095,6 +1287,7 @@ private: | ||||
|             INST("0011011-0101----", Id::ISET_IMM, Type::IntegerSet, "ISET_IMM"), | ||||
|             INST("0101000010001---", Id::PSET, Type::PredicateSetRegister, "PSET"), | ||||
|             INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"), | ||||
|             INST("010100001010----", Id::CSETP, Type::PredicateSetPredicate, "CSETP"), | ||||
|             INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"), | ||||
|             INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"), | ||||
|             INST("010100010-------", Id::XMAD_RC, Type::Xmad, "XMAD_RC"), | ||||
|   | ||||
							
								
								
									
										103
									
								
								src/video_core/engines/shader_header.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										103
									
								
								src/video_core/engines/shader_header.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,103 @@ | ||||
| // Copyright 2018 yuzu Emulator Project | ||||
| // Licensed under GPLv2 or any later version | ||||
| // Refer to the license.txt file included. | ||||
|  | ||||
| #pragma once | ||||
|  | ||||
| #include "common/bit_field.h" | ||||
| #include "common/common_funcs.h" | ||||
| #include "common/common_types.h" | ||||
|  | ||||
| namespace Tegra::Shader { | ||||
|  | ||||
| enum class OutputTopology : u32 { | ||||
|     PointList = 1, | ||||
|     LineStrip = 6, | ||||
|     TriangleStrip = 7, | ||||
| }; | ||||
|  | ||||
| // Documentation in: | ||||
| // http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html#ImapTexture | ||||
| struct Header { | ||||
|     union { | ||||
|         BitField<0, 5, u32> sph_type; | ||||
|         BitField<5, 5, u32> version; | ||||
|         BitField<10, 4, u32> shader_type; | ||||
|         BitField<14, 1, u32> mrt_enable; | ||||
|         BitField<15, 1, u32> kills_pixels; | ||||
|         BitField<16, 1, u32> does_global_store; | ||||
|         BitField<17, 4, u32> sass_version; | ||||
|         BitField<21, 5, u32> reserved; | ||||
|         BitField<26, 1, u32> does_load_or_store; | ||||
|         BitField<27, 1, u32> does_fp64; | ||||
|         BitField<28, 4, u32> stream_out_mask; | ||||
|     } common0; | ||||
|  | ||||
|     union { | ||||
|         BitField<0, 24, u32> shader_local_memory_low_size; | ||||
|         BitField<24, 8, u32> per_patch_attribute_count; | ||||
|     } common1; | ||||
|  | ||||
|     union { | ||||
|         BitField<0, 24, u32> shader_local_memory_high_size; | ||||
|         BitField<24, 8, u32> threads_per_input_primitive; | ||||
|     } common2; | ||||
|  | ||||
|     union { | ||||
|         BitField<0, 24, u32> shader_local_memory_crs_size; | ||||
|         BitField<24, 4, OutputTopology> output_topology; | ||||
|         BitField<28, 4, u32> reserved; | ||||
|     } common3; | ||||
|  | ||||
|     union { | ||||
|         BitField<0, 12, u32> max_output_vertices; | ||||
|         BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders. | ||||
|         BitField<24, 4, u32> reserved; | ||||
|         BitField<12, 8, u32> store_req_end; // NOTE: not used by geometry shaders. | ||||
|     } common4; | ||||
|  | ||||
|     union { | ||||
|         struct { | ||||
|             INSERT_PADDING_BYTES(3);  // ImapSystemValuesA | ||||
|             INSERT_PADDING_BYTES(1);  // ImapSystemValuesB | ||||
|             INSERT_PADDING_BYTES(16); // ImapGenericVector[32] | ||||
|             INSERT_PADDING_BYTES(2);  // ImapColor | ||||
|             INSERT_PADDING_BYTES(2);  // ImapSystemValuesC | ||||
|             INSERT_PADDING_BYTES(5);  // ImapFixedFncTexture[10] | ||||
|             INSERT_PADDING_BYTES(1);  // ImapReserved | ||||
|             INSERT_PADDING_BYTES(3);  // OmapSystemValuesA | ||||
|             INSERT_PADDING_BYTES(1);  // OmapSystemValuesB | ||||
|             INSERT_PADDING_BYTES(16); // OmapGenericVector[32] | ||||
|             INSERT_PADDING_BYTES(2);  // OmapColor | ||||
|             INSERT_PADDING_BYTES(2);  // OmapSystemValuesC | ||||
|             INSERT_PADDING_BYTES(5);  // OmapFixedFncTexture[10] | ||||
|             INSERT_PADDING_BYTES(1);  // OmapReserved | ||||
|         } vtg; | ||||
|  | ||||
|         struct { | ||||
|             INSERT_PADDING_BYTES(3);  // ImapSystemValuesA | ||||
|             INSERT_PADDING_BYTES(1);  // ImapSystemValuesB | ||||
|             INSERT_PADDING_BYTES(32); // ImapGenericVector[32] | ||||
|             INSERT_PADDING_BYTES(2);  // ImapColor | ||||
|             INSERT_PADDING_BYTES(2);  // ImapSystemValuesC | ||||
|             INSERT_PADDING_BYTES(10); // ImapFixedFncTexture[10] | ||||
|             INSERT_PADDING_BYTES(2);  // ImapReserved | ||||
|             struct { | ||||
|                 u32 target; | ||||
|                 union { | ||||
|                     BitField<0, 1, u32> sample_mask; | ||||
|                     BitField<1, 1, u32> depth; | ||||
|                     BitField<2, 30, u32> reserved; | ||||
|                 }; | ||||
|             } omap; | ||||
|             bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const { | ||||
|                 const u32 bit = render_target * 4 + component; | ||||
|                 return omap.target & (1 << bit); | ||||
|             } | ||||
|         } ps; | ||||
|     }; | ||||
| }; | ||||
|  | ||||
| static_assert(sizeof(Header) == 0x50, "Incorrect structure size"); | ||||
|  | ||||
| } // namespace Tegra::Shader | ||||
| @@ -42,6 +42,7 @@ enum class RenderTargetFormat : u32 { | ||||
|     R32_UINT = 0xE4, | ||||
|     R32_FLOAT = 0xE5, | ||||
|     B5G6R5_UNORM = 0xE8, | ||||
|     BGR5A1_UNORM = 0xE9, | ||||
|     RG8_UNORM = 0xEA, | ||||
|     RG8_SNORM = 0xEB, | ||||
|     R16_UNORM = 0xEE, | ||||
|   | ||||
| @@ -152,7 +152,7 @@ private: | ||||
|     boost::optional<u32> | ||||
|         delayed_pc; ///< Program counter to execute at after the delay slot is executed. | ||||
|  | ||||
|     static constexpr size_t NumMacroRegisters = 8; | ||||
|     static constexpr std::size_t NumMacroRegisters = 8; | ||||
|  | ||||
|     /// General purpose macro registers. | ||||
|     std::array<u32, NumMacroRegisters> registers = {}; | ||||
|   | ||||
| @@ -12,10 +12,10 @@ | ||||
|  | ||||
| namespace OpenGL { | ||||
|  | ||||
| OGLBufferCache::OGLBufferCache(size_t size) : stream_buffer(GL_ARRAY_BUFFER, size) {} | ||||
| OGLBufferCache::OGLBufferCache(std::size_t size) : stream_buffer(GL_ARRAY_BUFFER, size) {} | ||||
|  | ||||
| GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, size_t size, size_t alignment, | ||||
|                                       bool cache) { | ||||
| GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, | ||||
|                                       std::size_t alignment, bool cache) { | ||||
|     auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); | ||||
|     const boost::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)}; | ||||
|  | ||||
| @@ -53,7 +53,8 @@ GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, size_t size, siz | ||||
|     return uploaded_offset; | ||||
| } | ||||
|  | ||||
| GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, size_t size, size_t alignment) { | ||||
| GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, std::size_t size, | ||||
|                                           std::size_t alignment) { | ||||
|     AlignBuffer(alignment); | ||||
|     std::memcpy(buffer_ptr, raw_pointer, size); | ||||
|     GLintptr uploaded_offset = buffer_offset; | ||||
| @@ -63,7 +64,7 @@ GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, size_t size, | ||||
|     return uploaded_offset; | ||||
| } | ||||
|  | ||||
| void OGLBufferCache::Map(size_t max_size) { | ||||
| void OGLBufferCache::Map(std::size_t max_size) { | ||||
|     bool invalidate; | ||||
|     std::tie(buffer_ptr, buffer_offset_base, invalidate) = | ||||
|         stream_buffer.Map(static_cast<GLsizeiptr>(max_size), 4); | ||||
| @@ -81,10 +82,10 @@ GLuint OGLBufferCache::GetHandle() const { | ||||
|     return stream_buffer.GetHandle(); | ||||
| } | ||||
|  | ||||
| void OGLBufferCache::AlignBuffer(size_t alignment) { | ||||
| void OGLBufferCache::AlignBuffer(std::size_t alignment) { | ||||
|     // Align the offset, not the mapped pointer | ||||
|     GLintptr offset_aligned = | ||||
|         static_cast<GLintptr>(Common::AlignUp(static_cast<size_t>(buffer_offset), alignment)); | ||||
|         static_cast<GLintptr>(Common::AlignUp(static_cast<std::size_t>(buffer_offset), alignment)); | ||||
|     buffer_ptr += offset_aligned - buffer_offset; | ||||
|     buffer_offset = offset_aligned; | ||||
| } | ||||
|   | ||||
| @@ -19,32 +19,32 @@ struct CachedBufferEntry final { | ||||
|         return addr; | ||||
|     } | ||||
|  | ||||
|     size_t GetSizeInBytes() const { | ||||
|     std::size_t GetSizeInBytes() const { | ||||
|         return size; | ||||
|     } | ||||
|  | ||||
|     VAddr addr; | ||||
|     size_t size; | ||||
|     std::size_t size; | ||||
|     GLintptr offset; | ||||
|     size_t alignment; | ||||
|     std::size_t alignment; | ||||
| }; | ||||
|  | ||||
| class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> { | ||||
| public: | ||||
|     explicit OGLBufferCache(size_t size); | ||||
|     explicit OGLBufferCache(std::size_t size); | ||||
|  | ||||
|     GLintptr UploadMemory(Tegra::GPUVAddr gpu_addr, size_t size, size_t alignment = 4, | ||||
|     GLintptr UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, | ||||
|                           bool cache = true); | ||||
|  | ||||
|     GLintptr UploadHostMemory(const void* raw_pointer, size_t size, size_t alignment = 4); | ||||
|     GLintptr UploadHostMemory(const void* raw_pointer, std::size_t size, std::size_t alignment = 4); | ||||
|  | ||||
|     void Map(size_t max_size); | ||||
|     void Map(std::size_t max_size); | ||||
|     void Unmap(); | ||||
|  | ||||
|     GLuint GetHandle() const; | ||||
|  | ||||
| protected: | ||||
|     void AlignBuffer(size_t alignment); | ||||
|     void AlignBuffer(std::size_t alignment); | ||||
|  | ||||
| private: | ||||
|     OGLStreamBuffer stream_buffer; | ||||
|   | ||||
| @@ -46,7 +46,7 @@ MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, | ||||
| RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo& info) | ||||
|     : emu_window{window}, screen_info{info}, buffer_cache(STREAM_BUFFER_SIZE) { | ||||
|     // Create sampler objects | ||||
|     for (size_t i = 0; i < texture_samplers.size(); ++i) { | ||||
|     for (std::size_t i = 0; i < texture_samplers.size(); ++i) { | ||||
|         texture_samplers[i].Create(); | ||||
|         state.texture_units[i].sampler = texture_samplers[i].sampler.handle; | ||||
|     } | ||||
| @@ -181,7 +181,7 @@ void RasterizerOpenGL::SetupShaders() { | ||||
|     u32 current_constbuffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage; | ||||
|     u32 current_texture_bindpoint = 0; | ||||
|  | ||||
|     for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | ||||
|     for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | ||||
|         const auto& shader_config = gpu.regs.shader_config[index]; | ||||
|         const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)}; | ||||
|  | ||||
| @@ -190,12 +190,12 @@ void RasterizerOpenGL::SetupShaders() { | ||||
|             continue; | ||||
|         } | ||||
|  | ||||
|         const size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5 | ||||
|         const std::size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5 | ||||
|  | ||||
|         GLShader::MaxwellUniformData ubo{}; | ||||
|         ubo.SetFromRegs(gpu.state.shader_stages[stage]); | ||||
|         const GLintptr offset = buffer_cache.UploadHostMemory( | ||||
|             &ubo, sizeof(ubo), static_cast<size_t>(uniform_buffer_alignment)); | ||||
|             &ubo, sizeof(ubo), static_cast<std::size_t>(uniform_buffer_alignment)); | ||||
|  | ||||
|         // Bind the buffer | ||||
|         glBindBufferRange(GL_UNIFORM_BUFFER, stage, buffer_cache.GetHandle(), offset, sizeof(ubo)); | ||||
| @@ -238,10 +238,10 @@ void RasterizerOpenGL::SetupShaders() { | ||||
|     shader_program_manager->UseTrivialGeometryShader(); | ||||
| } | ||||
|  | ||||
| size_t RasterizerOpenGL::CalculateVertexArraysSize() const { | ||||
| std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { | ||||
|     const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; | ||||
|  | ||||
|     size_t size = 0; | ||||
|     std::size_t size = 0; | ||||
|     for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { | ||||
|         if (!regs.vertex_array[index].IsEnabled()) | ||||
|             continue; | ||||
| @@ -299,7 +299,7 @@ void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { | ||||
|  | ||||
| void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_depth_fb, | ||||
|                                              bool preserve_contents, | ||||
|                                              boost::optional<size_t> single_color_target) { | ||||
|                                              boost::optional<std::size_t> single_color_target) { | ||||
|     MICROPROFILE_SCOPE(OpenGL_Framebuffer); | ||||
|     const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; | ||||
|  | ||||
| @@ -330,7 +330,7 @@ void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_dep | ||||
|         } else { | ||||
|             // Multiple color attachments are enabled | ||||
|             std::array<GLenum, Maxwell::NumRenderTargets> buffers; | ||||
|             for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { | ||||
|             for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { | ||||
|                 Surface color_surface = res_cache.GetColorBufferSurface(index, preserve_contents); | ||||
|                 buffers[index] = GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index); | ||||
|                 glFramebufferTexture2D( | ||||
| @@ -342,7 +342,7 @@ void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_dep | ||||
|         } | ||||
|     } else { | ||||
|         // No color attachments are enabled - zero out all of them | ||||
|         for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { | ||||
|         for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { | ||||
|             glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, | ||||
|                                    GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index), GL_TEXTURE_2D, | ||||
|                                    0, 0); | ||||
| @@ -462,15 +462,15 @@ void RasterizerOpenGL::DrawArrays() { | ||||
|     state.draw.vertex_buffer = buffer_cache.GetHandle(); | ||||
|     state.Apply(); | ||||
|  | ||||
|     size_t buffer_size = CalculateVertexArraysSize(); | ||||
|     std::size_t buffer_size = CalculateVertexArraysSize(); | ||||
|  | ||||
|     if (is_indexed) { | ||||
|         buffer_size = Common::AlignUp<size_t>(buffer_size, 4) + index_buffer_size; | ||||
|         buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) + index_buffer_size; | ||||
|     } | ||||
|  | ||||
|     // Uniform space for the 5 shader stages | ||||
|     buffer_size = | ||||
|         Common::AlignUp<size_t>(buffer_size, 4) + | ||||
|         Common::AlignUp<std::size_t>(buffer_size, 4) + | ||||
|         (sizeof(GLShader::MaxwellUniformData) + uniform_buffer_alignment) * Maxwell::MaxShaderStage; | ||||
|  | ||||
|     // Add space for at least 18 constant buffers | ||||
| @@ -644,7 +644,7 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shad | ||||
|     MICROPROFILE_SCOPE(OpenGL_UBO); | ||||
|     const auto& gpu = Core::System::GetInstance().GPU(); | ||||
|     const auto& maxwell3d = gpu.Maxwell3D(); | ||||
|     const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<size_t>(stage)]; | ||||
|     const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<std::size_t>(stage)]; | ||||
|     const auto& entries = shader->GetShaderEntries().const_buffer_entries; | ||||
|  | ||||
|     constexpr u64 max_binds = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers; | ||||
| @@ -667,7 +667,7 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shad | ||||
|             continue; | ||||
|         } | ||||
|  | ||||
|         size_t size = 0; | ||||
|         std::size_t size = 0; | ||||
|  | ||||
|         if (used_buffer.IsIndirect()) { | ||||
|             // Buffer is accessed indirectly, so upload the entire thing | ||||
| @@ -689,7 +689,7 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shad | ||||
|         ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big"); | ||||
|  | ||||
|         GLintptr const_buffer_offset = buffer_cache.UploadMemory( | ||||
|             buffer.address, size, static_cast<size_t>(uniform_buffer_alignment)); | ||||
|             buffer.address, size, static_cast<std::size_t>(uniform_buffer_alignment)); | ||||
|  | ||||
|         // Now configure the bindpoint of the buffer inside the shader | ||||
|         glUniformBlockBinding(shader->GetProgramHandle(), | ||||
|   | ||||
| @@ -73,7 +73,7 @@ public: | ||||
|     }; | ||||
|  | ||||
|     /// Maximum supported size that a constbuffer can have in bytes. | ||||
|     static constexpr size_t MaxConstbufferSize = 0x10000; | ||||
|     static constexpr std::size_t MaxConstbufferSize = 0x10000; | ||||
|     static_assert(MaxConstbufferSize % sizeof(GLvec4) == 0, | ||||
|                   "The maximum size of a constbuffer must be a multiple of the size of GLvec4"); | ||||
|  | ||||
| @@ -106,7 +106,7 @@ private: | ||||
|      */ | ||||
|     void ConfigureFramebuffers(bool use_color_fb = true, bool using_depth_fb = true, | ||||
|                                bool preserve_contents = true, | ||||
|                                boost::optional<size_t> single_color_target = {}); | ||||
|                                boost::optional<std::size_t> single_color_target = {}); | ||||
|  | ||||
|     /* | ||||
|      * Configures the current constbuffers to use for the draw command. | ||||
| @@ -180,12 +180,12 @@ private: | ||||
|  | ||||
|     std::array<SamplerInfo, GLShader::NumTextureSamplers> texture_samplers; | ||||
|  | ||||
|     static constexpr size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; | ||||
|     static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; | ||||
|     OGLBufferCache buffer_cache; | ||||
|     OGLFramebuffer framebuffer; | ||||
|     GLint uniform_buffer_alignment; | ||||
|  | ||||
|     size_t CalculateVertexArraysSize() const; | ||||
|     std::size_t CalculateVertexArraysSize() const; | ||||
|  | ||||
|     void SetupVertexArrays(); | ||||
|  | ||||
|   | ||||
| @@ -75,7 +75,7 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) { | ||||
|     return params; | ||||
| } | ||||
|  | ||||
| /*static*/ SurfaceParams SurfaceParams::CreateForFramebuffer(size_t index) { | ||||
| /*static*/ SurfaceParams SurfaceParams::CreateForFramebuffer(std::size_t index) { | ||||
|     const auto& config{Core::System::GetInstance().GPU().Maxwell3D().regs.rt[index]}; | ||||
|     SurfaceParams params{}; | ||||
|     params.addr = TryGetCpuAddr(config.Address()); | ||||
| @@ -167,6 +167,7 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form | ||||
|     {GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false},                                // RG8S | ||||
|     {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false},              // RG32UI | ||||
|     {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false},              // R32UI | ||||
|     {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8 | ||||
|  | ||||
|     // Depth formats | ||||
|     {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F | ||||
| @@ -203,7 +204,7 @@ static GLenum SurfaceTargetToGL(SurfaceParams::SurfaceTarget target) { | ||||
| } | ||||
|  | ||||
| static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) { | ||||
|     ASSERT(static_cast<size_t>(pixel_format) < tex_format_tuples.size()); | ||||
|     ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size()); | ||||
|     auto& format = tex_format_tuples[static_cast<unsigned int>(pixel_format)]; | ||||
|     ASSERT(component_type == format.component_type); | ||||
|  | ||||
| @@ -213,6 +214,7 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType | ||||
| static bool IsPixelFormatASTC(PixelFormat format) { | ||||
|     switch (format) { | ||||
|     case PixelFormat::ASTC_2D_4X4: | ||||
|     case PixelFormat::ASTC_2D_8X8: | ||||
|         return true; | ||||
|     default: | ||||
|         return false; | ||||
| @@ -223,6 +225,8 @@ static std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) { | ||||
|     switch (format) { | ||||
|     case PixelFormat::ASTC_2D_4X4: | ||||
|         return {4, 4}; | ||||
|     case PixelFormat::ASTC_2D_8X8: | ||||
|         return {8, 8}; | ||||
|     default: | ||||
|         LOG_CRITICAL(HW_GPU, "Unhandled format: {}", static_cast<u32>(format)); | ||||
|         UNREACHABLE(); | ||||
| @@ -256,7 +260,7 @@ static bool IsFormatBCn(PixelFormat format) { | ||||
| } | ||||
|  | ||||
| template <bool morton_to_gl, PixelFormat format> | ||||
| void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, size_t gl_buffer_size, | ||||
| void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, std::size_t gl_buffer_size, | ||||
|                 VAddr addr) { | ||||
|     constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT; | ||||
|     constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); | ||||
| @@ -267,7 +271,7 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, size_t | ||||
|         const u32 tile_size{IsFormatBCn(format) ? 4U : 1U}; | ||||
|         const std::vector<u8> data = Tegra::Texture::UnswizzleTexture( | ||||
|             addr, tile_size, bytes_per_pixel, stride, height, block_height); | ||||
|         const size_t size_to_copy{std::min(gl_buffer_size, data.size())}; | ||||
|         const std::size_t size_to_copy{std::min(gl_buffer_size, data.size())}; | ||||
|         memcpy(gl_buffer, data.data(), size_to_copy); | ||||
|     } else { | ||||
|         // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should | ||||
| @@ -278,7 +282,7 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, size_t | ||||
|     } | ||||
| } | ||||
|  | ||||
| static constexpr std::array<void (*)(u32, u32, u32, u8*, size_t, VAddr), | ||||
| static constexpr std::array<void (*)(u32, u32, u32, u8*, std::size_t, VAddr), | ||||
|                             SurfaceParams::MaxPixelFormat> | ||||
|     morton_to_gl_fns = { | ||||
|         // clang-format off | ||||
| @@ -327,6 +331,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, size_t, VAddr), | ||||
|         MortonCopy<true, PixelFormat::RG8S>, | ||||
|         MortonCopy<true, PixelFormat::RG32UI>, | ||||
|         MortonCopy<true, PixelFormat::R32UI>, | ||||
|         MortonCopy<true, PixelFormat::ASTC_2D_8X8>, | ||||
|         MortonCopy<true, PixelFormat::Z32F>, | ||||
|         MortonCopy<true, PixelFormat::Z16>, | ||||
|         MortonCopy<true, PixelFormat::Z24S8>, | ||||
| @@ -335,7 +340,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, size_t, VAddr), | ||||
|         // clang-format on | ||||
| }; | ||||
|  | ||||
| static constexpr std::array<void (*)(u32, u32, u32, u8*, size_t, VAddr), | ||||
| static constexpr std::array<void (*)(u32, u32, u32, u8*, std::size_t, VAddr), | ||||
|                             SurfaceParams::MaxPixelFormat> | ||||
|     gl_to_morton_fns = { | ||||
|         // clang-format off | ||||
| @@ -386,6 +391,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, size_t, VAddr), | ||||
|         MortonCopy<false, PixelFormat::RG8S>, | ||||
|         MortonCopy<false, PixelFormat::RG32UI>, | ||||
|         MortonCopy<false, PixelFormat::R32UI>, | ||||
|         nullptr, | ||||
|         MortonCopy<false, PixelFormat::Z32F>, | ||||
|         MortonCopy<false, PixelFormat::Z16>, | ||||
|         MortonCopy<false, PixelFormat::Z24S8>, | ||||
| @@ -513,9 +519,9 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) { | ||||
|     S8Z24 input_pixel{}; | ||||
|     Z24S8 output_pixel{}; | ||||
|     constexpr auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::S8Z24)}; | ||||
|     for (size_t y = 0; y < height; ++y) { | ||||
|         for (size_t x = 0; x < width; ++x) { | ||||
|             const size_t offset{bpp * (y * width + x)}; | ||||
|     for (std::size_t y = 0; y < height; ++y) { | ||||
|         for (std::size_t x = 0; x < width; ++x) { | ||||
|             const std::size_t offset{bpp * (y * width + x)}; | ||||
|             std::memcpy(&input_pixel, &data[offset], sizeof(S8Z24)); | ||||
|             output_pixel.s8.Assign(input_pixel.s8); | ||||
|             output_pixel.z24.Assign(input_pixel.z24); | ||||
| @@ -526,9 +532,9 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) { | ||||
|  | ||||
| static void ConvertG8R8ToR8G8(std::vector<u8>& data, u32 width, u32 height) { | ||||
|     constexpr auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::G8R8U)}; | ||||
|     for (size_t y = 0; y < height; ++y) { | ||||
|         for (size_t x = 0; x < width; ++x) { | ||||
|             const size_t offset{bpp * (y * width + x)}; | ||||
|     for (std::size_t y = 0; y < height; ++y) { | ||||
|         for (std::size_t x = 0; x < width; ++x) { | ||||
|             const std::size_t offset{bpp * (y * width + x)}; | ||||
|             const u8 temp{data[offset]}; | ||||
|             data[offset] = data[offset + 1]; | ||||
|             data[offset + 1] = temp; | ||||
| @@ -544,7 +550,8 @@ static void ConvertG8R8ToR8G8(std::vector<u8>& data, u32 width, u32 height) { | ||||
| static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelFormat pixel_format, | ||||
|                                                u32 width, u32 height) { | ||||
|     switch (pixel_format) { | ||||
|     case PixelFormat::ASTC_2D_4X4: { | ||||
|     case PixelFormat::ASTC_2D_4X4: | ||||
|     case PixelFormat::ASTC_2D_8X8: { | ||||
|         // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC. | ||||
|         u32 block_width{}; | ||||
|         u32 block_height{}; | ||||
| @@ -591,13 +598,13 @@ void CachedSurface::LoadGLBuffer() { | ||||
|             UNREACHABLE(); | ||||
|         } | ||||
|  | ||||
|         gl_buffer.resize(static_cast<size_t>(params.depth) * copy_size); | ||||
|         morton_to_gl_fns[static_cast<size_t>(params.pixel_format)]( | ||||
|         gl_buffer.resize(static_cast<std::size_t>(params.depth) * copy_size); | ||||
|         morton_to_gl_fns[static_cast<std::size_t>(params.pixel_format)]( | ||||
|             params.width, params.block_height, params.height, gl_buffer.data(), copy_size, | ||||
|             params.addr); | ||||
|     } else { | ||||
|         const u8* const texture_src_data_end{texture_src_data + | ||||
|                                              (static_cast<size_t>(params.depth) * copy_size)}; | ||||
|                                              (static_cast<std::size_t>(params.depth) * copy_size)}; | ||||
|         gl_buffer.assign(texture_src_data, texture_src_data_end); | ||||
|     } | ||||
|  | ||||
| @@ -616,7 +623,7 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle | ||||
|  | ||||
|     MICROPROFILE_SCOPE(OpenGL_TextureUL); | ||||
|  | ||||
|     ASSERT(gl_buffer.size() == static_cast<size_t>(params.width) * params.height * | ||||
|     ASSERT(gl_buffer.size() == static_cast<std::size_t>(params.width) * params.height * | ||||
|                                    GetGLBytesPerPixel(params.pixel_format) * params.depth); | ||||
|  | ||||
|     const auto& rect{params.GetRect()}; | ||||
| @@ -624,8 +631,9 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle | ||||
|     // Load data from memory to the surface | ||||
|     const GLint x0 = static_cast<GLint>(rect.left); | ||||
|     const GLint y0 = static_cast<GLint>(rect.bottom); | ||||
|     const size_t buffer_offset = | ||||
|         static_cast<size_t>(static_cast<size_t>(y0) * params.width + static_cast<size_t>(x0)) * | ||||
|     const std::size_t buffer_offset = | ||||
|         static_cast<std::size_t>(static_cast<std::size_t>(y0) * params.width + | ||||
|                                  static_cast<std::size_t>(x0)) * | ||||
|         GetGLBytesPerPixel(params.pixel_format); | ||||
|  | ||||
|     const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type); | ||||
| @@ -727,7 +735,7 @@ Surface RasterizerCacheOpenGL::GetDepthBufferSurface(bool preserve_contents) { | ||||
|     return GetSurface(depth_params, preserve_contents); | ||||
| } | ||||
|  | ||||
| Surface RasterizerCacheOpenGL::GetColorBufferSurface(size_t index, bool preserve_contents) { | ||||
| Surface RasterizerCacheOpenGL::GetColorBufferSurface(std::size_t index, bool preserve_contents) { | ||||
|     const auto& regs{Core::System::GetInstance().GPU().Maxwell3D().regs}; | ||||
|  | ||||
|     ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); | ||||
| @@ -825,7 +833,7 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface, | ||||
|         auto source_format = GetFormatTuple(params.pixel_format, params.component_type); | ||||
|         auto dest_format = GetFormatTuple(new_params.pixel_format, new_params.component_type); | ||||
|  | ||||
|         size_t buffer_size = std::max(params.SizeInBytes(), new_params.SizeInBytes()); | ||||
|         std::size_t buffer_size = std::max(params.SizeInBytes(), new_params.SizeInBytes()); | ||||
|  | ||||
|         glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo.handle); | ||||
|         glBufferData(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_STREAM_DRAW_ARB); | ||||
| @@ -849,7 +857,7 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface, | ||||
|                 LOG_DEBUG(HW_GPU, "Trying to upload extra texture data from the CPU during " | ||||
|                                   "reinterpretation but the texture is tiled."); | ||||
|             } | ||||
|             size_t remaining_size = new_params.SizeInBytes() - params.SizeInBytes(); | ||||
|             std::size_t remaining_size = new_params.SizeInBytes() - params.SizeInBytes(); | ||||
|             std::vector<u8> data(remaining_size); | ||||
|             Memory::ReadBlock(new_params.addr + params.SizeInBytes(), data.data(), data.size()); | ||||
|             glBufferSubData(GL_PIXEL_PACK_BUFFER, params.SizeInBytes(), remaining_size, | ||||
|   | ||||
| @@ -70,19 +70,20 @@ struct SurfaceParams { | ||||
|         RG8S = 42, | ||||
|         RG32UI = 43, | ||||
|         R32UI = 44, | ||||
|         ASTC_2D_8X8 = 45, | ||||
|  | ||||
|         MaxColorFormat, | ||||
|  | ||||
|         // Depth formats | ||||
|         Z32F = 45, | ||||
|         Z16 = 46, | ||||
|         Z32F = 46, | ||||
|         Z16 = 47, | ||||
|  | ||||
|         MaxDepthFormat, | ||||
|  | ||||
|         // DepthStencil formats | ||||
|         Z24S8 = 47, | ||||
|         S8Z24 = 48, | ||||
|         Z32FS8 = 49, | ||||
|         Z24S8 = 48, | ||||
|         S8Z24 = 49, | ||||
|         Z32FS8 = 50, | ||||
|  | ||||
|         MaxDepthStencilFormat, | ||||
|  | ||||
| @@ -90,7 +91,7 @@ struct SurfaceParams { | ||||
|         Invalid = 255, | ||||
|     }; | ||||
|  | ||||
|     static constexpr size_t MaxPixelFormat = static_cast<size_t>(PixelFormat::Max); | ||||
|     static constexpr std::size_t MaxPixelFormat = static_cast<std::size_t>(PixelFormat::Max); | ||||
|  | ||||
|     enum class ComponentType { | ||||
|         Invalid = 0, | ||||
| @@ -192,6 +193,7 @@ struct SurfaceParams { | ||||
|             1, // RG8S | ||||
|             1, // RG32UI | ||||
|             1, // R32UI | ||||
|             4, // ASTC_2D_8X8 | ||||
|             1, // Z32F | ||||
|             1, // Z16 | ||||
|             1, // Z24S8 | ||||
| @@ -199,8 +201,8 @@ struct SurfaceParams { | ||||
|             1, // Z32FS8 | ||||
|         }}; | ||||
|  | ||||
|         ASSERT(static_cast<size_t>(format) < compression_factor_table.size()); | ||||
|         return compression_factor_table[static_cast<size_t>(format)]; | ||||
|         ASSERT(static_cast<std::size_t>(format) < compression_factor_table.size()); | ||||
|         return compression_factor_table[static_cast<std::size_t>(format)]; | ||||
|     } | ||||
|  | ||||
|     static constexpr u32 GetFormatBpp(PixelFormat format) { | ||||
| @@ -253,6 +255,7 @@ struct SurfaceParams { | ||||
|             16,  // RG8S | ||||
|             64,  // RG32UI | ||||
|             32,  // R32UI | ||||
|             16,  // ASTC_2D_8X8 | ||||
|             32,  // Z32F | ||||
|             16,  // Z16 | ||||
|             32,  // Z24S8 | ||||
| @@ -260,8 +263,8 @@ struct SurfaceParams { | ||||
|             64,  // Z32FS8 | ||||
|         }}; | ||||
|  | ||||
|         ASSERT(static_cast<size_t>(format) < bpp_table.size()); | ||||
|         return bpp_table[static_cast<size_t>(format)]; | ||||
|         ASSERT(static_cast<std::size_t>(format) < bpp_table.size()); | ||||
|         return bpp_table[static_cast<std::size_t>(format)]; | ||||
|     } | ||||
|  | ||||
|     u32 GetFormatBpp() const { | ||||
| @@ -316,6 +319,8 @@ struct SurfaceParams { | ||||
|             return PixelFormat::R11FG11FB10F; | ||||
|         case Tegra::RenderTargetFormat::B5G6R5_UNORM: | ||||
|             return PixelFormat::B5G6R5U; | ||||
|         case Tegra::RenderTargetFormat::BGR5A1_UNORM: | ||||
|             return PixelFormat::A1B5G5R5U; | ||||
|         case Tegra::RenderTargetFormat::RGBA32_UINT: | ||||
|             return PixelFormat::RGBA32UI; | ||||
|         case Tegra::RenderTargetFormat::R8_UNORM: | ||||
| @@ -522,6 +527,8 @@ struct SurfaceParams { | ||||
|             return PixelFormat::BC6H_SF16; | ||||
|         case Tegra::Texture::TextureFormat::ASTC_2D_4X4: | ||||
|             return PixelFormat::ASTC_2D_4X4; | ||||
|         case Tegra::Texture::TextureFormat::ASTC_2D_8X8: | ||||
|             return PixelFormat::ASTC_2D_8X8; | ||||
|         case Tegra::Texture::TextureFormat::R16_G16: | ||||
|             switch (component_type) { | ||||
|             case Tegra::Texture::ComponentType::FLOAT: | ||||
| @@ -576,6 +583,7 @@ struct SurfaceParams { | ||||
|         case Tegra::RenderTargetFormat::RG16_UNORM: | ||||
|         case Tegra::RenderTargetFormat::R16_UNORM: | ||||
|         case Tegra::RenderTargetFormat::B5G6R5_UNORM: | ||||
|         case Tegra::RenderTargetFormat::BGR5A1_UNORM: | ||||
|         case Tegra::RenderTargetFormat::RG8_UNORM: | ||||
|         case Tegra::RenderTargetFormat::RGBA16_UNORM: | ||||
|             return ComponentType::UNorm; | ||||
| @@ -636,16 +644,18 @@ struct SurfaceParams { | ||||
|     } | ||||
|  | ||||
|     static SurfaceType GetFormatType(PixelFormat pixel_format) { | ||||
|         if (static_cast<size_t>(pixel_format) < static_cast<size_t>(PixelFormat::MaxColorFormat)) { | ||||
|         if (static_cast<std::size_t>(pixel_format) < | ||||
|             static_cast<std::size_t>(PixelFormat::MaxColorFormat)) { | ||||
|             return SurfaceType::ColorTexture; | ||||
|         } | ||||
|  | ||||
|         if (static_cast<size_t>(pixel_format) < static_cast<size_t>(PixelFormat::MaxDepthFormat)) { | ||||
|         if (static_cast<std::size_t>(pixel_format) < | ||||
|             static_cast<std::size_t>(PixelFormat::MaxDepthFormat)) { | ||||
|             return SurfaceType::Depth; | ||||
|         } | ||||
|  | ||||
|         if (static_cast<size_t>(pixel_format) < | ||||
|             static_cast<size_t>(PixelFormat::MaxDepthStencilFormat)) { | ||||
|         if (static_cast<std::size_t>(pixel_format) < | ||||
|             static_cast<std::size_t>(PixelFormat::MaxDepthStencilFormat)) { | ||||
|             return SurfaceType::DepthStencil; | ||||
|         } | ||||
|  | ||||
| @@ -659,7 +669,7 @@ struct SurfaceParams { | ||||
|     MathUtil::Rectangle<u32> GetRect() const; | ||||
|  | ||||
|     /// Returns the size of this surface in bytes, adjusted for compression | ||||
|     size_t SizeInBytes() const { | ||||
|     std::size_t SizeInBytes() const { | ||||
|         const u32 compression_factor{GetCompressionFactor(pixel_format)}; | ||||
|         ASSERT(width % compression_factor == 0); | ||||
|         ASSERT(height % compression_factor == 0); | ||||
| @@ -671,7 +681,7 @@ struct SurfaceParams { | ||||
|     static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config); | ||||
|  | ||||
|     /// Creates SurfaceParams from a framebuffer configuration | ||||
|     static SurfaceParams CreateForFramebuffer(size_t index); | ||||
|     static SurfaceParams CreateForFramebuffer(std::size_t index); | ||||
|  | ||||
|     /// Creates SurfaceParams for a depth buffer configuration | ||||
|     static SurfaceParams CreateForDepthBuffer(u32 zeta_width, u32 zeta_height, | ||||
| @@ -694,7 +704,7 @@ struct SurfaceParams { | ||||
|     u32 height; | ||||
|     u32 depth; | ||||
|     u32 unaligned_height; | ||||
|     size_t size_in_bytes; | ||||
|     std::size_t size_in_bytes; | ||||
|     SurfaceTarget target; | ||||
| }; | ||||
|  | ||||
| @@ -711,7 +721,7 @@ struct SurfaceReserveKey : Common::HashableStruct<OpenGL::SurfaceParams> { | ||||
| namespace std { | ||||
| template <> | ||||
| struct hash<SurfaceReserveKey> { | ||||
|     size_t operator()(const SurfaceReserveKey& k) const { | ||||
|     std::size_t operator()(const SurfaceReserveKey& k) const { | ||||
|         return k.Hash(); | ||||
|     } | ||||
| }; | ||||
| @@ -727,7 +737,7 @@ public: | ||||
|         return params.addr; | ||||
|     } | ||||
|  | ||||
|     size_t GetSizeInBytes() const { | ||||
|     std::size_t GetSizeInBytes() const { | ||||
|         return params.size_in_bytes; | ||||
|     } | ||||
|  | ||||
| @@ -775,7 +785,7 @@ public: | ||||
|     Surface GetDepthBufferSurface(bool preserve_contents); | ||||
|  | ||||
|     /// Get the color surface based on the framebuffer configuration and the specified render target | ||||
|     Surface GetColorBufferSurface(size_t index, bool preserve_contents); | ||||
|     Surface GetColorBufferSurface(std::size_t index, bool preserve_contents); | ||||
|  | ||||
|     /// Flushes the surface to Switch memory | ||||
|     void FlushSurface(const Surface& surface); | ||||
|   | ||||
| @@ -14,7 +14,7 @@ namespace OpenGL { | ||||
| /// Gets the address for the specified shader stage program | ||||
| static VAddr GetShaderAddress(Maxwell::ShaderProgram program) { | ||||
|     const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); | ||||
|     const auto& shader_config = gpu.regs.shader_config[static_cast<size_t>(program)]; | ||||
|     const auto& shader_config = gpu.regs.shader_config[static_cast<std::size_t>(program)]; | ||||
|     return *gpu.memory_manager.GpuToCpuAddress(gpu.regs.code_address.CodeAddress() + | ||||
|                                                shader_config.offset); | ||||
| } | ||||
| @@ -28,7 +28,7 @@ static GLShader::ProgramCode GetShaderCode(VAddr addr) { | ||||
|  | ||||
| /// Helper function to set shader uniform block bindings for a single shader stage | ||||
| static void SetShaderUniformBlockBinding(GLuint shader, const char* name, | ||||
|                                          Maxwell::ShaderStage binding, size_t expected_size) { | ||||
|                                          Maxwell::ShaderStage binding, std::size_t expected_size) { | ||||
|     const GLuint ub_index = glGetUniformBlockIndex(shader, name); | ||||
|     if (ub_index == GL_INVALID_INDEX) { | ||||
|         return; | ||||
| @@ -36,7 +36,7 @@ static void SetShaderUniformBlockBinding(GLuint shader, const char* name, | ||||
|  | ||||
|     GLint ub_size = 0; | ||||
|     glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size); | ||||
|     ASSERT_MSG(static_cast<size_t>(ub_size) == expected_size, | ||||
|     ASSERT_MSG(static_cast<std::size_t>(ub_size) == expected_size, | ||||
|                "Uniform block size did not match! Got {}, expected {}", ub_size, expected_size); | ||||
|     glUniformBlockBinding(shader, ub_index, static_cast<GLuint>(binding)); | ||||
| } | ||||
|   | ||||
| @@ -28,7 +28,7 @@ public: | ||||
|     } | ||||
|  | ||||
|     /// Gets the size of the shader in guest memory, required for cache management | ||||
|     size_t GetSizeInBytes() const { | ||||
|     std::size_t GetSizeInBytes() const { | ||||
|         return GLShader::MAX_PROGRAM_CODE_LENGTH * sizeof(u64); | ||||
|     } | ||||
|  | ||||
|   | ||||
| @@ -12,6 +12,7 @@ | ||||
| #include "common/assert.h" | ||||
| #include "common/common_types.h" | ||||
| #include "video_core/engines/shader_bytecode.h" | ||||
| #include "video_core/engines/shader_header.h" | ||||
| #include "video_core/renderer_opengl/gl_rasterizer.h" | ||||
| #include "video_core/renderer_opengl/gl_shader_decompiler.h" | ||||
|  | ||||
| @@ -26,7 +27,7 @@ using Tegra::Shader::Sampler; | ||||
| using Tegra::Shader::SubOp; | ||||
|  | ||||
| constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH; | ||||
| constexpr u32 PROGRAM_HEADER_SIZE = 0x50; | ||||
| constexpr u32 PROGRAM_HEADER_SIZE = sizeof(Tegra::Shader::Header); | ||||
|  | ||||
| class DecompileFail : public std::runtime_error { | ||||
| public: | ||||
| @@ -189,7 +190,7 @@ public: | ||||
|  | ||||
| private: | ||||
|     void AppendIndentation() { | ||||
|         shader_source.append(static_cast<size_t>(scope) * 4, ' '); | ||||
|         shader_source.append(static_cast<std::size_t>(scope) * 4, ' '); | ||||
|     } | ||||
|  | ||||
|     std::string shader_source; | ||||
| @@ -208,7 +209,7 @@ public: | ||||
|         UnsignedInteger, | ||||
|     }; | ||||
|  | ||||
|     GLSLRegister(size_t index, const std::string& suffix) : index{index}, suffix{suffix} {} | ||||
|     GLSLRegister(std::size_t index, const std::string& suffix) : index{index}, suffix{suffix} {} | ||||
|  | ||||
|     /// Gets the GLSL type string for a register | ||||
|     static std::string GetTypeString() { | ||||
| @@ -226,15 +227,23 @@ public: | ||||
|     } | ||||
|  | ||||
|     /// Returns the index of the register | ||||
|     size_t GetIndex() const { | ||||
|     std::size_t GetIndex() const { | ||||
|         return index; | ||||
|     } | ||||
|  | ||||
| private: | ||||
|     const size_t index; | ||||
|     const std::size_t index; | ||||
|     const std::string& suffix; | ||||
| }; | ||||
|  | ||||
| enum class InternalFlag : u64 { | ||||
|     ZeroFlag = 0, | ||||
|     CarryFlag = 1, | ||||
|     OverflowFlag = 2, | ||||
|     NaNFlag = 3, | ||||
|     Amount | ||||
| }; | ||||
|  | ||||
| /** | ||||
|  * Used to manage shader registers that are emulated with GLSL. This class keeps track of the state | ||||
|  * of all registers (e.g. whether they are currently being used as Floats or Integers), and | ||||
| @@ -328,13 +337,19 @@ public: | ||||
|     void SetRegisterToInteger(const Register& reg, bool is_signed, u64 elem, | ||||
|                               const std::string& value, u64 dest_num_components, | ||||
|                               u64 value_num_components, bool is_saturated = false, | ||||
|                               u64 dest_elem = 0, Register::Size size = Register::Size::Word) { | ||||
|                               u64 dest_elem = 0, Register::Size size = Register::Size::Word, | ||||
|                               bool sets_cc = false) { | ||||
|         ASSERT_MSG(!is_saturated, "Unimplemented"); | ||||
|  | ||||
|         const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"}; | ||||
|  | ||||
|         SetRegister(reg, elem, func + '(' + ConvertIntegerSize(value, size) + ')', | ||||
|                     dest_num_components, value_num_components, dest_elem); | ||||
|  | ||||
|         if (sets_cc) { | ||||
|             const std::string zero_condition = "( " + ConvertIntegerSize(value, size) + " == 0 )"; | ||||
|             SetInternalFlag(InternalFlag::ZeroFlag, zero_condition); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     /** | ||||
| @@ -351,6 +366,26 @@ public: | ||||
|         shader.AddLine(dest + " = " + src + ';'); | ||||
|     } | ||||
|  | ||||
|     std::string GetControlCode(const Tegra::Shader::ControlCode cc) const { | ||||
|         switch (cc) { | ||||
|         case Tegra::Shader::ControlCode::NEU: | ||||
|             return "!(" + GetInternalFlag(InternalFlag::ZeroFlag) + ')'; | ||||
|         default: | ||||
|             LOG_CRITICAL(HW_GPU, "Unimplemented Control Code {}", static_cast<u32>(cc)); | ||||
|             UNREACHABLE(); | ||||
|             return "false"; | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     std::string GetInternalFlag(const InternalFlag ii) const { | ||||
|         const u32 code = static_cast<u32>(ii); | ||||
|         return "internalFlag_" + std::to_string(code) + suffix; | ||||
|     } | ||||
|  | ||||
|     void SetInternalFlag(const InternalFlag ii, const std::string& value) const { | ||||
|         shader.AddLine(GetInternalFlag(ii) + " = " + value + ';'); | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Writes code that does a output attribute assignment to register operation. Output attributes | ||||
|      * are stored as floats, so this may require conversion. | ||||
| @@ -414,6 +449,12 @@ public: | ||||
|         } | ||||
|         declarations.AddNewLine(); | ||||
|  | ||||
|         for (u32 ii = 0; ii < static_cast<u64>(InternalFlag::Amount); ii++) { | ||||
|             const InternalFlag code = static_cast<InternalFlag>(ii); | ||||
|             declarations.AddLine("bool " + GetInternalFlag(code) + " = false;"); | ||||
|         } | ||||
|         declarations.AddNewLine(); | ||||
|  | ||||
|         for (const auto element : declr_input_attribute) { | ||||
|             // TODO(bunnei): Use proper number of elements for these | ||||
|             u32 idx = | ||||
| @@ -468,7 +509,7 @@ public: | ||||
|     /// necessary. | ||||
|     std::string AccessSampler(const Sampler& sampler, Tegra::Shader::TextureType type, | ||||
|                               bool is_array) { | ||||
|         const size_t offset = static_cast<size_t>(sampler.index.Value()); | ||||
|         const std::size_t offset = static_cast<std::size_t>(sampler.index.Value()); | ||||
|  | ||||
|         // If this sampler has already been used, return the existing mapping. | ||||
|         const auto itr = | ||||
| @@ -481,7 +522,7 @@ public: | ||||
|         } | ||||
|  | ||||
|         // Otherwise create a new mapping for this sampler | ||||
|         const size_t next_index = used_samplers.size(); | ||||
|         const std::size_t next_index = used_samplers.size(); | ||||
|         const SamplerEntry entry{stage, offset, next_index, type, is_array}; | ||||
|         used_samplers.emplace_back(entry); | ||||
|         return entry.GetName(); | ||||
| @@ -531,7 +572,7 @@ private: | ||||
|     void BuildRegisterList() { | ||||
|         regs.reserve(Register::NumRegisters); | ||||
|  | ||||
|         for (size_t index = 0; index < Register::NumRegisters; ++index) { | ||||
|         for (std::size_t index = 0; index < Register::NumRegisters; ++index) { | ||||
|             regs.emplace_back(index, suffix); | ||||
|         } | ||||
|     } | ||||
| @@ -674,7 +715,7 @@ public: | ||||
|                   u32 main_offset, Maxwell3D::Regs::ShaderStage stage, const std::string& suffix) | ||||
|         : subroutines(subroutines), program_code(program_code), main_offset(main_offset), | ||||
|           stage(stage), suffix(suffix) { | ||||
|  | ||||
|         std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); | ||||
|         Generate(suffix); | ||||
|     } | ||||
|  | ||||
| @@ -688,23 +729,6 @@ public: | ||||
|     } | ||||
|  | ||||
| private: | ||||
|     // Shader program header for a Fragment Shader. | ||||
|     struct FragmentHeader { | ||||
|         INSERT_PADDING_WORDS(5); | ||||
|         INSERT_PADDING_WORDS(13); | ||||
|         u32 enabled_color_outputs; | ||||
|         union { | ||||
|             BitField<0, 1, u32> writes_samplemask; | ||||
|             BitField<1, 1, u32> writes_depth; | ||||
|         }; | ||||
|  | ||||
|         bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const { | ||||
|             const u32 bit = render_target * 4 + component; | ||||
|             return enabled_color_outputs & (1 << bit); | ||||
|         } | ||||
|     }; | ||||
|     static_assert(sizeof(FragmentHeader) == PROGRAM_HEADER_SIZE, "FragmentHeader size is wrong"); | ||||
|  | ||||
|     /// Gets the Subroutine object corresponding to the specified address. | ||||
|     const Subroutine& GetSubroutine(u32 begin, u32 end) const { | ||||
|         const auto iter = subroutines.find(Subroutine{begin, end, suffix}); | ||||
| @@ -862,7 +886,7 @@ private: | ||||
|      */ | ||||
|     bool IsSchedInstruction(u32 offset) const { | ||||
|         // sched instructions appear once every 4 instructions. | ||||
|         static constexpr size_t SchedPeriod = 4; | ||||
|         static constexpr std::size_t SchedPeriod = 4; | ||||
|         u32 absolute_offset = offset - main_offset; | ||||
|  | ||||
|         return (absolute_offset % SchedPeriod) == 0; | ||||
| @@ -930,7 +954,7 @@ private: | ||||
|         std::string result; | ||||
|         result += '('; | ||||
|  | ||||
|         for (size_t i = 0; i < shift_amounts.size(); ++i) { | ||||
|         for (std::size_t i = 0; i < shift_amounts.size(); ++i) { | ||||
|             if (i) | ||||
|                 result += '|'; | ||||
|             result += "(((" + imm_lut + " >> (((" + op_c + " >> " + shift_amounts[i] + | ||||
| @@ -954,9 +978,7 @@ private: | ||||
|         // TEXS has two destination registers and a swizzle. The first two elements in the swizzle | ||||
|         // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 | ||||
|  | ||||
|         ASSERT_MSG(instr.texs.nodep == 0, "TEXS nodep not implemented"); | ||||
|  | ||||
|         size_t written_components = 0; | ||||
|         std::size_t written_components = 0; | ||||
|         for (u32 component = 0; component < 4; ++component) { | ||||
|             if (!instr.texs.IsComponentEnabled(component)) { | ||||
|                 continue; | ||||
| @@ -1010,10 +1032,8 @@ private: | ||||
|     /// Writes the output values from a fragment shader to the corresponding GLSL output variables. | ||||
|     void EmitFragmentOutputsWrite() { | ||||
|         ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment); | ||||
|         FragmentHeader header; | ||||
|         std::memcpy(&header, program_code.data(), PROGRAM_HEADER_SIZE); | ||||
|  | ||||
|         ASSERT_MSG(header.writes_samplemask == 0, "Samplemask write is unimplemented"); | ||||
|         ASSERT_MSG(header.ps.omap.sample_mask == 0, "Samplemask write is unimplemented"); | ||||
|  | ||||
|         // Write the color outputs using the data in the shader registers, disabled | ||||
|         // rendertargets/components are skipped in the register assignment. | ||||
| @@ -1022,7 +1042,7 @@ private: | ||||
|              ++render_target) { | ||||
|             // TODO(Subv): Figure out how dual-source blending is configured in the Switch. | ||||
|             for (u32 component = 0; component < 4; ++component) { | ||||
|                 if (header.IsColorComponentOutputEnabled(render_target, component)) { | ||||
|                 if (header.ps.IsColorComponentOutputEnabled(render_target, component)) { | ||||
|                     shader.AddLine(fmt::format("FragColor{}[{}] = {};", render_target, component, | ||||
|                                                regs.GetRegisterAsFloat(current_reg))); | ||||
|                     ++current_reg; | ||||
| @@ -1030,7 +1050,7 @@ private: | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         if (header.writes_depth) { | ||||
|         if (header.ps.omap.depth) { | ||||
|             // The depth output is always 2 registers after the last color output, and current_reg | ||||
|             // already contains one past the last color register. | ||||
|  | ||||
| @@ -1510,8 +1530,6 @@ private: | ||||
|             case OpCode::Id::LEA_IMM: | ||||
|             case OpCode::Id::LEA_RZ: | ||||
|             case OpCode::Id::LEA_HI: { | ||||
|                 std::string op_a; | ||||
|                 std::string op_b; | ||||
|                 std::string op_c; | ||||
|  | ||||
|                 switch (opcode->GetId()) { | ||||
| @@ -1642,7 +1660,8 @@ private: | ||||
|                 } | ||||
|  | ||||
|                 regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1, | ||||
|                                           1, instr.alu.saturate_d, 0, instr.conversion.dest_size); | ||||
|                                           1, instr.alu.saturate_d, 0, instr.conversion.dest_size, | ||||
|                                           instr.generates_cc.Value() != 0); | ||||
|                 break; | ||||
|             } | ||||
|             case OpCode::Id::I2F_R: | ||||
| @@ -1781,8 +1800,8 @@ private: | ||||
|                 Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective, | ||||
|                                                   Tegra::Shader::IpaSampleMode::Default}; | ||||
|  | ||||
|                 u32 next_element = instr.attribute.fmt20.element; | ||||
|                 u32 next_index = static_cast<u32>(instr.attribute.fmt20.index.Value()); | ||||
|                 u64 next_element = instr.attribute.fmt20.element; | ||||
|                 u64 next_index = static_cast<u64>(instr.attribute.fmt20.index.Value()); | ||||
|  | ||||
|                 const auto LoadNextElement = [&](u32 reg_offset) { | ||||
|                     regs.SetRegisterToInputAttibute(instr.gpr0.Value() + reg_offset, next_element, | ||||
| @@ -1846,8 +1865,8 @@ private: | ||||
|                 ASSERT_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) == 0, | ||||
|                            "Unaligned attribute loads are not supported"); | ||||
|  | ||||
|                 u32 next_element = instr.attribute.fmt20.element; | ||||
|                 u32 next_index = static_cast<u32>(instr.attribute.fmt20.index.Value()); | ||||
|                 u64 next_element = instr.attribute.fmt20.element; | ||||
|                 u64 next_index = static_cast<u64>(instr.attribute.fmt20.index.Value()); | ||||
|  | ||||
|                 const auto StoreNextElement = [&](u32 reg_offset) { | ||||
|                     regs.SetOutputAttributeToRegister(static_cast<Attribute::Index>(next_index), | ||||
| @@ -1873,6 +1892,13 @@ private: | ||||
|                 Tegra::Shader::TextureType texture_type{instr.tex.texture_type}; | ||||
|                 std::string coord; | ||||
|  | ||||
|                 ASSERT_MSG(!instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), | ||||
|                            "NODEP is not implemented"); | ||||
|                 ASSERT_MSG(!instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI), | ||||
|                            "AOFFI is not implemented"); | ||||
|                 ASSERT_MSG(!instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC), | ||||
|                            "DC is not implemented"); | ||||
|  | ||||
|                 switch (texture_type) { | ||||
|                 case Tegra::Shader::TextureType::Texture1D: { | ||||
|                     const std::string x = regs.GetRegisterAsFloat(instr.gpr8); | ||||
| @@ -1937,8 +1963,8 @@ private: | ||||
|                     UNREACHABLE(); | ||||
|                 } | ||||
|                 } | ||||
|                 size_t dest_elem{}; | ||||
|                 for (size_t elem = 0; elem < 4; ++elem) { | ||||
|                 std::size_t dest_elem{}; | ||||
|                 for (std::size_t elem = 0; elem < 4; ++elem) { | ||||
|                     if (!instr.tex.IsComponentEnabled(elem)) { | ||||
|                         // Skip disabled components | ||||
|                         continue; | ||||
| @@ -1955,6 +1981,11 @@ private: | ||||
|                 Tegra::Shader::TextureType texture_type{instr.texs.GetTextureType()}; | ||||
|                 bool is_array{instr.texs.IsArrayTexture()}; | ||||
|  | ||||
|                 ASSERT_MSG(!instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), | ||||
|                            "NODEP is not implemented"); | ||||
|                 ASSERT_MSG(!instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC), | ||||
|                            "DC is not implemented"); | ||||
|  | ||||
|                 switch (texture_type) { | ||||
|                 case Tegra::Shader::TextureType::Texture2D: { | ||||
|                     if (is_array) { | ||||
| @@ -1990,6 +2021,13 @@ private: | ||||
|                 std::string coord; | ||||
|                 const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()}; | ||||
|                 const bool is_array{instr.tlds.IsArrayTexture()}; | ||||
|   | ||||
|                 ASSERT_MSG(!instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), | ||||
|                            "NODEP is not implemented"); | ||||
|                 ASSERT_MSG(!instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI), | ||||
|                            "AOFFI is not implemented"); | ||||
|                 ASSERT_MSG(!instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::MZ), | ||||
|                            "MZ is not implemented"); | ||||
|  | ||||
|                 switch (texture_type) { | ||||
|                 case Tegra::Shader::TextureType::Texture1D: { | ||||
| @@ -2024,6 +2062,17 @@ private: | ||||
|                 ASSERT(instr.tld4.array == 0); | ||||
|                 std::string coord; | ||||
|  | ||||
|                 ASSERT_MSG(!instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), | ||||
|                            "NODEP is not implemented"); | ||||
|                 ASSERT_MSG(!instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI), | ||||
|                            "AOFFI is not implemented"); | ||||
|                 ASSERT_MSG(!instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC), | ||||
|                            "DC is not implemented"); | ||||
|                 ASSERT_MSG(!instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), | ||||
|                            "NDV is not implemented"); | ||||
|                 ASSERT_MSG(!instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::PTP), | ||||
|                            "PTP is not implemented"); | ||||
|  | ||||
|                 switch (instr.tld4.texture_type) { | ||||
|                 case Tegra::Shader::TextureType::Texture2D: { | ||||
|                     const std::string x = regs.GetRegisterAsFloat(instr.gpr8); | ||||
| @@ -2047,8 +2096,8 @@ private: | ||||
|                 const std::string texture = "textureGather(" + sampler + ", coords, " + | ||||
|                                             std::to_string(instr.tld4.component) + ')'; | ||||
|  | ||||
|                 size_t dest_elem{}; | ||||
|                 for (size_t elem = 0; elem < 4; ++elem) { | ||||
|                 std::size_t dest_elem{}; | ||||
|                 for (std::size_t elem = 0; elem < 4; ++elem) { | ||||
|                     if (!instr.tex.IsComponentEnabled(elem)) { | ||||
|                         // Skip disabled components | ||||
|                         continue; | ||||
| @@ -2061,6 +2110,13 @@ private: | ||||
|                 break; | ||||
|             } | ||||
|             case OpCode::Id::TLD4S: { | ||||
|                 ASSERT_MSG(!instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), | ||||
|                            "NODEP is not implemented"); | ||||
|                 ASSERT_MSG(!instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI), | ||||
|                            "AOFFI is not implemented"); | ||||
|                 ASSERT_MSG(!instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC), | ||||
|                            "DC is not implemented"); | ||||
|  | ||||
|                 const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); | ||||
|                 const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20); | ||||
|                 // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction. | ||||
| @@ -2073,6 +2129,9 @@ private: | ||||
|                 break; | ||||
|             } | ||||
|             case OpCode::Id::TXQ: { | ||||
|                 ASSERT_MSG(!instr.txq.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), | ||||
|                            "NODEP is not implemented"); | ||||
|  | ||||
|                 // TODO: the new commits on the texture refactor, change the way samplers work. | ||||
|                 // Sadly, not all texture instructions specify the type of texture their sampler | ||||
|                 // uses. This must be fixed at a later instance. | ||||
| @@ -2093,6 +2152,11 @@ private: | ||||
|                 break; | ||||
|             } | ||||
|             case OpCode::Id::TMML: { | ||||
|                 ASSERT_MSG(!instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP), | ||||
|                            "NODEP is not implemented"); | ||||
|                 ASSERT_MSG(!instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), | ||||
|                            "NDV is not implemented"); | ||||
|  | ||||
|                 const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); | ||||
|                 const std::string op_b = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); | ||||
|                 const bool is_array = instr.tmml.array != 0; | ||||
| @@ -2259,31 +2323,55 @@ private: | ||||
|             break; | ||||
|         } | ||||
|         case OpCode::Type::PredicateSetPredicate: { | ||||
|             const std::string op_a = | ||||
|                 GetPredicateCondition(instr.psetp.pred12, instr.psetp.neg_pred12 != 0); | ||||
|             const std::string op_b = | ||||
|                 GetPredicateCondition(instr.psetp.pred29, instr.psetp.neg_pred29 != 0); | ||||
|             switch (opcode->GetId()) { | ||||
|             case OpCode::Id::PSETP: { | ||||
|                 const std::string op_a = | ||||
|                     GetPredicateCondition(instr.psetp.pred12, instr.psetp.neg_pred12 != 0); | ||||
|                 const std::string op_b = | ||||
|                     GetPredicateCondition(instr.psetp.pred29, instr.psetp.neg_pred29 != 0); | ||||
|  | ||||
|             // We can't use the constant predicate as destination. | ||||
|             ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); | ||||
|                 // We can't use the constant predicate as destination. | ||||
|                 ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); | ||||
|  | ||||
|             const std::string second_pred = | ||||
|                 GetPredicateCondition(instr.psetp.pred39, instr.psetp.neg_pred39 != 0); | ||||
|                 const std::string second_pred = | ||||
|                     GetPredicateCondition(instr.psetp.pred39, instr.psetp.neg_pred39 != 0); | ||||
|  | ||||
|             const std::string combiner = GetPredicateCombiner(instr.psetp.op); | ||||
|                 const std::string combiner = GetPredicateCombiner(instr.psetp.op); | ||||
|  | ||||
|             const std::string predicate = | ||||
|                 '(' + op_a + ") " + GetPredicateCombiner(instr.psetp.cond) + " (" + op_b + ')'; | ||||
|                 const std::string predicate = | ||||
|                     '(' + op_a + ") " + GetPredicateCombiner(instr.psetp.cond) + " (" + op_b + ')'; | ||||
|  | ||||
|             // Set the primary predicate to the result of Predicate OP SecondPredicate | ||||
|             SetPredicate(instr.psetp.pred3, | ||||
|                          '(' + predicate + ") " + combiner + " (" + second_pred + ')'); | ||||
|                 // Set the primary predicate to the result of Predicate OP SecondPredicate | ||||
|                 SetPredicate(instr.psetp.pred3, | ||||
|                              '(' + predicate + ") " + combiner + " (" + second_pred + ')'); | ||||
|  | ||||
|             if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||||
|                 // Set the secondary predicate to the result of !Predicate OP SecondPredicate, | ||||
|                 // if enabled | ||||
|                 SetPredicate(instr.psetp.pred0, | ||||
|                              "!(" + predicate + ") " + combiner + " (" + second_pred + ')'); | ||||
|                 if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||||
|                     // Set the secondary predicate to the result of !Predicate OP SecondPredicate, | ||||
|                     // if enabled | ||||
|                     SetPredicate(instr.psetp.pred0, | ||||
|                                  "!(" + predicate + ") " + combiner + " (" + second_pred + ')'); | ||||
|                 } | ||||
|                 break; | ||||
|             } | ||||
|             case OpCode::Id::CSETP: { | ||||
|                 const std::string pred = | ||||
|                     GetPredicateCondition(instr.csetp.pred39, instr.csetp.neg_pred39 != 0); | ||||
|                 const std::string combiner = GetPredicateCombiner(instr.csetp.op); | ||||
|                 const std::string controlCode = regs.GetControlCode(instr.csetp.cc); | ||||
|                 if (instr.csetp.pred3 != static_cast<u64>(Pred::UnusedIndex)) { | ||||
|                     SetPredicate(instr.csetp.pred3, | ||||
|                                  '(' + controlCode + ") " + combiner + " (" + pred + ')'); | ||||
|                 } | ||||
|                 if (instr.csetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||||
|                     SetPredicate(instr.csetp.pred0, | ||||
|                                  "!(" + controlCode + ") " + combiner + " (" + pred + ')'); | ||||
|                 } | ||||
|                 break; | ||||
|             } | ||||
|             default: { | ||||
|                 LOG_CRITICAL(HW_GPU, "Unhandled predicate instruction: {}", opcode->GetName()); | ||||
|                 UNREACHABLE(); | ||||
|             } | ||||
|             } | ||||
|             break; | ||||
|         } | ||||
| @@ -2673,6 +2761,7 @@ private: | ||||
| private: | ||||
|     const std::set<Subroutine>& subroutines; | ||||
|     const ProgramCode& program_code; | ||||
|     Tegra::Shader::Header header; | ||||
|     const u32 main_offset; | ||||
|     Maxwell3D::Regs::ShaderStage stage; | ||||
|     const std::string& suffix; | ||||
|   | ||||
| @@ -13,7 +13,7 @@ | ||||
|  | ||||
| namespace OpenGL::GLShader { | ||||
|  | ||||
| constexpr size_t MAX_PROGRAM_CODE_LENGTH{0x1000}; | ||||
| constexpr std::size_t MAX_PROGRAM_CODE_LENGTH{0x1000}; | ||||
| using ProgramCode = std::vector<u64>; | ||||
|  | ||||
| class ConstBufferEntry { | ||||
| @@ -51,7 +51,7 @@ public: | ||||
|     } | ||||
|  | ||||
|     std::string GetName() const { | ||||
|         return BufferBaseNames[static_cast<size_t>(stage)] + std::to_string(index); | ||||
|         return BufferBaseNames[static_cast<std::size_t>(stage)] + std::to_string(index); | ||||
|     } | ||||
|  | ||||
|     u32 GetHash() const { | ||||
| @@ -74,15 +74,15 @@ class SamplerEntry { | ||||
|     using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||||
|  | ||||
| public: | ||||
|     SamplerEntry(Maxwell::ShaderStage stage, size_t offset, size_t index, | ||||
|     SamplerEntry(Maxwell::ShaderStage stage, std::size_t offset, std::size_t index, | ||||
|                  Tegra::Shader::TextureType type, bool is_array) | ||||
|         : offset(offset), stage(stage), sampler_index(index), type(type), is_array(is_array) {} | ||||
|  | ||||
|     size_t GetOffset() const { | ||||
|     std::size_t GetOffset() const { | ||||
|         return offset; | ||||
|     } | ||||
|  | ||||
|     size_t GetIndex() const { | ||||
|     std::size_t GetIndex() const { | ||||
|         return sampler_index; | ||||
|     } | ||||
|  | ||||
| @@ -91,7 +91,7 @@ public: | ||||
|     } | ||||
|  | ||||
|     std::string GetName() const { | ||||
|         return std::string(TextureSamplerNames[static_cast<size_t>(stage)]) + '_' + | ||||
|         return std::string(TextureSamplerNames[static_cast<std::size_t>(stage)]) + '_' + | ||||
|                std::to_string(sampler_index); | ||||
|     } | ||||
|  | ||||
| @@ -133,7 +133,7 @@ public: | ||||
|     } | ||||
|  | ||||
|     static std::string GetArrayName(Maxwell::ShaderStage stage) { | ||||
|         return TextureSamplerNames[static_cast<size_t>(stage)]; | ||||
|         return TextureSamplerNames[static_cast<std::size_t>(stage)]; | ||||
|     } | ||||
|  | ||||
| private: | ||||
| @@ -143,9 +143,9 @@ private: | ||||
|  | ||||
|     /// Offset in TSC memory from which to read the sampler object, as specified by the sampling | ||||
|     /// instruction. | ||||
|     size_t offset; | ||||
|     std::size_t offset; | ||||
|     Maxwell::ShaderStage stage;      ///< Shader stage where this sampler was used. | ||||
|     size_t sampler_index;            ///< Value used to index into the generated GLSL sampler array. | ||||
|     std::size_t sampler_index;       ///< Value used to index into the generated GLSL sampler array. | ||||
|     Tegra::Shader::TextureType type; ///< The type used to sample this texture (Texture2D, etc) | ||||
|     bool is_array; ///< Whether the texture is being sampled as an array texture or not. | ||||
| }; | ||||
|   | ||||
| @@ -12,7 +12,7 @@ | ||||
| namespace OpenGL::GLShader { | ||||
|  | ||||
| /// Number of OpenGL texture samplers that can be used in the fragment shader | ||||
| static constexpr size_t NumTextureSamplers = 32; | ||||
| static constexpr std::size_t NumTextureSamplers = 32; | ||||
|  | ||||
| using Tegra::Engines::Maxwell3D; | ||||
|  | ||||
|   | ||||
| @@ -272,7 +272,7 @@ void OpenGLState::Apply() const { | ||||
|     } | ||||
|  | ||||
|     // Clip distance | ||||
|     for (size_t i = 0; i < clip_distance.size(); ++i) { | ||||
|     for (std::size_t i = 0; i < clip_distance.size(); ++i) { | ||||
|         if (clip_distance[i] != cur_state.clip_distance[i]) { | ||||
|             if (clip_distance[i]) { | ||||
|                 glEnable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i)); | ||||
|   | ||||
| @@ -61,7 +61,7 @@ std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr a | ||||
|     mapped_size = size; | ||||
|  | ||||
|     if (alignment > 0) { | ||||
|         buffer_pos = Common::AlignUp<size_t>(buffer_pos, alignment); | ||||
|         buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment); | ||||
|     } | ||||
|  | ||||
|     bool invalidate = false; | ||||
|   | ||||
| @@ -46,6 +46,48 @@ void CopySwizzledData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_bytes_ | ||||
|     } | ||||
| } | ||||
|  | ||||
| template <std::size_t N, std::size_t M> | ||||
| struct alignas(64) SwizzleTable { | ||||
|     constexpr SwizzleTable() { | ||||
|         for (u32 y = 0; y < N; ++y) { | ||||
|             for (u32 x = 0; x < M; ++x) { | ||||
|                 const u32 x2 = x * 16; | ||||
|                 values[y][x] = static_cast<u16>(((x2 % 64) / 32) * 256 + ((y % 8) / 2) * 64 + | ||||
|                                                 ((x2 % 32) / 16) * 32 + (y % 2) * 16); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|     const std::array<u16, M>& operator[](std::size_t index) const { | ||||
|         return values[index]; | ||||
|     } | ||||
|     std::array<std::array<u16, M>, N> values{}; | ||||
| }; | ||||
|  | ||||
| constexpr auto swizzle_table = SwizzleTable<8, 4>(); | ||||
|  | ||||
| void FastSwizzleData(u32 width, u32 height, u32 bytes_per_pixel, u8* swizzled_data, | ||||
|                      u8* unswizzled_data, bool unswizzle, u32 block_height) { | ||||
|     std::array<u8*, 2> data_ptrs; | ||||
|     const std::size_t stride{width * bytes_per_pixel}; | ||||
|     const std::size_t image_width_in_gobs{(stride + 63) / 64}; | ||||
|     const std::size_t copy_size{16}; | ||||
|     for (std::size_t y = 0; y < height; ++y) { | ||||
|         const std::size_t initial_gob = | ||||
|             (y / (8 * block_height)) * 512 * block_height * image_width_in_gobs + | ||||
|             (y % (8 * block_height) / 8) * 512; | ||||
|         const std::size_t pixel_base{y * width * bytes_per_pixel}; | ||||
|         const auto& table = swizzle_table[y % 8]; | ||||
|         for (std::size_t xb = 0; xb < stride; xb += copy_size) { | ||||
|             const std::size_t gob_address{initial_gob + (xb / 64) * 512 * block_height}; | ||||
|             const std::size_t swizzle_offset{gob_address + table[(xb / 16) % 4]}; | ||||
|             const std::size_t pixel_index{xb + pixel_base}; | ||||
|             data_ptrs[unswizzle] = swizzled_data + swizzle_offset; | ||||
|             data_ptrs[!unswizzle] = unswizzled_data + pixel_index; | ||||
|             std::memcpy(data_ptrs[0], data_ptrs[1], copy_size); | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| u32 BytesPerPixel(TextureFormat format) { | ||||
|     switch (format) { | ||||
|     case TextureFormat::DXT1: | ||||
| @@ -63,6 +105,7 @@ u32 BytesPerPixel(TextureFormat format) { | ||||
|     case TextureFormat::R32_G32_B32: | ||||
|         return 12; | ||||
|     case TextureFormat::ASTC_2D_4X4: | ||||
|     case TextureFormat::ASTC_2D_8X8: | ||||
|     case TextureFormat::A8R8G8B8: | ||||
|     case TextureFormat::A2B10G10R10: | ||||
|     case TextureFormat::BF10GF11RF11: | ||||
| @@ -91,8 +134,13 @@ u32 BytesPerPixel(TextureFormat format) { | ||||
| std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size, u32 bytes_per_pixel, u32 width, | ||||
|                                  u32 height, u32 block_height) { | ||||
|     std::vector<u8> unswizzled_data(width * height * bytes_per_pixel); | ||||
|     CopySwizzledData(width / tile_size, height / tile_size, bytes_per_pixel, bytes_per_pixel, | ||||
|                      Memory::GetPointer(address), unswizzled_data.data(), true, block_height); | ||||
|     if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % 16 == 0) { | ||||
|         FastSwizzleData(width / tile_size, height / tile_size, bytes_per_pixel, | ||||
|                         Memory::GetPointer(address), unswizzled_data.data(), true, block_height); | ||||
|     } else { | ||||
|         CopySwizzledData(width / tile_size, height / tile_size, bytes_per_pixel, bytes_per_pixel, | ||||
|                          Memory::GetPointer(address), unswizzled_data.data(), true, block_height); | ||||
|     } | ||||
|     return unswizzled_data; | ||||
| } | ||||
|  | ||||
| @@ -111,6 +159,7 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat | ||||
|     case TextureFormat::BC6H_UF16: | ||||
|     case TextureFormat::BC6H_SF16: | ||||
|     case TextureFormat::ASTC_2D_4X4: | ||||
|     case TextureFormat::ASTC_2D_8X8: | ||||
|     case TextureFormat::A8R8G8B8: | ||||
|     case TextureFormat::A2B10G10R10: | ||||
|     case TextureFormat::A1B5G5R5: | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 raven02
					raven02