Merge pull request #1519 from ReinUsesLisp/vsetp
gl_shader_decompiler: Implement VSETP
This commit is contained in:
		| @@ -214,7 +214,7 @@ enum class IMinMaxExchange : u64 { | ||||
|     XHi = 3, | ||||
| }; | ||||
|  | ||||
| enum class VmadType : u64 { | ||||
| enum class VideoType : u64 { | ||||
|     Size16_Low = 0, | ||||
|     Size16_High = 1, | ||||
|     Size32 = 2, | ||||
| @@ -782,6 +782,14 @@ union Instruction { | ||||
|         BitField<45, 2, PredOperation> op; | ||||
|     } psetp; | ||||
|  | ||||
|     union { | ||||
|         BitField<43, 4, PredCondition> cond; | ||||
|         BitField<45, 2, PredOperation> op; | ||||
|         BitField<3, 3, u64> pred3; | ||||
|         BitField<0, 3, u64> pred0; | ||||
|         BitField<39, 3, u64> pred39; | ||||
|     } vsetp; | ||||
|  | ||||
|     union { | ||||
|         BitField<12, 3, u64> pred12; | ||||
|         BitField<15, 1, u64> neg_pred12; | ||||
| @@ -1154,15 +1162,17 @@ union Instruction { | ||||
|     union { | ||||
|         BitField<48, 1, u64> signed_a; | ||||
|         BitField<38, 1, u64> is_byte_chunk_a; | ||||
|         BitField<36, 2, VmadType> type_a; | ||||
|         BitField<36, 2, VideoType> type_a; | ||||
|         BitField<36, 2, u64> byte_height_a; | ||||
|  | ||||
|         BitField<49, 1, u64> signed_b; | ||||
|         BitField<50, 1, u64> use_register_b; | ||||
|         BitField<30, 1, u64> is_byte_chunk_b; | ||||
|         BitField<28, 2, VmadType> type_b; | ||||
|         BitField<28, 2, VideoType> type_b; | ||||
|         BitField<28, 2, u64> byte_height_b; | ||||
|     } video; | ||||
|  | ||||
|     union { | ||||
|         BitField<51, 2, VmadShr> shr; | ||||
|         BitField<55, 1, u64> saturate; // Saturates the result (a * b + c) | ||||
|         BitField<47, 1, u64> cc; | ||||
| @@ -1238,6 +1248,7 @@ public: | ||||
|         OUT_R, // Emit vertex/primitive | ||||
|         ISBERD, | ||||
|         VMAD, | ||||
|         VSETP, | ||||
|         FFMA_IMM, // Fused Multiply and Add | ||||
|         FFMA_CR, | ||||
|         FFMA_RC, | ||||
| @@ -1495,6 +1506,7 @@ private: | ||||
|             INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), | ||||
|             INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"), | ||||
|             INST("01011111--------", Id::VMAD, Type::Trivial, "VMAD"), | ||||
|             INST("0101000011110---", Id::VSETP, Type::Trivial, "VSETP"), | ||||
|             INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"), | ||||
|             INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"), | ||||
|             INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"), | ||||
|   | ||||
| @@ -1312,6 +1312,63 @@ private: | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     /// Unpacks a video instruction operand (e.g. VMAD). | ||||
|     std::string GetVideoOperand(const std::string& op, bool is_chunk, bool is_signed, | ||||
|                                 Tegra::Shader::VideoType type, u64 byte_height) { | ||||
|         const std::string value = [&]() { | ||||
|             if (!is_chunk) { | ||||
|                 const auto offset = static_cast<u32>(byte_height * 8); | ||||
|                 return "((" + op + " >> " + std::to_string(offset) + ") & 0xff)"; | ||||
|             } | ||||
|             const std::string zero = "0"; | ||||
|  | ||||
|             switch (type) { | ||||
|             case Tegra::Shader::VideoType::Size16_Low: | ||||
|                 return '(' + op + " & 0xffff)"; | ||||
|             case Tegra::Shader::VideoType::Size16_High: | ||||
|                 return '(' + op + " >> 16)"; | ||||
|             case Tegra::Shader::VideoType::Size32: | ||||
|                 // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when | ||||
|                 // this type is used (1 * 1 + 0 == 0x5b800000). Until a better | ||||
|                 // explanation is found: assert. | ||||
|                 UNIMPLEMENTED(); | ||||
|                 return zero; | ||||
|             case Tegra::Shader::VideoType::Invalid: | ||||
|                 UNREACHABLE_MSG("Invalid instruction encoding"); | ||||
|                 return zero; | ||||
|             default: | ||||
|                 UNREACHABLE(); | ||||
|                 return zero; | ||||
|             } | ||||
|         }(); | ||||
|  | ||||
|         if (is_signed) { | ||||
|             return "int(" + value + ')'; | ||||
|         } | ||||
|         return value; | ||||
|     }; | ||||
|  | ||||
|     /// Gets the A operand for a video instruction. | ||||
|     std::string GetVideoOperandA(Instruction instr) { | ||||
|         return GetVideoOperand(regs.GetRegisterAsInteger(instr.gpr8, 0, false), | ||||
|                                instr.video.is_byte_chunk_a != 0, instr.video.signed_a, | ||||
|                                instr.video.type_a, instr.video.byte_height_a); | ||||
|     } | ||||
|  | ||||
|     /// Gets the B operand for a video instruction. | ||||
|     std::string GetVideoOperandB(Instruction instr) { | ||||
|         if (instr.video.use_register_b) { | ||||
|             return GetVideoOperand(regs.GetRegisterAsInteger(instr.gpr20, 0, false), | ||||
|                                    instr.video.is_byte_chunk_b != 0, instr.video.signed_b, | ||||
|                                    instr.video.type_b, instr.video.byte_height_b); | ||||
|         } else { | ||||
|             return '(' + | ||||
|                    std::to_string(instr.video.signed_b ? static_cast<s16>(instr.alu.GetImm20_16()) | ||||
|                                                        : instr.alu.GetImm20_16()) + | ||||
|                    ')'; | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Compiles a single instruction from Tegra to GLSL. | ||||
|      * @param offset the offset of the Tegra shader instruction. | ||||
| @@ -3321,87 +3378,51 @@ private: | ||||
|                 break; | ||||
|             } | ||||
|             case OpCode::Id::VMAD: { | ||||
|                 const bool signed_a = instr.vmad.signed_a == 1; | ||||
|                 const bool signed_b = instr.vmad.signed_b == 1; | ||||
|                 const bool result_signed = signed_a || signed_b; | ||||
|                 boost::optional<std::string> forced_result; | ||||
|  | ||||
|                 auto Unpack = [&](const std::string& op, bool is_chunk, bool is_signed, | ||||
|                                   Tegra::Shader::VmadType type, u64 byte_height) { | ||||
|                     const std::string value = [&]() { | ||||
|                         if (!is_chunk) { | ||||
|                             const auto shift = static_cast<u32>(byte_height * 8); | ||||
|                             return "((" + op + " >> " + std::to_string(shift) + ") & 0xff)"; | ||||
|                         } | ||||
|                         const std::string zero = "0"; | ||||
|  | ||||
|                         switch (type) { | ||||
|                         case Tegra::Shader::VmadType::Size16_Low: | ||||
|                             return '(' + op + " & 0xffff)"; | ||||
|                         case Tegra::Shader::VmadType::Size16_High: | ||||
|                             return '(' + op + " >> 16)"; | ||||
|                         case Tegra::Shader::VmadType::Size32: | ||||
|                             // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when | ||||
|                             // this type is used (1 * 1 + 0 == 0x5b800000). Until a better | ||||
|                             // explanation is found: assert. | ||||
|                             UNREACHABLE_MSG("Unimplemented"); | ||||
|                             return zero; | ||||
|                         case Tegra::Shader::VmadType::Invalid: | ||||
|                             // Note(Rodrigo): This flag is invalid according to nvdisasm. From my | ||||
|                             // testing (even though it's invalid) this makes the whole instruction | ||||
|                             // assign zero to target register. | ||||
|                             forced_result = boost::make_optional(zero); | ||||
|                             return zero; | ||||
|                         default: | ||||
|                             UNREACHABLE(); | ||||
|                             return zero; | ||||
|                         } | ||||
|                     }(); | ||||
|  | ||||
|                     if (is_signed) { | ||||
|                         return "int(" + value + ')'; | ||||
|                     } | ||||
|                     return value; | ||||
|                 }; | ||||
|  | ||||
|                 const std::string op_a = Unpack(regs.GetRegisterAsInteger(instr.gpr8, 0, false), | ||||
|                                                 instr.vmad.is_byte_chunk_a != 0, signed_a, | ||||
|                                                 instr.vmad.type_a, instr.vmad.byte_height_a); | ||||
|  | ||||
|                 std::string op_b; | ||||
|                 if (instr.vmad.use_register_b) { | ||||
|                     op_b = Unpack(regs.GetRegisterAsInteger(instr.gpr20, 0, false), | ||||
|                                   instr.vmad.is_byte_chunk_b != 0, signed_b, instr.vmad.type_b, | ||||
|                                   instr.vmad.byte_height_b); | ||||
|                 } else { | ||||
|                     op_b = '(' + | ||||
|                            std::to_string(signed_b ? static_cast<s16>(instr.alu.GetImm20_16()) | ||||
|                                                    : instr.alu.GetImm20_16()) + | ||||
|                            ')'; | ||||
|                 } | ||||
|  | ||||
|                 const bool result_signed = instr.video.signed_a == 1 || instr.video.signed_b == 1; | ||||
|                 const std::string op_a = GetVideoOperandA(instr); | ||||
|                 const std::string op_b = GetVideoOperandB(instr); | ||||
|                 const std::string op_c = regs.GetRegisterAsInteger(instr.gpr39, 0, result_signed); | ||||
|  | ||||
|                 std::string result; | ||||
|                 if (forced_result) { | ||||
|                     result = *forced_result; | ||||
|                 } else { | ||||
|                     result = '(' + op_a + " * " + op_b + " + " + op_c + ')'; | ||||
|                 std::string result = '(' + op_a + " * " + op_b + " + " + op_c + ')'; | ||||
|  | ||||
|                     switch (instr.vmad.shr) { | ||||
|                     case Tegra::Shader::VmadShr::Shr7: | ||||
|                         result = '(' + result + " >> 7)"; | ||||
|                         break; | ||||
|                     case Tegra::Shader::VmadShr::Shr15: | ||||
|                         result = '(' + result + " >> 15)"; | ||||
|                         break; | ||||
|                     } | ||||
|                 switch (instr.vmad.shr) { | ||||
|                 case Tegra::Shader::VmadShr::Shr7: | ||||
|                     result = '(' + result + " >> 7)"; | ||||
|                     break; | ||||
|                 case Tegra::Shader::VmadShr::Shr15: | ||||
|                     result = '(' + result + " >> 15)"; | ||||
|                     break; | ||||
|                 } | ||||
|  | ||||
|                 regs.SetRegisterToInteger(instr.gpr0, result_signed, 1, result, 1, 1, | ||||
|                                           instr.vmad.saturate == 1, 0, Register::Size::Word, | ||||
|                                           instr.vmad.cc); | ||||
|                 break; | ||||
|             } | ||||
|             case OpCode::Id::VSETP: { | ||||
|                 const std::string op_a = GetVideoOperandA(instr); | ||||
|                 const std::string op_b = GetVideoOperandB(instr); | ||||
|  | ||||
|                 // We can't use the constant predicate as destination. | ||||
|                 ASSERT(instr.vsetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); | ||||
|  | ||||
|                 const std::string second_pred = GetPredicateCondition(instr.vsetp.pred39, false); | ||||
|  | ||||
|                 const std::string combiner = GetPredicateCombiner(instr.vsetp.op); | ||||
|  | ||||
|                 const std::string predicate = GetPredicateComparison(instr.vsetp.cond, op_a, op_b); | ||||
|                 // Set the primary predicate to the result of Predicate OP SecondPredicate | ||||
|                 SetPredicate(instr.vsetp.pred3, | ||||
|                              '(' + predicate + ") " + combiner + " (" + second_pred + ')'); | ||||
|  | ||||
|                 if (instr.vsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||||
|                     // Set the secondary predicate to the result of !Predicate OP SecondPredicate, | ||||
|                     // if enabled | ||||
|                     SetPredicate(instr.vsetp.pred0, | ||||
|                                  "!(" + predicate + ") " + combiner + " (" + second_pred + ')'); | ||||
|                 } | ||||
|                 break; | ||||
|             } | ||||
|             default: { | ||||
|                 LOG_CRITICAL(HW_GPU, "Unhandled instruction: {}", opcode->GetName()); | ||||
|                 UNREACHABLE(); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 bunnei
					bunnei