Corrections Half Float operations on const buffers and implement saturation.
This commit is contained in:
		 Fernando Sahmkow
					Fernando Sahmkow
				
			
				
					committed by
					
						 FernandoS27
						FernandoS27
					
				
			
			
				
	
			
			
			 FernandoS27
						FernandoS27
					
				
			
						parent
						
							650d9b1044
						
					
				
				
					commit
					623b2e4b8f
				
			| @@ -9,6 +9,7 @@ | |||||||
|  |  | ||||||
| namespace VideoCommon::Shader { | namespace VideoCommon::Shader { | ||||||
|  |  | ||||||
|  | using Tegra::Shader::HalfType; | ||||||
| using Tegra::Shader::Instruction; | using Tegra::Shader::Instruction; | ||||||
| using Tegra::Shader::OpCode; | using Tegra::Shader::OpCode; | ||||||
|  |  | ||||||
| @@ -22,7 +23,6 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) { | |||||||
|             LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); |             LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|     UNIMPLEMENTED_IF_MSG(instr.alu_half.saturate != 0, "Half float saturation not implemented"); |  | ||||||
|  |  | ||||||
|     const bool negate_a = |     const bool negate_a = | ||||||
|         opcode->get().GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0; |         opcode->get().GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0; | ||||||
| @@ -32,35 +32,37 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) { | |||||||
|     Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a); |     Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a); | ||||||
|     op_a = GetOperandAbsNegHalf(op_a, instr.alu_half.abs_a, negate_a); |     op_a = GetOperandAbsNegHalf(op_a, instr.alu_half.abs_a, negate_a); | ||||||
|  |  | ||||||
|     Node op_b = [&]() { |     auto [type_b, op_b] = [&]() -> std::tuple<HalfType, Node> { | ||||||
|         switch (opcode->get().GetId()) { |         switch (opcode->get().GetId()) { | ||||||
|         case OpCode::Id::HADD2_C: |         case OpCode::Id::HADD2_C: | ||||||
|         case OpCode::Id::HMUL2_C: |         case OpCode::Id::HMUL2_C: | ||||||
|             return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); |             return {HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; | ||||||
|         case OpCode::Id::HADD2_R: |         case OpCode::Id::HADD2_R: | ||||||
|         case OpCode::Id::HMUL2_R: |         case OpCode::Id::HMUL2_R: | ||||||
|             return GetRegister(instr.gpr20); |             return {instr.alu_half.type_b, GetRegister(instr.gpr20)}; | ||||||
|         default: |         default: | ||||||
|             UNREACHABLE(); |             UNREACHABLE(); | ||||||
|             return Immediate(0); |             return {HalfType::F32, Immediate(0)}; | ||||||
|         } |         } | ||||||
|     }(); |     }(); | ||||||
|     op_b = UnpackHalfFloat(op_b, instr.alu_half.type_b); |     op_b = UnpackHalfFloat(op_b, type_b); | ||||||
|     op_b = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b); |     // redeclaration to avoid a bug in clang with reusing local bindings in lambdas | ||||||
|  |     Node op_b_alt = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b); | ||||||
|  |  | ||||||
|     Node value = [&]() { |     Node value = [&]() { | ||||||
|         switch (opcode->get().GetId()) { |         switch (opcode->get().GetId()) { | ||||||
|         case OpCode::Id::HADD2_C: |         case OpCode::Id::HADD2_C: | ||||||
|         case OpCode::Id::HADD2_R: |         case OpCode::Id::HADD2_R: | ||||||
|             return Operation(OperationCode::HAdd, PRECISE, op_a, op_b); |             return Operation(OperationCode::HAdd, PRECISE, op_a, op_b_alt); | ||||||
|         case OpCode::Id::HMUL2_C: |         case OpCode::Id::HMUL2_C: | ||||||
|         case OpCode::Id::HMUL2_R: |         case OpCode::Id::HMUL2_R: | ||||||
|             return Operation(OperationCode::HMul, PRECISE, op_a, op_b); |             return Operation(OperationCode::HMul, PRECISE, op_a, op_b_alt); | ||||||
|         default: |         default: | ||||||
|             UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName()); |             UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName()); | ||||||
|             return Immediate(0); |             return Immediate(0); | ||||||
|         } |         } | ||||||
|     }(); |     }(); | ||||||
|  |     value = GetSaturatedHalfFloat(value, instr.alu_half.saturate); | ||||||
|     value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half.merge); |     value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half.merge); | ||||||
|  |  | ||||||
|     SetRegister(bb, instr.gpr0, value); |     SetRegister(bb, instr.gpr0, value); | ||||||
|   | |||||||
| @@ -34,15 +34,14 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) { | |||||||
|         case OpCode::Id::HFMA2_CR: |         case OpCode::Id::HFMA2_CR: | ||||||
|             neg_b = instr.hfma2.negate_b; |             neg_b = instr.hfma2.negate_b; | ||||||
|             neg_c = instr.hfma2.negate_c; |             neg_c = instr.hfma2.negate_c; | ||||||
|             return {instr.hfma2.saturate, instr.hfma2.type_b, |             return {instr.hfma2.saturate, HalfType::F32, | ||||||
|                     GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), |                     GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), | ||||||
|                     instr.hfma2.type_reg39, GetRegister(instr.gpr39)}; |                     instr.hfma2.type_reg39, GetRegister(instr.gpr39)}; | ||||||
|         case OpCode::Id::HFMA2_RC: |         case OpCode::Id::HFMA2_RC: | ||||||
|             neg_b = instr.hfma2.negate_b; |             neg_b = instr.hfma2.negate_b; | ||||||
|             neg_c = instr.hfma2.negate_c; |             neg_c = instr.hfma2.negate_c; | ||||||
|             return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39), |             return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39), | ||||||
|                     instr.hfma2.type_b, |                     HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; | ||||||
|                     GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; |  | ||||||
|         case OpCode::Id::HFMA2_RR: |         case OpCode::Id::HFMA2_RR: | ||||||
|             neg_b = instr.hfma2.rr.negate_b; |             neg_b = instr.hfma2.rr.negate_b; | ||||||
|             neg_c = instr.hfma2.rr.negate_c; |             neg_c = instr.hfma2.rr.negate_c; | ||||||
| @@ -56,13 +55,13 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) { | |||||||
|             return {false, identity, Immediate(0), identity, Immediate(0)}; |             return {false, identity, Immediate(0), identity, Immediate(0)}; | ||||||
|         } |         } | ||||||
|     }(); |     }(); | ||||||
|     UNIMPLEMENTED_IF_MSG(saturate, "HFMA2 saturation is not implemented"); |  | ||||||
|  |  | ||||||
|     const Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hfma2.type_a); |     const Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hfma2.type_a); | ||||||
|     op_b = GetOperandAbsNegHalf(UnpackHalfFloat(op_b, type_b), false, neg_b); |     op_b = GetOperandAbsNegHalf(UnpackHalfFloat(op_b, type_b), false, neg_b); | ||||||
|     op_c = GetOperandAbsNegHalf(UnpackHalfFloat(op_c, type_c), false, neg_c); |     op_c = GetOperandAbsNegHalf(UnpackHalfFloat(op_c, type_c), false, neg_c); | ||||||
|  |  | ||||||
|     Node value = Operation(OperationCode::HFma, PRECISE, op_a, op_b, op_c); |     Node value = Operation(OperationCode::HFma, PRECISE, op_a, op_b, op_c); | ||||||
|  |     value = GetSaturatedHalfFloat(value, saturate); | ||||||
|     value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge); |     value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge); | ||||||
|  |  | ||||||
|     SetRegister(bb, instr.gpr0, value); |     SetRegister(bb, instr.gpr0, value); | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user