1
0
mirror of https://git.suyu.dev/suyu/suyu synced 2025-01-17 05:10:13 -06:00

shader: Fix floating point comparison for FP16

This commit is contained in:
FernandoS27 2021-03-21 04:33:19 +01:00 committed by ameerj
parent e10d9c1b8e
commit 27fb97377e
5 changed files with 54 additions and 30 deletions

View File

@ -72,7 +72,7 @@ bool IsCompareOpOrdered(FPCompareOp op) {
} }
} }
IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F32& operand_1, const IR::F32& operand_2, IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1, const IR::F16F32F64& operand_2,
FPCompareOp compare_op, IR::FpControl control) { FPCompareOp compare_op, IR::FpControl control) {
const bool ordered{IsCompareOpOrdered(compare_op)}; const bool ordered{IsCompareOpOrdered(compare_op)};
switch (compare_op) { switch (compare_op) {

View File

@ -18,7 +18,7 @@ namespace Shader::Maxwell {
[[nodiscard]] bool IsCompareOpOrdered(FPCompareOp op); [[nodiscard]] bool IsCompareOpOrdered(FPCompareOp op);
[[nodiscard]] IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F32& operand_1, [[nodiscard]] IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1,
const IR::F32& operand_2, FPCompareOp compare_op, const IR::F16F32F64& operand_2, FPCompareOp compare_op,
IR::FpControl control = {}); IR::FpControl control = {});
} // namespace Shader::Maxwell } // namespace Shader::Maxwell

View File

@ -22,8 +22,8 @@ void HSET2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool bf, bool f
auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hset2.src_a_reg), hset2.swizzle_a)}; auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hset2.src_a_reg), hset2.swizzle_a)};
auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
// TODO: Implement FP16 FloatingPointCompare
//if (lhs_a.Type() != lhs_b.Type()) { if (lhs_a.Type() != lhs_b.Type()) {
if (lhs_a.Type() == IR::Type::F16) { if (lhs_a.Type() == IR::Type::F16) {
lhs_a = v.ir.FPConvert(32, lhs_a); lhs_a = v.ir.FPConvert(32, lhs_a);
rhs_a = v.ir.FPConvert(32, rhs_a); rhs_a = v.ir.FPConvert(32, rhs_a);
@ -32,7 +32,7 @@ void HSET2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool bf, bool f
lhs_b = v.ir.FPConvert(32, lhs_b); lhs_b = v.ir.FPConvert(32, lhs_b);
rhs_b = v.ir.FPConvert(32, rhs_b); rhs_b = v.ir.FPConvert(32, rhs_b);
} }
//} }
lhs_a = v.ir.FPAbsNeg(lhs_a, hset2.abs_a != 0, hset2.neg_a != 0); lhs_a = v.ir.FPAbsNeg(lhs_a, hset2.abs_a != 0, hset2.neg_a != 0);
rhs_a = v.ir.FPAbsNeg(rhs_a, hset2.abs_a != 0, hset2.neg_a != 0); rhs_a = v.ir.FPAbsNeg(rhs_a, hset2.abs_a != 0, hset2.neg_a != 0);
@ -108,8 +108,8 @@ void TranslatorVisitor::HSET2_imm(u64 insn) {
const u32 imm{static_cast<u32>(hset2.low << 6) | ((hset2.neg_low != 0 ? 1 : 0) << 15) | const u32 imm{static_cast<u32>(hset2.low << 6) | ((hset2.neg_low != 0 ? 1 : 0) << 15) |
static_cast<u32>(hset2.high << 22) | ((hset2.neg_high != 0 ? 1 : 0) << 31)}; static_cast<u32>(hset2.high << 22) | ((hset2.neg_high != 0 ? 1 : 0) << 31)};
HSET2(*this, insn, ir.Imm32(imm), hset2.bf != 0, hset2.ftz != 0, false, false, HSET2(*this, insn, ir.Imm32(imm), hset2.bf != 0, hset2.ftz != 0, false, false, hset2.compare_op,
hset2.compare_op, Swizzle::H1_H0); Swizzle::H1_H0);
} }
} // namespace Shader::Maxwell } // namespace Shader::Maxwell

View File

@ -24,8 +24,8 @@ void HSETP2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool neg_b, bo
auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hsetp2.src_a_reg), hsetp2.swizzle_a)}; auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hsetp2.src_a_reg), hsetp2.swizzle_a)};
auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
// TODO: Implement FP16 FloatingPointCompare
// if (lhs_a.Type() != lhs_b.Type()) { if (lhs_a.Type() != lhs_b.Type()) {
if (lhs_a.Type() == IR::Type::F16) { if (lhs_a.Type() == IR::Type::F16) {
lhs_a = v.ir.FPConvert(32, lhs_a); lhs_a = v.ir.FPConvert(32, lhs_a);
rhs_a = v.ir.FPConvert(32, rhs_a); rhs_a = v.ir.FPConvert(32, rhs_a);
@ -34,7 +34,7 @@ void HSETP2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool neg_b, bo
lhs_b = v.ir.FPConvert(32, lhs_b); lhs_b = v.ir.FPConvert(32, lhs_b);
rhs_b = v.ir.FPConvert(32, rhs_b); rhs_b = v.ir.FPConvert(32, rhs_b);
} }
//} }
lhs_a = v.ir.FPAbsNeg(lhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0); lhs_a = v.ir.FPAbsNeg(lhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0);
rhs_a = v.ir.FPAbsNeg(rhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0); rhs_a = v.ir.FPAbsNeg(rhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0);

View File

@ -50,6 +50,30 @@ IR::Opcode Replace(IR::Opcode op) {
return IR::Opcode::CompositeInsertF32x3; return IR::Opcode::CompositeInsertF32x3;
case IR::Opcode::CompositeInsertF16x4: case IR::Opcode::CompositeInsertF16x4:
return IR::Opcode::CompositeInsertF32x4; return IR::Opcode::CompositeInsertF32x4;
case IR::Opcode::FPOrdEqual16:
return IR::Opcode::FPOrdEqual32;
case IR::Opcode::FPUnordEqual16:
return IR::Opcode::FPUnordEqual32;
case IR::Opcode::FPOrdNotEqual16:
return IR::Opcode::FPOrdNotEqual32;
case IR::Opcode::FPUnordNotEqual16:
return IR::Opcode::FPUnordNotEqual32;
case IR::Opcode::FPOrdLessThan16:
return IR::Opcode::FPOrdLessThan32;
case IR::Opcode::FPUnordLessThan16:
return IR::Opcode::FPUnordLessThan32;
case IR::Opcode::FPOrdGreaterThan16:
return IR::Opcode::FPOrdGreaterThan32;
case IR::Opcode::FPUnordGreaterThan16:
return IR::Opcode::FPUnordGreaterThan32;
case IR::Opcode::FPOrdLessThanEqual16:
return IR::Opcode::FPOrdLessThanEqual32;
case IR::Opcode::FPUnordLessThanEqual16:
return IR::Opcode::FPUnordLessThanEqual32;
case IR::Opcode::FPOrdGreaterThanEqual16:
return IR::Opcode::FPOrdGreaterThanEqual32;
case IR::Opcode::FPUnordGreaterThanEqual16:
return IR::Opcode::FPUnordGreaterThanEqual32;
case IR::Opcode::ConvertS16F16: case IR::Opcode::ConvertS16F16:
return IR::Opcode::ConvertS16F32; return IR::Opcode::ConvertS16F32;
case IR::Opcode::ConvertS32F16: case IR::Opcode::ConvertS32F16: