1
0
mirror of https://git.suyu.dev/suyu/suyu synced 2025-01-17 13:20:15 -06:00

shader: Implement LOP and LOP3

This commit is contained in:
ameerj 2021-03-03 00:41:05 -05:00
parent 382cba94ed
commit 980cafdc27
8 changed files with 227 additions and 31 deletions

View File

@ -86,6 +86,8 @@ add_library(shader_recompiler STATIC
frontend/maxwell/translate/impl/integer_short_multiply_add.cpp
frontend/maxwell/translate/impl/load_store_attribute.cpp
frontend/maxwell/translate/impl/load_store_memory.cpp
frontend/maxwell/translate/impl/logic_operation.cpp
frontend/maxwell/translate/impl/logic_operation_three_input.cpp
frontend/maxwell/translate/impl/move_predicate_to_register.cpp
frontend/maxwell/translate/impl/move_register.cpp
frontend/maxwell/translate/impl/move_special_register.cpp

View File

@ -178,8 +178,8 @@ INST(LOP_reg, "LOP (reg)", "0101 1100 0100 0---")
INST(LOP_cbuf, "LOP (cbuf)", "0100 1100 0100 0---")
INST(LOP_imm, "LOP (imm)", "0011 100- 0100 0---")
INST(LOP3_reg, "LOP3 (reg)", "0101 1011 1110 0---")
INST(LOP3_cbuf, "LOP3 (cbuf)", "0011 11-- ---- ----")
INST(LOP3_imm, "LOP3 (imm)", "0000 001- ---- ----")
INST(LOP3_cbuf, "LOP3 (cbuf)", "0000 001- ---- ----")
INST(LOP3_imm, "LOP3 (imm)", "0011 11-- ---- ----")
INST(LOP32I, "LOP32I", "0000 01-- ---- ----")
INST(MEMBAR, "MEMBAR", "1110 1111 1001 1---")
INST(MOV_reg, "MOV (reg)", "0101 1100 1001 1---")

View File

@ -5,9 +5,8 @@
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
namespace Shader::Maxwell {
[[nodiscard]] IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1,
const IR::U32& operand_2, CompareOp compare_op,
bool is_signed) {
IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2,
CompareOp compare_op, bool is_signed) {
switch (compare_op) {
case CompareOp::False:
return ir.Imm1(false);
@ -30,8 +29,8 @@ namespace Shader::Maxwell {
}
}
[[nodiscard]] IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1,
const IR::U1& predicate_2, BooleanOp bop) {
IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1, const IR::U1& predicate_2,
BooleanOp bop) {
switch (bop) {
case BooleanOp::AND:
return ir.LogicalAnd(predicate_1, predicate_2);
@ -43,4 +42,20 @@ namespace Shader::Maxwell {
throw NotImplementedException("Invalid bop {}", bop);
}
}
IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op) {
switch (op) {
case PredicateOp::False:
return ir.Imm1(false);
case PredicateOp::True:
return ir.Imm1(true);
case PredicateOp::Zero:
return ir.IEqual(result, ir.Imm32(0));
case PredicateOp::NonZero:
return ir.INotEqual(result, ir.Imm32(0));
default:
throw NotImplementedException("Invalid Predicate operation {}", op);
}
}
} // namespace Shader::Maxwell

View File

@ -13,4 +13,6 @@ namespace Shader::Maxwell {
[[nodiscard]] IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1,
const IR::U1& predicate_2, BooleanOp bop);
[[nodiscard]] IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op);
} // namespace Shader::Maxwell

View File

@ -28,6 +28,13 @@ enum class BooleanOp : u64 {
XOR,
};
enum class PredicateOp : u64 {
False,
True,
Zero,
NonZero,
};
class TranslatorVisitor {
public:
explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_}, ir(block) {}

View File

@ -0,0 +1,77 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class LogicalOp : u64 {
AND,
OR,
XOR,
PASS_B,
};
[[nodiscard]] IR::U32 LogicalOperation(IR::IREmitter& ir, const IR::U32& operand_1,
const IR::U32& operand_2, LogicalOp op) {
switch (op) {
case LogicalOp::AND:
return ir.BitwiseAnd(operand_1, operand_2);
case LogicalOp::OR:
return ir.BitwiseOr(operand_1, operand_2);
case LogicalOp::XOR:
return ir.BitwiseXor(operand_1, operand_2);
case LogicalOp::PASS_B:
return operand_2;
default:
throw NotImplementedException("Invalid Logical operation {}", op);
}
}
void LOP(TranslatorVisitor& v, u64 insn, IR::U32 op_b) {
union {
u64 insn;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_reg;
BitField<39, 1, u64> neg_a;
BitField<40, 1, u64> neg_b;
BitField<41, 2, LogicalOp> bit_op;
BitField<43, 1, u64> x;
BitField<44, 2, PredicateOp> pred_op;
BitField<48, 3, IR::Pred> pred;
} const lop{insn};
if (lop.x != 0) {
throw NotImplementedException("LOP X");
}
IR::U32 op_a{v.X(lop.src_reg)};
if (lop.neg_a != 0) {
op_a = v.ir.BitwiseNot(op_a);
}
if (lop.neg_b != 0) {
op_b = v.ir.BitwiseNot(op_b);
}
const IR::U32 result{LogicalOperation(v.ir, op_a, op_b, lop.bit_op)};
const IR::U1 pred_result{PredicateOperation(v.ir, result, lop.pred_op)};
v.X(lop.dest_reg, result);
v.ir.SetPred(lop.pred, pred_result);
}
} // Anonymous namespace
void TranslatorVisitor::LOP_reg(u64 insn) {
LOP(*this, insn, GetReg20(insn));
}
void TranslatorVisitor::LOP_cbuf(u64 insn) {
LOP(*this, insn, GetCbuf(insn));
}
void TranslatorVisitor::LOP_imm(u64 insn) {
LOP(*this, insn, GetImm20(insn));
}
} // namespace Shader::Maxwell

View File

@ -0,0 +1,117 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
// https://forums.developer.nvidia.com/t/reverse-lut-for-lop3-lut/110651
// Emulate GPU's LOP3.LUT (three-input logic op with 8-bit truth table)
IR::U32 ApplyLUT(IR::IREmitter& ir, const IR::U32& a, const IR::U32& b, const IR::U32& c,
u64 ttbl) {
IR::U32 r{ir.Imm32(0)};
const IR::U32 not_a{ir.BitwiseNot(a)};
const IR::U32 not_b{ir.BitwiseNot(b)};
const IR::U32 not_c{ir.BitwiseNot(c)};
if (ttbl & 0x01) {
// r |= ~a & ~b & ~c;
const auto lhs{ir.BitwiseAnd(not_a, not_b)};
const auto rhs{ir.BitwiseAnd(lhs, not_c)};
r = ir.BitwiseOr(r, rhs);
}
if (ttbl & 0x02) {
// r |= ~a & ~b & c;
const auto lhs{ir.BitwiseAnd(not_a, not_b)};
const auto rhs{ir.BitwiseAnd(lhs, c)};
r = ir.BitwiseOr(r, rhs);
}
if (ttbl & 0x04) {
// r |= ~a & b & ~c;
const auto lhs{ir.BitwiseAnd(not_a, b)};
const auto rhs{ir.BitwiseAnd(lhs, not_c)};
r = ir.BitwiseOr(r, rhs);
}
if (ttbl & 0x08) {
// r |= ~a & b & c;
const auto lhs{ir.BitwiseAnd(not_a, b)};
const auto rhs{ir.BitwiseAnd(lhs, c)};
r = ir.BitwiseOr(r, rhs);
}
if (ttbl & 0x10) {
// r |= a & ~b & ~c;
const auto lhs{ir.BitwiseAnd(a, not_b)};
const auto rhs{ir.BitwiseAnd(lhs, not_c)};
r = ir.BitwiseOr(r, rhs);
}
if (ttbl & 0x20) {
// r |= a & ~b & c;
const auto lhs{ir.BitwiseAnd(a, not_b)};
const auto rhs{ir.BitwiseAnd(lhs, c)};
r = ir.BitwiseOr(r, rhs);
}
if (ttbl & 0x40) {
// r |= a & b & ~c;
const auto lhs{ir.BitwiseAnd(a, b)};
const auto rhs{ir.BitwiseAnd(lhs, not_c)};
r = ir.BitwiseOr(r, rhs);
}
if (ttbl & 0x80) {
// r |= a & b & c;
const auto lhs{ir.BitwiseAnd(a, b)};
const auto rhs{ir.BitwiseAnd(lhs, c)};
r = ir.BitwiseOr(r, rhs);
}
return r;
}
IR::U32 LOP3(TranslatorVisitor& v, u64 insn, const IR::U32& op_b, const IR::U32& op_c, u64 lut) {
union {
u64 insn;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_reg;
} const lop3{insn};
const IR::U32 op_a{v.X(lop3.src_reg)};
const IR::U32 result{ApplyLUT(v.ir, op_a, op_b, op_c, lut)};
v.X(lop3.dest_reg, result);
return result;
}
u64 GetLut48(u64 insn) {
union {
u64 raw;
BitField<48, 8, u64> lut;
} const lut{insn};
return lut.lut;
}
} // Anonymous namespace
void TranslatorVisitor::LOP3_reg(u64 insn) {
union {
u64 insn;
BitField<28, 8, u64> lut;
BitField<38, 1, u64> x;
BitField<36, 2, PredicateOp> pred_op;
BitField<48, 3, IR::Pred> pred;
} const lop3{insn};
if (lop3.x != 0) {
throw NotImplementedException("LOP3 X");
}
const IR::U32 result{LOP3(*this, insn, GetReg20(insn), GetReg39(insn), lop3.lut)};
const IR::U1 pred_result{PredicateOperation(ir, result, lop3.pred_op)};
ir.SetPred(lop3.pred, pred_result);
}
void TranslatorVisitor::LOP3_cbuf(u64 insn) {
LOP3(*this, insn, GetCbuf(insn), GetReg39(insn), GetLut48(insn));
}
void TranslatorVisitor::LOP3_imm(u64 insn) {
LOP3(*this, insn, GetImm20(insn), GetReg39(insn), GetLut48(insn));
}
} // namespace Shader::Maxwell

View File

@ -493,30 +493,6 @@ void TranslatorVisitor::LONGJMP(u64) {
ThrowNotImplemented(Opcode::LONGJMP);
}
void TranslatorVisitor::LOP_reg(u64) {
ThrowNotImplemented(Opcode::LOP_reg);
}
void TranslatorVisitor::LOP_cbuf(u64) {
ThrowNotImplemented(Opcode::LOP_cbuf);
}
void TranslatorVisitor::LOP_imm(u64) {
ThrowNotImplemented(Opcode::LOP_imm);
}
void TranslatorVisitor::LOP3_reg(u64) {
ThrowNotImplemented(Opcode::LOP3_reg);
}
void TranslatorVisitor::LOP3_cbuf(u64) {
ThrowNotImplemented(Opcode::LOP3_cbuf);
}
void TranslatorVisitor::LOP3_imm(u64) {
ThrowNotImplemented(Opcode::LOP3_imm);
}
void TranslatorVisitor::LOP32I(u64) {
ThrowNotImplemented(Opcode::LOP32I);
}