From 2e0a9f66a0d41dd81bfaa7078aad9b53bedb547e Mon Sep 17 00:00:00 2001
From: Subv <subv2112@gmail.com>
Date: Fri, 20 Apr 2018 09:02:28 -0500
Subject: [PATCH 1/5] ShaderGen: Ignore the 'sched' instruction when generating
 shaders.

The 'sched' instruction has a very convoluted encoding, but fortunately it seems to only appear on a fixed interval (once every 4 instructions).
---
 .../renderer_opengl/gl_shader_decompiler.cpp     | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index de137558d..c23f590cd 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -276,6 +276,18 @@ private:
         shader.AddLine(dest + " = " + src + ";");
     }
 
+    /*
+     * Returns whether the instruction at the specified offset is a 'sched' instruction.
+     * Sched instructions always appear before a sequence of 3 instructions.
+     */
+    bool IsSchedInstruction(u32 offset) const {
+        // sched instructions appear once every 4 instructions.
+        static constexpr size_t SchedPeriod = 4;
+        u32 absolute_offset = offset - main_offset;
+
+        return (absolute_offset % SchedPeriod) == 0;
+    }
+
     /**
      * Compiles a single instruction from Tegra to GLSL.
      * @param offset the offset of the Tegra shader instruction.
@@ -283,6 +295,10 @@ private:
      * + 1. If the current instruction always terminates the program, returns PROGRAM_END.
      */
     u32 CompileInstr(u32 offset) {
+        // Ignore sched instructions when generating code.
+        if (IsSchedInstruction(offset))
+            return offset + 1;
+
         const Instruction instr = {program_code[offset]};
 
         shader.AddLine("// " + std::to_string(offset) + ": " + OpCode::GetInfo(instr.opcode).name);

From d03fc774756306aa8fd89abd5522c928b46336c7 Mon Sep 17 00:00:00 2001
From: Subv <subv2112@gmail.com>
Date: Fri, 20 Apr 2018 09:04:54 -0500
Subject: [PATCH 2/5] ShaderGen: Register id 255 is special and is hardcoded to
 return 0 (SR_ZERO).

---
 src/video_core/engines/shader_bytecode.h                | 3 +++
 src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 2 ++
 2 files changed, 5 insertions(+)

diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 7cd125f05..b0da805db 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -13,6 +13,9 @@ namespace Tegra {
 namespace Shader {
 
 struct Register {
+    // Register 255 is special cased to always be 0
+    static constexpr size_t ZeroIndex = 255;
+
     constexpr Register() = default;
 
     constexpr Register(u64 value) : value(value) {}
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index c23f590cd..6db0b7d39 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -220,6 +220,8 @@ private:
 
     /// Generates code representing a temporary (GPR) register.
     std::string GetRegister(const Register& reg, unsigned elem = 0) {
+        if (reg == Register::ZeroIndex)
+            return "0";
         if (stage == Maxwell3D::Regs::ShaderStage::Fragment && reg < 4) {
             // GPRs 0-3 are output color for the fragment shader
             return std::string{"color."} + "rgba"[(reg + elem) & 3];

From 0a5e01b710b66b9264ceb469903e8b8f16faf516 Mon Sep 17 00:00:00 2001
From: Subv <subv2112@gmail.com>
Date: Fri, 20 Apr 2018 09:09:50 -0500
Subject: [PATCH 3/5] ShaderGen: Implemented the fsetp instruction.

Predicate variables are now added to the generated shader code in the form of 'pX' where X is the predicate id.
These predicate variables are initialized to false on shader startup and are set via the fsetp instructions.

TODO:

* Not all the comparison types are implemented.
* Only the single-predicate version is implemented.
---
 src/video_core/engines/shader_bytecode.h      | 43 ++++++++++-
 .../renderer_opengl/gl_shader_decompiler.cpp  | 72 +++++++++++++++++++
 2 files changed, 112 insertions(+), 3 deletions(-)

diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index b0da805db..fb639a417 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -109,6 +109,8 @@ union OpCode {
 
         FSETP_R = 0x5BB,
         FSETP_C = 0x4BB,
+        FSETP_IMM = 0x36B,
+        FSETP_NEG_IMM = 0x37B,
         EXIT = 0xE30,
         KIL = 0xE33,
 
@@ -124,6 +126,7 @@ union OpCode {
         Ffma,
         Flow,
         Memory,
+        FloatPredicate,
         Unknown,
     };
 
@@ -164,6 +167,9 @@ union OpCode {
         case Id::FSETP_C:
         case Id::KIL:
             return op4;
+        case Id::FSETP_IMM:
+        case Id::FSETP_NEG_IMM:
+            return Id::FSETP_IMM;
         }
 
         switch (op5) {
@@ -241,8 +247,9 @@ union OpCode {
         info_table[Id::FMUL_C] = {Type::Arithmetic, "fmul_c"};
         info_table[Id::FMUL_IMM] = {Type::Arithmetic, "fmul_imm"};
         info_table[Id::FMUL32_IMM] = {Type::Arithmetic, "fmul32_imm"};
-        info_table[Id::FSETP_C] = {Type::Arithmetic, "fsetp_c"};
-        info_table[Id::FSETP_R] = {Type::Arithmetic, "fsetp_r"};
+        info_table[Id::FSETP_C] = {Type::FloatPredicate, "fsetp_c"};
+        info_table[Id::FSETP_R] = {Type::FloatPredicate, "fsetp_r"};
+        info_table[Id::FSETP_IMM] = {Type::FloatPredicate, "fsetp_imm"};
         info_table[Id::EXIT] = {Type::Trivial, "exit"};
         info_table[Id::IPA] = {Type::Trivial, "ipa"};
         info_table[Id::KIL] = {Type::Flow, "kil"};
@@ -286,7 +293,23 @@ namespace Shader {
 
 enum class Pred : u64 {
     UnusedIndex = 0x7,
-    NeverExecute = 0xf,
+    NeverExecute = 0xF,
+};
+
+enum class PredCondition : u64 {
+    LessThan = 1,
+    Equal = 2,
+    LessEqual = 3,
+    GreaterThan = 4,
+    NotEqual = 5,
+    GreaterEqual = 6,
+    // TODO(Subv): Other condition types
+};
+
+enum class PredOperation : u64 {
+    And = 0,
+    Or = 1,
+    Xor = 2,
 };
 
 enum class SubOp : u64 {
@@ -346,6 +369,20 @@ union Instruction {
         BitField<49, 1, u64> negate_c;
     } ffma;
 
+    union {
+        BitField<0, 3, u64> pred0;
+        BitField<3, 3, u64> pred3;
+        BitField<7, 1, u64> abs_a;
+        BitField<39, 3, u64> pred39;
+        BitField<42, 1, u64> neg_pred;
+        BitField<43, 1, u64> neg_a;
+        BitField<44, 1, u64> abs_b;
+        BitField<45, 2, PredOperation> op;
+        BitField<47, 1, u64> ftz;
+        BitField<48, 4, PredCondition> cond;
+        BitField<56, 1, u64> neg_b;
+    } fsetp;
+
     BitField<61, 1, u64> is_b_imm;
     BitField<60, 1, u64> is_b_gpr;
     BitField<59, 1, u64> is_c_gpr;
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 6db0b7d39..2e0203a68 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -278,6 +278,21 @@ private:
         shader.AddLine(dest + " = " + src + ";");
     }
 
+    /*
+     * Writes code that assigns a predicate boolean variable.
+     * @param pred The id of the predicate to write to.
+     * @param value The expression value to assign to the predicate.
+     */
+    void SetPredicate(u64 pred, const std::string& value) {
+        using Tegra::Shader::Pred;
+        // Can't assign to the constant predicate.
+        ASSERT(pred != static_cast<u64>(Pred::UnusedIndex));
+
+        std::string variable = 'p' + std::to_string(pred);
+        shader.AddLine(variable + " = " + value + ';');
+        declr_predicates.insert(std::move(variable));
+    }
+
     /*
      * Returns whether the instruction at the specified offset is a 'sched' instruction.
      * Sched instructions always appear before a sequence of 3 instructions.
@@ -468,7 +483,57 @@ private:
             }
             break;
         }
+        case OpCode::Type::FloatPredicate: {
+            std::string op_a = instr.fsetp.neg_a ? "-" : "";
+            op_a += GetRegister(instr.gpr8);
 
+            if (instr.fsetp.abs_a) {
+                op_a = "abs(" + op_a + ')';
+            }
+
+            std::string op_b{};
+
+            if (instr.is_b_imm) {
+                if (instr.fsetp.neg_b) {
+                    // Only the immediate version of fsetp has a neg_b bit.
+                    op_b += '-';
+                }
+                op_b += '(' + GetImmediate19(instr) + ')';
+            } else {
+                if (instr.is_b_gpr) {
+                    op_b += GetRegister(instr.gpr20);
+                } else {
+                    op_b += GetUniform(instr.uniform);
+                }
+            }
+
+            if (instr.fsetp.abs_b) {
+                op_b = "abs(" + op_b + ')';
+            }
+
+            using Tegra::Shader::Pred;
+            ASSERT_MSG(instr.fsetp.pred0 == static_cast<u64>(Pred::UnusedIndex) &&
+                           instr.fsetp.pred39 == static_cast<u64>(Pred::UnusedIndex),
+                       "Compound predicates are not implemented");
+
+            // We can't use the constant predicate as destination.
+            ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
+
+            using Tegra::Shader::PredCondition;
+            switch (instr.fsetp.cond) {
+            case PredCondition::LessThan:
+                SetPredicate(instr.fsetp.pred3, '(' + op_a + ") < (" + op_b + ')');
+                break;
+            case PredCondition::Equal:
+                SetPredicate(instr.fsetp.pred3, '(' + op_a + ") == (" + op_b + ')');
+                break;
+            default:
+                NGLOG_CRITICAL(HW_GPU, "Unhandled predicate condition: {} (a: {}, b: {})",
+                               static_cast<unsigned>(instr.fsetp.cond.Value()), op_a, op_b);
+                UNREACHABLE();
+            }
+            break;
+        }
         default: {
             switch (instr.opcode.EffectiveOpCode()) {
             case OpCode::Id::EXIT: {
@@ -623,6 +688,12 @@ private:
             declarations.AddNewLine();
             ++const_buffer_layout;
         }
+
+        declarations.AddNewLine();
+        for (const auto& pred : declr_predicates) {
+            declarations.AddLine("bool " + pred + " = false;");
+        }
+        declarations.AddNewLine();
     }
 
 private:
@@ -636,6 +707,7 @@ private:
 
     // Declarations
     std::set<std::string> declr_register;
+    std::set<std::string> declr_predicates;
     std::set<Attribute::Index> declr_input_attribute;
     std::set<Attribute::Index> declr_output_attribute;
     std::array<ConstBufferEntry, Maxwell3D::Regs::MaxConstBuffers> declr_const_buffers;

From c3a8ea76f180fbaf2d58d0454e7adc2bb1f30009 Mon Sep 17 00:00:00 2001
From: Subv <subv2112@gmail.com>
Date: Fri, 20 Apr 2018 09:16:55 -0500
Subject: [PATCH 4/5] ShaderGen: Implemented predicated instruction execution.

Each predicated instruction will be wrapped in an `if (predicate) { instruction_body; }` in the GLSL, where `predicate` is one of the predicate boolean variables previously set by fsetp.
---
 src/video_core/engines/shader_bytecode.h      |  6 +++-
 .../renderer_opengl/gl_shader_decompiler.cpp  | 35 +++++++++++++++++++
 2 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index fb639a417..e6c2fd367 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -331,7 +331,11 @@ union Instruction {
     OpCode opcode;
     BitField<0, 8, Register> gpr0;
     BitField<8, 8, Register> gpr8;
-    BitField<16, 4, Pred> pred;
+    union {
+        BitField<16, 4, Pred> full_pred;
+        BitField<16, 3, u64> pred_index;
+    } pred;
+    BitField<19, 1, u64> negate_pred;
     BitField<20, 8, Register> gpr20;
     BitField<20, 7, SubOp> sub_op;
     BitField<28, 8, Register> gpr28;
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 2e0203a68..7aaee9464 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -293,6 +293,25 @@ private:
         declr_predicates.insert(std::move(variable));
     }
 
+    /*
+     * Returns the condition to use in the 'if' for a predicated instruction.
+     * @param instr Instruction to generate the if condition for.
+     * @returns string containing the predicate condition.
+     */
+    std::string GetPredicateCondition(Instruction instr) const {
+        using Tegra::Shader::Pred;
+        ASSERT(instr.pred.pred_index != static_cast<u64>(Pred::UnusedIndex));
+
+        std::string variable =
+            'p' + std::to_string(static_cast<u64>(instr.pred.pred_index.Value()));
+
+        if (instr.negate_pred) {
+            return "!(" + variable + ')';
+        }
+
+        return variable;
+    }
+
     /*
      * Returns whether the instruction at the specified offset is a 'sched' instruction.
      * Sched instructions always appear before a sequence of 3 instructions.
@@ -320,6 +339,16 @@ private:
 
         shader.AddLine("// " + std::to_string(offset) + ": " + OpCode::GetInfo(instr.opcode).name);
 
+        using Tegra::Shader::Pred;
+        ASSERT_MSG(instr.pred.full_pred != Pred::NeverExecute,
+                   "NeverExecute predicate not implemented");
+
+        if (instr.pred.pred_index != static_cast<u64>(Pred::UnusedIndex)) {
+            shader.AddLine("if (" + GetPredicateCondition(instr) + ')');
+            shader.AddLine('{');
+            ++shader.scope;
+        }
+
         switch (OpCode::GetInfo(instr.opcode).type) {
         case OpCode::Type::Arithmetic: {
             std::string dest = GetRegister(instr.gpr0);
@@ -559,6 +588,12 @@ private:
         }
         }
 
+        // Close the predicate condition scope.
+        if (instr.pred != Pred::UnusedIndex) {
+            --shader.scope;
+            shader.AddLine('}');
+        }
+
         return offset + 1;
     }
 

From 17a0ef1e1eb65ceb41232e694f779e1645e2b2d7 Mon Sep 17 00:00:00 2001
From: Subv <subv2112@gmail.com>
Date: Fri, 20 Apr 2018 09:17:39 -0500
Subject: [PATCH 5/5] ShaderGen: Implemented the KIL instruction, which is
 equivalent to 'discard'.

---
 src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 7aaee9464..2395945c3 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -566,10 +566,16 @@ private:
         default: {
             switch (instr.opcode.EffectiveOpCode()) {
             case OpCode::Id::EXIT: {
+                ASSERT_MSG(instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex),
+                           "Predicated exits not implemented");
                 shader.AddLine("return true;");
                 offset = PROGRAM_END - 1;
                 break;
             }
+            case OpCode::Id::KIL: {
+                shader.AddLine("discard;");
+                break;
+            }
             case OpCode::Id::IPA: {
                 const auto& attribute = instr.attribute.fmt28;
                 std::string dest = GetRegister(instr.gpr0);
@@ -589,7 +595,7 @@ private:
         }
 
         // Close the predicate condition scope.
-        if (instr.pred != Pred::UnusedIndex) {
+        if (instr.pred.pred_index != static_cast<u64>(Pred::UnusedIndex)) {
             --shader.scope;
             shader.AddLine('}');
         }