Merge pull request #933 from neobrain/shader_debugger
Shader debugger improvements
This commit is contained in:
		| @@ -6,9 +6,16 @@ | ||||
| #include <sstream> | ||||
|  | ||||
| #include <QBoxLayout> | ||||
| #include <QFileDialog> | ||||
| #include <QGroupBox> | ||||
| #include <QLabel> | ||||
| #include <QLineEdit> | ||||
| #include <QPushButton> | ||||
| #include <QSignalMapper> | ||||
| #include <QSpinBox> | ||||
| #include <QTreeView> | ||||
|  | ||||
| #include "video_core/shader/shader_interpreter.h" | ||||
| #include "video_core/shader/shader.h" | ||||
|  | ||||
| #include "graphics_vertex_shader.h" | ||||
|  | ||||
| @@ -17,7 +24,7 @@ using nihstro::Instruction; | ||||
| using nihstro::SourceRegister; | ||||
| using nihstro::SwizzlePattern; | ||||
|  | ||||
| GraphicsVertexShaderModel::GraphicsVertexShaderModel(QObject* parent): QAbstractItemModel(parent) { | ||||
| GraphicsVertexShaderModel::GraphicsVertexShaderModel(GraphicsVertexShaderWidget* parent): QAbstractItemModel(parent), par(parent) { | ||||
|  | ||||
| } | ||||
|  | ||||
| @@ -34,7 +41,7 @@ int GraphicsVertexShaderModel::columnCount(const QModelIndex& parent) const { | ||||
| } | ||||
|  | ||||
| int GraphicsVertexShaderModel::rowCount(const QModelIndex& parent) const { | ||||
|     return static_cast<int>(info.code.size()); | ||||
|     return static_cast<int>(par->info.code.size()); | ||||
| } | ||||
|  | ||||
| QVariant GraphicsVertexShaderModel::headerData(int section, Qt::Orientation orientation, int role) const { | ||||
| @@ -62,21 +69,21 @@ QVariant GraphicsVertexShaderModel::data(const QModelIndex& index, int role) con | ||||
|     { | ||||
|         switch (index.column()) { | ||||
|         case 0: | ||||
|             if (info.HasLabel(index.row())) | ||||
|                 return QString::fromStdString(info.GetLabel(index.row())); | ||||
|             if (par->info.HasLabel(index.row())) | ||||
|                 return QString::fromStdString(par->info.GetLabel(index.row())); | ||||
|  | ||||
|             return QString("%1").arg(4*index.row(), 4, 16, QLatin1Char('0')); | ||||
|  | ||||
|         case 1: | ||||
|             return QString("%1").arg(info.code[index.row()].hex, 8, 16, QLatin1Char('0')); | ||||
|             return QString("%1").arg(par->info.code[index.row()].hex, 8, 16, QLatin1Char('0')); | ||||
|  | ||||
|         case 2: | ||||
|         { | ||||
|             std::stringstream output; | ||||
|             output.flags(std::ios::hex); | ||||
|  | ||||
|             Instruction instr = info.code[index.row()]; | ||||
|             const SwizzlePattern& swizzle = info.swizzle_info[instr.common.operand_desc_id].pattern; | ||||
|             Instruction instr = par->info.code[index.row()]; | ||||
|             const SwizzlePattern& swizzle = par->info.swizzle_info[instr.common.operand_desc_id].pattern; | ||||
|  | ||||
|             // longest known instruction name: "setemit " | ||||
|             output << std::setw(8) << std::left << instr.opcode.Value().GetInfo().name; | ||||
| @@ -130,13 +137,13 @@ QVariant GraphicsVertexShaderModel::data(const QModelIndex& index, int role) con | ||||
|  | ||||
|                     print_input_indexed_compact(output, src1, swizzle.negate_src1, swizzle.SelectorToString(false).substr(0,1), instr.common.AddressRegisterName()); | ||||
|                     output << " " << instr.common.compare_op.ToString(instr.common.compare_op.x) << " "; | ||||
|                     print_input(output, src2, swizzle.negate_src2, swizzle.SelectorToString(false).substr(0,1)); | ||||
|                     print_input(output, src2, swizzle.negate_src2, swizzle.SelectorToString(true).substr(0,1)); | ||||
|  | ||||
|                     output << ", "; | ||||
|  | ||||
|                     print_input_indexed_compact(output, src1, swizzle.negate_src1, swizzle.SelectorToString(false).substr(1,1), instr.common.AddressRegisterName()); | ||||
|                     output << " " << instr.common.compare_op.ToString(instr.common.compare_op.y) << " "; | ||||
|                     print_input(output, src2, swizzle.negate_src2, swizzle.SelectorToString(false).substr(1,1)); | ||||
|                     print_input(output, src2, swizzle.negate_src2, swizzle.SelectorToString(true).substr(1,1)); | ||||
|  | ||||
|                     break; | ||||
|                 } | ||||
| @@ -167,7 +174,7 @@ QVariant GraphicsVertexShaderModel::data(const QModelIndex& index, int role) con | ||||
|                     // TODO: In some cases, the Address Register is used as an index for SRC2 instead of SRC1 | ||||
|                     if (instr.opcode.Value().GetInfo().subtype & OpCode::Info::Src2) { | ||||
|                         SourceRegister src2 = instr.common.GetSrc2(src_is_inverted); | ||||
|                         print_input(output, src2, swizzle.negate_src2, swizzle.SelectorToString(false)); | ||||
|                         print_input(output, src2, swizzle.negate_src2, swizzle.SelectorToString(true)); | ||||
|                     } | ||||
|                     break; | ||||
|                 } | ||||
| @@ -240,6 +247,18 @@ QVariant GraphicsVertexShaderModel::data(const QModelIndex& index, int role) con | ||||
|     case Qt::FontRole: | ||||
|         return QFont("monospace"); | ||||
|  | ||||
|     case Qt::BackgroundRole: | ||||
|         // Highlight instructions which have no debug data associated to them | ||||
|         for (const auto& record : par->debug_data.records) | ||||
|             if (index.row() == record.instruction_offset) | ||||
|                 return QVariant(); | ||||
|  | ||||
|         return QBrush(QColor(255, 255, 127)); | ||||
|  | ||||
|  | ||||
|     // TODO: Draw arrows for each "reachable" instruction to visualize control flow | ||||
|  | ||||
|  | ||||
|     default: | ||||
|         break; | ||||
|     } | ||||
| @@ -247,53 +266,232 @@ QVariant GraphicsVertexShaderModel::data(const QModelIndex& index, int role) con | ||||
|     return QVariant(); | ||||
| } | ||||
|  | ||||
| void GraphicsVertexShaderModel::OnUpdate() | ||||
| { | ||||
|     beginResetModel(); | ||||
| void GraphicsVertexShaderWidget::DumpShader() { | ||||
|     QString filename = QFileDialog::getSaveFileName(this, tr("Save Shader Dump"), "shader_dump.shbin", | ||||
|                                                     tr("Shader Binary (*.shbin)")); | ||||
|  | ||||
|     info.Clear(); | ||||
|     if (filename.isEmpty()) { | ||||
|         // If the user canceled the dialog, don't dump anything. | ||||
|         return; | ||||
|     } | ||||
|  | ||||
|     for (auto instr : Pica::g_state.vs.program_code) | ||||
|         info.code.push_back({instr}); | ||||
|     auto& setup  = Pica::g_state.vs; | ||||
|     auto& config = Pica::g_state.regs.vs; | ||||
|  | ||||
|     for (auto pattern : Pica::g_state.vs.swizzle_data) | ||||
|         info.swizzle_info.push_back({pattern}); | ||||
|  | ||||
|     info.labels.insert({ Pica::g_state.regs.vs.main_offset, "main" }); | ||||
|  | ||||
|     endResetModel(); | ||||
|     Pica::DebugUtils::DumpShader(filename.toStdString(), config, setup, Pica::g_state.regs.vs_output_attributes); | ||||
| } | ||||
|  | ||||
|  | ||||
| GraphicsVertexShaderWidget::GraphicsVertexShaderWidget(std::shared_ptr< Pica::DebugContext > debug_context, | ||||
|                                                        QWidget* parent) | ||||
|         : BreakPointObserverDock(debug_context, "Pica Vertex Shader", parent) { | ||||
|     setObjectName("PicaVertexShader"); | ||||
|  | ||||
|     auto binary_model = new GraphicsVertexShaderModel(this); | ||||
|     auto binary_list = new QTreeView; | ||||
|     binary_list->setModel(binary_model); | ||||
|     auto input_data_mapper = new QSignalMapper(this); | ||||
|  | ||||
|     // TODO: Support inputting data in hexadecimal raw format | ||||
|     for (unsigned i = 0; i < ARRAY_SIZE(input_data); ++i) { | ||||
|         input_data[i] = new QLineEdit; | ||||
|         input_data[i]->setValidator(new QDoubleValidator(input_data[i])); | ||||
|     } | ||||
|  | ||||
|     breakpoint_warning = new QLabel(tr("(data only available at VertexLoaded breakpoints)")); | ||||
|  | ||||
|     // TODO: Add some button for jumping to the shader entry point | ||||
|  | ||||
|     model = new GraphicsVertexShaderModel(this); | ||||
|     binary_list = new QTreeView; | ||||
|     binary_list->setModel(model); | ||||
|     binary_list->setRootIsDecorated(false); | ||||
|     binary_list->setAlternatingRowColors(true); | ||||
|  | ||||
|     connect(this, SIGNAL(Update()), binary_model, SLOT(OnUpdate())); | ||||
|     auto dump_shader = new QPushButton(QIcon::fromTheme("document-save"), tr("Dump")); | ||||
|  | ||||
|     instruction_description = new QLabel; | ||||
|  | ||||
|     cycle_index = new QSpinBox; | ||||
|  | ||||
|     connect(this, SIGNAL(SelectCommand(const QModelIndex&, QItemSelectionModel::SelectionFlags)), | ||||
|             binary_list->selectionModel(), SLOT(select(const QModelIndex&, QItemSelectionModel::SelectionFlags))); | ||||
|  | ||||
|     connect(dump_shader, SIGNAL(clicked()), this, SLOT(DumpShader())); | ||||
|  | ||||
|     connect(cycle_index, SIGNAL(valueChanged(int)), this, SLOT(OnCycleIndexChanged(int))); | ||||
|  | ||||
|     for (unsigned i = 0; i < ARRAY_SIZE(input_data); ++i) { | ||||
|         connect(input_data[i], SIGNAL(textEdited(const QString&)), input_data_mapper, SLOT(map())); | ||||
|         input_data_mapper->setMapping(input_data[i], i); | ||||
|     } | ||||
|     connect(input_data_mapper, SIGNAL(mapped(int)), this, SLOT(OnInputAttributeChanged(int))); | ||||
|  | ||||
|     auto main_widget = new QWidget; | ||||
|     auto main_layout = new QVBoxLayout; | ||||
|     { | ||||
|         auto input_data_group = new QGroupBox(tr("Input Data")); | ||||
|  | ||||
|         // For each vertex attribute, add a QHBoxLayout consisting of: | ||||
|         // - A QLabel denoting the source attribute index | ||||
|         // - Four QLineEdits for showing and manipulating attribute data | ||||
|         // - A QLabel denoting the shader input attribute index | ||||
|         auto sub_layout = new QVBoxLayout; | ||||
|         for (unsigned i = 0; i < 16; ++i) { | ||||
|             // Create an HBoxLayout to store the widgets used to specify a particular attribute | ||||
|             // and store it in a QWidget to allow for easy hiding and unhiding. | ||||
|             auto row_layout = new QHBoxLayout; | ||||
|             row_layout->addWidget(new QLabel(tr("Attribute %1").arg(i, 2))); | ||||
|             for (unsigned comp = 0; comp < 4; ++comp) | ||||
|                 row_layout->addWidget(input_data[4 * i + comp]); | ||||
|  | ||||
|             row_layout->addWidget(input_data_mapping[i] = new QLabel); | ||||
|  | ||||
|             input_data_container[i] = new QWidget; | ||||
|             input_data_container[i]->setLayout(row_layout); | ||||
|             input_data_container[i]->hide(); | ||||
|  | ||||
|             sub_layout->addWidget(input_data_container[i]); | ||||
|         } | ||||
|  | ||||
|         sub_layout->addWidget(breakpoint_warning); | ||||
|         breakpoint_warning->hide(); | ||||
|  | ||||
|         input_data_group->setLayout(sub_layout); | ||||
|         main_layout->addWidget(input_data_group); | ||||
|     } | ||||
|     { | ||||
|         auto sub_layout = new QHBoxLayout; | ||||
|         sub_layout->addWidget(binary_list); | ||||
|         main_layout->addLayout(sub_layout); | ||||
|     } | ||||
|     main_layout->addWidget(dump_shader); | ||||
|     { | ||||
|         auto sub_layout = new QHBoxLayout; | ||||
|         sub_layout->addWidget(new QLabel(tr("Cycle Index:"))); | ||||
|         sub_layout->addWidget(cycle_index); | ||||
|         main_layout->addLayout(sub_layout); | ||||
|     } | ||||
|     main_layout->addWidget(instruction_description); | ||||
|     main_layout->addStretch(); | ||||
|     main_widget->setLayout(main_layout); | ||||
|     setWidget(main_widget); | ||||
|  | ||||
|     widget()->setEnabled(false); | ||||
| } | ||||
|  | ||||
| void GraphicsVertexShaderWidget::OnBreakPointHit(Pica::DebugContext::Event event, void* data) { | ||||
|     emit Update(); | ||||
|     auto input = static_cast<Pica::Shader::InputVertex*>(data); | ||||
|     if (event == Pica::DebugContext::Event::VertexLoaded) { | ||||
|         Reload(true, data); | ||||
|     } else { | ||||
|         // No vertex data is retrievable => invalidate currently stored vertex data | ||||
|         Reload(true, nullptr); | ||||
|     } | ||||
|     widget()->setEnabled(true); | ||||
| } | ||||
|  | ||||
| void GraphicsVertexShaderWidget::Reload(bool replace_vertex_data, void* vertex_data) { | ||||
|     model->beginResetModel(); | ||||
|  | ||||
|     if (replace_vertex_data) { | ||||
|         if (vertex_data) { | ||||
|             memcpy(&input_vertex, vertex_data, sizeof(input_vertex)); | ||||
|             for (unsigned attr = 0; attr < 16; ++attr) { | ||||
|                 for (unsigned comp = 0; comp < 4; ++comp) { | ||||
|                     input_data[4 * attr + comp]->setText(QString("%1").arg(input_vertex.attr[attr][comp].ToFloat32())); | ||||
|                 } | ||||
|             } | ||||
|             breakpoint_warning->hide(); | ||||
|         } else { | ||||
|             for (unsigned attr = 0; attr < 16; ++attr) { | ||||
|                 for (unsigned comp = 0; comp < 4; ++comp) { | ||||
|                     input_data[4 * attr + comp]->setText(QString("???")); | ||||
|                 } | ||||
|             } | ||||
|             breakpoint_warning->show(); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     // Reload shader code | ||||
|     info.Clear(); | ||||
|  | ||||
|     auto& shader_setup = Pica::g_state.vs; | ||||
|     auto& shader_config = Pica::g_state.regs.vs; | ||||
|     for (auto instr : shader_setup.program_code) | ||||
|         info.code.push_back({instr}); | ||||
|  | ||||
|     for (auto pattern : shader_setup.swizzle_data) | ||||
|         info.swizzle_info.push_back({pattern}); | ||||
|  | ||||
|     u32 entry_point = Pica::g_state.regs.vs.main_offset; | ||||
|     info.labels.insert({ entry_point, "main" }); | ||||
|  | ||||
|     // Generate debug information | ||||
|     debug_data = Pica::Shader::ProduceDebugInfo(input_vertex, 1, shader_config, shader_setup); | ||||
|  | ||||
|     // Reload widget state | ||||
|  | ||||
|     // Only show input attributes which are used as input to the shader | ||||
|     for (unsigned int attr = 0; attr < 16; ++attr) { | ||||
|         input_data_container[attr]->setVisible(false); | ||||
|     } | ||||
|     for (unsigned int attr = 0; attr < Pica::g_state.regs.vertex_attributes.GetNumTotalAttributes(); ++attr) { | ||||
|         unsigned source_attr = shader_config.input_register_map.GetRegisterForAttribute(attr); | ||||
|         input_data_mapping[source_attr]->setText(QString("-> v%1").arg(attr)); | ||||
|         input_data_container[source_attr]->setVisible(true); | ||||
|     } | ||||
|  | ||||
|     // Initialize debug info text for current cycle count | ||||
|     cycle_index->setMaximum(debug_data.records.size() - 1); | ||||
|     OnCycleIndexChanged(cycle_index->value()); | ||||
|  | ||||
|     model->endResetModel(); | ||||
| } | ||||
|  | ||||
| void GraphicsVertexShaderWidget::OnResumed() { | ||||
|     widget()->setEnabled(false); | ||||
| } | ||||
|  | ||||
| void GraphicsVertexShaderWidget::OnInputAttributeChanged(int index) { | ||||
|     float value = input_data[index]->text().toFloat(); | ||||
|     Reload(); | ||||
| } | ||||
|  | ||||
| void GraphicsVertexShaderWidget::OnCycleIndexChanged(int index) { | ||||
|     QString text; | ||||
|  | ||||
|     auto& record = debug_data.records[index]; | ||||
|     if (record.mask & Pica::Shader::DebugDataRecord::SRC1) | ||||
|         text += tr("SRC1: %1, %2, %3, %4\n").arg(record.src1.x.ToFloat32()).arg(record.src1.y.ToFloat32()).arg(record.src1.z.ToFloat32()).arg(record.src1.w.ToFloat32()); | ||||
|     if (record.mask & Pica::Shader::DebugDataRecord::SRC2) | ||||
|         text += tr("SRC2: %1, %2, %3, %4\n").arg(record.src2.x.ToFloat32()).arg(record.src2.y.ToFloat32()).arg(record.src2.z.ToFloat32()).arg(record.src2.w.ToFloat32()); | ||||
|     if (record.mask & Pica::Shader::DebugDataRecord::SRC3) | ||||
|         text += tr("SRC3: %1, %2, %3, %4\n").arg(record.src3.x.ToFloat32()).arg(record.src3.y.ToFloat32()).arg(record.src3.z.ToFloat32()).arg(record.src3.w.ToFloat32()); | ||||
|     if (record.mask & Pica::Shader::DebugDataRecord::DEST_IN) | ||||
|         text += tr("DEST_IN: %1, %2, %3, %4\n").arg(record.dest_in.x.ToFloat32()).arg(record.dest_in.y.ToFloat32()).arg(record.dest_in.z.ToFloat32()).arg(record.dest_in.w.ToFloat32()); | ||||
|     if (record.mask & Pica::Shader::DebugDataRecord::DEST_OUT) | ||||
|         text += tr("DEST_OUT: %1, %2, %3, %4\n").arg(record.dest_out.x.ToFloat32()).arg(record.dest_out.y.ToFloat32()).arg(record.dest_out.z.ToFloat32()).arg(record.dest_out.w.ToFloat32()); | ||||
|  | ||||
|     if (record.mask & Pica::Shader::DebugDataRecord::ADDR_REG_OUT) | ||||
|         text += tr("Addres Registers: %1, %2\n").arg(record.address_registers[0]).arg(record.address_registers[1]); | ||||
|     if (record.mask & Pica::Shader::DebugDataRecord::CMP_RESULT) | ||||
|         text += tr("Compare Result: %1, %2\n").arg(record.conditional_code[0] ? "true" : "false").arg(record.conditional_code[1] ? "true" : "false"); | ||||
|  | ||||
|     if (record.mask & Pica::Shader::DebugDataRecord::COND_BOOL_IN) | ||||
|         text += tr("Static Condition: %1\n").arg(record.cond_bool ? "true" : "false"); | ||||
|     if (record.mask & Pica::Shader::DebugDataRecord::COND_CMP_IN) | ||||
|         text += tr("Dynamic Conditions: %1, %2\n").arg(record.cond_cmp[0] ? "true" : "false").arg(record.cond_cmp[1] ? "true" : "false"); | ||||
|     if (record.mask & Pica::Shader::DebugDataRecord::LOOP_INT_IN) | ||||
|         text += tr("Loop Parameters: %1 (repeats), %2 (initializer), %3 (increment), %4\n").arg(record.loop_int.x).arg(record.loop_int.y).arg(record.loop_int.z).arg(record.loop_int.w); | ||||
|  | ||||
|     text += tr("Instruction offset: 0x%1").arg(4 * record.instruction_offset, 4, 16, QLatin1Char('0')); | ||||
|     if (record.mask & Pica::Shader::DebugDataRecord::NEXT_INSTR) { | ||||
|         text += tr(" -> 0x%2").arg(4 * record.next_instruction, 4, 16, QLatin1Char('0')); | ||||
|     } else { | ||||
|         text += tr(" (last instruction)"); | ||||
|     } | ||||
|  | ||||
|     instruction_description->setText(text); | ||||
|  | ||||
|     // Scroll to current instruction | ||||
|     const QModelIndex& instr_index = model->index(record.instruction_offset, 0); | ||||
|     emit SelectCommand(instr_index, QItemSelectionModel::ClearAndSelect | QItemSelectionModel::Rows); | ||||
|     binary_list->scrollTo(instr_index, QAbstractItemView::EnsureVisible); | ||||
| } | ||||
|   | ||||
| @@ -10,11 +10,18 @@ | ||||
|  | ||||
| #include "nihstro/parser_shbin.h" | ||||
|  | ||||
| #include "video_core/shader/shader.h" | ||||
|  | ||||
| class QLabel; | ||||
| class QSpinBox; | ||||
|  | ||||
| class GraphicsVertexShaderWidget; | ||||
|  | ||||
| class GraphicsVertexShaderModel : public QAbstractItemModel { | ||||
|     Q_OBJECT | ||||
|  | ||||
| public: | ||||
|     GraphicsVertexShaderModel(QObject* parent); | ||||
|     GraphicsVertexShaderModel(GraphicsVertexShaderWidget* parent); | ||||
|  | ||||
|     QModelIndex index(int row, int column, const QModelIndex& parent = QModelIndex()) const override; | ||||
|     QModelIndex parent(const QModelIndex& child) const override; | ||||
| @@ -23,11 +30,10 @@ public: | ||||
|     QVariant data(const QModelIndex& index, int role = Qt::DisplayRole) const override; | ||||
|     QVariant headerData(int section, Qt::Orientation orientation, int role = Qt::DisplayRole) const override; | ||||
|  | ||||
| public slots: | ||||
|     void OnUpdate(); | ||||
|  | ||||
| private: | ||||
|     nihstro::ShaderInfo info; | ||||
|     GraphicsVertexShaderWidget* par; | ||||
|  | ||||
|     friend class GraphicsVertexShaderWidget; | ||||
| }; | ||||
|  | ||||
| class GraphicsVertexShaderWidget : public BreakPointObserverDock { | ||||
| @@ -43,9 +49,42 @@ private slots: | ||||
|     void OnBreakPointHit(Pica::DebugContext::Event event, void* data) override; | ||||
|     void OnResumed() override; | ||||
|  | ||||
|     void OnInputAttributeChanged(int index); | ||||
|  | ||||
|     void OnCycleIndexChanged(int index); | ||||
|  | ||||
|     void DumpShader(); | ||||
|  | ||||
|     /** | ||||
|      * Reload widget based on the current PICA200 state | ||||
|      * @param replace_vertex_data If true, invalidate all current vertex data | ||||
|      * @param vertex_data New vertex data to use, as passed to OnBreakPointHit. May be nullptr to specify that no valid vertex data can be retrieved currently. Only used if replace_vertex_data is true. | ||||
|      */ | ||||
|     void Reload(bool replace_vertex_data = false, void* vertex_data = nullptr); | ||||
|  | ||||
|  | ||||
| signals: | ||||
|     void Update(); | ||||
|     // Call this to change the current command selection in the disassembly view | ||||
|     void SelectCommand(const QModelIndex&, QItemSelectionModel::SelectionFlags); | ||||
|  | ||||
| private: | ||||
|     QLabel* instruction_description; | ||||
|     QTreeView* binary_list; | ||||
|     GraphicsVertexShaderModel* model; | ||||
|  | ||||
|     /// TODO: Move these into a single struct | ||||
|     std::array<QLineEdit*, 4*16> input_data;  // A text box for each of the 4 components of up to 16 vertex attributes | ||||
|     std::array<QWidget*, 16> input_data_container; // QWidget containing the QLayout containing each vertex attribute | ||||
|     std::array<QLabel*, 16> input_data_mapping; // A QLabel denoting the shader input attribute which the vertex attribute maps to | ||||
|  | ||||
|     // Text to be shown when input vertex data is not retrievable | ||||
|     QLabel* breakpoint_warning; | ||||
|  | ||||
|     QSpinBox* cycle_index; | ||||
|  | ||||
|     nihstro::ShaderInfo info; | ||||
|     Pica::Shader::DebugData<true> debug_data; | ||||
|     Pica::Shader::InputVertex input_vertex; | ||||
|  | ||||
|     friend class GraphicsVertexShaderModel; | ||||
| }; | ||||
|   | ||||
| @@ -215,7 +215,7 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { | ||||
|             unsigned int vertex_cache_pos = 0; | ||||
|             vertex_cache_ids.fill(-1); | ||||
|  | ||||
|             Shader::UnitState shader_unit; | ||||
|             Shader::UnitState<false> shader_unit; | ||||
|             Shader::Setup(shader_unit); | ||||
|  | ||||
|             for (unsigned int index = 0; index < regs.num_vertices; ++index) | ||||
|   | ||||
| @@ -14,6 +14,7 @@ | ||||
| #include <png.h> | ||||
| #endif | ||||
|  | ||||
| #include <nihstro/float24.h> | ||||
| #include <nihstro/shader_binary.h> | ||||
|  | ||||
| #include "common/assert.h" | ||||
| @@ -110,8 +111,7 @@ void GeometryDumper::Dump() { | ||||
| } | ||||
|  | ||||
|  | ||||
| void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data, u32 swizzle_size, | ||||
|                 u32 main_offset, const Regs::VSOutputAttributes* output_attributes) | ||||
| void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, const State::ShaderSetup& setup, const Regs::VSOutputAttributes* output_attributes) | ||||
| { | ||||
|     struct StuffToWrite { | ||||
|         u8* pointer; | ||||
| @@ -131,11 +131,14 @@ void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data | ||||
|     // into shbin format (separate type and component mask). | ||||
|     union OutputRegisterInfo { | ||||
|         enum Type : u64 { | ||||
|             POSITION = 0, | ||||
|             COLOR = 2, | ||||
|             TEXCOORD0 = 3, | ||||
|             TEXCOORD1 = 5, | ||||
|             TEXCOORD2 = 6, | ||||
|             POSITION   = 0, | ||||
|             QUATERNION = 1, | ||||
|             COLOR      = 2, | ||||
|             TEXCOORD0  = 3, | ||||
|             TEXCOORD1  = 5, | ||||
|             TEXCOORD2  = 6, | ||||
|  | ||||
|             VIEW       = 8, | ||||
|         }; | ||||
|  | ||||
|         BitField< 0, 64, u64> hex; | ||||
| @@ -157,6 +160,10 @@ void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data | ||||
|                 { OutputAttributes::POSITION_Y, { OutputRegisterInfo::POSITION, 2} }, | ||||
|                 { OutputAttributes::POSITION_Z, { OutputRegisterInfo::POSITION, 4} }, | ||||
|                 { OutputAttributes::POSITION_W, { OutputRegisterInfo::POSITION, 8} }, | ||||
|                 { OutputAttributes::QUATERNION_X, { OutputRegisterInfo::QUATERNION, 1} }, | ||||
|                 { OutputAttributes::QUATERNION_Y, { OutputRegisterInfo::QUATERNION, 2} }, | ||||
|                 { OutputAttributes::QUATERNION_Z, { OutputRegisterInfo::QUATERNION, 4} }, | ||||
|                 { OutputAttributes::QUATERNION_W, { OutputRegisterInfo::QUATERNION, 8} }, | ||||
|                 { OutputAttributes::COLOR_R, { OutputRegisterInfo::COLOR, 1} }, | ||||
|                 { OutputAttributes::COLOR_G, { OutputRegisterInfo::COLOR, 2} }, | ||||
|                 { OutputAttributes::COLOR_B, { OutputRegisterInfo::COLOR, 4} }, | ||||
| @@ -166,7 +173,10 @@ void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data | ||||
|                 { OutputAttributes::TEXCOORD1_U, { OutputRegisterInfo::TEXCOORD1, 1} }, | ||||
|                 { OutputAttributes::TEXCOORD1_V, { OutputRegisterInfo::TEXCOORD1, 2} }, | ||||
|                 { OutputAttributes::TEXCOORD2_U, { OutputRegisterInfo::TEXCOORD2, 1} }, | ||||
|                 { OutputAttributes::TEXCOORD2_V, { OutputRegisterInfo::TEXCOORD2, 2} } | ||||
|                 { OutputAttributes::TEXCOORD2_V, { OutputRegisterInfo::TEXCOORD2, 2} }, | ||||
|                 { OutputAttributes::VIEW_X, { OutputRegisterInfo::VIEW, 1} }, | ||||
|                 { OutputAttributes::VIEW_Y, { OutputRegisterInfo::VIEW, 2} }, | ||||
|                 { OutputAttributes::VIEW_Z, { OutputRegisterInfo::VIEW, 4} } | ||||
|             }; | ||||
|  | ||||
|             for (const auto& semantic : std::vector<OutputAttributes::Semantic>{ | ||||
| @@ -221,28 +231,69 @@ void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data | ||||
|  | ||||
|     // TODO: Reduce the amount of binary code written to relevant portions | ||||
|     dvlp.binary_offset = write_offset - dvlp_offset; | ||||
|     dvlp.binary_size_words = binary_size; | ||||
|     QueueForWriting((u8*)binary_data, binary_size * sizeof(u32)); | ||||
|     dvlp.binary_size_words = setup.program_code.size(); | ||||
|     QueueForWriting((u8*)setup.program_code.data(), setup.program_code.size() * sizeof(u32)); | ||||
|  | ||||
|     dvlp.swizzle_info_offset = write_offset - dvlp_offset; | ||||
|     dvlp.swizzle_info_num_entries = swizzle_size; | ||||
|     dvlp.swizzle_info_num_entries = setup.swizzle_data.size(); | ||||
|     u32 dummy = 0; | ||||
|     for (unsigned int i = 0; i < swizzle_size; ++i) { | ||||
|         QueueForWriting((u8*)&swizzle_data[i], sizeof(swizzle_data[i])); | ||||
|     for (unsigned int i = 0; i < setup.swizzle_data.size(); ++i) { | ||||
|         QueueForWriting((u8*)&setup.swizzle_data[i], sizeof(setup.swizzle_data[i])); | ||||
|         QueueForWriting((u8*)&dummy, sizeof(dummy)); | ||||
|     } | ||||
|  | ||||
|     dvle.main_offset_words = main_offset; | ||||
|     dvle.main_offset_words = config.main_offset; | ||||
|     dvle.output_register_table_offset = write_offset - dvlb.dvle_offset; | ||||
|     dvle.output_register_table_size = static_cast<u32>(output_info_table.size()); | ||||
|     QueueForWriting((u8*)output_info_table.data(), static_cast<u32>(output_info_table.size() * sizeof(OutputRegisterInfo))); | ||||
|  | ||||
|     // TODO: Create a label table for "main" | ||||
|  | ||||
|     std::vector<nihstro::ConstantInfo> constant_table; | ||||
|     for (unsigned i = 0; i < setup.uniforms.b.size(); ++i) { | ||||
|         nihstro::ConstantInfo constant; | ||||
|         memset(&constant, 0, sizeof(constant)); | ||||
|         constant.type = nihstro::ConstantInfo::Bool; | ||||
|         constant.regid = i; | ||||
|         constant.b = setup.uniforms.b[i]; | ||||
|         constant_table.emplace_back(constant); | ||||
|     } | ||||
|     for (unsigned i = 0; i < setup.uniforms.i.size(); ++i) { | ||||
|         nihstro::ConstantInfo constant; | ||||
|         memset(&constant, 0, sizeof(constant)); | ||||
|         constant.type = nihstro::ConstantInfo::Int; | ||||
|         constant.regid = i; | ||||
|         constant.i.x = setup.uniforms.i[i].x; | ||||
|         constant.i.y = setup.uniforms.i[i].y; | ||||
|         constant.i.z = setup.uniforms.i[i].z; | ||||
|         constant.i.w = setup.uniforms.i[i].w; | ||||
|         constant_table.emplace_back(constant); | ||||
|     } | ||||
|     for (unsigned i = 0; i < sizeof(setup.uniforms.f) / sizeof(setup.uniforms.f[0]); ++i) { | ||||
|         nihstro::ConstantInfo constant; | ||||
|         memset(&constant, 0, sizeof(constant)); | ||||
|         constant.type = nihstro::ConstantInfo::Float; | ||||
|         constant.regid = i; | ||||
|         constant.f.x = nihstro::to_float24(setup.uniforms.f[i].x.ToFloat32()); | ||||
|         constant.f.y = nihstro::to_float24(setup.uniforms.f[i].y.ToFloat32()); | ||||
|         constant.f.z = nihstro::to_float24(setup.uniforms.f[i].z.ToFloat32()); | ||||
|         constant.f.w = nihstro::to_float24(setup.uniforms.f[i].w.ToFloat32()); | ||||
|  | ||||
|         // Store constant if it's different from zero.. | ||||
|         if (setup.uniforms.f[i].x.ToFloat32() != 0.0 || | ||||
|             setup.uniforms.f[i].y.ToFloat32() != 0.0 || | ||||
|             setup.uniforms.f[i].z.ToFloat32() != 0.0 || | ||||
|             setup.uniforms.f[i].w.ToFloat32() != 0.0) | ||||
|             constant_table.emplace_back(constant); | ||||
|     } | ||||
|     dvle.constant_table_offset = write_offset - dvlb.dvle_offset; | ||||
|     dvle.constant_table_size = constant_table.size(); | ||||
|     for (const auto& constant : constant_table) { | ||||
|         QueueForWriting((uint8_t*)&constant, sizeof(constant)); | ||||
|     } | ||||
|  | ||||
|     // Write data to file | ||||
|     static int dump_index = 0; | ||||
|     std::string filename = std::string("shader_dump") + std::to_string(++dump_index) + std::string(".shbin"); | ||||
|     std::ofstream file(filename, std::ios_base::out | std::ios_base::binary); | ||||
|  | ||||
|     for (auto& chunk : writing_queue) { | ||||
|   | ||||
| @@ -158,7 +158,6 @@ extern std::shared_ptr<DebugContext> g_debug_context; // TODO: Get rid of this g | ||||
| namespace DebugUtils { | ||||
|  | ||||
| #define PICA_DUMP_GEOMETRY 0 | ||||
| #define PICA_DUMP_SHADERS 0 | ||||
| #define PICA_DUMP_TEXTURES 0 | ||||
| #define PICA_LOG_TEV 0 | ||||
|  | ||||
| @@ -182,8 +181,8 @@ private: | ||||
|     std::vector<Face> faces; | ||||
| }; | ||||
|  | ||||
| void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data, u32 swizzle_size, | ||||
|                 u32 main_offset, const Regs::VSOutputAttributes* output_attributes); | ||||
| void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, | ||||
|                 const State::ShaderSetup& setup, const Regs::VSOutputAttributes* output_attributes); | ||||
|  | ||||
|  | ||||
| // Utility class to log Pica commands. | ||||
|   | ||||
| @@ -80,6 +80,11 @@ struct Regs { | ||||
|             POSITION_Z   =  2, | ||||
|             POSITION_W   =  3, | ||||
|  | ||||
|             QUATERNION_X =  4, | ||||
|             QUATERNION_Y =  5, | ||||
|             QUATERNION_Z =  6, | ||||
|             QUATERNION_W =  7, | ||||
|  | ||||
|             COLOR_R      =  8, | ||||
|             COLOR_G      =  9, | ||||
|             COLOR_B      = 10, | ||||
| @@ -89,6 +94,12 @@ struct Regs { | ||||
|             TEXCOORD0_V  = 13, | ||||
|             TEXCOORD1_U  = 14, | ||||
|             TEXCOORD1_V  = 15, | ||||
|  | ||||
|             // TODO: Not verified | ||||
|             VIEW_X       = 18, | ||||
|             VIEW_Y       = 19, | ||||
|             VIEW_Z       = 20, | ||||
|  | ||||
|             TEXCOORD2_U  = 22, | ||||
|             TEXCOORD2_V  = 23, | ||||
|  | ||||
|   | ||||
| @@ -5,6 +5,8 @@ | ||||
| #include <memory> | ||||
| #include <unordered_map> | ||||
|  | ||||
| #include <boost/range/algorithm/fill.hpp> | ||||
|  | ||||
| #include "common/hash.h" | ||||
| #include "common/make_unique.h" | ||||
| #include "common/profiler.h" | ||||
| @@ -30,7 +32,7 @@ static JitCompiler jit; | ||||
| static CompiledShader* jit_shader; | ||||
| #endif // ARCHITECTURE_x86_64 | ||||
|  | ||||
| void Setup(UnitState& state) { | ||||
| void Setup(UnitState<false>& state) { | ||||
| #ifdef ARCHITECTURE_x86_64 | ||||
|     if (VideoCore::g_shader_jit_enabled) { | ||||
|         u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^ | ||||
| @@ -54,9 +56,8 @@ void Shutdown() { | ||||
|  | ||||
| static Common::Profiling::TimingCategory shader_category("Vertex Shader"); | ||||
|  | ||||
| OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes) { | ||||
| OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attributes) { | ||||
|     auto& config = g_state.regs.vs; | ||||
|     auto& setup = g_state.vs; | ||||
|  | ||||
|     Common::Profiling::ScopeTimer timer(shader_category); | ||||
|  | ||||
| @@ -67,6 +68,8 @@ OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes) | ||||
|     // Setup input register table | ||||
|     const auto& attribute_register_map = config.input_register_map; | ||||
|  | ||||
|     // TODO: Instead of this cumbersome logic, just load the input data directly like | ||||
|     // for (int attr = 0; attr < num_attributes; ++attr) { input_attr[0] = state.registers.input[attribute_register_map.attribute0_register]; } | ||||
|     if (num_attributes > 0) state.registers.input[attribute_register_map.attribute0_register] = input.attr[0]; | ||||
|     if (num_attributes > 1) state.registers.input[attribute_register_map.attribute1_register] = input.attr[1]; | ||||
|     if (num_attributes > 2) state.registers.input[attribute_register_map.attribute2_register] = input.attr[2]; | ||||
| @@ -96,12 +99,6 @@ OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes) | ||||
|     RunInterpreter(state); | ||||
| #endif // ARCHITECTURE_x86_64 | ||||
|  | ||||
| #if PICA_DUMP_SHADERS | ||||
|     DebugUtils::DumpShader(setup.program_code.data(), state.debug.max_offset, setup.swizzle_data.data(), | ||||
|         state.debug.max_opdesc_id, config.main_offset, | ||||
|         g_state.regs.vs_output_attributes); // TODO: Don't hardcode VS here | ||||
| #endif | ||||
|  | ||||
|     // Setup output data | ||||
|     OutputVertex ret; | ||||
|     // TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to | ||||
| @@ -132,14 +129,52 @@ OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes) | ||||
|             std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f)); | ||||
|     } | ||||
|  | ||||
|     LOG_TRACE(Render_Software, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)", | ||||
|     LOG_TRACE(Render_Software, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), quat (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)", | ||||
|         ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), | ||||
|         ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(), ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(), | ||||
|         ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(), | ||||
|         ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32()); | ||||
|  | ||||
|     return ret; | ||||
| } | ||||
|  | ||||
| DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const State::ShaderSetup& setup) { | ||||
|     UnitState<true> state; | ||||
|  | ||||
|     const auto& shader_memory = setup.program_code; | ||||
|     state.program_counter = config.main_offset; | ||||
|     state.debug.max_offset = 0; | ||||
|     state.debug.max_opdesc_id = 0; | ||||
|  | ||||
|     // Setup input register table | ||||
|     const auto& attribute_register_map = config.input_register_map; | ||||
|     float24 dummy_register; | ||||
|     boost::fill(state.registers.input, &dummy_register); | ||||
|  | ||||
|     if (num_attributes > 0) state.registers.input[attribute_register_map.attribute0_register] = &input.attr[0].x; | ||||
|     if (num_attributes > 1) state.registers.input[attribute_register_map.attribute1_register] = &input.attr[1].x; | ||||
|     if (num_attributes > 2) state.registers.input[attribute_register_map.attribute2_register] = &input.attr[2].x; | ||||
|     if (num_attributes > 3) state.registers.input[attribute_register_map.attribute3_register] = &input.attr[3].x; | ||||
|     if (num_attributes > 4) state.registers.input[attribute_register_map.attribute4_register] = &input.attr[4].x; | ||||
|     if (num_attributes > 5) state.registers.input[attribute_register_map.attribute5_register] = &input.attr[5].x; | ||||
|     if (num_attributes > 6) state.registers.input[attribute_register_map.attribute6_register] = &input.attr[6].x; | ||||
|     if (num_attributes > 7) state.registers.input[attribute_register_map.attribute7_register] = &input.attr[7].x; | ||||
|     if (num_attributes > 8) state.registers.input[attribute_register_map.attribute8_register] = &input.attr[8].x; | ||||
|     if (num_attributes > 9) state.registers.input[attribute_register_map.attribute9_register] = &input.attr[9].x; | ||||
|     if (num_attributes > 10) state.registers.input[attribute_register_map.attribute10_register] = &input.attr[10].x; | ||||
|     if (num_attributes > 11) state.registers.input[attribute_register_map.attribute11_register] = &input.attr[11].x; | ||||
|     if (num_attributes > 12) state.registers.input[attribute_register_map.attribute12_register] = &input.attr[12].x; | ||||
|     if (num_attributes > 13) state.registers.input[attribute_register_map.attribute13_register] = &input.attr[13].x; | ||||
|     if (num_attributes > 14) state.registers.input[attribute_register_map.attribute14_register] = &input.attr[14].x; | ||||
|     if (num_attributes > 15) state.registers.input[attribute_register_map.attribute15_register] = &input.attr[15].x; | ||||
|  | ||||
|     state.conditional_code[0] = false; | ||||
|     state.conditional_code[1] = false; | ||||
|  | ||||
|     RunInterpreter(state); | ||||
|     return state.debug; | ||||
| } | ||||
|  | ||||
| } // namespace Shader | ||||
|  | ||||
| } // namespace Pica | ||||
|   | ||||
| @@ -4,7 +4,10 @@ | ||||
|  | ||||
| #pragma once | ||||
|  | ||||
| #include <vector> | ||||
|  | ||||
| #include <boost/container/static_vector.hpp> | ||||
|  | ||||
| #include <nihstro/shader_binary.h> | ||||
|  | ||||
| #include "common/common_funcs.h" | ||||
| @@ -72,12 +75,185 @@ struct OutputVertex { | ||||
| static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); | ||||
| static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); | ||||
|  | ||||
|  | ||||
| // Helper structure used to keep track of data useful for inspection of shader emulation | ||||
| template<bool full_debugging> | ||||
| struct DebugData; | ||||
|  | ||||
| template<> | ||||
| struct DebugData<false> { | ||||
|     // TODO: Hide these behind and interface and move them to DebugData<true> | ||||
|     u32 max_offset; // maximum program counter ever reached | ||||
|     u32 max_opdesc_id; // maximum swizzle pattern index ever used | ||||
| }; | ||||
|  | ||||
| template<> | ||||
| struct DebugData<true> { | ||||
|     // Records store the input and output operands of a particular instruction. | ||||
|     struct Record { | ||||
|         enum Type { | ||||
|             // Floating point arithmetic operands | ||||
|             SRC1         = 0x1, | ||||
|             SRC2         = 0x2, | ||||
|             SRC3         = 0x4, | ||||
|  | ||||
|             // Initial and final output operand value | ||||
|             DEST_IN      = 0x8, | ||||
|             DEST_OUT     = 0x10, | ||||
|  | ||||
|             // Current and next instruction offset (in words) | ||||
|             CUR_INSTR    = 0x20, | ||||
|             NEXT_INSTR   = 0x40, | ||||
|  | ||||
|             // Output address register value | ||||
|             ADDR_REG_OUT = 0x80, | ||||
|  | ||||
|             // Result of a comparison instruction | ||||
|             CMP_RESULT   = 0x100, | ||||
|  | ||||
|             // Input values for conditional flow control instructions | ||||
|             COND_BOOL_IN = 0x200, | ||||
|             COND_CMP_IN  = 0x400, | ||||
|  | ||||
|             // Input values for a loop | ||||
|             LOOP_INT_IN  = 0x800, | ||||
|         }; | ||||
|  | ||||
|         Math::Vec4<float24> src1; | ||||
|         Math::Vec4<float24> src2; | ||||
|         Math::Vec4<float24> src3; | ||||
|  | ||||
|         Math::Vec4<float24> dest_in; | ||||
|         Math::Vec4<float24> dest_out; | ||||
|  | ||||
|         s32 address_registers[2]; | ||||
|         bool conditional_code[2]; | ||||
|         bool cond_bool; | ||||
|         bool cond_cmp[2]; | ||||
|         Math::Vec4<u8> loop_int; | ||||
|  | ||||
|         u32 instruction_offset; | ||||
|         u32 next_instruction; | ||||
|  | ||||
|         // set of enabled fields (as a combination of Type flags) | ||||
|         unsigned mask = 0; | ||||
|     }; | ||||
|  | ||||
|     u32 max_offset; // maximum program counter ever reached | ||||
|     u32 max_opdesc_id; // maximum swizzle pattern index ever used | ||||
|  | ||||
|     // List of records for each executed shader instruction | ||||
|     std::vector<DebugData<true>::Record> records; | ||||
| }; | ||||
|  | ||||
| // Type alias for better readability | ||||
| using DebugDataRecord = DebugData<true>::Record; | ||||
|  | ||||
| // Helper function to set a DebugData<true>::Record field based on the template enum parameter. | ||||
| template<DebugDataRecord::Type type, typename ValueType> | ||||
| inline void SetField(DebugDataRecord& record, ValueType value); | ||||
|  | ||||
| template<> | ||||
| inline void SetField<DebugDataRecord::SRC1>(DebugDataRecord& record, float24* value) { | ||||
|     record.src1.x = value[0]; | ||||
|     record.src1.y = value[1]; | ||||
|     record.src1.z = value[2]; | ||||
|     record.src1.w = value[3]; | ||||
| } | ||||
|  | ||||
| template<> | ||||
| inline void SetField<DebugDataRecord::SRC2>(DebugDataRecord& record, float24* value) { | ||||
|     record.src2.x = value[0]; | ||||
|     record.src2.y = value[1]; | ||||
|     record.src2.z = value[2]; | ||||
|     record.src2.w = value[3]; | ||||
| } | ||||
|  | ||||
| template<> | ||||
| inline void SetField<DebugDataRecord::SRC3>(DebugDataRecord& record, float24* value) { | ||||
|     record.src3.x = value[0]; | ||||
|     record.src3.y = value[1]; | ||||
|     record.src3.z = value[2]; | ||||
|     record.src3.w = value[3]; | ||||
| } | ||||
|  | ||||
| template<> | ||||
| inline void SetField<DebugDataRecord::DEST_IN>(DebugDataRecord& record, float24* value) { | ||||
|     record.dest_in.x = value[0]; | ||||
|     record.dest_in.y = value[1]; | ||||
|     record.dest_in.z = value[2]; | ||||
|     record.dest_in.w = value[3]; | ||||
| } | ||||
|  | ||||
| template<> | ||||
| inline void SetField<DebugDataRecord::DEST_OUT>(DebugDataRecord& record, float24* value) { | ||||
|     record.dest_out.x = value[0]; | ||||
|     record.dest_out.y = value[1]; | ||||
|     record.dest_out.z = value[2]; | ||||
|     record.dest_out.w = value[3]; | ||||
| } | ||||
|  | ||||
| template<> | ||||
| inline void SetField<DebugDataRecord::ADDR_REG_OUT>(DebugDataRecord& record, s32* value) { | ||||
|     record.address_registers[0] = value[0]; | ||||
|     record.address_registers[1] = value[1]; | ||||
| } | ||||
|  | ||||
| template<> | ||||
| inline void SetField<DebugDataRecord::CMP_RESULT>(DebugDataRecord& record, bool* value) { | ||||
|     record.conditional_code[0] = value[0]; | ||||
|     record.conditional_code[1] = value[1]; | ||||
| } | ||||
|  | ||||
| template<> | ||||
| inline void SetField<DebugDataRecord::COND_BOOL_IN>(DebugDataRecord& record, bool value) { | ||||
|     record.cond_bool = value; | ||||
| } | ||||
|  | ||||
| template<> | ||||
| inline void SetField<DebugDataRecord::COND_CMP_IN>(DebugDataRecord& record, bool* value) { | ||||
|     record.cond_cmp[0] = value[0]; | ||||
|     record.cond_cmp[1] = value[1]; | ||||
| } | ||||
|  | ||||
| template<> | ||||
| inline void SetField<DebugDataRecord::LOOP_INT_IN>(DebugDataRecord& record, Math::Vec4<u8> value) { | ||||
|     record.loop_int = value; | ||||
| } | ||||
|  | ||||
| template<> | ||||
| inline void SetField<DebugDataRecord::CUR_INSTR>(DebugDataRecord& record, u32 value) { | ||||
|     record.instruction_offset = value; | ||||
| } | ||||
|  | ||||
| template<> | ||||
| inline void SetField<DebugDataRecord::NEXT_INSTR>(DebugDataRecord& record, u32 value) { | ||||
|     record.next_instruction = value; | ||||
| } | ||||
|  | ||||
| // Helper function to set debug information on the current shader iteration. | ||||
| template<DebugDataRecord::Type type, typename ValueType> | ||||
| inline void Record(DebugData<false>& debug_data, u32 offset, ValueType value) { | ||||
|     // Debugging disabled => nothing to do | ||||
| } | ||||
|  | ||||
| template<DebugDataRecord::Type type, typename ValueType> | ||||
| inline void Record(DebugData<true>& debug_data, u32 offset, ValueType value) { | ||||
|     if (offset >= debug_data.records.size()) | ||||
|         debug_data.records.resize(offset + 1); | ||||
|  | ||||
|    SetField<type, ValueType>(debug_data.records[offset], value); | ||||
|    debug_data.records[offset].mask |= type; | ||||
| } | ||||
|  | ||||
|  | ||||
| /** | ||||
|  * This structure contains the state information that needs to be unique for a shader unit. The 3DS | ||||
|  * has four shader units that process shaders in parallel. At the present, Citra only implements a | ||||
|  * single shader unit that processes all shaders serially. Putting the state information in a struct | ||||
|  * here will make it easier for us to parallelize the shader processing later. | ||||
|  */ | ||||
| template<bool Debug> | ||||
| struct UnitState { | ||||
|     struct Registers { | ||||
|         // The registers are accessed by the shader JIT using SSE instructions, and are therefore | ||||
| @@ -111,10 +287,7 @@ struct UnitState { | ||||
|     // TODO: Is there a maximal size for this? | ||||
|     boost::container::static_vector<CallStackElement, 16> call_stack; | ||||
|  | ||||
|     struct { | ||||
|         u32 max_offset; // maximum program counter ever reached | ||||
|         u32 max_opdesc_id; // maximum swizzle pattern index ever used | ||||
|     } debug; | ||||
|     DebugData<Debug> debug; | ||||
|  | ||||
|     static int InputOffset(const SourceRegister& reg) { | ||||
|         switch (reg.GetRegisterType()) { | ||||
| @@ -150,7 +323,7 @@ struct UnitState { | ||||
|  * vertex, which would happen within the `Run` function). | ||||
|  * @param state Shader unit state, must be setup per shader and per shader unit | ||||
|  */ | ||||
| void Setup(UnitState& state); | ||||
| void Setup(UnitState<false>& state); | ||||
|  | ||||
| /// Performs any cleanup when the emulator is shutdown | ||||
| void Shutdown(); | ||||
| @@ -162,7 +335,17 @@ void Shutdown(); | ||||
|  * @param num_attributes The number of vertex shader attributes | ||||
|  * @return The output vertex, after having been processed by the vertex shader | ||||
|  */ | ||||
| OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes); | ||||
| OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attributes); | ||||
|  | ||||
| /** | ||||
|  * Produce debug information based on the given shader and input vertex | ||||
|  * @param input Input vertex into the shader | ||||
|  * @param num_attributes The number of vertex shader attributes | ||||
|  * @param config Configuration object for the shader pipeline | ||||
|  * @param setup Setup object for the shader pipeline | ||||
|  * @return Debug information for this shader with regards to the given vertex | ||||
|  */ | ||||
| DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const State::ShaderSetup& setup); | ||||
|  | ||||
| } // namespace Shader | ||||
|  | ||||
|   | ||||
| @@ -21,7 +21,8 @@ namespace Pica { | ||||
|  | ||||
| namespace Shader { | ||||
|  | ||||
| void RunInterpreter(UnitState& state) { | ||||
| template<bool Debug> | ||||
| void RunInterpreter(UnitState<Debug>& state) { | ||||
|     const auto& uniforms = g_state.vs.uniforms; | ||||
|     const auto& swizzle_data = g_state.vs.swizzle_data; | ||||
|     const auto& program_code = g_state.vs.program_code; | ||||
| @@ -29,7 +30,9 @@ void RunInterpreter(UnitState& state) { | ||||
|     // Placeholder for invalid inputs | ||||
|     static float24 dummy_vec4_float24[4]; | ||||
|  | ||||
|     while (true) { | ||||
|     unsigned iteration = 0; | ||||
|     bool exit_loop = false; | ||||
|     while (!exit_loop) { | ||||
|         if (!state.call_stack.empty()) { | ||||
|             auto& top = state.call_stack.back(); | ||||
|             if (state.program_counter == top.final_address) { | ||||
| @@ -47,16 +50,19 @@ void RunInterpreter(UnitState& state) { | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         bool exit_loop = false; | ||||
|         const Instruction instr = { program_code[state.program_counter] }; | ||||
|         const SwizzlePattern swizzle = { swizzle_data[instr.common.operand_desc_id] }; | ||||
|  | ||||
|         static auto call = [](UnitState& state, u32 offset, u32 num_instructions, | ||||
|         static auto call = [](UnitState<Debug>& state, u32 offset, u32 num_instructions, | ||||
|                               u32 return_offset, u8 repeat_count, u8 loop_increment) { | ||||
|             state.program_counter = offset - 1; // -1 to make sure when incrementing the PC we end up at the correct offset | ||||
|             ASSERT(state.call_stack.size() < state.call_stack.capacity()); | ||||
|             state.call_stack.push_back({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset }); | ||||
|         }; | ||||
|         Record<DebugDataRecord::CUR_INSTR>(state.debug, iteration, state.program_counter); | ||||
|         if (iteration > 0) | ||||
|             Record<DebugDataRecord::NEXT_INSTR>(state.debug, iteration - 1, state.program_counter); | ||||
|  | ||||
|         state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + state.program_counter); | ||||
|  | ||||
|         auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* { | ||||
| @@ -123,58 +129,78 @@ void RunInterpreter(UnitState& state) { | ||||
|             switch (instr.opcode.Value().EffectiveOpCode()) { | ||||
|             case OpCode::Id::ADD: | ||||
|             { | ||||
|                 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||||
|                 Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | ||||
|                 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||||
|                 for (int i = 0; i < 4; ++i) { | ||||
|                     if (!swizzle.DestComponentEnabled(i)) | ||||
|                         continue; | ||||
|  | ||||
|                     dest[i] = src1[i] + src2[i]; | ||||
|                 } | ||||
|  | ||||
|                 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | ||||
|                 break; | ||||
|             } | ||||
|  | ||||
|             case OpCode::Id::MUL: | ||||
|             { | ||||
|                 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||||
|                 Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | ||||
|                 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||||
|                 for (int i = 0; i < 4; ++i) { | ||||
|                     if (!swizzle.DestComponentEnabled(i)) | ||||
|                         continue; | ||||
|  | ||||
|                     dest[i] = src1[i] * src2[i]; | ||||
|                 } | ||||
|  | ||||
|                 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | ||||
|                 break; | ||||
|             } | ||||
|  | ||||
|             case OpCode::Id::FLR: | ||||
|                 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||||
|                 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||||
|                 for (int i = 0; i < 4; ++i) { | ||||
|                     if (!swizzle.DestComponentEnabled(i)) | ||||
|                         continue; | ||||
|  | ||||
|                     dest[i] = float24::FromFloat32(std::floor(src1[i].ToFloat32())); | ||||
|                 } | ||||
|                 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | ||||
|                 break; | ||||
|  | ||||
|             case OpCode::Id::MAX: | ||||
|                 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||||
|                 Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | ||||
|                 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||||
|                 for (int i = 0; i < 4; ++i) { | ||||
|                     if (!swizzle.DestComponentEnabled(i)) | ||||
|                         continue; | ||||
|  | ||||
|                     dest[i] = std::max(src1[i], src2[i]); | ||||
|                 } | ||||
|                 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | ||||
|                 break; | ||||
|  | ||||
|             case OpCode::Id::MIN: | ||||
|                 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||||
|                 Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | ||||
|                 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||||
|                 for (int i = 0; i < 4; ++i) { | ||||
|                     if (!swizzle.DestComponentEnabled(i)) | ||||
|                         continue; | ||||
|  | ||||
|                     dest[i] = std::min(src1[i], src2[i]); | ||||
|                 } | ||||
|                 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | ||||
|                 break; | ||||
|  | ||||
|             case OpCode::Id::DP3: | ||||
|             case OpCode::Id::DP4: | ||||
|             { | ||||
|                 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||||
|                 Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | ||||
|                 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||||
|                 float24 dot = float24::FromFloat32(0.f); | ||||
|                 int num_components = (instr.opcode.Value() == OpCode::Id::DP3) ? 3 : 4; | ||||
|                 for (int i = 0; i < num_components; ++i) | ||||
| @@ -186,12 +212,15 @@ void RunInterpreter(UnitState& state) { | ||||
|  | ||||
|                     dest[i] = dot; | ||||
|                 } | ||||
|                 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | ||||
|                 break; | ||||
|             } | ||||
|  | ||||
|             // Reciprocal | ||||
|             case OpCode::Id::RCP: | ||||
|             { | ||||
|                 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||||
|                 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||||
|                 for (int i = 0; i < 4; ++i) { | ||||
|                     if (!swizzle.DestComponentEnabled(i)) | ||||
|                         continue; | ||||
| @@ -200,13 +229,15 @@ void RunInterpreter(UnitState& state) { | ||||
|                     // TODO: I think this might be wrong... we should only use one component here | ||||
|                     dest[i] = float24::FromFloat32(1.0f / src1[i].ToFloat32()); | ||||
|                 } | ||||
|  | ||||
|                 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | ||||
|                 break; | ||||
|             } | ||||
|  | ||||
|             // Reciprocal Square Root | ||||
|             case OpCode::Id::RSQ: | ||||
|             { | ||||
|                 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||||
|                 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||||
|                 for (int i = 0; i < 4; ++i) { | ||||
|                     if (!swizzle.DestComponentEnabled(i)) | ||||
|                         continue; | ||||
| @@ -215,12 +246,13 @@ void RunInterpreter(UnitState& state) { | ||||
|                     // TODO: I think this might be wrong... we should only use one component here | ||||
|                     dest[i] = float24::FromFloat32(1.0f / sqrt(src1[i].ToFloat32())); | ||||
|                 } | ||||
|  | ||||
|                 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | ||||
|                 break; | ||||
|             } | ||||
|  | ||||
|             case OpCode::Id::MOVA: | ||||
|             { | ||||
|                 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||||
|                 for (int i = 0; i < 2; ++i) { | ||||
|                     if (!swizzle.DestComponentEnabled(i)) | ||||
|                         continue; | ||||
| @@ -228,32 +260,41 @@ void RunInterpreter(UnitState& state) { | ||||
|                     // TODO: Figure out how the rounding is done on hardware | ||||
|                     state.address_registers[i] = static_cast<s32>(src1[i].ToFloat32()); | ||||
|                 } | ||||
|  | ||||
|                 Record<DebugDataRecord::ADDR_REG_OUT>(state.debug, iteration, state.address_registers); | ||||
|                 break; | ||||
|             } | ||||
|  | ||||
|             case OpCode::Id::MOV: | ||||
|             { | ||||
|                 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||||
|                 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||||
|                 for (int i = 0; i < 4; ++i) { | ||||
|                     if (!swizzle.DestComponentEnabled(i)) | ||||
|                         continue; | ||||
|  | ||||
|                     dest[i] = src1[i]; | ||||
|                 } | ||||
|                 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | ||||
|                 break; | ||||
|             } | ||||
|  | ||||
|             case OpCode::Id::SLT: | ||||
|             case OpCode::Id::SLTI: | ||||
|                 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||||
|                 Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | ||||
|                 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||||
|                 for (int i = 0; i < 4; ++i) { | ||||
|                     if (!swizzle.DestComponentEnabled(i)) | ||||
|                         continue; | ||||
|  | ||||
|                     dest[i] = (src1[i] < src2[i]) ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f); | ||||
|                 } | ||||
|                 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | ||||
|                 break; | ||||
|  | ||||
|             case OpCode::Id::CMP: | ||||
|                 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||||
|                 Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | ||||
|                 for (int i = 0; i < 2; ++i) { | ||||
|                     // TODO: Can you restrict to one compare via dest masking? | ||||
|  | ||||
| @@ -261,27 +302,27 @@ void RunInterpreter(UnitState& state) { | ||||
|                     auto op = (i == 0) ? compare_op.x.Value() : compare_op.y.Value(); | ||||
|  | ||||
|                     switch (op) { | ||||
|                         case compare_op.Equal: | ||||
|                         case Instruction::Common::CompareOpType::Equal: | ||||
|                             state.conditional_code[i] = (src1[i] == src2[i]); | ||||
|                             break; | ||||
|  | ||||
|                         case compare_op.NotEqual: | ||||
|                         case Instruction::Common::CompareOpType::NotEqual: | ||||
|                             state.conditional_code[i] = (src1[i] != src2[i]); | ||||
|                             break; | ||||
|  | ||||
|                         case compare_op.LessThan: | ||||
|                         case Instruction::Common::CompareOpType::LessThan: | ||||
|                             state.conditional_code[i] = (src1[i] <  src2[i]); | ||||
|                             break; | ||||
|  | ||||
|                         case compare_op.LessEqual: | ||||
|                         case Instruction::Common::CompareOpType::LessEqual: | ||||
|                             state.conditional_code[i] = (src1[i] <= src2[i]); | ||||
|                             break; | ||||
|  | ||||
|                         case compare_op.GreaterThan: | ||||
|                         case Instruction::Common::CompareOpType::GreaterThan: | ||||
|                             state.conditional_code[i] = (src1[i] >  src2[i]); | ||||
|                             break; | ||||
|  | ||||
|                         case compare_op.GreaterEqual: | ||||
|                         case Instruction::Common::CompareOpType::GreaterEqual: | ||||
|                             state.conditional_code[i] = (src1[i] >= src2[i]); | ||||
|                             break; | ||||
|  | ||||
| @@ -290,6 +331,7 @@ void RunInterpreter(UnitState& state) { | ||||
|                             break; | ||||
|                     } | ||||
|                 } | ||||
|                 Record<DebugDataRecord::CMP_RESULT>(state.debug, iteration, state.conditional_code); | ||||
|                 break; | ||||
|  | ||||
|             default: | ||||
| @@ -359,12 +401,17 @@ void RunInterpreter(UnitState& state) { | ||||
|                             : (instr.mad.dest.Value() < 0x20) ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0] | ||||
|                             : dummy_vec4_float24; | ||||
|  | ||||
|                 Record<DebugDataRecord::SRC1>(state.debug, iteration, src1); | ||||
|                 Record<DebugDataRecord::SRC2>(state.debug, iteration, src2); | ||||
|                 Record<DebugDataRecord::SRC3>(state.debug, iteration, src3); | ||||
|                 Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest); | ||||
|                 for (int i = 0; i < 4; ++i) { | ||||
|                     if (!swizzle.DestComponentEnabled(i)) | ||||
|                         continue; | ||||
|  | ||||
|                     dest[i] = src1[i] * src2[i] + src3[i]; | ||||
|                 } | ||||
|                 Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest); | ||||
|             } else { | ||||
|                 LOG_ERROR(HW_GPU, "Unhandled multiply-add instruction: 0x%02x (%s): 0x%08x", | ||||
|                           (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex); | ||||
| @@ -374,7 +421,7 @@ void RunInterpreter(UnitState& state) { | ||||
|  | ||||
|         default: | ||||
|         { | ||||
|             static auto evaluate_condition = [](const UnitState& state, bool refx, bool refy, Instruction::FlowControlType flow_control) { | ||||
|             static auto evaluate_condition = [](const UnitState<Debug>& state, bool refx, bool refy, Instruction::FlowControlType flow_control) { | ||||
|                 bool results[2] = { refx == state.conditional_code[0], | ||||
|                                     refy == state.conditional_code[1] }; | ||||
|  | ||||
| @@ -400,12 +447,14 @@ void RunInterpreter(UnitState& state) { | ||||
|                 break; | ||||
|  | ||||
|             case OpCode::Id::JMPC: | ||||
|                 Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); | ||||
|                 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { | ||||
|                     state.program_counter = instr.flow_control.dest_offset - 1; | ||||
|                 } | ||||
|                 break; | ||||
|  | ||||
|             case OpCode::Id::JMPU: | ||||
|                 Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); | ||||
|                 if (uniforms.b[instr.flow_control.bool_uniform_id]) { | ||||
|                     state.program_counter = instr.flow_control.dest_offset - 1; | ||||
|                 } | ||||
| @@ -419,6 +468,7 @@ void RunInterpreter(UnitState& state) { | ||||
|                 break; | ||||
|  | ||||
|             case OpCode::Id::CALLU: | ||||
|                 Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); | ||||
|                 if (uniforms.b[instr.flow_control.bool_uniform_id]) { | ||||
|                     call(state, | ||||
|                         instr.flow_control.dest_offset, | ||||
| @@ -428,6 +478,7 @@ void RunInterpreter(UnitState& state) { | ||||
|                 break; | ||||
|  | ||||
|             case OpCode::Id::CALLC: | ||||
|                 Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); | ||||
|                 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { | ||||
|                     call(state, | ||||
|                         instr.flow_control.dest_offset, | ||||
| @@ -440,6 +491,7 @@ void RunInterpreter(UnitState& state) { | ||||
|                 break; | ||||
|  | ||||
|             case OpCode::Id::IFU: | ||||
|                 Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); | ||||
|                 if (uniforms.b[instr.flow_control.bool_uniform_id]) { | ||||
|                     call(state, | ||||
|                          state.program_counter + 1, | ||||
| @@ -458,6 +510,7 @@ void RunInterpreter(UnitState& state) { | ||||
|             { | ||||
|                 // TODO: Do we need to consider swizzlers here? | ||||
|  | ||||
|                 Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); | ||||
|                 if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { | ||||
|                     call(state, | ||||
|                          state.program_counter + 1, | ||||
| @@ -475,14 +528,19 @@ void RunInterpreter(UnitState& state) { | ||||
|  | ||||
|             case OpCode::Id::LOOP: | ||||
|             { | ||||
|                 state.address_registers[2] = uniforms.i[instr.flow_control.int_uniform_id].y; | ||||
|                 Math::Vec4<u8> loop_param(uniforms.i[instr.flow_control.int_uniform_id].x, | ||||
|                                           uniforms.i[instr.flow_control.int_uniform_id].y, | ||||
|                                           uniforms.i[instr.flow_control.int_uniform_id].z, | ||||
|                                           uniforms.i[instr.flow_control.int_uniform_id].w); | ||||
|                 state.address_registers[2] = loop_param.y; | ||||
|  | ||||
|                 Record<DebugDataRecord::LOOP_INT_IN>(state.debug, iteration, loop_param); | ||||
|                 call(state, | ||||
|                      state.program_counter + 1, | ||||
|                      instr.flow_control.dest_offset - state.program_counter + 1, | ||||
|                      instr.flow_control.dest_offset + 1, | ||||
|                      uniforms.i[instr.flow_control.int_uniform_id].x, | ||||
|                      uniforms.i[instr.flow_control.int_uniform_id].z); | ||||
|                      loop_param.x, | ||||
|                      loop_param.z); | ||||
|                 break; | ||||
|             } | ||||
|  | ||||
| @@ -497,12 +555,14 @@ void RunInterpreter(UnitState& state) { | ||||
|         } | ||||
|  | ||||
|         ++state.program_counter; | ||||
|  | ||||
|         if (exit_loop) | ||||
|             break; | ||||
|         ++iteration; | ||||
|     } | ||||
| } | ||||
|  | ||||
| // Explicit instantiation | ||||
| template void RunInterpreter(UnitState<false>& state); | ||||
| template void RunInterpreter(UnitState<true>& state); | ||||
|  | ||||
| } // namespace | ||||
|  | ||||
| } // namespace | ||||
|   | ||||
| @@ -12,7 +12,8 @@ namespace Pica { | ||||
|  | ||||
| namespace Shader { | ||||
|  | ||||
| void RunInterpreter(UnitState& state); | ||||
| template<bool Debug> | ||||
| void RunInterpreter(UnitState<Debug>& state); | ||||
|  | ||||
| } // namespace | ||||
|  | ||||
|   | ||||
| @@ -141,7 +141,7 @@ void JitCompiler::Compile_SwizzleSrc(Instruction instr, unsigned src_num, Source | ||||
|         src_offset = src_reg.GetIndex() * sizeof(float24) * 4; | ||||
|     } else { | ||||
|         src_ptr = REGISTERS; | ||||
|         src_offset = UnitState::InputOffset(src_reg); | ||||
|         src_offset = UnitState<false>::InputOffset(src_reg); | ||||
|     } | ||||
|  | ||||
|     unsigned operand_desc_id; | ||||
| @@ -217,11 +217,11 @@ void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) { | ||||
|     // If all components are enabled, write the result to the destination register | ||||
|     if (swiz.dest_mask == NO_DEST_REG_MASK) { | ||||
|         // Store dest back to memory | ||||
|         MOVAPS(MDisp(REGISTERS, UnitState::OutputOffset(dest)), src); | ||||
|         MOVAPS(MDisp(REGISTERS, UnitState<false>::OutputOffset(dest)), src); | ||||
|  | ||||
|     } else { | ||||
|         // Not all components are enabled, so mask the result when storing to the destination register... | ||||
|         MOVAPS(SCRATCH, MDisp(REGISTERS, UnitState::OutputOffset(dest))); | ||||
|         MOVAPS(SCRATCH, MDisp(REGISTERS, UnitState<false>::OutputOffset(dest))); | ||||
|  | ||||
|         if (Common::GetCPUCaps().sse4_1) { | ||||
|             u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1); | ||||
| @@ -240,7 +240,7 @@ void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) { | ||||
|         } | ||||
|  | ||||
|         // Store dest back to memory | ||||
|         MOVAPS(MDisp(REGISTERS, UnitState::OutputOffset(dest)), SCRATCH); | ||||
|         MOVAPS(MDisp(REGISTERS, UnitState<false>::OutputOffset(dest)), SCRATCH); | ||||
|     } | ||||
| } | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Tony Wasserka
					Tony Wasserka