mirror of
				https://git.suyu.dev/suyu/suyu
				synced 2025-11-04 00:49:02 -06:00 
			
		
		
		
	Merge pull request #3409 from ReinUsesLisp/host-queries
query_cache: Implement a query cache and query 21 (samples passed)
This commit is contained in:
		@@ -4,6 +4,7 @@
 | 
			
		||||
 | 
			
		||||
#include <cinttypes>
 | 
			
		||||
#include <cstring>
 | 
			
		||||
#include <optional>
 | 
			
		||||
#include "common/assert.h"
 | 
			
		||||
#include "core/core.h"
 | 
			
		||||
#include "core/core_timing.h"
 | 
			
		||||
@@ -16,6 +17,8 @@
 | 
			
		||||
 | 
			
		||||
namespace Tegra::Engines {
 | 
			
		||||
 | 
			
		||||
using VideoCore::QueryType;
 | 
			
		||||
 | 
			
		||||
/// First register id that is actually a Macro call.
 | 
			
		||||
constexpr u32 MacroRegistersStart = 0xE00;
 | 
			
		||||
 | 
			
		||||
@@ -400,6 +403,10 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
 | 
			
		||||
        ProcessQueryCondition();
 | 
			
		||||
        break;
 | 
			
		||||
    }
 | 
			
		||||
    case MAXWELL3D_REG_INDEX(counter_reset): {
 | 
			
		||||
        ProcessCounterReset();
 | 
			
		||||
        break;
 | 
			
		||||
    }
 | 
			
		||||
    case MAXWELL3D_REG_INDEX(sync_info): {
 | 
			
		||||
        ProcessSyncPoint();
 | 
			
		||||
        break;
 | 
			
		||||
@@ -544,40 +551,28 @@ void Maxwell3D::ProcessQueryGet() {
 | 
			
		||||
               "Units other than CROP are unimplemented");
 | 
			
		||||
 | 
			
		||||
    switch (regs.query.query_get.operation) {
 | 
			
		||||
    case Regs::QueryOperation::Release: {
 | 
			
		||||
        const u64 result = regs.query.query_sequence;
 | 
			
		||||
        StampQueryResult(result, regs.query.query_get.short_query == 0);
 | 
			
		||||
    case Regs::QueryOperation::Release:
 | 
			
		||||
        StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0);
 | 
			
		||||
        break;
 | 
			
		||||
    }
 | 
			
		||||
    case Regs::QueryOperation::Acquire: {
 | 
			
		||||
        // Todo(Blinkhawk): Under this operation, the GPU waits for the CPU
 | 
			
		||||
        // to write a value that matches the current payload.
 | 
			
		||||
    case Regs::QueryOperation::Acquire:
 | 
			
		||||
        // TODO(Blinkhawk): Under this operation, the GPU waits for the CPU to write a value that
 | 
			
		||||
        // matches the current payload.
 | 
			
		||||
        UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE");
 | 
			
		||||
        break;
 | 
			
		||||
    }
 | 
			
		||||
    case Regs::QueryOperation::Counter: {
 | 
			
		||||
        u64 result{};
 | 
			
		||||
        switch (regs.query.query_get.select) {
 | 
			
		||||
        case Regs::QuerySelect::Zero:
 | 
			
		||||
            result = 0;
 | 
			
		||||
            break;
 | 
			
		||||
        default:
 | 
			
		||||
            result = 1;
 | 
			
		||||
            UNIMPLEMENTED_MSG("Unimplemented query select type {}",
 | 
			
		||||
                              static_cast<u32>(regs.query.query_get.select.Value()));
 | 
			
		||||
    case Regs::QueryOperation::Counter:
 | 
			
		||||
        if (const std::optional<u64> result = GetQueryResult()) {
 | 
			
		||||
            // If the query returns an empty optional it means it's cached and deferred.
 | 
			
		||||
            // In this case we have a non-empty result, so we stamp it immediately.
 | 
			
		||||
            StampQueryResult(*result, regs.query.query_get.short_query == 0);
 | 
			
		||||
        }
 | 
			
		||||
        StampQueryResult(result, regs.query.query_get.short_query == 0);
 | 
			
		||||
        break;
 | 
			
		||||
    }
 | 
			
		||||
    case Regs::QueryOperation::Trap: {
 | 
			
		||||
    case Regs::QueryOperation::Trap:
 | 
			
		||||
        UNIMPLEMENTED_MSG("Unimplemented query operation TRAP");
 | 
			
		||||
        break;
 | 
			
		||||
    }
 | 
			
		||||
    default: {
 | 
			
		||||
    default:
 | 
			
		||||
        UNIMPLEMENTED_MSG("Unknown query operation");
 | 
			
		||||
        break;
 | 
			
		||||
    }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void Maxwell3D::ProcessQueryCondition() {
 | 
			
		||||
@@ -593,20 +588,20 @@ void Maxwell3D::ProcessQueryCondition() {
 | 
			
		||||
    }
 | 
			
		||||
    case Regs::ConditionMode::ResNonZero: {
 | 
			
		||||
        Regs::QueryCompare cmp;
 | 
			
		||||
        memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp));
 | 
			
		||||
        memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp));
 | 
			
		||||
        execute_on = cmp.initial_sequence != 0U && cmp.initial_mode != 0U;
 | 
			
		||||
        break;
 | 
			
		||||
    }
 | 
			
		||||
    case Regs::ConditionMode::Equal: {
 | 
			
		||||
        Regs::QueryCompare cmp;
 | 
			
		||||
        memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp));
 | 
			
		||||
        memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp));
 | 
			
		||||
        execute_on =
 | 
			
		||||
            cmp.initial_sequence == cmp.current_sequence && cmp.initial_mode == cmp.current_mode;
 | 
			
		||||
        break;
 | 
			
		||||
    }
 | 
			
		||||
    case Regs::ConditionMode::NotEqual: {
 | 
			
		||||
        Regs::QueryCompare cmp;
 | 
			
		||||
        memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp));
 | 
			
		||||
        memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp));
 | 
			
		||||
        execute_on =
 | 
			
		||||
            cmp.initial_sequence != cmp.current_sequence || cmp.initial_mode != cmp.current_mode;
 | 
			
		||||
        break;
 | 
			
		||||
@@ -619,6 +614,18 @@ void Maxwell3D::ProcessQueryCondition() {
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void Maxwell3D::ProcessCounterReset() {
 | 
			
		||||
    switch (regs.counter_reset) {
 | 
			
		||||
    case Regs::CounterReset::SampleCnt:
 | 
			
		||||
        rasterizer.ResetCounter(QueryType::SamplesPassed);
 | 
			
		||||
        break;
 | 
			
		||||
    default:
 | 
			
		||||
        LOG_WARNING(Render_OpenGL, "Unimplemented counter reset={}",
 | 
			
		||||
                    static_cast<int>(regs.counter_reset));
 | 
			
		||||
        break;
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void Maxwell3D::ProcessSyncPoint() {
 | 
			
		||||
    const u32 sync_point = regs.sync_info.sync_point.Value();
 | 
			
		||||
    const u32 increment = regs.sync_info.increment.Value();
 | 
			
		||||
@@ -661,6 +668,22 @@ void Maxwell3D::DrawArrays() {
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::optional<u64> Maxwell3D::GetQueryResult() {
 | 
			
		||||
    switch (regs.query.query_get.select) {
 | 
			
		||||
    case Regs::QuerySelect::Zero:
 | 
			
		||||
        return 0;
 | 
			
		||||
    case Regs::QuerySelect::SamplesPassed:
 | 
			
		||||
        // Deferred.
 | 
			
		||||
        rasterizer.Query(regs.query.QueryAddress(), VideoCore::QueryType::SamplesPassed,
 | 
			
		||||
                         system.GPU().GetTicks());
 | 
			
		||||
        return {};
 | 
			
		||||
    default:
 | 
			
		||||
        UNIMPLEMENTED_MSG("Unimplemented query select type {}",
 | 
			
		||||
                          static_cast<u32>(regs.query.query_get.select.Value()));
 | 
			
		||||
        return 1;
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void Maxwell3D::ProcessCBBind(std::size_t stage_index) {
 | 
			
		||||
    // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage.
 | 
			
		||||
    auto& shader = state.shader_stages[stage_index];
 | 
			
		||||
 
 | 
			
		||||
@@ -6,6 +6,7 @@
 | 
			
		||||
 | 
			
		||||
#include <array>
 | 
			
		||||
#include <bitset>
 | 
			
		||||
#include <optional>
 | 
			
		||||
#include <type_traits>
 | 
			
		||||
#include <unordered_map>
 | 
			
		||||
#include <vector>
 | 
			
		||||
@@ -409,6 +410,27 @@ public:
 | 
			
		||||
            Linear = 1,
 | 
			
		||||
        };
 | 
			
		||||
 | 
			
		||||
        enum class CounterReset : u32 {
 | 
			
		||||
            SampleCnt = 0x01,
 | 
			
		||||
            Unk02 = 0x02,
 | 
			
		||||
            Unk03 = 0x03,
 | 
			
		||||
            Unk04 = 0x04,
 | 
			
		||||
            EmittedPrimitives = 0x10, // Not tested
 | 
			
		||||
            Unk11 = 0x11,
 | 
			
		||||
            Unk12 = 0x12,
 | 
			
		||||
            Unk13 = 0x13,
 | 
			
		||||
            Unk15 = 0x15,
 | 
			
		||||
            Unk16 = 0x16,
 | 
			
		||||
            Unk17 = 0x17,
 | 
			
		||||
            Unk18 = 0x18,
 | 
			
		||||
            Unk1A = 0x1A,
 | 
			
		||||
            Unk1B = 0x1B,
 | 
			
		||||
            Unk1C = 0x1C,
 | 
			
		||||
            Unk1D = 0x1D,
 | 
			
		||||
            Unk1E = 0x1E,
 | 
			
		||||
            GeneratedPrimitives = 0x1F,
 | 
			
		||||
        };
 | 
			
		||||
 | 
			
		||||
        struct Cull {
 | 
			
		||||
            enum class FrontFace : u32 {
 | 
			
		||||
                ClockWise = 0x0900,
 | 
			
		||||
@@ -857,7 +879,7 @@ public:
 | 
			
		||||
                    BitField<7, 1, u32> c7;
 | 
			
		||||
                } clip_distance_enabled;
 | 
			
		||||
 | 
			
		||||
                INSERT_UNION_PADDING_WORDS(0x1);
 | 
			
		||||
                u32 samplecnt_enable;
 | 
			
		||||
 | 
			
		||||
                float point_size;
 | 
			
		||||
 | 
			
		||||
@@ -865,7 +887,11 @@ public:
 | 
			
		||||
 | 
			
		||||
                u32 point_sprite_enable;
 | 
			
		||||
 | 
			
		||||
                INSERT_UNION_PADDING_WORDS(0x5);
 | 
			
		||||
                INSERT_UNION_PADDING_WORDS(0x3);
 | 
			
		||||
 | 
			
		||||
                CounterReset counter_reset;
 | 
			
		||||
 | 
			
		||||
                INSERT_UNION_PADDING_WORDS(0x1);
 | 
			
		||||
 | 
			
		||||
                u32 zeta_enable;
 | 
			
		||||
 | 
			
		||||
@@ -1412,12 +1438,15 @@ private:
 | 
			
		||||
    /// Handles a write to the QUERY_GET register.
 | 
			
		||||
    void ProcessQueryGet();
 | 
			
		||||
 | 
			
		||||
    // Writes the query result accordingly
 | 
			
		||||
    /// Writes the query result accordingly.
 | 
			
		||||
    void StampQueryResult(u64 payload, bool long_query);
 | 
			
		||||
 | 
			
		||||
    // Handles Conditional Rendering
 | 
			
		||||
    /// Handles conditional rendering.
 | 
			
		||||
    void ProcessQueryCondition();
 | 
			
		||||
 | 
			
		||||
    /// Handles counter resets.
 | 
			
		||||
    void ProcessCounterReset();
 | 
			
		||||
 | 
			
		||||
    /// Handles writes to syncing register.
 | 
			
		||||
    void ProcessSyncPoint();
 | 
			
		||||
 | 
			
		||||
@@ -1434,6 +1463,9 @@ private:
 | 
			
		||||
 | 
			
		||||
    // Handles a instance drawcall from MME
 | 
			
		||||
    void StepInstance(MMEDrawMode expected_mode, u32 count);
 | 
			
		||||
 | 
			
		||||
    /// Returns a query's value or an empty object if the value will be deferred through a cache.
 | 
			
		||||
    std::optional<u64> GetQueryResult();
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
#define ASSERT_REG_POSITION(field_name, position)                                                  \
 | 
			
		||||
@@ -1499,8 +1531,10 @@ ASSERT_REG_POSITION(screen_y_control, 0x4EB);
 | 
			
		||||
ASSERT_REG_POSITION(vb_element_base, 0x50D);
 | 
			
		||||
ASSERT_REG_POSITION(vb_base_instance, 0x50E);
 | 
			
		||||
ASSERT_REG_POSITION(clip_distance_enabled, 0x544);
 | 
			
		||||
ASSERT_REG_POSITION(samplecnt_enable, 0x545);
 | 
			
		||||
ASSERT_REG_POSITION(point_size, 0x546);
 | 
			
		||||
ASSERT_REG_POSITION(point_sprite_enable, 0x548);
 | 
			
		||||
ASSERT_REG_POSITION(counter_reset, 0x54C);
 | 
			
		||||
ASSERT_REG_POSITION(zeta_enable, 0x54E);
 | 
			
		||||
ASSERT_REG_POSITION(multisample_control, 0x54F);
 | 
			
		||||
ASSERT_REG_POSITION(condition, 0x554);
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user