mirror of
				https://git.suyu.dev/suyu/suyu
				synced 2025-11-03 16:39:01 -06:00 
			
		
		
		
	shader: Add NVN storage buffer fallbacks
When we can't track the SSBO origin of a global memory instruction, leave it as a global memory operation and assume these pointers are in the NVN storage buffer slots, then apply a linear search in the shader's runtime.
This commit is contained in:
		@@ -411,6 +411,7 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& bin
 | 
			
		||||
    DefineTextures(program.info, binding);
 | 
			
		||||
    DefineImages(program.info, binding);
 | 
			
		||||
    DefineAttributeMemAccess(program.info);
 | 
			
		||||
    DefineGlobalMemoryFunctions(program.info);
 | 
			
		||||
    DefineLabels(program);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@@ -762,6 +763,82 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) {
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void EmitContext::DefineGlobalMemoryFunctions(const Info& info) {
 | 
			
		||||
    if (!info.uses_global_memory) {
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
    using DefPtr = Id StorageDefinitions::*;
 | 
			
		||||
    const Id zero{u32_zero_value};
 | 
			
		||||
    const auto define_body{[&](DefPtr ssbo_member, Id addr, Id element_pointer, u32 shift,
 | 
			
		||||
                               auto&& callback) {
 | 
			
		||||
        AddLabel();
 | 
			
		||||
        const size_t num_buffers{info.storage_buffers_descriptors.size()};
 | 
			
		||||
        for (size_t index = 0; index < num_buffers; ++index) {
 | 
			
		||||
            const auto& ssbo{info.storage_buffers_descriptors[index]};
 | 
			
		||||
            const Id ssbo_addr_cbuf_offset{Const(ssbo.cbuf_offset / 8)};
 | 
			
		||||
            const Id ssbo_size_cbuf_offset{Const(ssbo.cbuf_offset / 4 + 2)};
 | 
			
		||||
            const Id ssbo_addr_pointer{OpAccessChain(
 | 
			
		||||
                uniform_types.U32x2, cbufs[ssbo.cbuf_index].U32x2, zero, ssbo_addr_cbuf_offset)};
 | 
			
		||||
            const Id ssbo_size_pointer{OpAccessChain(uniform_types.U32, cbufs[ssbo.cbuf_index].U32,
 | 
			
		||||
                                                     zero, ssbo_size_cbuf_offset)};
 | 
			
		||||
 | 
			
		||||
            const Id ssbo_addr{OpBitcast(U64, OpLoad(U32[2], ssbo_addr_pointer))};
 | 
			
		||||
            const Id ssbo_size{OpUConvert(U64, OpLoad(U32[1], ssbo_size_pointer))};
 | 
			
		||||
            const Id ssbo_end{OpIAdd(U64, ssbo_addr, ssbo_size)};
 | 
			
		||||
            const Id cond{OpLogicalAnd(U1, OpUGreaterThanEqual(U1, addr, ssbo_addr),
 | 
			
		||||
                                       OpULessThan(U1, addr, ssbo_end))};
 | 
			
		||||
            const Id then_label{OpLabel()};
 | 
			
		||||
            const Id else_label{OpLabel()};
 | 
			
		||||
            OpSelectionMerge(else_label, spv::SelectionControlMask::MaskNone);
 | 
			
		||||
            OpBranchConditional(cond, then_label, else_label);
 | 
			
		||||
            AddLabel(then_label);
 | 
			
		||||
            const Id ssbo_id{ssbos[index].*ssbo_member};
 | 
			
		||||
            const Id ssbo_offset{OpUConvert(U32[1], OpISub(U64, addr, ssbo_addr))};
 | 
			
		||||
            const Id ssbo_index{OpShiftRightLogical(U32[1], ssbo_offset, Const(shift))};
 | 
			
		||||
            const Id ssbo_pointer{OpAccessChain(element_pointer, ssbo_id, zero, ssbo_index)};
 | 
			
		||||
            callback(ssbo_pointer);
 | 
			
		||||
            AddLabel(else_label);
 | 
			
		||||
        }
 | 
			
		||||
    }};
 | 
			
		||||
    const auto define_load{[&](DefPtr ssbo_member, Id element_pointer, Id type, u32 shift) {
 | 
			
		||||
        const Id function_type{TypeFunction(type, U64)};
 | 
			
		||||
        const Id func_id{OpFunction(type, spv::FunctionControlMask::MaskNone, function_type)};
 | 
			
		||||
        const Id addr{OpFunctionParameter(U64)};
 | 
			
		||||
        define_body(ssbo_member, addr, element_pointer, shift,
 | 
			
		||||
                    [&](Id ssbo_pointer) { OpReturnValue(OpLoad(type, ssbo_pointer)); });
 | 
			
		||||
        OpReturnValue(ConstantNull(type));
 | 
			
		||||
        OpFunctionEnd();
 | 
			
		||||
        return func_id;
 | 
			
		||||
    }};
 | 
			
		||||
    const auto define_write{[&](DefPtr ssbo_member, Id element_pointer, Id type, u32 shift) {
 | 
			
		||||
        const Id function_type{TypeFunction(void_id, U64, type)};
 | 
			
		||||
        const Id func_id{OpFunction(void_id, spv::FunctionControlMask::MaskNone, function_type)};
 | 
			
		||||
        const Id addr{OpFunctionParameter(U64)};
 | 
			
		||||
        const Id data{OpFunctionParameter(type)};
 | 
			
		||||
        define_body(ssbo_member, addr, element_pointer, shift, [&](Id ssbo_pointer) {
 | 
			
		||||
            OpStore(ssbo_pointer, data);
 | 
			
		||||
            OpReturn();
 | 
			
		||||
        });
 | 
			
		||||
        OpReturn();
 | 
			
		||||
        OpFunctionEnd();
 | 
			
		||||
        return func_id;
 | 
			
		||||
    }};
 | 
			
		||||
    const auto define{
 | 
			
		||||
        [&](DefPtr ssbo_member, const StorageTypeDefinition& type_def, Id type, size_t size) {
 | 
			
		||||
            const Id element_type{type_def.element};
 | 
			
		||||
            const u32 shift{static_cast<u32>(std::countr_zero(size))};
 | 
			
		||||
            const Id load_func{define_load(ssbo_member, element_type, type, shift)};
 | 
			
		||||
            const Id write_func{define_write(ssbo_member, element_type, type, shift)};
 | 
			
		||||
            return std::make_pair(load_func, write_func);
 | 
			
		||||
        }};
 | 
			
		||||
    std::tie(load_global_func_u32, write_global_func_u32) =
 | 
			
		||||
        define(&StorageDefinitions::U32, storage_types.U32, U32[1], sizeof(u32));
 | 
			
		||||
    std::tie(load_global_func_u32x2, write_global_func_u32x2) =
 | 
			
		||||
        define(&StorageDefinitions::U32x2, storage_types.U32x2, U32[2], sizeof(u32[2]));
 | 
			
		||||
    std::tie(load_global_func_u32x4, write_global_func_u32x4) =
 | 
			
		||||
        define(&StorageDefinitions::U32x4, storage_types.U32x4, U32[4], sizeof(u32[4]));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) {
 | 
			
		||||
    if (info.constant_buffer_descriptors.empty()) {
 | 
			
		||||
        return;
 | 
			
		||||
 
 | 
			
		||||
@@ -224,6 +224,13 @@ public:
 | 
			
		||||
    Id f32x2_min_cas{};
 | 
			
		||||
    Id f32x2_max_cas{};
 | 
			
		||||
 | 
			
		||||
    Id load_global_func_u32{};
 | 
			
		||||
    Id load_global_func_u32x2{};
 | 
			
		||||
    Id load_global_func_u32x4{};
 | 
			
		||||
    Id write_global_func_u32{};
 | 
			
		||||
    Id write_global_func_u32x2{};
 | 
			
		||||
    Id write_global_func_u32x4{};
 | 
			
		||||
 | 
			
		||||
    Id input_position{};
 | 
			
		||||
    std::array<Id, 32> input_generics{};
 | 
			
		||||
 | 
			
		||||
@@ -255,6 +262,7 @@ private:
 | 
			
		||||
    void DefineTextures(const Info& info, u32& binding);
 | 
			
		||||
    void DefineImages(const Info& info, u32& binding);
 | 
			
		||||
    void DefineAttributeMemAccess(const Info& info);
 | 
			
		||||
    void DefineGlobalMemoryFunctions(const Info& info);
 | 
			
		||||
    void DefineLabels(IR::Program& program);
 | 
			
		||||
 | 
			
		||||
    void DefineInputs(const Info& info);
 | 
			
		||||
 
 | 
			
		||||
@@ -84,16 +84,16 @@ void EmitLoadGlobalU8(EmitContext& ctx);
 | 
			
		||||
void EmitLoadGlobalS8(EmitContext& ctx);
 | 
			
		||||
void EmitLoadGlobalU16(EmitContext& ctx);
 | 
			
		||||
void EmitLoadGlobalS16(EmitContext& ctx);
 | 
			
		||||
void EmitLoadGlobal32(EmitContext& ctx);
 | 
			
		||||
void EmitLoadGlobal64(EmitContext& ctx);
 | 
			
		||||
void EmitLoadGlobal128(EmitContext& ctx);
 | 
			
		||||
Id EmitLoadGlobal32(EmitContext& ctx, Id address);
 | 
			
		||||
Id EmitLoadGlobal64(EmitContext& ctx, Id address);
 | 
			
		||||
Id EmitLoadGlobal128(EmitContext& ctx, Id address);
 | 
			
		||||
void EmitWriteGlobalU8(EmitContext& ctx);
 | 
			
		||||
void EmitWriteGlobalS8(EmitContext& ctx);
 | 
			
		||||
void EmitWriteGlobalU16(EmitContext& ctx);
 | 
			
		||||
void EmitWriteGlobalS16(EmitContext& ctx);
 | 
			
		||||
void EmitWriteGlobal32(EmitContext& ctx);
 | 
			
		||||
void EmitWriteGlobal64(EmitContext& ctx);
 | 
			
		||||
void EmitWriteGlobal128(EmitContext& ctx);
 | 
			
		||||
void EmitWriteGlobal32(EmitContext& ctx, Id address, Id value);
 | 
			
		||||
void EmitWriteGlobal64(EmitContext& ctx, Id address, Id value);
 | 
			
		||||
void EmitWriteGlobal128(EmitContext& ctx, Id address, Id value);
 | 
			
		||||
Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
 | 
			
		||||
Id EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
 | 
			
		||||
Id EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
 | 
			
		||||
@@ -277,9 +277,9 @@ Id EmitFPIsNan16(EmitContext& ctx, Id value);
 | 
			
		||||
Id EmitFPIsNan32(EmitContext& ctx, Id value);
 | 
			
		||||
Id EmitFPIsNan64(EmitContext& ctx, Id value);
 | 
			
		||||
Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
 | 
			
		||||
void EmitIAdd64(EmitContext& ctx);
 | 
			
		||||
Id EmitIAdd64(EmitContext& ctx, Id a, Id b);
 | 
			
		||||
Id EmitISub32(EmitContext& ctx, Id a, Id b);
 | 
			
		||||
void EmitISub64(EmitContext& ctx);
 | 
			
		||||
Id EmitISub64(EmitContext& ctx, Id a, Id b);
 | 
			
		||||
Id EmitIMul32(EmitContext& ctx, Id a, Id b);
 | 
			
		||||
Id EmitINeg32(EmitContext& ctx, Id value);
 | 
			
		||||
Id EmitINeg64(EmitContext& ctx, Id value);
 | 
			
		||||
 
 | 
			
		||||
@@ -55,16 +55,16 @@ Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
 | 
			
		||||
    return result;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void EmitIAdd64(EmitContext&) {
 | 
			
		||||
    throw NotImplementedException("SPIR-V Instruction");
 | 
			
		||||
Id EmitIAdd64(EmitContext& ctx, Id a, Id b) {
 | 
			
		||||
    return ctx.OpIAdd(ctx.U64, a, b);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
Id EmitISub32(EmitContext& ctx, Id a, Id b) {
 | 
			
		||||
    return ctx.OpISub(ctx.U32[1], a, b);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void EmitISub64(EmitContext&) {
 | 
			
		||||
    throw NotImplementedException("SPIR-V Instruction");
 | 
			
		||||
Id EmitISub64(EmitContext& ctx, Id a, Id b) {
 | 
			
		||||
    return ctx.OpISub(ctx.U64, a, b);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
Id EmitIMul32(EmitContext& ctx, Id a, Id b) {
 | 
			
		||||
 
 | 
			
		||||
@@ -64,16 +64,16 @@ void EmitLoadGlobalS16(EmitContext&) {
 | 
			
		||||
    throw NotImplementedException("SPIR-V Instruction");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void EmitLoadGlobal32(EmitContext&) {
 | 
			
		||||
    throw NotImplementedException("SPIR-V Instruction");
 | 
			
		||||
Id EmitLoadGlobal32(EmitContext& ctx, Id address) {
 | 
			
		||||
    return ctx.OpFunctionCall(ctx.U32[1], ctx.load_global_func_u32, address);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void EmitLoadGlobal64(EmitContext&) {
 | 
			
		||||
    throw NotImplementedException("SPIR-V Instruction");
 | 
			
		||||
Id EmitLoadGlobal64(EmitContext& ctx, Id address) {
 | 
			
		||||
    return ctx.OpFunctionCall(ctx.U32[2], ctx.load_global_func_u32x2, address);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void EmitLoadGlobal128(EmitContext&) {
 | 
			
		||||
    throw NotImplementedException("SPIR-V Instruction");
 | 
			
		||||
Id EmitLoadGlobal128(EmitContext& ctx, Id address) {
 | 
			
		||||
    return ctx.OpFunctionCall(ctx.U32[4], ctx.load_global_func_u32x4, address);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void EmitWriteGlobalU8(EmitContext&) {
 | 
			
		||||
@@ -92,16 +92,16 @@ void EmitWriteGlobalS16(EmitContext&) {
 | 
			
		||||
    throw NotImplementedException("SPIR-V Instruction");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void EmitWriteGlobal32(EmitContext&) {
 | 
			
		||||
    throw NotImplementedException("SPIR-V Instruction");
 | 
			
		||||
void EmitWriteGlobal32(EmitContext& ctx, Id address, Id value) {
 | 
			
		||||
    ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32, address, value);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void EmitWriteGlobal64(EmitContext&) {
 | 
			
		||||
    throw NotImplementedException("SPIR-V Instruction");
 | 
			
		||||
void EmitWriteGlobal64(EmitContext& ctx, Id address, Id value) {
 | 
			
		||||
    ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32x2, address, value);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void EmitWriteGlobal128(EmitContext&) {
 | 
			
		||||
    throw NotImplementedException("SPIR-V Instruction");
 | 
			
		||||
void EmitWriteGlobal128(EmitContext& ctx, Id address, Id value) {
 | 
			
		||||
    ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32x4, address, value);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
 | 
			
		||||
 
 | 
			
		||||
@@ -60,6 +60,48 @@ void CollectInterpolationInfo(Environment& env, IR::Program& program) {
 | 
			
		||||
        }();
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void AddNVNStorageBuffers(IR::Program& program) {
 | 
			
		||||
    if (!program.info.uses_global_memory) {
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
    const u32 driver_cbuf{0};
 | 
			
		||||
    const u32 descriptor_size{0x10};
 | 
			
		||||
    const u32 num_buffers{16};
 | 
			
		||||
    const u32 base{[&] {
 | 
			
		||||
        switch (program.stage) {
 | 
			
		||||
        case Stage::VertexA:
 | 
			
		||||
        case Stage::VertexB:
 | 
			
		||||
            return 0x110u;
 | 
			
		||||
        case Stage::TessellationControl:
 | 
			
		||||
            return 0x210u;
 | 
			
		||||
        case Stage::TessellationEval:
 | 
			
		||||
            return 0x310u;
 | 
			
		||||
        case Stage::Geometry:
 | 
			
		||||
            return 0x410u;
 | 
			
		||||
        case Stage::Fragment:
 | 
			
		||||
            return 0x510u;
 | 
			
		||||
        case Stage::Compute:
 | 
			
		||||
            return 0x310u;
 | 
			
		||||
        }
 | 
			
		||||
        throw InvalidArgument("Invalid stage {}", program.stage);
 | 
			
		||||
    }()};
 | 
			
		||||
    auto& descs{program.info.storage_buffers_descriptors};
 | 
			
		||||
    for (u32 index = 0; index < num_buffers; ++index) {
 | 
			
		||||
        const u32 offset{base + index * descriptor_size};
 | 
			
		||||
        const auto it{std::ranges::find(descs, offset, &StorageBufferDescriptor::cbuf_offset)};
 | 
			
		||||
        if (it != descs.end()) {
 | 
			
		||||
            continue;
 | 
			
		||||
        }
 | 
			
		||||
        // Assume these are written for now
 | 
			
		||||
        descs.push_back({
 | 
			
		||||
            .cbuf_index = driver_cbuf,
 | 
			
		||||
            .cbuf_offset = offset,
 | 
			
		||||
            .count = 1,
 | 
			
		||||
            .is_written = true,
 | 
			
		||||
        });
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
} // Anonymous namespace
 | 
			
		||||
 | 
			
		||||
IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
 | 
			
		||||
@@ -105,6 +147,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
 | 
			
		||||
    Optimization::VerificationPass(program);
 | 
			
		||||
    Optimization::CollectShaderInfoPass(env, program);
 | 
			
		||||
    CollectInterpolationInfo(env, program);
 | 
			
		||||
    AddNVNStorageBuffers(program);
 | 
			
		||||
    return program;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -187,6 +187,8 @@ void VisitUsages(Info& info, IR::Inst& inst) {
 | 
			
		||||
    case IR::Opcode::FPUnordGreaterThanEqual16:
 | 
			
		||||
    case IR::Opcode::FPIsNan16:
 | 
			
		||||
    case IR::Opcode::GlobalAtomicAddF16x2:
 | 
			
		||||
    case IR::Opcode::GlobalAtomicMinF16x2:
 | 
			
		||||
    case IR::Opcode::GlobalAtomicMaxF16x2:
 | 
			
		||||
    case IR::Opcode::StorageAtomicAddF16x2:
 | 
			
		||||
    case IR::Opcode::StorageAtomicMinF16x2:
 | 
			
		||||
    case IR::Opcode::StorageAtomicMaxF16x2:
 | 
			
		||||
@@ -373,12 +375,63 @@ void VisitUsages(Info& info, IR::Inst& inst) {
 | 
			
		||||
    case IR::Opcode::StorageAtomicAnd64:
 | 
			
		||||
    case IR::Opcode::StorageAtomicOr64:
 | 
			
		||||
    case IR::Opcode::StorageAtomicXor64:
 | 
			
		||||
    case IR::Opcode::StorageAtomicExchange64:
 | 
			
		||||
        info.uses_int64 = true;
 | 
			
		||||
        break;
 | 
			
		||||
    default:
 | 
			
		||||
        break;
 | 
			
		||||
    }
 | 
			
		||||
    switch (inst.GetOpcode()) {
 | 
			
		||||
    case IR::Opcode::LoadGlobalU8:
 | 
			
		||||
    case IR::Opcode::LoadGlobalS8:
 | 
			
		||||
    case IR::Opcode::LoadGlobalU16:
 | 
			
		||||
    case IR::Opcode::LoadGlobalS16:
 | 
			
		||||
    case IR::Opcode::LoadGlobal32:
 | 
			
		||||
    case IR::Opcode::LoadGlobal64:
 | 
			
		||||
    case IR::Opcode::LoadGlobal128:
 | 
			
		||||
    case IR::Opcode::WriteGlobalU8:
 | 
			
		||||
    case IR::Opcode::WriteGlobalS8:
 | 
			
		||||
    case IR::Opcode::WriteGlobalU16:
 | 
			
		||||
    case IR::Opcode::WriteGlobalS16:
 | 
			
		||||
    case IR::Opcode::WriteGlobal32:
 | 
			
		||||
    case IR::Opcode::WriteGlobal64:
 | 
			
		||||
    case IR::Opcode::WriteGlobal128:
 | 
			
		||||
    case IR::Opcode::GlobalAtomicIAdd32:
 | 
			
		||||
    case IR::Opcode::GlobalAtomicSMin32:
 | 
			
		||||
    case IR::Opcode::GlobalAtomicUMin32:
 | 
			
		||||
    case IR::Opcode::GlobalAtomicSMax32:
 | 
			
		||||
    case IR::Opcode::GlobalAtomicUMax32:
 | 
			
		||||
    case IR::Opcode::GlobalAtomicInc32:
 | 
			
		||||
    case IR::Opcode::GlobalAtomicDec32:
 | 
			
		||||
    case IR::Opcode::GlobalAtomicAnd32:
 | 
			
		||||
    case IR::Opcode::GlobalAtomicOr32:
 | 
			
		||||
    case IR::Opcode::GlobalAtomicXor32:
 | 
			
		||||
    case IR::Opcode::GlobalAtomicExchange32:
 | 
			
		||||
    case IR::Opcode::GlobalAtomicIAdd64:
 | 
			
		||||
    case IR::Opcode::GlobalAtomicSMin64:
 | 
			
		||||
    case IR::Opcode::GlobalAtomicUMin64:
 | 
			
		||||
    case IR::Opcode::GlobalAtomicSMax64:
 | 
			
		||||
    case IR::Opcode::GlobalAtomicUMax64:
 | 
			
		||||
    case IR::Opcode::GlobalAtomicAnd64:
 | 
			
		||||
    case IR::Opcode::GlobalAtomicOr64:
 | 
			
		||||
    case IR::Opcode::GlobalAtomicXor64:
 | 
			
		||||
    case IR::Opcode::GlobalAtomicExchange64:
 | 
			
		||||
    case IR::Opcode::GlobalAtomicAddF32:
 | 
			
		||||
    case IR::Opcode::GlobalAtomicAddF16x2:
 | 
			
		||||
    case IR::Opcode::GlobalAtomicAddF32x2:
 | 
			
		||||
    case IR::Opcode::GlobalAtomicMinF16x2:
 | 
			
		||||
    case IR::Opcode::GlobalAtomicMinF32x2:
 | 
			
		||||
    case IR::Opcode::GlobalAtomicMaxF16x2:
 | 
			
		||||
    case IR::Opcode::GlobalAtomicMaxF32x2:
 | 
			
		||||
        info.uses_int64 = true;
 | 
			
		||||
        info.uses_global_memory = true;
 | 
			
		||||
        info.used_constant_buffer_types |= IR::Type::U32 | IR::Type::U32x2;
 | 
			
		||||
        info.used_storage_buffer_types |= IR::Type::U32 | IR::Type::U32x2 | IR::Type::U32x4;
 | 
			
		||||
        break;
 | 
			
		||||
    default:
 | 
			
		||||
        break;
 | 
			
		||||
    }
 | 
			
		||||
    switch (inst.GetOpcode()) {
 | 
			
		||||
    case IR::Opcode::DemoteToHelperInvocation:
 | 
			
		||||
        info.uses_demote_to_helper_invocation = true;
 | 
			
		||||
        break;
 | 
			
		||||
 
 | 
			
		||||
@@ -11,6 +11,7 @@
 | 
			
		||||
#include <boost/container/flat_set.hpp>
 | 
			
		||||
#include <boost/container/small_vector.hpp>
 | 
			
		||||
 | 
			
		||||
#include "common/alignment.h"
 | 
			
		||||
#include "shader_recompiler/frontend/ir/basic_block.h"
 | 
			
		||||
#include "shader_recompiler/frontend/ir/breadth_first_search.h"
 | 
			
		||||
#include "shader_recompiler/frontend/ir/ir_emitter.h"
 | 
			
		||||
@@ -244,39 +245,6 @@ bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexce
 | 
			
		||||
           storage_buffer.offset < bias.offset_end;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Discards a global memory operation, reads return zero and writes are ignored
 | 
			
		||||
void DiscardGlobalMemory(IR::Block& block, IR::Inst& inst) {
 | 
			
		||||
    IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
 | 
			
		||||
    const IR::Value zero{u32{0}};
 | 
			
		||||
    switch (inst.GetOpcode()) {
 | 
			
		||||
    case IR::Opcode::LoadGlobalS8:
 | 
			
		||||
    case IR::Opcode::LoadGlobalU8:
 | 
			
		||||
    case IR::Opcode::LoadGlobalS16:
 | 
			
		||||
    case IR::Opcode::LoadGlobalU16:
 | 
			
		||||
    case IR::Opcode::LoadGlobal32:
 | 
			
		||||
        inst.ReplaceUsesWith(zero);
 | 
			
		||||
        break;
 | 
			
		||||
    case IR::Opcode::LoadGlobal64:
 | 
			
		||||
        inst.ReplaceUsesWith(IR::Value{ir.CompositeConstruct(zero, zero)});
 | 
			
		||||
        break;
 | 
			
		||||
    case IR::Opcode::LoadGlobal128:
 | 
			
		||||
        inst.ReplaceUsesWith(IR::Value{ir.CompositeConstruct(zero, zero, zero, zero)});
 | 
			
		||||
        break;
 | 
			
		||||
    case IR::Opcode::WriteGlobalS8:
 | 
			
		||||
    case IR::Opcode::WriteGlobalU8:
 | 
			
		||||
    case IR::Opcode::WriteGlobalS16:
 | 
			
		||||
    case IR::Opcode::WriteGlobalU16:
 | 
			
		||||
    case IR::Opcode::WriteGlobal32:
 | 
			
		||||
    case IR::Opcode::WriteGlobal64:
 | 
			
		||||
    case IR::Opcode::WriteGlobal128:
 | 
			
		||||
        inst.Invalidate();
 | 
			
		||||
        break;
 | 
			
		||||
    default:
 | 
			
		||||
        throw LogicError("Invalid opcode to discard its global memory operation {}",
 | 
			
		||||
                         inst.GetOpcode());
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
struct LowAddrInfo {
 | 
			
		||||
    IR::U32 value;
 | 
			
		||||
    s32 imm_offset;
 | 
			
		||||
@@ -350,6 +318,10 @@ std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias)
 | 
			
		||||
            .index{index.U32()},
 | 
			
		||||
            .offset{offset.U32()},
 | 
			
		||||
        };
 | 
			
		||||
        if (!Common::IsAligned(storage_buffer.offset, 16)) {
 | 
			
		||||
            // The SSBO pointer has to be aligned
 | 
			
		||||
            return std::nullopt;
 | 
			
		||||
        }
 | 
			
		||||
        if (bias && !MeetsBias(storage_buffer, *bias)) {
 | 
			
		||||
            // We have to blacklist some addresses in case we wrongly
 | 
			
		||||
            // point to them
 | 
			
		||||
@@ -372,19 +344,17 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info)
 | 
			
		||||
    // Track the low address of the instruction
 | 
			
		||||
    const std::optional<LowAddrInfo> low_addr_info{TrackLowAddress(&inst)};
 | 
			
		||||
    if (!low_addr_info) {
 | 
			
		||||
        DiscardGlobalMemory(block, inst);
 | 
			
		||||
        // Failed to track the low address, use NVN fallbacks
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
    // First try to find storage buffers in the NVN address
 | 
			
		||||
    const IR::U32 low_addr{low_addr_info->value};
 | 
			
		||||
    std::optional storage_buffer{Track(low_addr, &nvn_bias)};
 | 
			
		||||
    std::optional<StorageBufferAddr> storage_buffer{Track(low_addr, &nvn_bias)};
 | 
			
		||||
    if (!storage_buffer) {
 | 
			
		||||
        // If it fails, track without a bias
 | 
			
		||||
        storage_buffer = Track(low_addr, nullptr);
 | 
			
		||||
        if (!storage_buffer) {
 | 
			
		||||
            // If that also failed, drop the global memory usage
 | 
			
		||||
            // LOG_ERROR
 | 
			
		||||
            DiscardGlobalMemory(block, inst);
 | 
			
		||||
            // If that also fails, use NVN fallbacks
 | 
			
		||||
            return;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 
 | 
			
		||||
@@ -162,6 +162,7 @@ struct Info {
 | 
			
		||||
    bool uses_atomic_f32x2_min{};
 | 
			
		||||
    bool uses_atomic_f32x2_max{};
 | 
			
		||||
    bool uses_int64_bit_atomics{};
 | 
			
		||||
    bool uses_global_memory{};
 | 
			
		||||
 | 
			
		||||
    IR::Type used_constant_buffer_types{};
 | 
			
		||||
    IR::Type used_storage_buffer_types{};
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user