gl_shader_cache: Specialize local memory size for compute shaders
Local memory size in compute shaders was stubbed with an arbitary size. This commit specializes local memory size from guest GPU parameters.
This commit is contained in:
parent
dbeb523879
commit
287ae2b9e8
@ -178,7 +178,12 @@ public:
|
|||||||
BitField<24, 5, u32> gpr_alloc;
|
BitField<24, 5, u32> gpr_alloc;
|
||||||
};
|
};
|
||||||
|
|
||||||
INSERT_PADDING_WORDS(0x11);
|
union {
|
||||||
|
BitField<0, 20, u32> local_crs_alloc;
|
||||||
|
BitField<24, 5, u32> sass_version;
|
||||||
|
};
|
||||||
|
|
||||||
|
INSERT_PADDING_WORDS(0x10);
|
||||||
} launch_description{};
|
} launch_description{};
|
||||||
|
|
||||||
struct {
|
struct {
|
||||||
|
@ -731,7 +731,8 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
|
|||||||
|
|
||||||
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
|
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
|
||||||
const ProgramVariant variant(launch_desc.block_dim_x, launch_desc.block_dim_y,
|
const ProgramVariant variant(launch_desc.block_dim_x, launch_desc.block_dim_y,
|
||||||
launch_desc.block_dim_z, launch_desc.shared_alloc);
|
launch_desc.block_dim_z, launch_desc.shared_alloc,
|
||||||
|
launch_desc.local_pos_alloc);
|
||||||
std::tie(state.draw.shader_program, std::ignore) = kernel->GetHandle(variant);
|
std::tie(state.draw.shader_program, std::ignore) = kernel->GetHandle(variant);
|
||||||
state.draw.program_pipeline = 0;
|
state.draw.program_pipeline = 0;
|
||||||
|
|
||||||
|
@ -329,6 +329,11 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramTy
|
|||||||
source += fmt::format("shared uint smem[{}];",
|
source += fmt::format("shared uint smem[{}];",
|
||||||
Common::AlignUp(variant.shared_memory_size, 4) / 4);
|
Common::AlignUp(variant.shared_memory_size, 4) / 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (variant.local_memory_size > 0) {
|
||||||
|
source += fmt::format("#define LOCAL_MEMORY_SIZE {}",
|
||||||
|
Common::AlignUp(variant.local_memory_size, 4) / 4);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
source += '\n';
|
source += '\n';
|
||||||
|
@ -510,10 +510,14 @@ private:
|
|||||||
}
|
}
|
||||||
|
|
||||||
void DeclareLocalMemory() {
|
void DeclareLocalMemory() {
|
||||||
// TODO(Rodrigo): Unstub kernel local memory size and pass it from a register at
|
if (stage == ProgramType::Compute) {
|
||||||
// specialization time.
|
code.AddLine("#ifdef LOCAL_MEMORY_SIZE");
|
||||||
const u64 local_memory_size =
|
code.AddLine("uint {}[LOCAL_MEMORY_SIZE];", GetLocalMemory());
|
||||||
stage == ProgramType::Compute ? 0x400 : header.GetLocalMemorySize();
|
code.AddLine("#endif");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const u64 local_memory_size = header.GetLocalMemorySize();
|
||||||
if (local_memory_size == 0) {
|
if (local_memory_size == 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -851,9 +855,6 @@ private:
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (const auto lmem = std::get_if<LmemNode>(&*node)) {
|
if (const auto lmem = std::get_if<LmemNode>(&*node)) {
|
||||||
if (stage == ProgramType::Compute) {
|
|
||||||
LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders");
|
|
||||||
}
|
|
||||||
return {
|
return {
|
||||||
fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()),
|
fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()),
|
||||||
Type::Uint};
|
Type::Uint};
|
||||||
@ -1228,9 +1229,6 @@ private:
|
|||||||
}
|
}
|
||||||
target = std::move(*output);
|
target = std::move(*output);
|
||||||
} else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
|
} else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
|
||||||
if (stage == ProgramType::Compute) {
|
|
||||||
LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders");
|
|
||||||
}
|
|
||||||
target = {
|
target = {
|
||||||
fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()),
|
fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()),
|
||||||
Type::Uint};
|
Type::Uint};
|
||||||
|
@ -52,11 +52,11 @@ struct BindlessSamplerKey {
|
|||||||
Tegra::Engines::SamplerDescriptor sampler{};
|
Tegra::Engines::SamplerDescriptor sampler{};
|
||||||
};
|
};
|
||||||
|
|
||||||
constexpr u32 NativeVersion = 8;
|
constexpr u32 NativeVersion = 9;
|
||||||
|
|
||||||
// Making sure sizes doesn't change by accident
|
// Making sure sizes doesn't change by accident
|
||||||
static_assert(sizeof(BaseBindings) == 16);
|
static_assert(sizeof(BaseBindings) == 16);
|
||||||
static_assert(sizeof(ProgramVariant) == 32);
|
static_assert(sizeof(ProgramVariant) == 36);
|
||||||
|
|
||||||
ShaderCacheVersionHash GetShaderCacheVersionHash() {
|
ShaderCacheVersionHash GetShaderCacheVersionHash() {
|
||||||
ShaderCacheVersionHash hash{};
|
ShaderCacheVersionHash hash{};
|
||||||
|
@ -64,10 +64,10 @@ struct ProgramVariant final {
|
|||||||
: base_bindings{base_bindings}, primitive_mode{primitive_mode} {}
|
: base_bindings{base_bindings}, primitive_mode{primitive_mode} {}
|
||||||
|
|
||||||
/// Compute constructor.
|
/// Compute constructor.
|
||||||
explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z,
|
explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z, u32 shared_memory_size,
|
||||||
u32 shared_memory_size) noexcept
|
u32 local_memory_size) noexcept
|
||||||
: block_x{block_x}, block_y{static_cast<u16>(block_y)}, block_z{static_cast<u16>(block_z)},
|
: block_x{block_x}, block_y{static_cast<u16>(block_y)}, block_z{static_cast<u16>(block_z)},
|
||||||
shared_memory_size{shared_memory_size} {}
|
shared_memory_size{shared_memory_size}, local_memory_size{local_memory_size} {}
|
||||||
|
|
||||||
// Graphics specific parameters.
|
// Graphics specific parameters.
|
||||||
BaseBindings base_bindings{};
|
BaseBindings base_bindings{};
|
||||||
@ -78,12 +78,13 @@ struct ProgramVariant final {
|
|||||||
u16 block_y{};
|
u16 block_y{};
|
||||||
u16 block_z{};
|
u16 block_z{};
|
||||||
u32 shared_memory_size{};
|
u32 shared_memory_size{};
|
||||||
|
u32 local_memory_size{};
|
||||||
|
|
||||||
bool operator==(const ProgramVariant& rhs) const noexcept {
|
bool operator==(const ProgramVariant& rhs) const noexcept {
|
||||||
return std::tie(base_bindings, primitive_mode, block_x, block_y, block_z,
|
return std::tie(base_bindings, primitive_mode, block_x, block_y, block_z,
|
||||||
shared_memory_size) == std::tie(rhs.base_bindings, rhs.primitive_mode,
|
shared_memory_size, local_memory_size) ==
|
||||||
rhs.block_x, rhs.block_y, rhs.block_z,
|
std::tie(rhs.base_bindings, rhs.primitive_mode, rhs.block_x, rhs.block_y,
|
||||||
rhs.shared_memory_size);
|
rhs.block_z, rhs.shared_memory_size, rhs.local_memory_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool operator!=(const ProgramVariant& rhs) const noexcept {
|
bool operator!=(const ProgramVariant& rhs) const noexcept {
|
||||||
@ -133,7 +134,8 @@ struct hash<OpenGL::ProgramVariant> {
|
|||||||
static_cast<std::size_t>(variant.block_x) ^
|
static_cast<std::size_t>(variant.block_x) ^
|
||||||
(static_cast<std::size_t>(variant.block_y) << 32) ^
|
(static_cast<std::size_t>(variant.block_y) << 32) ^
|
||||||
(static_cast<std::size_t>(variant.block_z) << 48) ^
|
(static_cast<std::size_t>(variant.block_z) << 48) ^
|
||||||
(static_cast<std::size_t>(variant.shared_memory_size) << 16);
|
(static_cast<std::size_t>(variant.shared_memory_size) << 16) ^
|
||||||
|
(static_cast<std::size_t>(variant.local_memory_size) << 36);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user