diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt
index 0184289eb..4ff588851 100644
--- a/externals/CMakeLists.txt
+++ b/externals/CMakeLists.txt
@@ -157,6 +157,9 @@ endif()
 add_library(stb stb/stb_dxt.cpp)
 target_include_directories(stb PUBLIC ./stb)
 
+add_library(bc_decoder bc_decoder/bc_decoder.cpp)
+target_include_directories(bc_decoder PUBLIC ./bc_decoder)
+
 if (ANDROID)
    if (ARCHITECTURE_arm64)
        add_subdirectory(libadrenotools)
diff --git a/externals/bc_decoder/bc_decoder.cpp b/externals/bc_decoder/bc_decoder.cpp
new file mode 100644
index 000000000..536c44f34
--- /dev/null
+++ b/externals/bc_decoder/bc_decoder.cpp
@@ -0,0 +1,1522 @@
+// SPDX-License-Identifier: MPL-2.0
+// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
+// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
+
+// This BCn Decoder is directly derivative of Swiftshader's BCn Decoder found at: https://github.com/google/swiftshader/blob/d070309f7d154d6764cbd514b1a5c8bfcef61d06/src/Device/BC_Decoder.cpp
+// This file does not follow the Skyline code conventions but has certain Skyline specific code
+// There are a lot of implicit and narrowing conversions in this file due to this (Warnings are disabled as a result)
+
+#include <array>
+#include <assert.h>
+#include <stddef.h>
+#include <stdint.h>
+
+namespace {
+    constexpr int BlockWidth = 4;
+    constexpr int BlockHeight = 4;
+
+    struct BC_color {
+        void decode(uint8_t *dst, size_t x, size_t y, size_t dstW, size_t dstH, size_t dstPitch, size_t dstBpp, bool hasAlphaChannel, bool hasSeparateAlpha) const {
+            Color c[4];
+            c[0].extract565(c0);
+            c[1].extract565(c1);
+            if (hasSeparateAlpha || (c0 > c1)) {
+                c[2] = ((c[0] * 2) + c[1]) / 3;
+                c[3] = ((c[1] * 2) + c[0]) / 3;
+            } else {
+                c[2] = (c[0] + c[1]) >> 1;
+                if (hasAlphaChannel) {
+                    c[3].clearAlpha();
+                }
+            }
+
+            for (int j = 0; j < BlockHeight && (y + j) < dstH; j++) {
+                size_t dstOffset = j * dstPitch;
+                size_t idxOffset = j * BlockHeight;
+                for (size_t i = 0; i < BlockWidth && (x + i) < dstW; i++, idxOffset++, dstOffset += dstBpp) {
+                    *reinterpret_cast<unsigned int *>(dst + dstOffset) = c[getIdx(idxOffset)].pack8888();
+                }
+            }
+        }
+
+      private:
+        struct Color {
+            Color() {
+                c[0] = c[1] = c[2] = 0;
+                c[3] = 0xFF000000;
+            }
+
+            void extract565(const unsigned int c565) {
+                c[0] = ((c565 & 0x0000001F) << 3) | ((c565 & 0x0000001C) >> 2);
+                c[1] = ((c565 & 0x000007E0) >> 3) | ((c565 & 0x00000600) >> 9);
+                c[2] = ((c565 & 0x0000F800) >> 8) | ((c565 & 0x0000E000) >> 13);
+            }
+
+            unsigned int pack8888() const {
+                return ((c[0] & 0xFF) << 16) | ((c[1] & 0xFF) << 8) | (c[2] & 0xFF) | c[3];
+            }
+
+            void clearAlpha() {
+                c[3] = 0;
+            }
+
+            Color operator*(int factor) const {
+                Color res;
+                for (int i = 0; i < 4; ++i) {
+                    res.c[i] = c[i] * factor;
+                }
+                return res;
+            }
+
+            Color operator/(int factor) const {
+                Color res;
+                for (int i = 0; i < 4; ++i) {
+                    res.c[i] = c[i] / factor;
+                }
+                return res;
+            }
+
+            Color operator>>(int shift) const {
+                Color res;
+                for (int i = 0; i < 4; ++i) {
+                    res.c[i] = c[i] >> shift;
+                }
+                return res;
+            }
+
+            Color operator+(Color const &obj) const {
+                Color res;
+                for (int i = 0; i < 4; ++i) {
+                    res.c[i] = c[i] + obj.c[i];
+                }
+                return res;
+            }
+
+          private:
+            int c[4];
+        };
+
+        size_t getIdx(int i) const {
+            size_t offset = i << 1;  // 2 bytes per index
+            return (idx & (0x3 << offset)) >> offset;
+        }
+
+        unsigned short c0;
+        unsigned short c1;
+        unsigned int idx;
+    };
+    static_assert(sizeof(BC_color) == 8, "BC_color must be 8 bytes");
+
+    struct BC_channel {
+        void decode(uint8_t *dst, size_t x, size_t y, size_t dstW, size_t dstH, size_t dstPitch, size_t dstBpp, size_t channel, bool isSigned) const {
+            int c[8] = {0};
+
+            if (isSigned) {
+                c[0] = static_cast<signed char>(data & 0xFF);
+                c[1] = static_cast<signed char>((data & 0xFF00) >> 8);
+            } else {
+                c[0] = static_cast<uint8_t>(data & 0xFF);
+                c[1] = static_cast<uint8_t>((data & 0xFF00) >> 8);
+            }
+
+            if (c[0] > c[1]) {
+                for (int i = 2; i < 8; ++i) {
+                    c[i] = ((8 - i) * c[0] + (i - 1) * c[1]) / 7;
+                }
+            } else {
+                for (int i = 2; i < 6; ++i) {
+                    c[i] = ((6 - i) * c[0] + (i - 1) * c[1]) / 5;
+                }
+                c[6] = isSigned ? -128 : 0;
+                c[7] = isSigned ? 127 : 255;
+            }
+
+            for (size_t j = 0; j < BlockHeight && (y + j) < dstH; j++) {
+                for (size_t i = 0; i < BlockWidth && (x + i) < dstW; i++) {
+                    dst[channel + (i * dstBpp) + (j * dstPitch)] = static_cast<uint8_t>(c[getIdx((j * BlockHeight) + i)]);
+                }
+            }
+        }
+
+      private:
+        uint8_t getIdx(int i) const {
+            int offset = i * 3 + 16;
+            return static_cast<uint8_t>((data & (0x7ull << offset)) >> offset);
+        }
+
+        uint64_t data;
+    };
+    static_assert(sizeof(BC_channel) == 8, "BC_channel must be 8 bytes");
+
+    struct BC_alpha {
+        void decode(uint8_t *dst, size_t x, size_t y, size_t dstW, size_t dstH, size_t dstPitch, size_t dstBpp) const {
+            dst += 3;  // Write only to alpha (channel 3)
+            for (size_t j = 0; j < BlockHeight && (y + j) < dstH; j++, dst += dstPitch) {
+                uint8_t *dstRow = dst;
+                for (size_t i = 0; i < BlockWidth && (x + i) < dstW; i++, dstRow += dstBpp) {
+                    *dstRow = getAlpha(j * BlockHeight + i);
+                }
+            }
+        }
+
+      private:
+        uint8_t getAlpha(int i) const {
+            int offset = i << 2;
+            int alpha = (data & (0xFull << offset)) >> offset;
+            return static_cast<uint8_t>(alpha | (alpha << 4));
+        }
+
+        uint64_t data;
+    };
+    static_assert(sizeof(BC_alpha) == 8, "BC_alpha must be 8 bytes");
+
+    namespace BC6H {
+        static constexpr int MaxPartitions = 64;
+
+        // @fmt:off
+
+        static constexpr uint8_t PartitionTable2[MaxPartitions][16] = {
+            { 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1 },
+            { 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1 },
+            { 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1 },
+            { 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1 },
+            { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1 },
+            { 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1 },
+            { 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1 },
+            { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1 },
+            { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1 },
+            { 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+            { 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1 },
+            { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1 },
+            { 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+            { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 },
+            { 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+            { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1 },
+            { 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1 },
+            { 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
+            { 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0 },
+            { 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0 },
+            { 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
+            { 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0 },
+            { 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0 },
+            { 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1 },
+            { 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0 },
+            { 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0 },
+            { 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0 },
+            { 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0 },
+            { 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0 },
+            { 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0 },
+            { 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0 },
+            { 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0 },
+            { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 },
+            { 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1 },
+            { 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0 },
+            { 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0 },
+            { 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0 },
+            { 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0 },
+            { 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1 },
+            { 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1 },
+            { 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0 },
+            { 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0 },
+            { 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0 },
+            { 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0 },
+            { 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0 },
+            { 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1 },
+            { 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1 },
+            { 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0 },
+            { 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0 },
+            { 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0 },
+            { 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0 },
+            { 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0 },
+            { 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1 },
+            { 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1 },
+            { 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0 },
+            { 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0 },
+            { 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1 },
+            { 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1 },
+            { 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1 },
+            { 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1 },
+            { 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1 },
+            { 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0 },
+            { 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0 },
+            { 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1 },
+        };
+
+        static constexpr uint8_t AnchorTable2[MaxPartitions] = {
+            0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
+            0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
+            0xf, 0x2, 0x8, 0x2, 0x2, 0x8, 0x8, 0xf,
+            0x2, 0x8, 0x2, 0x2, 0x8, 0x8, 0x2, 0x2,
+            0xf, 0xf, 0x6, 0x8, 0x2, 0x8, 0xf, 0xf,
+            0x2, 0x8, 0x2, 0x2, 0x2, 0xf, 0xf, 0x6,
+            0x6, 0x2, 0x6, 0x8, 0xf, 0xf, 0x2, 0x2,
+            0xf, 0xf, 0xf, 0xf, 0xf, 0x2, 0x2, 0xf,
+        };
+
+    // @fmt:on
+
+        // 1.0f in half-precision floating point format
+        static constexpr uint16_t halfFloat1 = 0x3C00;
+        union Color {
+            struct RGBA {
+                uint16_t r = 0;
+                uint16_t g = 0;
+                uint16_t b = 0;
+                uint16_t a = halfFloat1;
+
+                RGBA(uint16_t r, uint16_t g, uint16_t b)
+                    : r(r), g(g), b(b) {
+                }
+
+                RGBA &operator=(const RGBA &other) {
+                    this->r = other.r;
+                    this->g = other.g;
+                    this->b = other.b;
+                    this->a = halfFloat1;
+
+                    return *this;
+                }
+            };
+
+            Color(uint16_t r, uint16_t g, uint16_t b)
+                : rgba(r, g, b) {
+            }
+
+            Color(int r, int g, int b)
+                : rgba((uint16_t) r, (uint16_t) g, (uint16_t) b) {
+            }
+
+            Color() {}
+
+            Color(const Color &other) {
+                this->rgba = other.rgba;
+            }
+
+            Color &operator=(const Color &other) {
+                this->rgba = other.rgba;
+
+                return *this;
+            }
+
+            RGBA rgba;
+            uint16_t channel[4];
+        };
+        static_assert(sizeof(Color) == 8, "BC6h::Color must be 8 bytes long");
+
+        inline int32_t extendSign(int32_t val, size_t size) {
+            // Suppose we have a 2-bit integer being stored in 4 bit variable:
+            //    x = 0b00AB
+            //
+            // In order to sign extend x, we need to turn the 0s into A's:
+            //    x_extend = 0bAAAB
+            //
+            // We can do that by flipping A in x then subtracting 0b0010 from x.
+            // Suppose A is 1:
+            //    x       = 0b001B
+            //    x_flip  = 0b000B
+            //    x_minus = 0b111B
+            // Since A is flipped to 0, subtracting the mask sets it and all the bits above it to 1.
+            // And if A is 0:
+            //    x       = 0b000B
+            //    x_flip  = 0b001B
+            //    x_minus = 0b000B
+            // We unset the bit we flipped, and touch no other bit
+            uint16_t mask = 1u << (size - 1);
+            return (val ^ mask) - mask;
+        }
+
+        static int constexpr RGBfChannels = 3;
+        struct RGBf {
+            uint16_t channel[RGBfChannels];
+            size_t size[RGBfChannels];
+            bool isSigned;
+
+            RGBf() {
+                static_assert(RGBfChannels == 3, "RGBf must have exactly 3 channels");
+                static_assert(sizeof(channel) / sizeof(channel[0]) == RGBfChannels, "RGBf must have exactly 3 channels");
+                static_assert(sizeof(channel) / sizeof(channel[0]) == sizeof(size) / sizeof(size[0]), "RGBf requires equally sized arrays for channels and channel sizes");
+
+                for (int i = 0; i < RGBfChannels; i++) {
+                    channel[i] = 0;
+                    size[i] = 0;
+                }
+
+                isSigned = false;
+            }
+
+            void extendSign() {
+                for (int i = 0; i < RGBfChannels; i++) {
+                    channel[i] = BC6H::extendSign(channel[i], size[i]);
+                }
+            }
+
+            // Assuming this is the delta, take the base-endpoint and transform this into
+            // a proper endpoint.
+            //
+            // The final computed endpoint is truncated to the base-endpoint's size;
+            void resolveDelta(RGBf base) {
+                for (int i = 0; i < RGBfChannels; i++) {
+                    size[i] = base.size[i];
+                    channel[i] = (base.channel[i] + channel[i]) & ((1 << base.size[i]) - 1);
+                }
+
+                // Per the spec:
+                // "For signed formats, the results of the delta calculation must be sign
+                // extended as well."
+                if (isSigned) {
+                    extendSign();
+                }
+            }
+
+            void unquantize() {
+                if (isSigned) {
+                    unquantizeSigned();
+                } else {
+                    unquantizeUnsigned();
+                }
+            }
+
+            void unquantizeUnsigned() {
+                for (int i = 0; i < RGBfChannels; i++) {
+                    if (size[i] >= 15 || channel[i] == 0) {
+                        continue;
+                    } else if (channel[i] == ((1u << size[i]) - 1)) {
+                        channel[i] = 0xFFFFu;
+                    } else {
+                        // Need 32 bits to avoid overflow
+                        uint32_t tmp = channel[i];
+                        channel[i] = (uint16_t) (((tmp << 16) + 0x8000) >> size[i]);
+                    }
+                    size[i] = 16;
+                }
+            }
+
+            void unquantizeSigned() {
+                for (int i = 0; i < RGBfChannels; i++) {
+                    if (size[i] >= 16 || channel[i] == 0) {
+                        continue;
+                    }
+
+                    int16_t value = (int16_t)channel[i];
+                    int32_t result = value;
+                    bool signBit = value < 0;
+                    if (signBit) {
+                        value = -value;
+                    }
+
+                    if (value >= ((1 << (size[i] - 1)) - 1)) {
+                        result = 0x7FFF;
+                    } else {
+                        // Need 32 bits to avoid overflow
+                        int32_t tmp = value;
+                        result = (((tmp << 15) + 0x4000) >> (size[i] - 1));
+                    }
+
+                    if (signBit) {
+                        result = -result;
+                    }
+
+                    channel[i] = (uint16_t) result;
+                    size[i] = 16;
+                }
+            }
+        };
+
+        struct Data {
+            uint64_t low64;
+            uint64_t high64;
+
+            Data() = default;
+
+            Data(uint64_t low64, uint64_t high64)
+                : low64(low64), high64(high64) {
+            }
+
+            // Consumes the lowest N bits from from low64 and high64 where N is:
+            //      abs(MSB - LSB)
+            // MSB and LSB come from the block description of the BC6h spec and specify
+            // the location of the bits in the returned bitstring.
+            //
+            // If MSB < LSB, then the bits are reversed. Otherwise, the bitstring is read and
+            // shifted without further modification.
+            //
+            uint32_t consumeBits(uint32_t MSB, uint32_t LSB) {
+                bool reversed = MSB < LSB;
+                if (reversed) {
+                    std::swap(MSB, LSB);
+                }
+                assert(MSB - LSB + 1 < sizeof(uint32_t) * 8);
+
+                uint32_t numBits = MSB - LSB + 1;
+                uint32_t mask = (1 << numBits) - 1;
+                // Read the low N bits
+                uint32_t bits = (low64 & mask);
+
+                low64 >>= numBits;
+                // Put the low N bits of high64 into the high 64-N bits of low64
+                low64 |= (high64 & mask) << (sizeof(high64) * 8 - numBits);
+                high64 >>= numBits;
+
+                if (reversed) {
+                    uint32_t tmp = 0;
+                    for (uint32_t numSwaps = 0; numSwaps < numBits; numSwaps++) {
+                        tmp <<= 1;
+                        tmp |= (bits & 1);
+                        bits >>= 1;
+                    }
+
+                    bits = tmp;
+                }
+
+                return bits << LSB;
+            }
+        };
+
+        struct IndexInfo {
+            uint64_t value;
+            int numBits;
+        };
+
+// Interpolates between two endpoints, then does a final unquantization step
+        Color interpolate(RGBf e0, RGBf e1, const IndexInfo &index, bool isSigned) {
+            static constexpr uint32_t weights3[] = {0, 9, 18, 27, 37, 46, 55, 64};
+            static constexpr uint32_t weights4[] = {0, 4, 9, 13, 17, 21, 26, 30,
+                                                    34, 38, 43, 47, 51, 55, 60, 64};
+            static constexpr uint32_t const *weightsN[] = {
+                nullptr, nullptr, nullptr, weights3, weights4
+            };
+            auto weights = weightsN[index.numBits];
+            assert(weights != nullptr);
+            Color color;
+            uint32_t e0Weight = 64 - weights[index.value];
+            uint32_t e1Weight = weights[index.value];
+
+            for (int i = 0; i < RGBfChannels; i++) {
+                int32_t e0Channel = e0.channel[i];
+                int32_t e1Channel = e1.channel[i];
+
+                if (isSigned) {
+                    e0Channel = extendSign(e0Channel, 16);
+                    e1Channel = extendSign(e1Channel, 16);
+                }
+
+                int32_t e0Value = e0Channel * e0Weight;
+                int32_t e1Value = e1Channel * e1Weight;
+
+                uint32_t tmp = ((e0Value + e1Value + 32) >> 6);
+
+                // Need to unquantize value to limit it to the legal range of half-precision
+                // floats. We do this by scaling by 31/32 or 31/64 depending on if the value
+                // is signed or unsigned.
+                if (isSigned) {
+                    tmp = ((tmp & 0x80000000) != 0) ? (((~tmp + 1) * 31) >> 5) | 0x8000 : (tmp * 31) >> 5;
+                    // Don't return -0.0f, just normalize it to 0.0f.
+                    if (tmp == 0x8000)
+                        tmp = 0;
+                } else {
+                    tmp = (tmp * 31) >> 6;
+                }
+
+                color.channel[i] = (uint16_t) tmp;
+            }
+
+            return color;
+        }
+
+        enum DataType {
+            // Endpoints
+            EP0 = 0,
+            EP1 = 1,
+            EP2 = 2,
+            EP3 = 3,
+            Mode,
+            Partition,
+            End,
+        };
+
+        enum Channel {
+            R = 0,
+            G = 1,
+            B = 2,
+            None,
+        };
+
+        struct DeltaBits {
+            size_t channel[3];
+
+            constexpr DeltaBits()
+                : channel{0, 0, 0} {
+            }
+
+            constexpr DeltaBits(size_t r, size_t g, size_t b)
+                : channel{r, g, b} {
+            }
+        };
+
+        struct ModeDesc {
+            int number;
+            bool hasDelta;
+            int partitionCount;
+            int endpointBits;
+            DeltaBits deltaBits;
+
+            constexpr ModeDesc()
+                : number(-1), hasDelta(false), partitionCount(0), endpointBits(0) {
+            }
+
+            constexpr ModeDesc(int number, bool hasDelta, int partitionCount, int endpointBits, DeltaBits deltaBits)
+                : number(number), hasDelta(hasDelta), partitionCount(partitionCount), endpointBits(endpointBits), deltaBits(deltaBits) {
+            }
+        };
+
+        struct BlockDesc {
+            DataType type;
+            Channel channel;
+            int MSB;
+            int LSB;
+            ModeDesc modeDesc;
+
+            constexpr BlockDesc()
+                : type(End), channel(None), MSB(0), LSB(0), modeDesc() {
+            }
+
+            constexpr BlockDesc(const DataType type, Channel channel, int MSB, int LSB, ModeDesc modeDesc)
+                : type(type), channel(channel), MSB(MSB), LSB(LSB), modeDesc(modeDesc) {
+            }
+
+            constexpr BlockDesc(DataType type, Channel channel, int MSB, int LSB)
+                : type(type), channel(channel), MSB(MSB), LSB(LSB), modeDesc() {
+            }
+        };
+
+// Turns a legal mode into an index into the BlockDesc table.
+// Illegal or reserved modes return -1.
+        static int modeToIndex(uint8_t mode) {
+            if (mode <= 3) {
+                return mode;
+            } else if ((mode & 0x2) != 0) {
+                if (mode <= 18) {
+// Turns 6 into 4, 7 into 5, 10 into 6, etc.
+                    return (mode / 2) + 1 + (mode & 0x1);
+                } else if (mode == 22 || mode == 26 || mode == 30) {
+// Turns 22 into 11, 26 into 12, etc.
+                    return mode / 4 + 6;
+                }
+            }
+
+            return -1;
+        }
+
+// Returns a description of the bitfields for each mode from the LSB
+// to the MSB before the index data starts.
+//
+// The numbers come from the BC6h block description. Each BlockDesc in the
+//   {Type, Channel, MSB, LSB}
+//   * Type describes which endpoint this is, or if this is a mode, a partition
+//     number, or the end of the block description.
+//   * Channel describes one of the 3 color channels within an endpoint
+//   * MSB and LSB specificy:
+//      * The size of the bitfield being read
+//      * The position of the bitfield within the variable it is being read to
+//      * If the bitfield is stored in reverse bit order
+//     If MSB < LSB then the bitfield is stored in reverse order. The size of
+//     the bitfield is abs(MSB-LSB+1). And the position of the bitfield within
+//     the variable is min(LSB, MSB).
+//
+// Invalid or reserved modes return an empty list.
+        static constexpr int NumBlocks = 14;
+// The largest number of descriptions within a block.
+        static constexpr int MaxBlockDescIndex = 26;
+        static constexpr BlockDesc blockDescs[NumBlocks][MaxBlockDescIndex] = {
+// @fmt:off
+// Mode 0, Index 0
+{
+{ Mode, None, 1, 0, { 0, true, 2, 10, { 5, 5, 5 } } },
+{ EP2, G, 4, 4 }, { EP2, B, 4, 4 }, { EP3, B, 4, 4 },
+{ EP0, R, 9, 0 }, { EP0, G, 9, 0 }, { EP0, B, 9, 0 },
+{ EP1, R, 4, 0 }, { EP3, G, 4, 4 }, { EP2, G, 3, 0 },
+{ EP1, G, 4, 0 }, { EP3, B, 0, 0 }, { EP3, G, 3, 0 },
+{ EP1, B, 4, 0 }, { EP3, B, 1, 1 }, { EP2, B, 3, 0 },
+{ EP2, R, 4, 0 }, { EP3, B, 2, 2 }, { EP3, R, 4, 0 },
+{ EP3, B, 3, 3 },
+{ Partition, None, 4, 0 },
+{ End, None, 0, 0},
+},
+// Mode 1, Index 1
+{
+{ Mode, None, 1, 0, { 1, true, 2, 7, { 6, 6, 6 } } },
+{ EP2, G, 5, 5 }, { EP3, G, 5, 4 }, { EP0, R, 6, 0 },
+{ EP3, B, 1, 0 }, { EP2, B, 4, 4 }, { EP0, G, 6, 0 },
+{ EP2, B, 5, 5 }, { EP3, B, 2, 2 }, { EP2, G, 4, 4 },
+{ EP0, B, 6, 0 }, { EP3, B, 3, 3 }, { EP3, B, 5, 5 },
+{ EP3, B, 4, 4 }, { EP1, R, 5, 0 }, { EP2, G, 3, 0 },
+{ EP1, G, 5, 0 }, { EP3, G, 3, 0 }, { EP1, B, 5, 0 },
+{ EP2, B, 3, 0 }, { EP2, R, 5, 0 }, { EP3, R, 5, 0 },
+{ Partition, None, 4, 0 },
+{ End, None, 0, 0},
+},
+// Mode 2, Index 2
+{
+{ Mode, None, 4, 0, { 2, true, 2, 11, { 5, 4, 4 } } },
+{ EP0, R, 9, 0 }, { EP0, G, 9, 0 }, { EP0, B, 9, 0 },
+{ EP1, R, 4, 0 }, { EP0, R, 10, 10 }, { EP2, G, 3, 0 },
+{ EP1, G, 3, 0 }, { EP0, G, 10, 10 }, { EP3, B, 0, 0 },
+{ EP3, G, 3, 0 }, { EP1, B, 3, 0 }, { EP0, B, 10, 10 },
+{ EP3, B, 1, 1 }, { EP2, B, 3, 0 }, { EP2, R, 4, 0 },
+{ EP3, B, 2, 2 }, { EP3, R, 4, 0 }, { EP3, B, 3, 3 },
+{ Partition, None, 4, 0 },
+{ End, None, 0, 0},
+},
+// Mode 3, Index 3
+{
+{ Mode, None, 4, 0, { 3, false, 1, 10, { 0, 0, 0 } } },
+{ EP0, R, 9, 0 }, { EP0, G, 9, 0 }, { EP0, B, 9, 0 },
+{ EP1, R, 9, 0 }, { EP1, G, 9, 0 }, { EP1, B, 9, 0 },
+{ End, None, 0, 0},
+},
+// Mode 6, Index 4
+{
+{ Mode, None, 4, 0, { 6, true, 2, 11, { 4, 5, 4 } } }, // 1 1
+{ EP0, R, 9, 0 }, { EP0, G, 9, 0 }, { EP0, B, 9, 0 },
+{ EP1, R, 3, 0 }, { EP0, R, 10, 10 }, { EP3, G, 4, 4 },
+{ EP2, G, 3, 0 }, { EP1, G, 4, 0 }, { EP0, G, 10, 10 },
+{ EP3, G, 3, 0 }, { EP1, B, 3, 0 }, { EP0, B, 10, 10 },
+{ EP3, B, 1, 1 }, { EP2, B, 3, 0 }, { EP2, R, 3, 0 },
+{ EP3, B, 0, 0 }, { EP3, B, 2, 2 }, { EP3, R, 3, 0 }, // 18 19
+{ EP2, G, 4, 4 }, { EP3, B, 3, 3 }, // 2 21
+{ Partition, None, 4, 0 },
+{ End, None, 0, 0},
+},
+// Mode 7, Index 5
+{
+{ Mode, None, 4, 0, { 7, true, 1, 11, { 9, 9, 9 } } },
+{ EP0, R, 9, 0 }, { EP0, G, 9, 0 }, { EP0, B, 9, 0 },
+{ EP1, R, 8, 0 }, { EP0, R, 10, 10 }, { EP1, G, 8, 0 },
+{ EP0, G, 10, 10 }, { EP1, B, 8, 0 }, { EP0, B, 10, 10 },
+{ End, None, 0, 0},
+},
+// Mode 10, Index 6
+{
+{ Mode, None, 4, 0, { 10, true, 2, 11, { 4, 4, 5 } } },
+{ EP0, R, 9, 0 }, { EP0, G, 9, 0 }, { EP0, B, 9, 0 },
+{ EP1, R, 3, 0 }, { EP0, R, 10, 10 }, { EP2, B, 4, 4 },
+{ EP2, G, 3, 0 }, { EP1, G, 3, 0 }, { EP0, G, 10, 10 },
+{ EP3, B, 0, 0 }, { EP3, G, 3, 0 }, { EP1, B, 4, 0 },
+{ EP0, B, 10, 10 }, { EP2, B, 3, 0 }, { EP2, R, 3, 0 },
+{ EP3, B, 1, 1 }, { EP3, B, 2, 2 }, { EP3, R, 3, 0 },
+{ EP3, B, 4, 4 }, { EP3, B, 3, 3 },
+{ Partition, None, 4, 0 },
+{ End, None, 0, 0},
+},
+// Mode 11, Index 7
+{
+{ Mode, None, 4, 0, { 11, true, 1, 12, { 8, 8, 8 } } },
+{ EP0, R, 9, 0 }, { EP0, G, 9, 0 }, { EP0, B, 9, 0 },
+{ EP1, R, 7, 0 }, { EP0, R, 10, 11 }, { EP1, G, 7, 0 },
+{ EP0, G, 10, 11 }, { EP1, B, 7, 0 }, { EP0, B, 10, 11 },
+{ End, None, 0, 0},
+},
+// Mode 14, Index 8
+{
+{ Mode, None, 4, 0, { 14, true, 2, 9, { 5, 5, 5 } } },
+{ EP0, R, 8, 0 }, { EP2, B, 4, 4 }, { EP0, G, 8, 0 },
+{ EP2, G, 4, 4 }, { EP0, B, 8, 0 }, { EP3, B, 4, 4 },
+{ EP1, R, 4, 0 }, { EP3, G, 4, 4 }, { EP2, G, 3, 0 },
+{ EP1, G, 4, 0 }, { EP3, B, 0, 0 }, { EP3, G, 3, 0 },
+{ EP1, B, 4, 0 }, { EP3, B, 1, 1 }, { EP2, B, 3, 0 },
+{ EP2, R, 4, 0 }, { EP3, B, 2, 2 }, { EP3, R, 4, 0 },
+{ EP3, B, 3, 3 },
+{ Partition, None, 4, 0 },
+{ End, None, 0, 0},
+},
+// Mode 15, Index 9
+{
+{ Mode, None, 4, 0, { 15, true, 1, 16, { 4, 4, 4 } } },
+{ EP0, R, 9, 0 }, { EP0, G, 9, 0 }, { EP0, B, 9, 0 },
+{ EP1, R, 3, 0 }, { EP0, R, 10, 15 }, { EP1, G, 3, 0 },
+{ EP0, G, 10, 15 }, { EP1, B, 3, 0 }, { EP0, B, 10, 15 },
+{ End, None, 0, 0},
+},
+// Mode 18, Index 10
+{
+{ Mode, None, 4, 0, { 18, true, 2, 8, { 6, 5, 5 } } },
+{ EP0, R, 7, 0 }, { EP3, G, 4, 4 }, { EP2, B, 4, 4 },
+{ EP0, G, 7, 0 }, { EP3, B, 2, 2 }, { EP2, G, 4, 4 },
+{ EP0, B, 7, 0 }, { EP3, B, 3, 3 }, { EP3, B, 4, 4 },
+{ EP1, R, 5, 0 }, { EP2, G, 3, 0 }, { EP1, G, 4, 0 },
+{ EP3, B, 0, 0 }, { EP3, G, 3, 0 }, { EP1, B, 4, 0 },
+{ EP3, B, 1, 1 }, { EP2, B, 3, 0 }, { EP2, R, 5, 0 },
+{ EP3, R, 5, 0 },
+{ Partition, None, 4, 0 },
+{ End, None, 0, 0},
+},
+// Mode 22, Index 11
+{
+{ Mode, None, 4, 0, { 22, true, 2, 8, { 5, 6, 5 } } },
+{ EP0, R, 7, 0 }, { EP3, B, 0, 0 }, { EP2, B, 4, 4 },
+{ EP0, G, 7, 0 }, { EP2, G, 5, 5 }, { EP2, G, 4, 4 },
+{ EP0, B, 7, 0 }, { EP3, G, 5, 5 }, { EP3, B, 4, 4 },
+{ EP1, R, 4, 0 }, { EP3, G, 4, 4 }, { EP2, G, 3, 0 },
+{ EP1, G, 5, 0 }, { EP3, G, 3, 0 }, { EP1, B, 4, 0 },
+{ EP3, B, 1, 1 }, { EP2, B, 3, 0 }, { EP2, R, 4, 0 },
+{ EP3, B, 2, 2 }, { EP3, R, 4, 0 }, { EP3, B, 3, 3 },
+{ Partition, None, 4, 0 },
+{ End, None, 0, 0},
+},
+// Mode 26, Index 12
+{
+{ Mode, None, 4, 0, { 26, true, 2, 8, { 5, 5, 6 } } },
+{ EP0, R, 7, 0 }, { EP3, B, 1, 1 }, { EP2, B, 4, 4 },
+{ EP0, G, 7, 0 }, { EP2, B, 5, 5 }, { EP2, G, 4, 4 },
+{ EP0, B, 7, 0 }, { EP3, B, 5, 5 }, { EP3, B, 4, 4 },
+{ EP1, R, 4, 0 }, { EP3, G, 4, 4 }, { EP2, G, 3, 0 },
+{ EP1, G, 4, 0 }, { EP3, B, 0, 0 }, { EP3, G, 3, 0 },
+{ EP1, B, 5, 0 }, { EP2, B, 3, 0 }, { EP2, R, 4, 0 },
+{ EP3, B, 2, 2 }, { EP3, R, 4, 0 }, { EP3, B, 3, 3 },
+{ Partition, None, 4, 0 },
+{ End, None, 0, 0},
+},
+// Mode 30, Index 13
+{
+{ Mode, None, 4, 0, { 30, false, 2, 6, { 0, 0, 0 } } },
+{ EP0, R, 5, 0 }, { EP3, G, 4, 4 }, { EP3, B, 0, 0 },
+{ EP3, B, 1, 1 }, { EP2, B, 4, 4 }, { EP0, G, 5, 0 },
+{ EP2, G, 5, 5 }, { EP2, B, 5, 5 }, { EP3, B, 2, 2 },
+{ EP2, G, 4, 4 }, { EP0, B, 5, 0 }, { EP3, G, 5, 5 },
+{ EP3, B, 3, 3 }, { EP3, B, 5, 5 }, { EP3, B, 4, 4 },
+{ EP1, R, 5, 0 }, { EP2, G, 3, 0 }, { EP1, G, 5, 0 },
+{ EP3, G, 3, 0 }, { EP1, B, 5, 0 }, { EP2, B, 3, 0 },
+{ EP2, R, 5, 0 }, { EP3, R, 5, 0 },
+{ Partition, None, 4, 0 },
+{ End, None, 0, 0},
+}
+// @fmt:on
+        };
+
+        struct Block {
+            uint64_t low64;
+            uint64_t high64;
+
+            void decode(uint8_t *dst, size_t dstX, size_t dstY, size_t dstWidth, size_t dstHeight, size_t dstPitch, size_t dstBpp, bool isSigned) const {
+                uint8_t mode = 0;
+                Data data(low64, high64);
+                assert(dstBpp == sizeof(Color));
+
+                if ((data.low64 & 0x2) == 0) {
+                    mode = data.consumeBits(1, 0);
+                } else {
+                    mode = data.consumeBits(4, 0);
+                }
+
+                int blockIndex = modeToIndex(mode);
+                // Handle illegal or reserved mode
+                if (blockIndex == -1) {
+                    for (int y = 0; y < 4 && y + dstY < dstHeight; y++) {
+                        for (int x = 0; x < 4 && x + dstX < dstWidth; x++) {
+                            auto out = reinterpret_cast<Color *>(dst + sizeof(Color) * x + dstPitch * y);
+                            out->rgba = {0, 0, 0};
+                        }
+                    }
+                    return;
+                }
+                const BlockDesc *blockDesc = blockDescs[blockIndex];
+
+                RGBf e[4];
+                e[0].isSigned = e[1].isSigned = e[2].isSigned = e[3].isSigned = isSigned;
+
+                int partition = 0;
+                ModeDesc modeDesc;
+                for (int index = 0; blockDesc[index].type != End; index++) {
+                    const BlockDesc desc = blockDesc[index];
+
+                    switch (desc.type) {
+                        case Mode:
+                            modeDesc = desc.modeDesc;
+                            assert(modeDesc.number == mode);
+
+                            e[0].size[0] = e[0].size[1] = e[0].size[2] = modeDesc.endpointBits;
+                            for (int i = 0; i < RGBfChannels; i++) {
+                                if (modeDesc.hasDelta) {
+                                    e[1].size[i] = e[2].size[i] = e[3].size[i] = modeDesc.deltaBits.channel[i];
+                                } else {
+                                    e[1].size[i] = e[2].size[i] = e[3].size[i] = modeDesc.endpointBits;
+                                }
+                            }
+                            break;
+                        case Partition:
+                            partition |= data.consumeBits(desc.MSB, desc.LSB);
+                            break;
+                        case EP0:
+                        case EP1:
+                        case EP2:
+                        case EP3:
+                            e[desc.type].channel[desc.channel] |= data.consumeBits(desc.MSB, desc.LSB);
+                            break;
+                        default:
+                            assert(false);
+                            return;
+                    }
+                }
+
+                // Sign extension
+                if (isSigned) {
+                    for (int ep = 0; ep < modeDesc.partitionCount * 2; ep++) {
+                        e[ep].extendSign();
+                    }
+                } else if (modeDesc.hasDelta) {
+                    // Don't sign-extend the base endpoint in an unsigned format.
+                    for (int ep = 1; ep < modeDesc.partitionCount * 2; ep++) {
+                        e[ep].extendSign();
+                    }
+                }
+
+                // Turn the deltas into endpoints
+                if (modeDesc.hasDelta) {
+                    for (int ep = 1; ep < modeDesc.partitionCount * 2; ep++) {
+                        e[ep].resolveDelta(e[0]);
+                    }
+                }
+
+                for (int ep = 0; ep < modeDesc.partitionCount * 2; ep++) {
+                    e[ep].unquantize();
+                }
+
+                // Get the indices, calculate final colors, and output
+                for (int y = 0; y < 4; y++) {
+                    for (int x = 0; x < 4; x++) {
+                        int pixelNum = x + y * 4;
+                        IndexInfo idx;
+                        bool isAnchor = false;
+                        int firstEndpoint = 0;
+                        // Bc6H can have either 1 or 2 petitions depending on the mode.
+                        // The number of petitions affects the number of indices with implicit
+                        // leading 0 bits and the number of bits per index.
+                        if (modeDesc.partitionCount == 1) {
+                            idx.numBits = 4;
+                            // There's an implicit leading 0 bit for the first idx
+                            isAnchor = (pixelNum == 0);
+                        } else {
+                            idx.numBits = 3;
+                            // There are 2 indices with implicit leading 0-bits.
+                            isAnchor = ((pixelNum == 0) || (pixelNum == AnchorTable2[partition]));
+                            firstEndpoint = PartitionTable2[partition][pixelNum] * 2;
+                        }
+
+                        idx.value = data.consumeBits(idx.numBits - isAnchor - 1, 0);
+
+                        // Don't exit the loop early, we need to consume these index bits regardless if
+                        // we actually output them or not.
+                        if ((y + dstY >= dstHeight) || (x + dstX >= dstWidth)) {
+                            continue;
+                        }
+
+                        Color color = interpolate(e[firstEndpoint], e[firstEndpoint + 1], idx, isSigned);
+                        auto out = reinterpret_cast<Color *>(dst + dstBpp * x + dstPitch * y);
+                        *out = color;
+                    }
+                }
+            }
+        };
+
+    }  // namespace BC6H
+
+    namespace BC7 {
+// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_compression_bptc.txt
+// https://docs.microsoft.com/en-us/windows/win32/direct3d11/bc7-format
+
+        struct Bitfield {
+            int offset;
+            int count;
+
+            constexpr Bitfield Then(const int bits) { return {offset + count, bits}; }
+
+            constexpr bool operator==(const Bitfield &rhs) {
+                return offset == rhs.offset && count == rhs.count;
+            }
+        };
+
+        struct Mode {
+            const int IDX;  // Mode index
+            const int NS;   // Number of subsets in each partition
+            const int PB;   // Partition bits
+            const int RB;   // Rotation bits
+            const int ISB;  // Index selection bits
+            const int CB;   // Color bits
+            const int AB;   // Alpha bits
+            const int EPB;  // Endpoint P-bits
+            const int SPB;  // Shared P-bits
+            const int IB;   // Primary index bits per element
+            const int IBC;  // Primary index bits total
+            const int IB2;  // Secondary index bits per element
+
+            constexpr int NumColors() const { return NS * 2; }
+
+            constexpr Bitfield Partition() const { return {IDX + 1, PB}; }
+
+            constexpr Bitfield Rotation() const { return Partition().Then(RB); }
+
+            constexpr Bitfield IndexSelection() const { return Rotation().Then(ISB); }
+
+            constexpr Bitfield Red(int idx) const {
+                return IndexSelection().Then(CB * idx).Then(CB);
+            }
+
+            constexpr Bitfield Green(int idx) const {
+                return Red(NumColors() - 1).Then(CB * idx).Then(CB);
+            }
+
+            constexpr Bitfield Blue(int idx) const {
+                return Green(NumColors() - 1).Then(CB * idx).Then(CB);
+            }
+
+            constexpr Bitfield Alpha(int idx) const {
+                return Blue(NumColors() - 1).Then(AB * idx).Then(AB);
+            }
+
+            constexpr Bitfield EndpointPBit(int idx) const {
+                return Alpha(NumColors() - 1).Then(EPB * idx).Then(EPB);
+            }
+
+            constexpr Bitfield SharedPBit0() const {
+                return EndpointPBit(NumColors() - 1).Then(SPB);
+            }
+
+            constexpr Bitfield SharedPBit1() const {
+                return SharedPBit0().Then(SPB);
+            }
+
+            constexpr Bitfield PrimaryIndex(int offset, int count) const {
+                return SharedPBit1().Then(offset).Then(count);
+            }
+
+            constexpr Bitfield SecondaryIndex(int offset, int count) const {
+                return SharedPBit1().Then(IBC + offset).Then(count);
+            }
+        };
+
+        static constexpr Mode Modes[] = {
+            //     IDX  NS   PB   RB   ISB  CB   AB   EPB  SPB  IB   IBC, IB2
+            /**/ {0x0, 0x3, 0x4, 0x0, 0x0, 0x4, 0x0, 0x1, 0x0, 0x3, 0x2d, 0x0},
+/**/ {0x1, 0x2, 0x6, 0x0, 0x0, 0x6, 0x0, 0x0, 0x1, 0x3, 0x2e, 0x0},
+/**/ {0x2, 0x3, 0x6, 0x0, 0x0, 0x5, 0x0, 0x0, 0x0, 0x2, 0x1d, 0x0},
+/**/ {0x3, 0x2, 0x6, 0x0, 0x0, 0x7, 0x0, 0x1, 0x0, 0x2, 0x1e, 0x0},
+/**/ {0x4, 0x1, 0x0, 0x2, 0x1, 0x5, 0x6, 0x0, 0x0, 0x2, 0x1f, 0x3},
+/**/ {0x5, 0x1, 0x0, 0x2, 0x0, 0x7, 0x8, 0x0, 0x0, 0x2, 0x1f, 0x2},
+/**/ {0x6, 0x1, 0x0, 0x0, 0x0, 0x7, 0x7, 0x1, 0x0, 0x4, 0x3f, 0x0},
+/**/ {0x7, 0x2, 0x6, 0x0, 0x0, 0x5, 0x5, 0x1, 0x0, 0x2, 0x1e, 0x0},
+/**/ {-1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x00, 0x0},
+        };
+
+        static constexpr int MaxPartitions = 64;
+        static constexpr int MaxSubsets = 3;
+
+        static constexpr uint8_t PartitionTable2[MaxPartitions][16] = {
+            {0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1},
+            {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1},
+            {0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1},
+            {0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1},
+            {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1},
+            {0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1},
+            {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1},
+            {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1},
+            {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1},
+            {0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+            {0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1},
+            {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1},
+            {0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+            {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1},
+            {0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+            {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1},
+            {0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1},
+            {0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0},
+            {0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0},
+            {0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0},
+            {0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0},
+            {0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0},
+            {0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0},
+            {0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1},
+            {0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0},
+            {0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0},
+            {0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0},
+            {0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0},
+            {0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0},
+            {0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0},
+            {0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0},
+            {0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0},
+            {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1},
+            {0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1},
+            {0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0},
+            {0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0},
+            {0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0},
+            {0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0},
+            {0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1},
+            {0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1},
+            {0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0},
+            {0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0},
+            {0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0},
+            {0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0},
+            {0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0},
+            {0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1},
+            {0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1},
+            {0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0},
+            {0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0},
+            {0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0},
+            {0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0},
+            {0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0},
+            {0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1},
+            {0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1},
+            {0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0},
+            {0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0},
+            {0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1},
+            {0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1},
+            {0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1},
+            {0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1},
+            {0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1},
+            {0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0},
+            {0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0},
+            {0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1},
+        };
+
+        static constexpr uint8_t PartitionTable3[MaxPartitions][16] = {
+            {0, 0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 1, 2, 2, 2, 2},
+            {0, 0, 0, 1, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1},
+            {0, 0, 0, 0, 2, 0, 0, 1, 2, 2, 1, 1, 2, 2, 1, 1},
+            {0, 2, 2, 2, 0, 0, 2, 2, 0, 0, 1, 1, 0, 1, 1, 1},
+            {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2},
+            {0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 2, 2, 0, 0, 2, 2},
+            {0, 0, 2, 2, 0, 0, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1},
+            {0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1},
+            {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2},
+            {0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2},
+            {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2},
+            {0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2},
+            {0, 1, 1, 2, 0, 1, 1, 2, 0, 1, 1, 2, 0, 1, 1, 2},
+            {0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2},
+            {0, 0, 1, 1, 0, 1, 1, 2, 1, 1, 2, 2, 1, 2, 2, 2},
+            {0, 0, 1, 1, 2, 0, 0, 1, 2, 2, 0, 0, 2, 2, 2, 0},
+            {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 2, 1, 1, 2, 2},
+            {0, 1, 1, 1, 0, 0, 1, 1, 2, 0, 0, 1, 2, 2, 0, 0},
+            {0, 0, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2},
+            {0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 1, 1, 1, 1},
+            {0, 1, 1, 1, 0, 1, 1, 1, 0, 2, 2, 2, 0, 2, 2, 2},
+            {0, 0, 0, 1, 0, 0, 0, 1, 2, 2, 2, 1, 2, 2, 2, 1},
+            {0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 2, 2, 0, 1, 2, 2},
+            {0, 0, 0, 0, 1, 1, 0, 0, 2, 2, 1, 0, 2, 2, 1, 0},
+            {0, 1, 2, 2, 0, 1, 2, 2, 0, 0, 1, 1, 0, 0, 0, 0},
+            {0, 0, 1, 2, 0, 0, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2},
+            {0, 1, 1, 0, 1, 2, 2, 1, 1, 2, 2, 1, 0, 1, 1, 0},
+            {0, 0, 0, 0, 0, 1, 1, 0, 1, 2, 2, 1, 1, 2, 2, 1},
+            {0, 0, 2, 2, 1, 1, 0, 2, 1, 1, 0, 2, 0, 0, 2, 2},
+            {0, 1, 1, 0, 0, 1, 1, 0, 2, 0, 0, 2, 2, 2, 2, 2},
+            {0, 0, 1, 1, 0, 1, 2, 2, 0, 1, 2, 2, 0, 0, 1, 1},
+            {0, 0, 0, 0, 2, 0, 0, 0, 2, 2, 1, 1, 2, 2, 2, 1},
+            {0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 2, 2, 2},
+            {0, 2, 2, 2, 0, 0, 2, 2, 0, 0, 1, 2, 0, 0, 1, 1},
+            {0, 0, 1, 1, 0, 0, 1, 2, 0, 0, 2, 2, 0, 2, 2, 2},
+            {0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0},
+            {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 0, 0, 0, 0},
+            {0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0},
+            {0, 1, 2, 0, 2, 0, 1, 2, 1, 2, 0, 1, 0, 1, 2, 0},
+            {0, 0, 1, 1, 2, 2, 0, 0, 1, 1, 2, 2, 0, 0, 1, 1},
+            {0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 0, 0, 0, 0, 1, 1},
+            {0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2},
+            {0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 2, 1, 2, 1, 2, 1},
+            {0, 0, 2, 2, 1, 1, 2, 2, 0, 0, 2, 2, 1, 1, 2, 2},
+            {0, 0, 2, 2, 0, 0, 1, 1, 0, 0, 2, 2, 0, 0, 1, 1},
+            {0, 2, 2, 0, 1, 2, 2, 1, 0, 2, 2, 0, 1, 2, 2, 1},
+            {0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 0, 1, 0, 1},
+            {0, 0, 0, 0, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1},
+            {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 2},
+            {0, 2, 2, 2, 0, 1, 1, 1, 0, 2, 2, 2, 0, 1, 1, 1},
+            {0, 0, 0, 2, 1, 1, 1, 2, 0, 0, 0, 2, 1, 1, 1, 2},
+            {0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2},
+            {0, 2, 2, 2, 0, 1, 1, 1, 0, 1, 1, 1, 0, 2, 2, 2},
+            {0, 0, 0, 2, 1, 1, 1, 2, 1, 1, 1, 2, 0, 0, 0, 2},
+            {0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 2, 2},
+            {0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 1, 2},
+            {0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 2, 2, 2, 2, 2, 2},
+            {0, 0, 2, 2, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 2, 2},
+            {0, 0, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 0, 0, 2, 2},
+            {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2},
+            {0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 1},
+            {0, 2, 2, 2, 1, 2, 2, 2, 0, 2, 2, 2, 1, 2, 2, 2},
+            {0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2},
+            {0, 1, 1, 1, 2, 0, 1, 1, 2, 2, 0, 1, 2, 2, 2, 0},
+        };
+
+        static constexpr uint8_t AnchorTable2[MaxPartitions] = {
+// @fmt:off
+0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
+0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
+0xf, 0x2, 0x8, 0x2, 0x2, 0x8, 0x8, 0xf,
+0x2, 0x8, 0x2, 0x2, 0x8, 0x8, 0x2, 0x2,
+0xf, 0xf, 0x6, 0x8, 0x2, 0x8, 0xf, 0xf,
+0x2, 0x8, 0x2, 0x2, 0x2, 0xf, 0xf, 0x6,
+0x6, 0x2, 0x6, 0x8, 0xf, 0xf, 0x2, 0x2,
+0xf, 0xf, 0xf, 0xf, 0xf, 0x2, 0x2, 0xf,
+// @fmt:on
+        };
+
+        static constexpr uint8_t AnchorTable3a[MaxPartitions] = {
+// @fmt:off
+0x3, 0x3, 0xf, 0xf, 0x8, 0x3, 0xf, 0xf,
+0x8, 0x8, 0x6, 0x6, 0x6, 0x5, 0x3, 0x3,
+0x3, 0x3, 0x8, 0xf, 0x3, 0x3, 0x6, 0xa,
+0x5, 0x8, 0x8, 0x6, 0x8, 0x5, 0xf, 0xf,
+0x8, 0xf, 0x3, 0x5, 0x6, 0xa, 0x8, 0xf,
+0xf, 0x3, 0xf, 0x5, 0xf, 0xf, 0xf, 0xf,
+0x3, 0xf, 0x5, 0x5, 0x5, 0x8, 0x5, 0xa,
+0x5, 0xa, 0x8, 0xd, 0xf, 0xc, 0x3, 0x3,
+// @fmt:on
+        };
+
+        static constexpr uint8_t AnchorTable3b[MaxPartitions] = {
+// @fmt:off
+0xf, 0x8, 0x8, 0x3, 0xf, 0xf, 0x3, 0x8,
+0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0x8,
+0xf, 0x8, 0xf, 0x3, 0xf, 0x8, 0xf, 0x8,
+0x3, 0xf, 0x6, 0xa, 0xf, 0xf, 0xa, 0x8,
+0xf, 0x3, 0xf, 0xa, 0xa, 0x8, 0x9, 0xa,
+0x6, 0xf, 0x8, 0xf, 0x3, 0x6, 0x6, 0x8,
+0xf, 0x3, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
+0xf, 0xf, 0xf, 0xf, 0x3, 0xf, 0xf, 0x8,
+// @fmt:on
+        };
+
+        struct Color {
+            struct RGB {
+                RGB() = default;
+
+                RGB(uint8_t r, uint8_t g, uint8_t b)
+                    : b(b), g(g), r(r) {}
+
+                RGB(int r, int g, int b)
+                    : b(static_cast<uint8_t>(b)), g(static_cast<uint8_t>(g)), r(static_cast<uint8_t>(r)) {}
+
+                RGB operator<<(int shift) const { return {r << shift, g << shift, b << shift}; }
+
+                RGB operator>>(int shift) const { return {r >> shift, g >> shift, b >> shift}; }
+
+                RGB operator|(int bits) const { return {r | bits, g | bits, b | bits}; }
+
+                RGB operator|(const RGB &rhs) const { return {r | rhs.r, g | rhs.g, b | rhs.b}; }
+
+                RGB operator+(const RGB &rhs) const { return {r + rhs.r, g + rhs.g, b + rhs.b}; }
+
+                uint8_t b;
+                uint8_t g;
+                uint8_t r;
+            };
+
+            RGB rgb;
+            uint8_t a;
+        };
+
+        static_assert(sizeof(Color) == 4, "Color size must be 4 bytes");
+
+        struct Block {
+            constexpr uint64_t Get(const Bitfield &bf) const {
+                uint64_t mask = (1ULL << bf.count) - 1;
+                if (bf.offset + bf.count <= 64) {
+                    return (low >> bf.offset) & mask;
+                }
+                if (bf.offset >= 64) {
+                    return (high >> (bf.offset - 64)) & mask;
+                }
+                return ((low >> bf.offset) | (high << (64 - bf.offset))) & mask;
+            }
+
+            const Mode &mode() const {
+                if ((low & 0b00000001) != 0) {
+                    return Modes[0];
+                }
+                if ((low & 0b00000010) != 0) {
+                    return Modes[1];
+                }
+                if ((low & 0b00000100) != 0) {
+                    return Modes[2];
+                }
+                if ((low & 0b00001000) != 0) {
+                    return Modes[3];
+                }
+                if ((low & 0b00010000) != 0) {
+                    return Modes[4];
+                }
+                if ((low & 0b00100000) != 0) {
+                    return Modes[5];
+                }
+                if ((low & 0b01000000) != 0) {
+                    return Modes[6];
+                }
+                if ((low & 0b10000000) != 0) {
+                    return Modes[7];
+                }
+                return Modes[8];  // Invalid mode
+            }
+
+            struct IndexInfo {
+                uint64_t value;
+                int numBits;
+            };
+
+            uint8_t interpolate(uint8_t e0, uint8_t e1, const IndexInfo &index) const {
+                static constexpr uint16_t weights2[] = {0, 21, 43, 64};
+                static constexpr uint16_t weights3[] = {0, 9, 18, 27, 37, 46, 55, 64};
+                static constexpr uint16_t weights4[] = {0, 4, 9, 13, 17, 21, 26, 30,
+                                                        34, 38, 43, 47, 51, 55, 60, 64};
+                static constexpr uint16_t const *weightsN[] = {
+                    nullptr, nullptr, weights2, weights3, weights4
+                };
+                auto weights = weightsN[index.numBits];
+                assert(weights != nullptr);
+                return (uint8_t) (((64 - weights[index.value]) * uint16_t(e0) + weights[index.value] * uint16_t(e1) + 32) >> 6);
+            }
+
+            void decode(uint8_t *dst, size_t dstX, size_t dstY, size_t dstWidth, size_t dstHeight, size_t dstPitch) const {
+                auto const &mode = this->mode();
+
+                if (mode.IDX < 0)  // Invalid mode:
+                {
+                    for (size_t y = 0; y < 4 && y + dstY < dstHeight; y++) {
+                        for (size_t x = 0; x < 4 && x + dstX < dstWidth; x++) {
+                            auto out = reinterpret_cast<Color *>(dst + sizeof(Color) * x + dstPitch * y);
+                            out->rgb = {0, 0, 0};
+                            out->a = 0;
+                        }
+                    }
+                    return;
+                }
+
+                using Endpoint = std::array<Color, 2>;
+                std::array<Endpoint, MaxSubsets> subsets;
+
+                for (size_t i = 0; i < mode.NS; i++) {
+                    auto &subset = subsets[i];
+                    subset[0].rgb.r = Get(mode.Red(i * 2 + 0));
+                    subset[0].rgb.g = Get(mode.Green(i * 2 + 0));
+                    subset[0].rgb.b = Get(mode.Blue(i * 2 + 0));
+                    subset[0].a = (mode.AB > 0) ? Get(mode.Alpha(i * 2 + 0)) : 255;
+
+                    subset[1].rgb.r = Get(mode.Red(i * 2 + 1));
+                    subset[1].rgb.g = Get(mode.Green(i * 2 + 1));
+                    subset[1].rgb.b = Get(mode.Blue(i * 2 + 1));
+                    subset[1].a = (mode.AB > 0) ? Get(mode.Alpha(i * 2 + 1)) : 255;
+                }
+
+                if (mode.SPB > 0) {
+                    auto pbit0 = Get(mode.SharedPBit0());
+                    auto pbit1 = Get(mode.SharedPBit1());
+                    subsets[0][0].rgb = (subsets[0][0].rgb << 1) | pbit0;
+                    subsets[0][1].rgb = (subsets[0][1].rgb << 1) | pbit0;
+                    subsets[1][0].rgb = (subsets[1][0].rgb << 1) | pbit1;
+                    subsets[1][1].rgb = (subsets[1][1].rgb << 1) | pbit1;
+                }
+
+                if (mode.EPB > 0) {
+                    for (size_t i = 0; i < mode.NS; i++) {
+                        auto &subset = subsets[i];
+                        auto pbit0 = Get(mode.EndpointPBit(i * 2 + 0));
+                        auto pbit1 = Get(mode.EndpointPBit(i * 2 + 1));
+                        subset[0].rgb = (subset[0].rgb << 1) | pbit0;
+                        subset[1].rgb = (subset[1].rgb << 1) | pbit1;
+                        if (mode.AB > 0) {
+                            subset[0].a = (subset[0].a << 1) | pbit0;
+                            subset[1].a = (subset[1].a << 1) | pbit1;
+                        }
+                    }
+                }
+
+                auto const colorBits = mode.CB + mode.SPB + mode.EPB;
+                auto const alphaBits = mode.AB + mode.SPB + mode.EPB;
+
+                for (size_t i = 0; i < mode.NS; i++) {
+                    auto &subset = subsets[i];
+                    subset[0].rgb = subset[0].rgb << (8 - colorBits);
+                    subset[1].rgb = subset[1].rgb << (8 - colorBits);
+                    subset[0].rgb = subset[0].rgb | (subset[0].rgb >> colorBits);
+                    subset[1].rgb = subset[1].rgb | (subset[1].rgb >> colorBits);
+
+                    if (mode.AB > 0) {
+                        subset[0].a = subset[0].a << (8 - alphaBits);
+                        subset[1].a = subset[1].a << (8 - alphaBits);
+                        subset[0].a = subset[0].a | (subset[0].a >> alphaBits);
+                        subset[1].a = subset[1].a | (subset[1].a >> alphaBits);
+                    }
+                }
+
+                int colorIndexBitOffset = 0;
+                int alphaIndexBitOffset = 0;
+                for (int y = 0; y < 4; y++) {
+                    for (int x = 0; x < 4; x++) {
+                        auto texelIdx = y * 4 + x;
+                        auto partitionIdx = Get(mode.Partition());
+                        assert(partitionIdx < MaxPartitions);
+                        auto subsetIdx = subsetIndex(mode, partitionIdx, texelIdx);
+                        assert(subsetIdx < MaxSubsets);
+                        auto const &subset = subsets[subsetIdx];
+
+                        auto anchorIdx = anchorIndex(mode, partitionIdx, subsetIdx);
+                        auto isAnchor = anchorIdx == texelIdx;
+                        auto colorIdx = colorIndex(mode, isAnchor, colorIndexBitOffset);
+                        auto alphaIdx = alphaIndex(mode, isAnchor, alphaIndexBitOffset);
+
+                        if (y + dstY >= dstHeight || x + dstX >= dstWidth) {
+                            // Don't be tempted to skip early at the loops:
+                            // The calls to colorIndex() and alphaIndex() adjust bit
+                            // offsets that need to be carefully tracked.
+                            continue;
+                        }
+
+                        Color output;
+                        // Note: We flip r and b channels past this point as the texture storage is BGR while the output is RGB
+                        output.rgb.r = interpolate(subset[0].rgb.b, subset[1].rgb.b, colorIdx);
+                        output.rgb.g = interpolate(subset[0].rgb.g, subset[1].rgb.g, colorIdx);
+                        output.rgb.b = interpolate(subset[0].rgb.r, subset[1].rgb.r, colorIdx);
+                        output.a = interpolate(subset[0].a, subset[1].a, alphaIdx);
+
+                        switch (Get(mode.Rotation())) {
+                            default:
+                                break;
+                            case 1:
+                                std::swap(output.a, output.rgb.b);
+                                break;
+                            case 2:
+                                std::swap(output.a, output.rgb.g);
+                                break;
+                            case 3:
+                                std::swap(output.a, output.rgb.r);
+                                break;
+                        }
+
+                        auto out = reinterpret_cast<Color *>(dst + sizeof(Color) * x + dstPitch * y);
+                        *out = output;
+                    }
+                }
+            }
+
+            int subsetIndex(const Mode &mode, int partitionIdx, int texelIndex) const {
+                switch (mode.NS) {
+                    default:
+                        return 0;
+                    case 2:
+                        return PartitionTable2[partitionIdx][texelIndex];
+                    case 3:
+                        return PartitionTable3[partitionIdx][texelIndex];
+                }
+            }
+
+            int anchorIndex(const Mode &mode, int partitionIdx, int subsetIdx) const {
+                // ARB_texture_compression_bptc states:
+                // "In partition zero, the anchor index is always index zero.
+                // In other partitions, the anchor index is specified by tables
+                // Table.A2 and Table.A3.""
+                // Note: This is really confusing - I believe they meant subset instead
+                // of partition here.
+                switch (subsetIdx) {
+                    default:
+                        return 0;
+                    case 1:
+                        return mode.NS == 2 ? AnchorTable2[partitionIdx] : AnchorTable3a[partitionIdx];
+                    case 2:
+                        return AnchorTable3b[partitionIdx];
+                }
+            }
+
+            IndexInfo colorIndex(const Mode &mode, bool isAnchor,
+                                 int &indexBitOffset) const {
+                // ARB_texture_compression_bptc states:
+                // "The index value for interpolating color comes from the secondary
+                // index for the texel if the format has an index selection bit and its
+                // value is one and from the primary index otherwise.""
+                auto idx = Get(mode.IndexSelection());
+                assert(idx <= 1);
+                bool secondary = idx == 1;
+                auto numBits = secondary ? mode.IB2 : mode.IB;
+                auto numReadBits = numBits - (isAnchor ? 1 : 0);
+                auto index =
+                    Get(secondary ? mode.SecondaryIndex(indexBitOffset, numReadBits)
+                                  : mode.PrimaryIndex(indexBitOffset, numReadBits));
+                indexBitOffset += numReadBits;
+                return {index, numBits};
+            }
+
+            IndexInfo alphaIndex(const Mode &mode, bool isAnchor,
+                                 int &indexBitOffset) const {
+                // ARB_texture_compression_bptc states:
+                // "The alpha index comes from the secondary index if the block has a
+                // secondary index and the block either doesn't have an index selection
+                // bit or that bit is zero and the primary index otherwise."
+                auto idx = Get(mode.IndexSelection());
+                assert(idx <= 1);
+                bool secondary = (mode.IB2 != 0) && (idx == 0);
+                auto numBits = secondary ? mode.IB2 : mode.IB;
+                auto numReadBits = numBits - (isAnchor ? 1 : 0);
+                auto index =
+                    Get(secondary ? mode.SecondaryIndex(indexBitOffset, numReadBits)
+                                  : mode.PrimaryIndex(indexBitOffset, numReadBits));
+                indexBitOffset += numReadBits;
+                return {index, numBits};
+            }
+
+            // Assumes little-endian
+            uint64_t low;
+            uint64_t high;
+        };
+
+    }  // namespace BC7
+}  // anonymous namespace
+
+namespace bcn {
+    constexpr size_t R8Bpp{1}; //!< The amount of bytes per pixel in R8
+    constexpr size_t R8g8Bpp{2}; //!< The amount of bytes per pixel in R8G8
+    constexpr size_t R8g8b8a8Bpp{4}; //!< The amount of bytes per pixel in R8G8B8A8
+    constexpr size_t R16g16b16a16Bpp{8}; //!< The amount of bytes per pixel in R16G16B16
+
+    void DecodeBc1(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height) {
+        const auto *color{reinterpret_cast<const BC_color *>(src)};
+        size_t pitch{R8g8b8a8Bpp * width};
+        color->decode(dst, x, y, width, height, pitch, R8g8b8a8Bpp, true, false);
+    }
+
+    void DecodeBc2(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height) {
+        const auto *alpha{reinterpret_cast<const BC_alpha *>(src)};
+        const auto *color{reinterpret_cast<const BC_color *>(src + 8)};
+        size_t pitch{R8g8b8a8Bpp * width};
+        color->decode(dst, x, y, width, height, pitch, R8g8b8a8Bpp, false, true);
+        alpha->decode(dst, x, y, width, height, pitch, R8g8b8a8Bpp);
+    }
+
+    void DecodeBc3(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height) {
+        const auto *alpha{reinterpret_cast<const BC_channel *>(src)};
+        const auto *color{reinterpret_cast<const BC_color *>(src + 8)};
+        size_t pitch{R8g8b8a8Bpp * width};
+        color->decode(dst, x, y, width, height, pitch, R8g8b8a8Bpp, false, true);
+        alpha->decode(dst, x, y, width, height, pitch, R8g8b8a8Bpp, 3, false);
+    }
+
+    void DecodeBc4(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height, bool isSigned) {
+        const auto *red{reinterpret_cast<const BC_channel *>(src)};
+        size_t pitch{R8Bpp * width};
+        red->decode(dst, x, y, width, height, pitch, R8Bpp, 0, isSigned);
+    }
+
+    void DecodeBc5(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height, bool isSigned) {
+        const auto *red{reinterpret_cast<const BC_channel *>(src)};
+        const auto *green{reinterpret_cast<const BC_channel *>(src + 8)};
+        size_t pitch{R8g8Bpp * width};
+        red->decode(dst, x, y, width, height, pitch, R8g8Bpp, 0, isSigned);
+        green->decode(dst, x, y, width, height, pitch, R8g8Bpp, 1, isSigned);
+    }
+
+    void DecodeBc6(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height, bool isSigned) {
+        const auto *block{reinterpret_cast<const BC6H::Block *>(src)};
+        size_t pitch{R16g16b16a16Bpp * width};
+        block->decode(dst, x, y, width, height, pitch, R16g16b16a16Bpp, isSigned);
+    }
+
+    void DecodeBc7(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height) {
+        const auto *block{reinterpret_cast<const BC7::Block *>(src)};
+        size_t pitch{R8g8b8a8Bpp * width};
+        block->decode(dst, x, y, width, height, pitch);
+    }
+}
diff --git a/externals/bc_decoder/bc_decoder.h b/externals/bc_decoder/bc_decoder.h
new file mode 100644
index 000000000..4f0ead7d3
--- /dev/null
+++ b/externals/bc_decoder/bc_decoder.h
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: MPL-2.0
+// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
+
+#pragma once
+
+#include <cstdint>
+
+namespace bcn {
+    /**
+     * @brief Decodes a BC1 encoded image to R8G8B8A8
+     */
+    void DecodeBc1(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height);
+
+    /**
+     * @brief Decodes a BC2 encoded image to R8G8B8A8
+     */
+    void DecodeBc2(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height);
+
+    /**
+     * @brief Decodes a BC3 encoded image to R8G8B8A8
+     */
+    void DecodeBc3(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height);
+
+    /**
+     * @brief Decodes a BC4 encoded image to R8
+     */
+    void DecodeBc4(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height, bool isSigned);
+
+    /**
+     * @brief Decodes a BC5 encoded image to R8G8
+     */
+    void DecodeBc5(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height, bool isSigned);
+
+    /**
+     * @brief Decodes a BC6 encoded image to R16G16B16A16
+     */
+    void DecodeBc6(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height, bool isSigned);
+
+    /**
+     * @brief Decodes a BC7 encoded image to R8G8B8A8
+     */
+    void DecodeBc7(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height);
+}
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index e9e6f278d..3b2fe01da 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -220,8 +220,8 @@ add_library(video_core STATIC
     surface.h
     texture_cache/accelerated_swizzle.cpp
     texture_cache/accelerated_swizzle.h
-    texture_cache/decode_bc4.cpp
-    texture_cache/decode_bc4.h
+    texture_cache/decode_bc.cpp
+    texture_cache/decode_bc.h
     texture_cache/descriptor_table.h
     texture_cache/formatter.cpp
     texture_cache/formatter.h
@@ -279,7 +279,7 @@ add_library(video_core STATIC
 create_target_directory_groups(video_core)
 
 target_link_libraries(video_core PUBLIC common core)
-target_link_libraries(video_core PUBLIC glad shader_recompiler stb)
+target_link_libraries(video_core PUBLIC glad shader_recompiler stb bc_decoder)
 
 if (YUZU_USE_BUNDLED_FFMPEG AND NOT (WIN32 OR ANDROID))
     add_dependencies(video_core ffmpeg-build)
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 9a0b10568..a8540339d 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -259,6 +259,26 @@ FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with
             break;
         }
     }
+    // Transcode on hardware that doesn't support BCn natively
+    if (!device.IsOptimalBcnSupported() && VideoCore::Surface::IsPixelFormatBCn(pixel_format)) {
+        const bool is_srgb = with_srgb && VideoCore::Surface::IsPixelFormatSRGB(pixel_format);
+        if (pixel_format == PixelFormat::BC4_SNORM) {
+            tuple.format = VK_FORMAT_R8_SNORM;
+        } else if (pixel_format == PixelFormat::BC4_UNORM) {
+            tuple.format = VK_FORMAT_R8_UNORM;
+        } else if (pixel_format == PixelFormat::BC5_SNORM) {
+            tuple.format = VK_FORMAT_R8G8_SNORM;
+        } else if (pixel_format == PixelFormat::BC5_UNORM) {
+            tuple.format = VK_FORMAT_R8G8_UNORM;
+        } else if (pixel_format == PixelFormat::BC6H_SFLOAT ||
+                   pixel_format == PixelFormat::BC6H_UFLOAT) {
+            tuple.format = VK_FORMAT_R16G16B16A16_SFLOAT;
+        } else if (is_srgb) {
+            tuple.format = VK_FORMAT_A8B8G8R8_SRGB_PACK32;
+        } else {
+            tuple.format = VK_FORMAT_A8B8G8R8_UNORM_PACK32;
+        }
+    }
     const bool attachable = (tuple.usage & Attachable) != 0;
     const bool storage = (tuple.usage & Storage) != 0;
 
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 268b955fb..f7c0d939a 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -315,7 +315,14 @@ void RasterizerVulkan::Clear(u32 layer_count) {
     FlushWork();
     gpu_memory->FlushCaching();
 
+#if ANDROID
+    if (Settings::IsGPULevelHigh()) {
+        // This is problematic on Android, disable on GPU Normal.
+        query_cache.UpdateCounters();
+    }
+#else
     query_cache.UpdateCounters();
+#endif
 
     auto& regs = maxwell3d->regs;
     const bool use_color = regs.clear_surface.R || regs.clear_surface.G || regs.clear_surface.B ||
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index ce6acc30c..8385b5509 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -1279,6 +1279,10 @@ Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu
         flags |= VideoCommon::ImageFlagBits::Converted;
         flags |= VideoCommon::ImageFlagBits::CostlyLoad;
     }
+    if (IsPixelFormatBCn(info.format) && !runtime->device.IsOptimalBcnSupported()) {
+        flags |= VideoCommon::ImageFlagBits::Converted;
+        flags |= VideoCommon::ImageFlagBits::CostlyLoad;
+    }
     if (runtime->device.HasDebuggingToolAttached()) {
         original_image.SetObjectNameEXT(VideoCommon::Name(*this).c_str());
     }
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index cb51529e4..e16cd5e73 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -269,6 +269,28 @@ bool IsPixelFormatASTC(PixelFormat format) {
     }
 }
 
+bool IsPixelFormatBCn(PixelFormat format) {
+    switch (format) {
+    case PixelFormat::BC1_RGBA_UNORM:
+    case PixelFormat::BC2_UNORM:
+    case PixelFormat::BC3_UNORM:
+    case PixelFormat::BC4_UNORM:
+    case PixelFormat::BC4_SNORM:
+    case PixelFormat::BC5_UNORM:
+    case PixelFormat::BC5_SNORM:
+    case PixelFormat::BC1_RGBA_SRGB:
+    case PixelFormat::BC2_SRGB:
+    case PixelFormat::BC3_SRGB:
+    case PixelFormat::BC7_UNORM:
+    case PixelFormat::BC6H_UFLOAT:
+    case PixelFormat::BC6H_SFLOAT:
+    case PixelFormat::BC7_SRGB:
+        return true;
+    default:
+        return false;
+    }
+}
+
 bool IsPixelFormatSRGB(PixelFormat format) {
     switch (format) {
     case PixelFormat::A8B8G8R8_SRGB:
diff --git a/src/video_core/surface.h b/src/video_core/surface.h
index 0225d3287..9b9c4d9bc 100644
--- a/src/video_core/surface.h
+++ b/src/video_core/surface.h
@@ -501,6 +501,8 @@ SurfaceType GetFormatType(PixelFormat pixel_format);
 
 bool IsPixelFormatASTC(PixelFormat format);
 
+bool IsPixelFormatBCn(PixelFormat format);
+
 bool IsPixelFormatSRGB(PixelFormat format);
 
 bool IsPixelFormatInteger(PixelFormat format);
diff --git a/src/video_core/texture_cache/decode_bc.cpp b/src/video_core/texture_cache/decode_bc.cpp
new file mode 100644
index 000000000..3e26474a3
--- /dev/null
+++ b/src/video_core/texture_cache/decode_bc.cpp
@@ -0,0 +1,129 @@
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <algorithm>
+#include <array>
+#include <span>
+#include <bc_decoder.h>
+
+#include "common/common_types.h"
+#include "video_core/texture_cache/decode_bc.h"
+
+namespace VideoCommon {
+
+namespace {
+constexpr u32 BLOCK_SIZE = 4;
+
+using VideoCore::Surface::PixelFormat;
+
+constexpr bool IsSigned(PixelFormat pixel_format) {
+    switch (pixel_format) {
+    case PixelFormat::BC4_SNORM:
+    case PixelFormat::BC4_UNORM:
+    case PixelFormat::BC5_SNORM:
+    case PixelFormat::BC5_UNORM:
+    case PixelFormat::BC6H_SFLOAT:
+    case PixelFormat::BC6H_UFLOAT:
+        return true;
+    default:
+        return false;
+    }
+}
+
+constexpr u32 BlockSize(PixelFormat pixel_format) {
+    switch (pixel_format) {
+    case PixelFormat::BC1_RGBA_SRGB:
+    case PixelFormat::BC1_RGBA_UNORM:
+    case PixelFormat::BC4_SNORM:
+    case PixelFormat::BC4_UNORM:
+        return 8;
+    default:
+        return 16;
+    }
+}
+} // Anonymous namespace
+
+u32 ConvertedBytesPerBlock(VideoCore::Surface::PixelFormat pixel_format) {
+    switch (pixel_format) {
+    case PixelFormat::BC4_SNORM:
+    case PixelFormat::BC4_UNORM:
+        return 1;
+    case PixelFormat::BC5_SNORM:
+    case PixelFormat::BC5_UNORM:
+        return 2;
+    case PixelFormat::BC6H_SFLOAT:
+    case PixelFormat::BC6H_UFLOAT:
+        return 8;
+    default:
+        return 4;
+    }
+}
+
+template <auto decompress, PixelFormat pixel_format>
+void DecompressBlocks(std::span<const u8> input, std::span<u8> output, Extent3D extent,
+                      bool is_signed = false) {
+    const u32 out_bpp = ConvertedBytesPerBlock(pixel_format);
+    const u32 block_width = std::min(extent.width, BLOCK_SIZE);
+    const u32 block_height = std::min(extent.height, BLOCK_SIZE);
+    const u32 pitch = extent.width * out_bpp;
+    size_t input_offset = 0;
+    size_t output_offset = 0;
+    for (u32 slice = 0; slice < extent.depth; ++slice) {
+        for (u32 y = 0; y < extent.height; y += block_height) {
+            size_t row_offset = 0;
+            for (u32 x = 0; x < extent.width;
+                 x += block_width, row_offset += block_width * out_bpp) {
+                const u8* src = input.data() + input_offset;
+                u8* const dst = output.data() + output_offset + row_offset;
+                if constexpr (IsSigned(pixel_format)) {
+                    decompress(src, dst, x, y, extent.width, extent.height, is_signed);
+                } else {
+                    decompress(src, dst, x, y, extent.width, extent.height);
+                }
+                input_offset += BlockSize(pixel_format);
+            }
+            output_offset += block_height * pitch;
+        }
+    }
+}
+
+void DecompressBCn(std::span<const u8> input, std::span<u8> output, Extent3D extent,
+                   VideoCore::Surface::PixelFormat pixel_format) {
+    switch (pixel_format) {
+    case PixelFormat::BC1_RGBA_UNORM:
+    case PixelFormat::BC1_RGBA_SRGB:
+        DecompressBlocks<bcn::DecodeBc1, PixelFormat::BC1_RGBA_UNORM>(input, output, extent);
+        break;
+    case PixelFormat::BC2_UNORM:
+    case PixelFormat::BC2_SRGB:
+        DecompressBlocks<bcn::DecodeBc2, PixelFormat::BC2_UNORM>(input, output, extent);
+        break;
+    case PixelFormat::BC3_UNORM:
+    case PixelFormat::BC3_SRGB:
+        DecompressBlocks<bcn::DecodeBc3, PixelFormat::BC3_UNORM>(input, output, extent);
+        break;
+    case PixelFormat::BC4_SNORM:
+    case PixelFormat::BC4_UNORM:
+        DecompressBlocks<bcn::DecodeBc4, PixelFormat::BC4_UNORM>(
+            input, output, extent, pixel_format == PixelFormat::BC4_SNORM);
+        break;
+    case PixelFormat::BC5_SNORM:
+    case PixelFormat::BC5_UNORM:
+        DecompressBlocks<bcn::DecodeBc5, PixelFormat::BC5_UNORM>(
+            input, output, extent, pixel_format == PixelFormat::BC5_SNORM);
+        break;
+    case PixelFormat::BC6H_SFLOAT:
+    case PixelFormat::BC6H_UFLOAT:
+        DecompressBlocks<bcn::DecodeBc6, PixelFormat::BC6H_UFLOAT>(
+            input, output, extent, pixel_format == PixelFormat::BC6H_SFLOAT);
+        break;
+    case PixelFormat::BC7_SRGB:
+    case PixelFormat::BC7_UNORM:
+        DecompressBlocks<bcn::DecodeBc7, PixelFormat::BC7_UNORM>(input, output, extent);
+        break;
+    default:
+        LOG_WARNING(HW_GPU, "Unimplemented BCn decompression {}", pixel_format);
+    }
+}
+
+} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/decode_bc4.h b/src/video_core/texture_cache/decode_bc.h
similarity index 50%
rename from src/video_core/texture_cache/decode_bc4.h
rename to src/video_core/texture_cache/decode_bc.h
index ab2f735be..41d1ec0a3 100644
--- a/src/video_core/texture_cache/decode_bc4.h
+++ b/src/video_core/texture_cache/decode_bc.h
@@ -6,10 +6,14 @@
 #include <span>
 
 #include "common/common_types.h"
+#include "video_core/surface.h"
 #include "video_core/texture_cache/types.h"
 
 namespace VideoCommon {
 
-void DecompressBC4(std::span<const u8> data, Extent3D extent, std::span<u8> output);
+[[nodiscard]] u32 ConvertedBytesPerBlock(VideoCore::Surface::PixelFormat pixel_format);
+
+void DecompressBCn(std::span<const u8> input, std::span<u8> output, Extent3D extent,
+                   VideoCore::Surface::PixelFormat pixel_format);
 
 } // namespace VideoCommon
diff --git a/src/video_core/texture_cache/decode_bc4.cpp b/src/video_core/texture_cache/decode_bc4.cpp
deleted file mode 100644
index ef98afdca..000000000
--- a/src/video_core/texture_cache/decode_bc4.cpp
+++ /dev/null
@@ -1,96 +0,0 @@
-// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-#include <algorithm>
-#include <array>
-#include <span>
-
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "video_core/texture_cache/decode_bc4.h"
-#include "video_core/texture_cache/types.h"
-
-namespace VideoCommon {
-
-// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_compression_rgtc.txt
-[[nodiscard]] constexpr u32 DecompressBlock(u64 bits, u32 x, u32 y) {
-    const u32 code_offset = 16 + 3 * (4 * y + x);
-    const u32 code = (bits >> code_offset) & 7;
-    const u32 red0 = (bits >> 0) & 0xff;
-    const u32 red1 = (bits >> 8) & 0xff;
-    if (red0 > red1) {
-        switch (code) {
-        case 0:
-            return red0;
-        case 1:
-            return red1;
-        case 2:
-            return (6 * red0 + 1 * red1) / 7;
-        case 3:
-            return (5 * red0 + 2 * red1) / 7;
-        case 4:
-            return (4 * red0 + 3 * red1) / 7;
-        case 5:
-            return (3 * red0 + 4 * red1) / 7;
-        case 6:
-            return (2 * red0 + 5 * red1) / 7;
-        case 7:
-            return (1 * red0 + 6 * red1) / 7;
-        }
-    } else {
-        switch (code) {
-        case 0:
-            return red0;
-        case 1:
-            return red1;
-        case 2:
-            return (4 * red0 + 1 * red1) / 5;
-        case 3:
-            return (3 * red0 + 2 * red1) / 5;
-        case 4:
-            return (2 * red0 + 3 * red1) / 5;
-        case 5:
-            return (1 * red0 + 4 * red1) / 5;
-        case 6:
-            return 0;
-        case 7:
-            return 0xff;
-        }
-    }
-    return 0;
-}
-
-void DecompressBC4(std::span<const u8> input, Extent3D extent, std::span<u8> output) {
-    UNIMPLEMENTED_IF_MSG(extent.width % 4 != 0, "Unaligned width={}", extent.width);
-    UNIMPLEMENTED_IF_MSG(extent.height % 4 != 0, "Unaligned height={}", extent.height);
-    static constexpr u32 BLOCK_SIZE = 4;
-    size_t input_offset = 0;
-    for (u32 slice = 0; slice < extent.depth; ++slice) {
-        for (u32 block_y = 0; block_y < extent.height / 4; ++block_y) {
-            for (u32 block_x = 0; block_x < extent.width / 4; ++block_x) {
-                u64 bits;
-                std::memcpy(&bits, &input[input_offset], sizeof(bits));
-                input_offset += sizeof(bits);
-
-                for (u32 y = 0; y < BLOCK_SIZE; ++y) {
-                    for (u32 x = 0; x < BLOCK_SIZE; ++x) {
-                        const u32 linear_z = slice;
-                        const u32 linear_y = block_y * BLOCK_SIZE + y;
-                        const u32 linear_x = block_x * BLOCK_SIZE + x;
-                        const u32 offset_z = linear_z * extent.width * extent.height;
-                        const u32 offset_y = linear_y * extent.width;
-                        const u32 offset_x = linear_x;
-                        const u32 output_offset = (offset_z + offset_y + offset_x) * 4ULL;
-                        const u32 color = DecompressBlock(bits, x, y);
-                        output[output_offset + 0] = static_cast<u8>(color);
-                        output[output_offset + 1] = 0;
-                        output[output_offset + 2] = 0;
-                        output[output_offset + 3] = 0xff;
-                    }
-                }
-            }
-        }
-    }
-}
-
-} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index f781cb7a0..9a618a57a 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -24,7 +24,7 @@
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/memory_manager.h"
 #include "video_core/surface.h"
-#include "video_core/texture_cache/decode_bc4.h"
+#include "video_core/texture_cache/decode_bc.h"
 #include "video_core/texture_cache/format_lookup_table.h"
 #include "video_core/texture_cache/formatter.h"
 #include "video_core/texture_cache/samples_helper.h"
@@ -61,8 +61,6 @@ using VideoCore::Surface::PixelFormatFromDepthFormat;
 using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
 using VideoCore::Surface::SurfaceType;
 
-constexpr u32 CONVERTED_BYTES_PER_BLOCK = BytesPerBlock(PixelFormat::A8B8G8R8_UNORM);
-
 struct LevelInfo {
     Extent3D size;
     Extent3D block;
@@ -612,7 +610,8 @@ u32 CalculateConvertedSizeBytes(const ImageInfo& info) noexcept {
         }
         return output_size;
     }
-    return NumBlocksPerLayer(info, TILE_SIZE) * info.resources.layers * CONVERTED_BYTES_PER_BLOCK;
+    return NumBlocksPerLayer(info, TILE_SIZE) * info.resources.layers *
+           ConvertedBytesPerBlock(info.format);
 }
 
 u32 CalculateLayerStride(const ImageInfo& info) noexcept {
@@ -945,7 +944,8 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8
                 tile_size.height, output.subspan(output_offset));
 
             output_offset += copy.image_extent.width * copy.image_extent.height *
-                             copy.image_subresource.num_layers * CONVERTED_BYTES_PER_BLOCK;
+                             copy.image_subresource.num_layers *
+                             BytesPerBlock(PixelFormat::A8B8G8R8_UNORM);
         } else if (astc) {
             // BC1 uses 0.5 bytes per texel
             // BC3 uses 1 byte per texel
@@ -956,7 +956,8 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8
 
             const u32 plane_dim = copy.image_extent.width * copy.image_extent.height;
             const u32 level_size = plane_dim * copy.image_extent.depth *
-                                   copy.image_subresource.num_layers * CONVERTED_BYTES_PER_BLOCK;
+                                   copy.image_subresource.num_layers *
+                                   BytesPerBlock(PixelFormat::A8B8G8R8_UNORM);
             decode_scratch.resize_destructive(level_size);
 
             Tegra::Texture::ASTC::Decompress(
@@ -976,10 +977,15 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8
                 bpp_div;
             output_offset += static_cast<u32>(copy.buffer_size);
         } else {
-            DecompressBC4(input_offset, copy.image_extent, output.subspan(output_offset));
-
+            const Extent3D image_extent{
+                .width = copy.image_extent.width,
+                .height = copy.image_extent.height * copy.image_subresource.num_layers,
+                .depth = copy.image_extent.depth,
+            };
+            DecompressBCn(input_offset, output.subspan(output_offset), image_extent, info.format);
             output_offset += copy.image_extent.width * copy.image_extent.height *
-                             copy.image_subresource.num_layers * CONVERTED_BYTES_PER_BLOCK;
+                             copy.image_subresource.num_layers *
+                             ConvertedBytesPerBlock(info.format);
         }
     }
 }
diff --git a/src/video_core/textures/bcn.cpp b/src/video_core/textures/bcn.cpp
index 671212a49..16ddbe320 100644
--- a/src/video_core/textures/bcn.cpp
+++ b/src/video_core/textures/bcn.cpp
@@ -3,7 +3,6 @@
 
 #include <stb_dxt.h>
 #include <string.h>
-
 #include "common/alignment.h"
 #include "video_core/textures/bcn.h"
 #include "video_core/textures/workers.h"
diff --git a/src/video_core/textures/bcn.h b/src/video_core/textures/bcn.h
index 6464af885..d5d2a16c9 100644
--- a/src/video_core/textures/bcn.h
+++ b/src/video_core/textures/bcn.h
@@ -4,14 +4,13 @@
 #pragma once
 
 #include <span>
-#include <stdint.h>
+
+#include "common/common_types.h"
 
 namespace Tegra::Texture::BCN {
 
-void CompressBC1(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
-                 std::span<uint8_t> output);
+void CompressBC1(std::span<const u8> data, u32 width, u32 height, u32 depth, std::span<u8> output);
 
-void CompressBC3(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
-                 std::span<uint8_t> output);
+void CompressBC3(std::span<const u8> data, u32 width, u32 height, u32 depth, std::span<u8> output);
 
 } // namespace Tegra::Texture::BCN
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index e05d04db3..1f17265d5 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -293,6 +293,11 @@ public:
         return features.features.textureCompressionASTC_LDR;
     }
 
+    /// Returns true if BCn is natively supported.
+    bool IsOptimalBcnSupported() const {
+        return features.features.textureCompressionBC;
+    }
+
     /// Returns true if descriptor aliasing is natively supported.
     bool IsDescriptorAliasingSupported() const {
         return GetDriverID() != VK_DRIVER_ID_QUALCOMM_PROPRIETARY;
@@ -423,6 +428,11 @@ public:
         return extensions.sampler_filter_minmax;
     }
 
+    /// Returns true if the device supports VK_EXT_shader_stencil_export.
+    bool IsExtShaderStencilExportSupported() const {
+        return extensions.shader_stencil_export;
+    }
+
     /// Returns true if the device supports VK_EXT_depth_range_unrestricted.
     bool IsExtDepthRangeUnrestrictedSupported() const {
         return extensions.depth_range_unrestricted;
@@ -492,11 +502,6 @@ public:
         return extensions.vertex_input_dynamic_state;
     }
 
-    /// Returns true if the device supports VK_EXT_shader_stencil_export.
-    bool IsExtShaderStencilExportSupported() const {
-        return extensions.shader_stencil_export;
-    }
-
     /// Returns true if the device supports VK_EXT_shader_demote_to_helper_invocation
     bool IsExtShaderDemoteToHelperInvocationSupported() const {
         return extensions.shader_demote_to_helper_invocation;