diff --git a/src/android/.gitignore b/src/android/.gitignore
index 5edb4eeb0..4423a0b45 100644
--- a/src/android/.gitignore
+++ b/src/android/.gitignore
@@ -8,3 +8,21 @@
 /build
 /captures
 .externalNativeBuild
+
+# CXX compile cache
+app/.cxx
+
+# Google Services (e.g. APIs or Firebase)
+google-services.json
+
+# Freeline
+freeline.py
+freeline/
+freeline_project_description.json
+
+# fastlane
+fastlane/report.xml
+fastlane/Preview.html
+fastlane/screenshots
+fastlane/test_output
+fastlane/readme.md
diff --git a/src/audio_core/hle/source.cpp b/src/audio_core/hle/source.cpp
index 604b0593d..24bbbbf6f 100644
--- a/src/audio_core/hle/source.cpp
+++ b/src/audio_core/hle/source.cpp
@@ -345,7 +345,6 @@ void Source::GenerateFrame() {
             break;
         case InterpolationMode::Polyphase:
             // TODO(merry): Implement polyphase interpolation
-            LOG_DEBUG(Audio_DSP, "Polyphase interpolation unimplemented; falling back to linear");
             AudioInterp::Linear(state.interp_state, state.current_buffer, state.rate_multiplier,
                                 current_frame, frame_position);
             break;
diff --git a/src/citra_qt/applets/mii_selector.cpp b/src/citra_qt/applets/mii_selector.cpp
index 3ee25805f..2099e675e 100644
--- a/src/citra_qt/applets/mii_selector.cpp
+++ b/src/citra_qt/applets/mii_selector.cpp
@@ -8,11 +8,7 @@
 #include <QString>
 #include <QVBoxLayout>
 #include "citra_qt/applets/mii_selector.h"
-#include "common/file_util.h"
 #include "common/string_util.h"
-#include "core/file_sys/archive_extsavedata.h"
-#include "core/file_sys/file_backend.h"
-#include "core/hle/service/ptm/ptm.h"
 
 QtMiiSelectorDialog::QtMiiSelectorDialog(QWidget* parent, QtMiiSelector* mii_selector_)
     : QDialog(parent), mii_selector(mii_selector_) {
@@ -33,37 +29,9 @@ QtMiiSelectorDialog::QtMiiSelectorDialog(QWidget* parent, QtMiiSelector* mii_sel
 
     miis.push_back(HLE::Applets::MiiSelector::GetStandardMiiResult().selected_mii_data);
     combobox->addItem(tr("Standard Mii"));
-
-    std::string nand_directory{FileUtil::GetUserPath(FileUtil::UserPath::NANDDir)};
-    FileSys::ArchiveFactory_ExtSaveData extdata_archive_factory(nand_directory, true);
-
-    auto archive_result = extdata_archive_factory.Open(Service::PTM::ptm_shared_extdata_id, 0);
-    if (archive_result.Succeeded()) {
-        auto archive = std::move(archive_result).Unwrap();
-
-        FileSys::Path file_path = "/CFL_DB.dat";
-        FileSys::Mode mode{};
-        mode.read_flag.Assign(1);
-
-        auto file_result = archive->OpenFile(file_path, mode);
-        if (file_result.Succeeded()) {
-            auto file = std::move(file_result).Unwrap();
-
-            u32 saved_miis_offset = 0x8;
-            // The Mii Maker has a 100 Mii limit on the 3ds
-            for (int i = 0; i < 100; ++i) {
-                HLE::Applets::MiiData mii;
-                std::array<u8, sizeof(mii)> mii_raw;
-                file->Read(saved_miis_offset, sizeof(mii), mii_raw.data());
-                std::memcpy(&mii, mii_raw.data(), sizeof(mii));
-                if (mii.mii_id != 0) {
-                    std::string name = Common::UTF16BufferToUTF8(mii.mii_name);
-                    miis.push_back(mii);
-                    combobox->addItem(QString::fromStdString(name));
-                }
-                saved_miis_offset += sizeof(mii);
-            }
-        }
+    for (const auto& mii : Frontend::LoadMiis()) {
+        miis.push_back(mii);
+        combobox->addItem(QString::fromStdString(Common::UTF16BufferToUTF8(mii.mii_name)));
     }
 
     if (combobox->count() > static_cast<int>(config.initially_selected_mii_index)) {
diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp
index 90608157e..fdc702521 100644
--- a/src/common/logging/backend.cpp
+++ b/src/common/logging/backend.cpp
@@ -26,6 +26,10 @@
 
 namespace Log {
 
+Filter filter;
+void SetGlobalFilter(const Filter& f) {
+    filter = f;
+}
 /**
  * Static state as a singleton.
  */
@@ -58,14 +62,6 @@ public:
         backends.erase(it, backends.end());
     }
 
-    const Filter& GetGlobalFilter() const {
-        return filter;
-    }
-
-    void SetGlobalFilter(const Filter& f) {
-        filter = f;
-    }
-
     Backend* GetBackend(std::string_view backend_name) {
         const auto it =
             std::find_if(backends.begin(), backends.end(),
@@ -144,6 +140,10 @@ void ColorConsoleBackend::Write(const Entry& entry) {
     PrintColoredMessage(entry);
 }
 
+void LogcatBackend::Write(const Entry& entry) {
+    PrintMessageToLogcat(entry);
+}
+
 FileBackend::FileBackend(const std::string& filename) : bytes_written(0) {
     if (FileUtil::Exists(filename + ".old.txt")) {
         FileUtil::Delete(filename + ".old.txt");
@@ -283,10 +283,6 @@ const char* GetLevelName(Level log_level) {
     return "Invalid";
 }
 
-void SetGlobalFilter(const Filter& filter) {
-    Impl::Instance().SetGlobalFilter(filter);
-}
-
 void AddBackend(std::unique_ptr<Backend> backend) {
     Impl::Instance().AddBackend(std::move(backend));
 }
@@ -303,10 +299,6 @@ void FmtLogMessageImpl(Class log_class, Level log_level, const char* filename,
                        unsigned int line_num, const char* function, const char* format,
                        const fmt::format_args& args) {
     auto& instance = Impl::Instance();
-    const auto& filter = instance.GetGlobalFilter();
-    if (!filter.CheckMessage(log_class, log_level))
-        return;
-
     instance.PushEntry(log_class, log_level, filename, line_num, function,
                        fmt::vformat(format, args));
 }
diff --git a/src/common/logging/backend.h b/src/common/logging/backend.h
index a6714ffd0..907c6a297 100644
--- a/src/common/logging/backend.h
+++ b/src/common/logging/backend.h
@@ -14,8 +14,6 @@
 
 namespace Log {
 
-class Filter;
-
 /**
  * A log entry. Log entries are store in a structured format to permit more varied output
  * formatting on different frontends, as well as facilitating filtering and aggregation.
@@ -83,6 +81,21 @@ public:
     void Write(const Entry& entry) override;
 };
 
+/**
+ * Backend that writes to the Android logcat
+ */
+class LogcatBackend : public Backend {
+public:
+    static const char* Name() {
+        return "logcat";
+    }
+
+    const char* GetName() const override {
+        return Name();
+    }
+    void Write(const Entry& entry) override;
+};
+
 /**
  * Backend that writes to a file passed into the constructor
  */
@@ -136,10 +149,4 @@ const char* GetLogClassName(Class log_class);
  */
 const char* GetLevelName(Level log_level);
 
-/**
- * The global filter will prevent any messages from even being processed if they are filtered. Each
- * backend can have a filter, but if the level is lower than the global filter, the backend will
- * never get the message
- */
-void SetGlobalFilter(const Filter& filter);
 } // namespace Log
diff --git a/src/common/logging/filter.h b/src/common/logging/filter.h
index bbadbcba1..058c7b345 100644
--- a/src/common/logging/filter.h
+++ b/src/common/logging/filter.h
@@ -9,43 +9,4 @@
 #include <string_view>
 #include "common/logging/log.h"
 
-namespace Log {
-
-/**
- * Implements a log message filter which allows different log classes to have different minimum
- * severity levels. The filter can be changed at runtime and can be parsed from a string to allow
- * editing via the interface or loading from a configuration file.
- */
-class Filter {
-public:
-    /// Initializes the filter with all classes having `default_level` as the minimum level.
-    explicit Filter(Level default_level = Level::Info);
-
-    /// Resets the filter so that all classes have `level` as the minimum displayed level.
-    void ResetAll(Level level);
-    /// Sets the minimum level of `log_class` (and not of its subclasses) to `level`.
-    void SetClassLevel(Class log_class, Level level);
-
-    /**
-     * Parses a filter string and applies it to this filter.
-     *
-     * A filter string consists of a space-separated list of filter rules, each of the format
-     * `<class>:<level>`. `<class>` is a log class name, with subclasses separated using periods.
-     * `*` is allowed as a class name and will reset all filters to the specified level. `<level>`
-     * a severity level name which will be set as the minimum logging level of the matched classes.
-     * Rules are applied left to right, with each rule overriding previous ones in the sequence.
-     *
-     * A few examples of filter rules:
-     *  - `*:Info` -- Resets the level of all classes to Info.
-     *  - `Service:Info` -- Sets the level of Service to Info.
-     *  - `Service.FS:Trace` -- Sets the level of the Service.FS class to Trace.
-     */
-    void ParseFilterString(std::string_view filter_view);
-
-    /// Matches class/level combination against the filter, returning true if it passed.
-    bool CheckMessage(Class log_class, Level level) const;
-
-private:
-    std::array<Level, static_cast<std::size_t>(Class::Count)> class_levels;
-};
-} // namespace Log
+namespace Log {} // namespace Log
diff --git a/src/common/logging/log.h b/src/common/logging/log.h
index a14e2ff37..3b3810851 100644
--- a/src/common/logging/log.h
+++ b/src/common/logging/log.h
@@ -4,13 +4,14 @@
 
 #pragma once
 
+#include <array>
 #include <fmt/format.h>
 #include "common/common_types.h"
 
 namespace Log {
 
 // trims up to and including the last of ../, ..\, src/, src\ in a string
-constexpr const char* TrimSourcePath(std::string_view source) {
+inline const char* TrimSourcePath(std::string_view source) {
     const auto rfind = [source](const std::string_view match) {
         return source.rfind(match) == source.npos ? 0 : (source.rfind(match) + match.size());
     };
@@ -113,6 +114,47 @@ enum class Class : ClassType {
     Count              ///< Total number of logging classes
 };
 
+/**
+ * Implements a log message filter which allows different log classes to have different minimum
+ * severity levels. The filter can be changed at runtime and can be parsed from a string to allow
+ * editing via the interface or loading from a configuration file.
+ */
+class Filter {
+public:
+    /// Initializes the filter with all classes having `default_level` as the minimum level.
+    explicit Filter(Level default_level = Level::Info);
+
+    /// Resets the filter so that all classes have `level` as the minimum displayed level.
+    void ResetAll(Level level);
+    /// Sets the minimum level of `log_class` (and not of its subclasses) to `level`.
+    void SetClassLevel(Class log_class, Level level);
+
+    /**
+     * Parses a filter string and applies it to this filter.
+     *
+     * A filter string consists of a space-separated list of filter rules, each of the format
+     * `<class>:<level>`. `<class>` is a log class name, with subclasses separated using periods.
+     * `*` is allowed as a class name and will reset all filters to the specified level. `<level>`
+     * a severity level name which will be set as the minimum logging level of the matched classes.
+     * Rules are applied left to right, with each rule overriding previous ones in the sequence.
+     *
+     * A few examples of filter rules:
+     *  - `*:Info` -- Resets the level of all classes to Info.
+     *  - `Service:Info` -- Sets the level of Service to Info.
+     *  - `Service.FS:Trace` -- Sets the level of the Service.FS class to Trace.
+     */
+    void ParseFilterString(std::string_view filter_view);
+
+    /// Matches class/level combination against the filter, returning true if it passed.
+    bool CheckMessage(Class log_class, Level level) const;
+
+private:
+    std::array<Level, static_cast<std::size_t>(Class::Count)> class_levels;
+};
+extern Filter filter;
+
+void SetGlobalFilter(const Filter& f);
+
 /// Logs a message to the global logger, using fmt
 void FmtLogMessageImpl(Class log_class, Level log_level, const char* filename,
                        unsigned int line_num, const char* function, const char* format,
@@ -121,6 +163,9 @@ void FmtLogMessageImpl(Class log_class, Level log_level, const char* filename,
 template <typename... Args>
 void FmtLogMessage(Class log_class, Level log_level, const char* filename, unsigned int line_num,
                    const char* function, const char* format, const Args&... args) {
+    if (!filter.CheckMessage(log_class, log_level))
+        return;
+
     FmtLogMessageImpl(log_class, log_level, filename, line_num, function, format,
                       fmt::make_format_args(args...));
 }
diff --git a/src/common/logging/text_formatter.cpp b/src/common/logging/text_formatter.cpp
index aa0dbd0c6..3d919ab10 100644
--- a/src/common/logging/text_formatter.cpp
+++ b/src/common/logging/text_formatter.cpp
@@ -34,13 +34,7 @@ std::string FormatLogMessage(const Entry& entry) {
 
 void PrintMessage(const Entry& entry) {
     const auto str = FormatLogMessage(entry).append(1, '\n');
-#ifdef ANDROID
-    // Android's log level enum are offset by '2'
-    const int android_log_level = static_cast<int>(entry.log_level) + 2;
-    __android_log_print(android_log_level, "CitraNative", "%s", str.c_str());
-#else
     fputs(str.c_str(), stderr);
-#endif
 }
 
 void PrintColoredMessage(const Entry& entry) {
@@ -78,7 +72,7 @@ void PrintColoredMessage(const Entry& entry) {
     }
 
     SetConsoleTextAttribute(console_handle, color);
-#elif !defined(ANDROID)
+#else
 #define ESC "\x1b"
     const char* color = "";
     switch (entry.log_level) {
@@ -111,9 +105,40 @@ void PrintColoredMessage(const Entry& entry) {
 
 #ifdef _WIN32
     SetConsoleTextAttribute(console_handle, original_info.wAttributes);
-#elif !defined(ANDROID)
+#else
     fputs(ESC "[0m", stderr);
 #undef ESC
 #endif
 }
+
+void PrintMessageToLogcat(const Entry& entry) {
+#ifdef ANDROID
+    const auto str = FormatLogMessage(entry);
+
+    android_LogPriority android_log_priority;
+    switch (entry.log_level) {
+    case Level::Trace:
+        android_log_priority = ANDROID_LOG_VERBOSE;
+        break;
+    case Level::Debug:
+        android_log_priority = ANDROID_LOG_DEBUG;
+        break;
+    case Level::Info:
+        android_log_priority = ANDROID_LOG_INFO;
+        break;
+    case Level::Warning:
+        android_log_priority = ANDROID_LOG_WARN;
+        break;
+    case Level::Error:
+        android_log_priority = ANDROID_LOG_ERROR;
+        break;
+    case Level::Critical:
+        android_log_priority = ANDROID_LOG_FATAL;
+        break;
+    case Level::Count:
+        UNREACHABLE();
+    }
+    __android_log_print(android_log_priority, "CitraNative", "%s", str.c_str());
+#endif
+}
 } // namespace Log
diff --git a/src/common/logging/text_formatter.h b/src/common/logging/text_formatter.h
index b6d9e57c8..13430951d 100644
--- a/src/common/logging/text_formatter.h
+++ b/src/common/logging/text_formatter.h
@@ -17,4 +17,6 @@ std::string FormatLogMessage(const Entry& entry);
 void PrintMessage(const Entry& entry);
 /// Prints the same message as `PrintMessage`, but colored according to the severity level.
 void PrintColoredMessage(const Entry& entry);
+/// Formats and prints a log entry to the android logcat.
+void PrintMessageToLogcat(const Entry& entry);
 } // namespace Log
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 6ee110869..6b98a0d28 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -108,8 +108,8 @@ add_library(core STATIC
     frontend/framebuffer_layout.h
     frontend/image_interface.h
     frontend/input.h
-    frontend/mic.h
     frontend/mic.cpp
+    frontend/mic.h
     frontend/scope_acquire_context.cpp
     frontend/scope_acquire_context.h
     gdbstub/gdbstub.cpp
diff --git a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
index 7ba67320b..72e1ebe06 100644
--- a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
+++ b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
@@ -953,6 +953,9 @@ unsigned InterpreterMainLoop(ARMul_State* cpu) {
 #define INC_PC(l) ptr += sizeof(arm_inst) + l
 #define INC_PC_STUB ptr += sizeof(arm_inst)
 
+#ifdef ANDROID
+#define GDB_BP_CHECK
+#else
 #define GDB_BP_CHECK                                                                               \
     cpu->Cpsr &= ~(1 << 5);                                                                        \
     cpu->Cpsr |= cpu->TFlag << 5;                                                                  \
@@ -965,6 +968,7 @@ unsigned InterpreterMainLoop(ARMul_State* cpu) {
             goto END;                                                                              \
         }                                                                                          \
     }
+#endif
 
 // GCC and Clang have a C++ extension to support a lookup table of labels. Otherwise, fallback to a
 // clunky switch statement.
@@ -1652,11 +1656,13 @@ DISPATCH : {
             goto END;
     }
 
+#ifndef ANDROID
     // Find breakpoint if one exists within the block
     if (GDBStub::IsConnected()) {
         breakpoint_data =
             GDBStub::GetNextBreakpointFromAddress(cpu->Reg[15], GDBStub::BreakpointType::Execute);
     }
+#endif
 
     inst_base = (arm_inst*)&trans_cache_buf[ptr];
     GOTO_NEXT_INST;
diff --git a/src/core/arm/skyeye_common/armstate.cpp b/src/core/arm/skyeye_common/armstate.cpp
index 775618a8b..5e773b0e3 100644
--- a/src/core/arm/skyeye_common/armstate.cpp
+++ b/src/core/arm/skyeye_common/armstate.cpp
@@ -182,13 +182,16 @@ void ARMul_State::ResetMPCoreCP15Registers() {
     CP15[CP15_MAIN_TLB_LOCKDOWN_ATTRIBUTE] = 0x00000000;
     CP15[CP15_TLB_DEBUG_CONTROL] = 0x00000000;
 }
-
+#ifdef ANDROID
+static void CheckMemoryBreakpoint(u32 address, GDBStub::BreakpointType type) {}
+#else
 static void CheckMemoryBreakpoint(u32 address, GDBStub::BreakpointType type) {
     if (GDBStub::IsServerEnabled() && GDBStub::CheckBreakpoint(address, type)) {
         LOG_DEBUG(Debug, "Found memory breakpoint @ {:08x}", address);
         GDBStub::Break(true);
     }
 }
+#endif
 
 u8 ARMul_State::ReadMemory8(u32 address) const {
     CheckMemoryBreakpoint(address, GDBStub::BreakpointType::Read);
diff --git a/src/core/frontend/applets/mii_selector.cpp b/src/core/frontend/applets/mii_selector.cpp
index 2ca23f1db..2fdfe3049 100644
--- a/src/core/frontend/applets/mii_selector.cpp
+++ b/src/core/frontend/applets/mii_selector.cpp
@@ -2,7 +2,12 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include "common/file_util.h"
+#include "common/string_util.h"
+#include "core/file_sys/archive_extsavedata.h"
+#include "core/file_sys/file_backend.h"
 #include "core/frontend/applets/mii_selector.h"
+#include "core/hle/service/ptm/ptm.h"
 
 namespace Frontend {
 
@@ -10,6 +15,42 @@ void MiiSelector::Finalize(u32 return_code, HLE::Applets::MiiData mii) {
     data = {return_code, mii};
 }
 
+std::vector<HLE::Applets::MiiData> LoadMiis() {
+    std::vector<HLE::Applets::MiiData> miis;
+
+    std::string nand_directory{FileUtil::GetUserPath(FileUtil::UserPath::NANDDir)};
+    FileSys::ArchiveFactory_ExtSaveData extdata_archive_factory(nand_directory, true);
+
+    auto archive_result = extdata_archive_factory.Open(Service::PTM::ptm_shared_extdata_id, 0);
+    if (archive_result.Succeeded()) {
+        auto archive = std::move(archive_result).Unwrap();
+
+        FileSys::Path file_path = "/CFL_DB.dat";
+        FileSys::Mode mode{};
+        mode.read_flag.Assign(1);
+
+        auto file_result = archive->OpenFile(file_path, mode);
+        if (file_result.Succeeded()) {
+            auto file = std::move(file_result).Unwrap();
+
+            u32 saved_miis_offset = 0x8;
+            // The Mii Maker has a 100 Mii limit on the 3ds
+            for (int i = 0; i < 100; ++i) {
+                HLE::Applets::MiiData mii;
+                std::array<u8, sizeof(mii)> mii_raw;
+                file->Read(saved_miis_offset, sizeof(mii), mii_raw.data());
+                std::memcpy(&mii, mii_raw.data(), sizeof(mii));
+                if (mii.mii_id != 0) {
+                    miis.push_back(mii);
+                }
+                saved_miis_offset += sizeof(mii);
+            }
+        }
+    }
+
+    return miis;
+}
+
 void DefaultMiiSelector::Setup(const Frontend::MiiSelectorConfig& config) {
     MiiSelector::Setup(config);
     Finalize(0, HLE::Applets::MiiSelector::GetStandardMiiResult().selected_mii_data);
diff --git a/src/core/frontend/applets/mii_selector.h b/src/core/frontend/applets/mii_selector.h
index 53578282b..3a5633a52 100644
--- a/src/core/frontend/applets/mii_selector.h
+++ b/src/core/frontend/applets/mii_selector.h
@@ -50,6 +50,8 @@ protected:
     MiiSelectorData data;
 };
 
+std::vector<HLE::Applets::MiiData> LoadMiis();
+
 class DefaultMiiSelector final : public MiiSelector {
 public:
     void Setup(const MiiSelectorConfig& config) override;
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index a6f9860eb..bb81f117c 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -54,6 +54,8 @@ add_library(video_core STATIC
     renderer_opengl/post_processing_opengl.h
     renderer_opengl/renderer_opengl.cpp
     renderer_opengl/renderer_opengl.h
+    renderer_opengl/texture_downloader_es.cpp
+    renderer_opengl/texture_downloader_es.h
     renderer_opengl/texture_filters/anime4k/anime4k_ultrafast.cpp
     renderer_opengl/texture_filters/anime4k/anime4k_ultrafast.h
     renderer_opengl/texture_filters/bicubic/bicubic.cpp
@@ -99,11 +101,12 @@ add_library(video_core STATIC
 )
 
 set(SHADER_FILES
+    renderer_opengl/depth_to_color.frag
+    renderer_opengl/depth_to_color.vert
+    renderer_opengl/ds_to_color.frag
     renderer_opengl/texture_filters/anime4k/refine.frag
-    renderer_opengl/texture_filters/anime4k/refine.vert
     renderer_opengl/texture_filters/anime4k/x_gradient.frag
     renderer_opengl/texture_filters/anime4k/y_gradient.frag
-    renderer_opengl/texture_filters/anime4k/y_gradient.vert
     renderer_opengl/texture_filters/bicubic/bicubic.frag
     renderer_opengl/texture_filters/scale_force/scale_force.frag
     renderer_opengl/texture_filters/tex_coord.vert
@@ -121,7 +124,7 @@ endforeach()
 
 add_custom_target(shaders
     BYPRODUCTS ${SHADER_HEADERS}
-    COMMAND cmake -P ${CMAKE_CURRENT_SOURCE_DIR}/generate_shaders.cmake
+    COMMAND "${CMAKE_COMMAND}" -P ${CMAKE_CURRENT_SOURCE_DIR}/generate_shaders.cmake
     SOURCES ${SHADER_FILES}
 )
 add_dependencies(video_core shaders)
diff --git a/src/video_core/renderer_opengl/depth_to_color.frag b/src/video_core/renderer_opengl/depth_to_color.frag
new file mode 100644
index 000000000..e69bed890
--- /dev/null
+++ b/src/video_core/renderer_opengl/depth_to_color.frag
@@ -0,0 +1,10 @@
+//? #version 320 es
+
+out highp uint color;
+
+uniform highp sampler2D depth;
+uniform int lod;
+
+void main() {
+    color = uint(texelFetch(depth, ivec2(gl_FragCoord.xy), lod).x * (exp2(32.0) - 1.0));
+}
diff --git a/src/video_core/renderer_opengl/texture_filters/anime4k/y_gradient.vert b/src/video_core/renderer_opengl/depth_to_color.vert
similarity index 60%
rename from src/video_core/renderer_opengl/texture_filters/anime4k/y_gradient.vert
rename to src/video_core/renderer_opengl/depth_to_color.vert
index 376a67b79..866d43b46 100644
--- a/src/video_core/renderer_opengl/texture_filters/anime4k/y_gradient.vert
+++ b/src/video_core/renderer_opengl/depth_to_color.vert
@@ -1,12 +1,8 @@
-//? #version 330
-out vec2 input_max;
-
-uniform sampler2D tex_size;
+//? #version 320 es
 
 const vec2 vertices[4] =
     vec2[4](vec2(-1.0, -1.0), vec2(1.0, -1.0), vec2(-1.0, 1.0), vec2(1.0, 1.0));
 
 void main() {
     gl_Position = vec4(vertices[gl_VertexID], 0.0, 1.0);
-    input_max = textureSize(tex_size, 0) * 2 - 1;
 }
diff --git a/src/video_core/renderer_opengl/ds_to_color.frag b/src/video_core/renderer_opengl/ds_to_color.frag
new file mode 100644
index 000000000..954217064
--- /dev/null
+++ b/src/video_core/renderer_opengl/ds_to_color.frag
@@ -0,0 +1,9 @@
+//? #version 320 es
+#extension GL_ARM_shader_framebuffer_fetch_depth_stencil : enable
+
+out highp uint color;
+
+void main() {
+    color = uint(gl_LastFragDepthARM * (exp2(24.0) - 1.0)) << 8;
+    color |= uint(gl_LastFragStencilARM);
+}
diff --git a/src/video_core/renderer_opengl/gl_format_reinterpreter.cpp b/src/video_core/renderer_opengl/gl_format_reinterpreter.cpp
index 2175c62bd..ee842a859 100644
--- a/src/video_core/renderer_opengl/gl_format_reinterpreter.cpp
+++ b/src/video_core/renderer_opengl/gl_format_reinterpreter.cpp
@@ -220,9 +220,175 @@ private:
     GLint d24s8_abgr_viewport_u_id;
 };
 
+class ShaderD24S8toRGBA8 final : public FormatReinterpreterBase {
+public:
+    ShaderD24S8toRGBA8() {
+        constexpr std::string_view vs_source = R"(
+out vec2 dst_coord;
+
+uniform mediump ivec2 dst_size;
+
+const vec2 vertices[4] =
+    vec2[4](vec2(-1.0, -1.0), vec2(1.0, -1.0), vec2(-1.0, 1.0), vec2(1.0, 1.0));
+
+void main() {
+    gl_Position = vec4(vertices[gl_VertexID], 0.0, 1.0);
+    dst_coord = (vertices[gl_VertexID] / 2.0 + 0.5) * vec2(dst_size);
+}
+)";
+
+        constexpr std::string_view fs_source = R"(
+in mediump vec2 dst_coord;
+
+out lowp vec4 frag_color;
+
+uniform highp sampler2D depth;
+uniform lowp usampler2D stencil;
+uniform mediump ivec2 dst_size;
+uniform mediump ivec2 src_size;
+uniform mediump ivec2 src_offset;
+
+void main() {
+    mediump ivec2 tex_coord;
+    if (src_size == dst_size) {
+        tex_coord = ivec2(dst_coord);
+    } else {
+        highp int tex_index = int(dst_coord.y) * dst_size.x + int(dst_coord.x);
+        mediump int y = tex_index / src_size.x;
+        tex_coord = ivec2(tex_index - y * src_size.x, y);
+    }
+    tex_coord -= src_offset;
+
+    highp uint depth_val =
+        uint(texelFetch(depth, tex_coord, 0).x * (exp2(32.0) - 1.0));
+    lowp uint stencil_val = texelFetch(stencil, tex_coord, 0).x;
+    highp uvec4 components =
+        uvec4(stencil_val, (uvec3(depth_val) >> uvec3(24u, 16u, 8u)) & 0x000000FFu);
+    frag_color = vec4(components) / (exp2(8.0) - 1.0);
+}
+)";
+
+        program.Create(vs_source.data(), fs_source.data());
+        dst_size_loc = glGetUniformLocation(program.handle, "dst_size");
+        src_size_loc = glGetUniformLocation(program.handle, "src_size");
+        src_offset_loc = glGetUniformLocation(program.handle, "src_offset");
+        vao.Create();
+
+        auto state = OpenGLState::GetCurState();
+        auto cur_program = state.draw.shader_program;
+        state.draw.shader_program = program.handle;
+        state.Apply();
+        glUniform1i(glGetUniformLocation(program.handle, "stencil"), 1);
+        state.draw.shader_program = cur_program;
+        state.Apply();
+
+        // OES_texture_view doesn't seem to support D24S8 views, at least on adreno
+        // so instead it will do an intermediate copy before running through the shader
+        if (GLAD_GL_ARB_texture_view) {
+            texture_view_func = glTextureView;
+        } else {
+            LOG_INFO(Render_OpenGL,
+                     "Texture views are unsupported, reinterpretation will do intermediate copy");
+            temp_tex.Create();
+        }
+    }
+
+    void Reinterpret(GLuint src_tex, const Common::Rectangle<u32>& src_rect, GLuint read_fb_handle,
+                     GLuint dst_tex, const Common::Rectangle<u32>& dst_rect,
+                     GLuint draw_fb_handle) override {
+        OpenGLState prev_state = OpenGLState::GetCurState();
+        SCOPE_EXIT({ prev_state.Apply(); });
+
+        OpenGLState state;
+        state.texture_units[0].texture_2d = src_tex;
+
+        if (texture_view_func) {
+            temp_tex.Create();
+            glActiveTexture(GL_TEXTURE1);
+            texture_view_func(temp_tex.handle, GL_TEXTURE_2D, src_tex, GL_DEPTH24_STENCIL8, 0, 1, 0,
+                              1);
+            glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+            glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+        } else if (src_rect.top > temp_rect.top || src_rect.right > temp_rect.right) {
+            temp_tex.Release();
+            temp_tex.Create();
+            state.texture_units[1].texture_2d = temp_tex.handle;
+            state.Apply();
+            glActiveTexture(GL_TEXTURE1);
+            glTexStorage2D(GL_TEXTURE_2D, 1, GL_DEPTH24_STENCIL8, src_rect.right, src_rect.top);
+            glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+            glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+            temp_rect = src_rect;
+        }
+
+        state.texture_units[1].texture_2d = temp_tex.handle;
+        state.draw.draw_framebuffer = draw_fb_handle;
+        state.draw.shader_program = program.handle;
+        state.draw.vertex_array = vao.handle;
+        state.viewport = {static_cast<GLint>(dst_rect.left), static_cast<GLint>(dst_rect.bottom),
+                          static_cast<GLsizei>(dst_rect.GetWidth()),
+                          static_cast<GLsizei>(dst_rect.GetHeight())};
+        state.Apply();
+
+        glActiveTexture(GL_TEXTURE1);
+        if (!texture_view_func) {
+            glCopyImageSubData(src_tex, GL_TEXTURE_2D, 0, src_rect.left, src_rect.bottom, 0,
+                               temp_tex.handle, GL_TEXTURE_2D, 0, src_rect.left, src_rect.bottom, 0,
+                               src_rect.GetWidth(), src_rect.GetHeight(), 1);
+        }
+        glTexParameteri(GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_STENCIL_INDEX);
+
+        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex,
+                               0);
+        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
+                               0);
+
+        glUniform2i(dst_size_loc, dst_rect.GetWidth(), dst_rect.GetHeight());
+        glUniform2i(src_size_loc, src_rect.GetWidth(), src_rect.GetHeight());
+        glUniform2i(src_offset_loc, src_rect.left, src_rect.bottom);
+        glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+
+        if (texture_view_func) {
+            temp_tex.Release();
+        }
+    }
+
+private:
+    decltype(glTextureView) texture_view_func = nullptr;
+    OGLProgram program{};
+    GLint dst_size_loc{-1}, src_size_loc{-1}, src_offset_loc{-1};
+    OGLVertexArray vao{};
+    OGLTexture temp_tex{};
+    Common::Rectangle<u32> temp_rect{0, 0, 0, 0};
+};
+
+class CopyImageSubData final : public FormatReinterpreterBase {
+    void Reinterpret(GLuint src_tex, const Common::Rectangle<u32>& src_rect, GLuint read_fb_handle,
+                     GLuint dst_tex, const Common::Rectangle<u32>& dst_rect,
+                     GLuint draw_fb_handle) override {
+        glCopyImageSubData(src_tex, GL_TEXTURE_2D, 0, src_rect.left, src_rect.bottom, 0, dst_tex,
+                           GL_TEXTURE_2D, 0, dst_rect.left, dst_rect.bottom, 0, src_rect.GetWidth(),
+                           src_rect.GetHeight(), 1);
+    }
+};
+
 FormatReinterpreterOpenGL::FormatReinterpreterOpenGL() {
-    reinterpreters.emplace(PixelFormatPair{PixelFormat::RGBA8, PixelFormat::D24S8},
-                           std::make_unique<PixelBufferD24S8toABGR>());
+    std::string_view vendor{reinterpret_cast<const char*>(glGetString(GL_VENDOR))};
+    if (vendor.find("NVIDIA") != vendor.npos) {
+        reinterpreters.emplace(PixelFormatPair{PixelFormat::RGBA8, PixelFormat::D24S8},
+                               std::make_unique<CopyImageSubData>());
+        // Nvidia bends the spec and allows direct copies between color and depth formats
+        // might as well take advantage of it
+        LOG_INFO(Render_OpenGL, "Using glCopyImageSubData for D24S8 to RGBA8 reinterpretation");
+    } else if ((GLAD_GL_ARB_stencil_texturing && GLAD_GL_ARB_texture_storage) || GLES) {
+        reinterpreters.emplace(PixelFormatPair{PixelFormat::RGBA8, PixelFormat::D24S8},
+                               std::make_unique<ShaderD24S8toRGBA8>());
+        LOG_INFO(Render_OpenGL, "Using shader for D24S8 to RGBA8 reinterpretation");
+    } else {
+        reinterpreters.emplace(PixelFormatPair{PixelFormat::RGBA8, PixelFormat::D24S8},
+                               std::make_unique<PixelBufferD24S8toABGR>());
+        LOG_INFO(Render_OpenGL, "Using pbo for D24S8 to RGBA8 reinterpretation");
+    }
     reinterpreters.emplace(PixelFormatPair{PixelFormat::RGB5A1, PixelFormat::RGBA4},
                            std::make_unique<RGBA4toRGB5A1>());
 }
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 4097d0cee..4a331c630 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -52,16 +52,17 @@ RasterizerOpenGL::RasterizerOpenGL()
     : is_amd(IsVendorAmd()), vertex_buffer(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE, is_amd),
       uniform_buffer(GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE, false),
       index_buffer(GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE, false),
-      texture_buffer(GL_TEXTURE_BUFFER, TEXTURE_BUFFER_SIZE, false) {
+      texture_buffer(GL_TEXTURE_BUFFER, TEXTURE_BUFFER_SIZE, false),
+      texture_lf_buffer(GL_TEXTURE_BUFFER, TEXTURE_BUFFER_SIZE, false) {
 
-    allow_shadow = GLAD_GL_ARB_shader_image_load_store && GLAD_GL_ARB_shader_image_size &&
-                   GLAD_GL_ARB_framebuffer_no_attachments;
+    allow_shadow = GLES || (GLAD_GL_ARB_shader_image_load_store && GLAD_GL_ARB_shader_image_size &&
+                            GLAD_GL_ARB_framebuffer_no_attachments);
     if (!allow_shadow) {
         LOG_WARNING(Render_OpenGL,
                     "Shadow might not be able to render because of unsupported OpenGL extensions.");
     }
 
-    if (!GLAD_GL_ARB_copy_image) {
+    if (!GLAD_GL_ARB_copy_image && !GLES) {
         LOG_WARNING(Render_OpenGL,
                     "ARB_copy_image not supported. Some games might produce artifacts.");
     }
@@ -149,11 +150,15 @@ RasterizerOpenGL::RasterizerOpenGL()
     framebuffer.Create();
 
     // Allocate and bind texture buffer lut textures
+    texture_buffer_lut_lf.Create();
     texture_buffer_lut_rg.Create();
     texture_buffer_lut_rgba.Create();
+    state.texture_buffer_lut_lf.texture_buffer = texture_buffer_lut_lf.handle;
     state.texture_buffer_lut_rg.texture_buffer = texture_buffer_lut_rg.handle;
     state.texture_buffer_lut_rgba.texture_buffer = texture_buffer_lut_rgba.handle;
     state.Apply();
+    glActiveTexture(TextureUnits::TextureBufferLUT_LF.Enum());
+    glTexBuffer(GL_TEXTURE_BUFFER, GL_RG32F, texture_lf_buffer.GetHandle());
     glActiveTexture(TextureUnits::TextureBufferLUT_RG.Enum());
     glTexBuffer(GL_TEXTURE_BUFFER, GL_RG32F, texture_buffer.GetHandle());
     glActiveTexture(TextureUnits::TextureBufferLUT_RGBA.Enum());
@@ -777,7 +782,7 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) {
     }
 
     OGLTexture temp_tex;
-    if (need_duplicate_texture && GLAD_GL_ARB_copy_image) {
+    if (need_duplicate_texture && (GLAD_GL_ARB_copy_image || GLES)) {
         // The game is trying to use a surface as a texture and framebuffer at the same time
         // which causes unpredictable behavior on the host.
         // Making a copy to sample from eliminates this issue and seems to be fairly cheap.
@@ -821,6 +826,7 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) {
 
     // Sync the LUTs within the texture buffer
     SyncAndUploadLUTs();
+    SyncAndUploadLUTsLF();
 
     // Sync the uniform data
     UploadUniforms(accelerate);
@@ -942,6 +948,10 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
 
     // Blending
     case PICA_REG_INDEX(framebuffer.output_merger.alphablend_enable):
+        if (GLES) {
+            // With GLES, we need this in the fragment shader to emulate logic operations
+            shader_dirty = true;
+        }
         SyncBlendEnabled();
         break;
     case PICA_REG_INDEX(framebuffer.output_merger.alpha_blending):
@@ -1062,6 +1072,10 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
 
     // Logic op
     case PICA_REG_INDEX(framebuffer.output_merger.logic_op):
+        if (GLES) {
+            // With GLES, we need this in the fragment shader to emulate logic operations
+            shader_dirty = true;
+        }
         SyncLogicOp();
         break;
 
@@ -1816,11 +1830,31 @@ void RasterizerOpenGL::SyncAlphaTest() {
 }
 
 void RasterizerOpenGL::SyncLogicOp() {
-    state.logic_op = PicaToGL::LogicOp(Pica::g_state.regs.framebuffer.output_merger.logic_op);
+    const auto& regs = Pica::g_state.regs;
+    state.logic_op = PicaToGL::LogicOp(regs.framebuffer.output_merger.logic_op);
+
+    if (GLES) {
+        if (!regs.framebuffer.output_merger.alphablend_enable) {
+            if (regs.framebuffer.output_merger.logic_op == Pica::FramebufferRegs::LogicOp::NoOp) {
+                // Color output is disabled by logic operation. We use color write mask to skip
+                // color but allow depth write.
+                state.color_mask = {};
+            }
+        }
+    }
 }
 
 void RasterizerOpenGL::SyncColorWriteMask() {
     const auto& regs = Pica::g_state.regs;
+    if (GLES) {
+        if (!regs.framebuffer.output_merger.alphablend_enable) {
+            if (regs.framebuffer.output_merger.logic_op == Pica::FramebufferRegs::LogicOp::NoOp) {
+                // Color output is disabled by logic operation. We use color write mask to skip
+                // color but allow depth write. Return early to avoid overwriting this.
+                return;
+            }
+        }
+    }
 
     auto IsColorWriteEnabled = [&](u32 value) {
         return (regs.framebuffer.framebuffer.allow_color_write != 0 && value != 0) ? GL_TRUE
@@ -2005,18 +2039,11 @@ void RasterizerOpenGL::SyncShadowTextureBias() {
     }
 }
 
-void RasterizerOpenGL::SyncAndUploadLUTs() {
-    constexpr std::size_t max_size = sizeof(GLvec2) * 256 * Pica::LightingRegs::NumLightingSampler +
-                                     sizeof(GLvec2) * 128 +     // fog
-                                     sizeof(GLvec2) * 128 * 3 + // proctex: noise + color + alpha
-                                     sizeof(GLvec4) * 256 +     // proctex
-                                     sizeof(GLvec4) * 256;      // proctex diff
+void RasterizerOpenGL::SyncAndUploadLUTsLF() {
+    constexpr std::size_t max_size =
+        sizeof(GLvec2) * 256 * Pica::LightingRegs::NumLightingSampler + sizeof(GLvec2) * 128; // fog
 
-    if (!uniform_block_data.lighting_lut_dirty_any && !uniform_block_data.fog_lut_dirty &&
-        !uniform_block_data.proctex_noise_lut_dirty &&
-        !uniform_block_data.proctex_color_map_dirty &&
-        !uniform_block_data.proctex_alpha_map_dirty && !uniform_block_data.proctex_lut_dirty &&
-        !uniform_block_data.proctex_diff_lut_dirty) {
+    if (!uniform_block_data.lighting_lut_dirty_any && !uniform_block_data.fog_lut_dirty) {
         return;
     }
 
@@ -2024,8 +2051,8 @@ void RasterizerOpenGL::SyncAndUploadLUTs() {
     GLintptr offset;
     bool invalidate;
     std::size_t bytes_used = 0;
-    glBindBuffer(GL_TEXTURE_BUFFER, texture_buffer.GetHandle());
-    std::tie(buffer, offset, invalidate) = texture_buffer.Map(max_size, sizeof(GLvec4));
+    glBindBuffer(GL_TEXTURE_BUFFER, texture_lf_buffer.GetHandle());
+    std::tie(buffer, offset, invalidate) = texture_lf_buffer.Map(max_size, sizeof(GLvec4));
 
     // Sync the lighting luts
     if (uniform_block_data.lighting_lut_dirty_any || invalidate) {
@@ -2050,8 +2077,8 @@ void RasterizerOpenGL::SyncAndUploadLUTs() {
                 uniform_block_data.lighting_lut_dirty[index] = false;
             }
         }
+        uniform_block_data.lighting_lut_dirty_any = false;
     }
-    uniform_block_data.lighting_lut_dirty_any = false;
 
     // Sync the fog lut
     if (uniform_block_data.fog_lut_dirty || invalidate) {
@@ -2073,6 +2100,28 @@ void RasterizerOpenGL::SyncAndUploadLUTs() {
         uniform_block_data.fog_lut_dirty = false;
     }
 
+    texture_lf_buffer.Unmap(bytes_used);
+}
+
+void RasterizerOpenGL::SyncAndUploadLUTs() {
+    constexpr std::size_t max_size = sizeof(GLvec2) * 128 * 3 + // proctex: noise + color + alpha
+                                     sizeof(GLvec4) * 256 +     // proctex
+                                     sizeof(GLvec4) * 256;      // proctex diff
+
+    if (!uniform_block_data.proctex_noise_lut_dirty &&
+        !uniform_block_data.proctex_color_map_dirty &&
+        !uniform_block_data.proctex_alpha_map_dirty && !uniform_block_data.proctex_lut_dirty &&
+        !uniform_block_data.proctex_diff_lut_dirty) {
+        return;
+    }
+
+    u8* buffer;
+    GLintptr offset;
+    bool invalidate;
+    std::size_t bytes_used = 0;
+    glBindBuffer(GL_TEXTURE_BUFFER, texture_buffer.GetHandle());
+    std::tie(buffer, offset, invalidate) = texture_buffer.Map(max_size, sizeof(GLvec4));
+
     // helper function for SyncProcTexNoiseLUT/ColorMap/AlphaMap
     auto SyncProcTexValueLUT = [this, buffer, offset, invalidate, &bytes_used](
                                    const std::array<Pica::State::ProcTex::ValueEntry, 128>& lut,
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index a28e9bda1..4748655d5 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -233,6 +233,7 @@ private:
 
     /// Syncs and uploads the lighting, fog and proctex LUTs
     void SyncAndUploadLUTs();
+    void SyncAndUploadLUTsLF();
 
     /// Upload the uniform blocks to the uniform buffer object
     void UploadUniforms(bool accelerate_draw);
@@ -303,6 +304,7 @@ private:
     OGLStreamBuffer uniform_buffer;
     OGLStreamBuffer index_buffer;
     OGLStreamBuffer texture_buffer;
+    OGLStreamBuffer texture_lf_buffer;
     OGLFramebuffer framebuffer;
     GLint uniform_buffer_alignment;
     std::size_t uniform_size_aligned_vs;
@@ -310,6 +312,7 @@ private:
 
     SamplerInfo texture_cube_sampler;
 
+    OGLTexture texture_buffer_lut_lf;
     OGLTexture texture_buffer_lut_rg;
     OGLTexture texture_buffer_lut_rgba;
 
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index d7112126d..25e345098 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -6,6 +6,7 @@
 #include <array>
 #include <atomic>
 #include <bitset>
+#include <cmath>
 #include <cstring>
 #include <iterator>
 #include <memory>
@@ -36,6 +37,7 @@
 #include "video_core/renderer_opengl/gl_rasterizer_cache.h"
 #include "video_core/renderer_opengl/gl_state.h"
 #include "video_core/renderer_opengl/gl_vars.h"
+#include "video_core/renderer_opengl/texture_downloader_es.h"
 #include "video_core/renderer_opengl/texture_filters/texture_filterer.h"
 #include "video_core/utils.h"
 #include "video_core/video_core.h"
@@ -64,13 +66,6 @@ static constexpr std::array<FormatTuple, 5> fb_format_tuples_oes = {{
     {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4},   // RGBA4
 }};
 
-static constexpr std::array<FormatTuple, 4> depth_format_tuples = {{
-    {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16
-    {},
-    {GL_DEPTH_COMPONENT24, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT},   // D24
-    {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24S8
-}};
-
 const FormatTuple& GetFormatTuple(PixelFormat pixel_format) {
     const SurfaceType type = SurfaceParams::GetFormatType(pixel_format);
     if (type == SurfaceType::Color) {
@@ -87,79 +82,6 @@ const FormatTuple& GetFormatTuple(PixelFormat pixel_format) {
     return tex_tuple;
 }
 
-/**
- * OpenGL ES does not support glGetTexImage. Obtain the pixels by attaching the
- * texture to a framebuffer.
- * Originally from https://github.com/apitrace/apitrace/blob/master/retrace/glstate_images.cpp
- */
-static void GetTexImageOES(GLenum target, GLint level, GLenum format, GLenum type, GLint height,
-                           GLint width, GLint depth, GLubyte* pixels, std::size_t size) {
-    memset(pixels, 0x80, size);
-
-    OpenGLState cur_state = OpenGLState::GetCurState();
-    OpenGLState state;
-
-    GLenum texture_binding = GL_NONE;
-    switch (target) {
-    case GL_TEXTURE_2D:
-        texture_binding = GL_TEXTURE_BINDING_2D;
-        break;
-    case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
-    case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
-    case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
-    case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
-    case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
-    case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
-        texture_binding = GL_TEXTURE_BINDING_CUBE_MAP;
-        break;
-    case GL_TEXTURE_3D_OES:
-        texture_binding = GL_TEXTURE_BINDING_3D_OES;
-    default:
-        return;
-    }
-
-    GLint texture = 0;
-    glGetIntegerv(texture_binding, &texture);
-    if (!texture) {
-        return;
-    }
-
-    OGLFramebuffer fbo;
-    fbo.Create();
-    state.draw.read_framebuffer = fbo.handle;
-    state.Apply();
-
-    switch (target) {
-    case GL_TEXTURE_2D:
-    case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
-    case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
-    case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
-    case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
-    case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
-    case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: {
-        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, texture,
-                               level);
-        GLenum status = glCheckFramebufferStatus(GL_READ_FRAMEBUFFER);
-        if (status != GL_FRAMEBUFFER_COMPLETE) {
-            LOG_DEBUG(Render_OpenGL, "Framebuffer is incomplete, status: {:X}", status);
-        }
-        glReadPixels(0, 0, width, height, format, type, pixels);
-        break;
-    }
-    case GL_TEXTURE_3D_OES:
-        for (int i = 0; i < depth; i++) {
-            glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_3D,
-                                   texture, level, i);
-            glReadPixels(0, 0, width, height, format, type, pixels + 4 * i * width * height);
-        }
-        break;
-    }
-
-    cur_state.Apply();
-
-    fbo.Release();
-}
-
 template <typename Map, typename Interval>
 static constexpr auto RangeFromInterval(Map& map, const Interval& interval) {
     return boost::make_iterator_range(map.equal_range(interval));
@@ -329,8 +251,14 @@ OGLTexture RasterizerCacheOpenGL::AllocateSurfaceTexture(const FormatTuple& form
     cur_state.Apply();
     glActiveTexture(GL_TEXTURE0);
 
-    glTexImage2D(GL_TEXTURE_2D, 0, format_tuple.internal_format, width, height, 0,
-                 format_tuple.format, format_tuple.type, nullptr);
+    if (GL_ARB_texture_storage) {
+        // Allocate all possible mipmap levels upfront
+        auto levels = std::log2(std::max(width, height)) + 1;
+        glTexStorage2D(GL_TEXTURE_2D, levels, format_tuple.internal_format, width, height);
+    } else {
+        glTexImage2D(GL_TEXTURE_2D, 0, format_tuple.internal_format, width, height, 0,
+                     format_tuple.format, format_tuple.type, nullptr);
+    }
 
     glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
     glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
@@ -352,17 +280,22 @@ static void AllocateTextureCube(GLuint texture, const FormatTuple& format_tuple,
     cur_state.texture_cube_unit.texture_cube = texture;
     cur_state.Apply();
     glActiveTexture(TextureUnits::TextureCube.Enum());
-
-    for (auto faces : {
-             GL_TEXTURE_CUBE_MAP_POSITIVE_X,
-             GL_TEXTURE_CUBE_MAP_POSITIVE_Y,
-             GL_TEXTURE_CUBE_MAP_POSITIVE_Z,
-             GL_TEXTURE_CUBE_MAP_NEGATIVE_X,
-             GL_TEXTURE_CUBE_MAP_NEGATIVE_Y,
-             GL_TEXTURE_CUBE_MAP_NEGATIVE_Z,
-         }) {
-        glTexImage2D(faces, 0, format_tuple.internal_format, width, width, 0, format_tuple.format,
-                     format_tuple.type, nullptr);
+    if (GL_ARB_texture_storage) {
+        // Allocate all possible mipmap levels in case the game uses them later
+        auto levels = std::log2(width) + 1;
+        glTexStorage2D(GL_TEXTURE_CUBE_MAP, levels, format_tuple.internal_format, width, width);
+    } else {
+        for (auto faces : {
+                 GL_TEXTURE_CUBE_MAP_POSITIVE_X,
+                 GL_TEXTURE_CUBE_MAP_POSITIVE_Y,
+                 GL_TEXTURE_CUBE_MAP_POSITIVE_Z,
+                 GL_TEXTURE_CUBE_MAP_NEGATIVE_X,
+                 GL_TEXTURE_CUBE_MAP_NEGATIVE_Y,
+                 GL_TEXTURE_CUBE_MAP_NEGATIVE_Z,
+             }) {
+            glTexImage2D(faces, 0, format_tuple.internal_format, width, width, 0,
+                         format_tuple.format, format_tuple.type, nullptr);
+        }
     }
 
     // Restore previous texture bindings
@@ -775,23 +708,28 @@ void CachedSurface::DumpTexture(GLuint target_tex, u64 tex_hash) {
         LOG_INFO(Render_OpenGL, "Dumping texture to {}", dump_path);
         std::vector<u8> decoded_texture;
         decoded_texture.resize(width * height * 4);
-        glBindTexture(GL_TEXTURE_2D, target_tex);
+        OpenGLState state = OpenGLState::GetCurState();
+        GLuint old_texture = state.texture_units[0].texture_2d;
+        state.Apply();
         /*
            GetTexImageOES is used even if not using OpenGL ES to work around a small issue that
            happens if using custom textures with texture dumping at the same.
            Let's say there's 2 textures that are both 32x32 and one of them gets replaced with a
-           higher quality 256x256 texture. If the 256x256 texture is displayed first and the 32x32
-           texture gets uploaded to the same underlying OpenGL texture, the 32x32 texture will
-           appear in the corner of the 256x256 texture.
-           If texture dumping is enabled and the 32x32 is undumped, Citra will attempt to dump it.
-           Since the underlying OpenGL texture is still 256x256, Citra crashes because it thinks the
-           texture is only 32x32.
+           higher quality 256x256 texture. If the 256x256 texture is displayed first and the
+           32x32 texture gets uploaded to the same underlying OpenGL texture, the 32x32 texture
+           will appear in the corner of the 256x256 texture. If texture dumping is enabled and
+           the 32x32 is undumped, Citra will attempt to dump it. Since the underlying OpenGL
+           texture is still 256x256, Citra crashes because it thinks the texture is only 32x32.
            GetTexImageOES conveniently only dumps the specified region, and works on both
            desktop and ES.
         */
-        GetTexImageOES(GL_TEXTURE_2D, 0, GL_RGBA, GL_UNSIGNED_BYTE, height, width, 0,
-                       &decoded_texture[0], decoded_texture.size());
-        glBindTexture(GL_TEXTURE_2D, 0);
+        // if the backend isn't OpenGL ES, this won't be initialized yet
+        if (!owner.texture_downloader_es)
+            owner.texture_downloader_es = std::make_unique<TextureDownloaderES>(false);
+        owner.texture_downloader_es->GetTexImage(GL_TEXTURE_2D, 0, GL_RGBA, GL_UNSIGNED_BYTE,
+                                                 height, width, &decoded_texture[0]);
+        state.texture_units[0].texture_2d = old_texture;
+        state.Apply();
         Common::FlipRGBA8Texture(decoded_texture, width, height);
         if (!image_interface->EncodePNG(dump_path, decoded_texture, width, height))
             LOG_ERROR(Render_OpenGL, "Failed to save decoded texture");
@@ -901,8 +839,9 @@ void CachedSurface::UploadGLTexture(Common::Rectangle<u32> rect, GLuint read_fb_
 MICROPROFILE_DEFINE(OpenGL_TextureDL, "OpenGL", "Texture Download", MP_RGB(128, 192, 64));
 void CachedSurface::DownloadGLTexture(const Common::Rectangle<u32>& rect, GLuint read_fb_handle,
                                       GLuint draw_fb_handle) {
-    if (type == SurfaceType::Fill)
+    if (type == SurfaceType::Fill) {
         return;
+    }
 
     MICROPROFILE_SCOPE(OpenGL_TextureDL);
 
@@ -941,9 +880,9 @@ void CachedSurface::DownloadGLTexture(const Common::Rectangle<u32>& rect, GLuint
 
         glActiveTexture(GL_TEXTURE0);
         if (GLES) {
-            GetTexImageOES(GL_TEXTURE_2D, 0, tuple.format, tuple.type, rect.GetHeight(),
-                           rect.GetWidth(), 0, &gl_buffer[buffer_offset],
-                           gl_buffer.size() - buffer_offset);
+            owner.texture_downloader_es->GetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type,
+                                                     rect.GetHeight(), rect.GetWidth(),
+                                                     &gl_buffer[buffer_offset]);
         } else {
             glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, &gl_buffer[buffer_offset]);
         }
@@ -967,6 +906,20 @@ void CachedSurface::DownloadGLTexture(const Common::Rectangle<u32>& rect, GLuint
             glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
                                    texture.handle, 0);
         }
+        switch (glCheckFramebufferStatus(GL_FRAMEBUFFER)) {
+        case GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT:
+            LOG_WARNING(Render_OpenGL, "Framebuffer incomplete attachment");
+            break;
+        case GL_FRAMEBUFFER_INCOMPLETE_DIMENSIONS:
+            LOG_WARNING(Render_OpenGL, "Framebuffer incomplete dimensions");
+            break;
+        case GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT:
+            LOG_WARNING(Render_OpenGL, "Framebuffer incomplete missing attachment");
+            break;
+        case GL_FRAMEBUFFER_UNSUPPORTED:
+            LOG_WARNING(Render_OpenGL, "Framebuffer unsupported");
+            break;
+        }
         glReadPixels(static_cast<GLint>(rect.left), static_cast<GLint>(rect.bottom),
                      static_cast<GLsizei>(rect.GetWidth()), static_cast<GLsizei>(rect.GetHeight()),
                      tuple.format, tuple.type, &gl_buffer[buffer_offset]);
@@ -1083,13 +1036,18 @@ RasterizerCacheOpenGL::RasterizerCacheOpenGL() {
     texture_filterer = std::make_unique<TextureFilterer>(Settings::values.texture_filter_name,
                                                          resolution_scale_factor);
     format_reinterpreter = std::make_unique<FormatReinterpreterOpenGL>();
+    if (GLES)
+        texture_downloader_es = std::make_unique<TextureDownloaderES>(false);
 
     read_framebuffer.Create();
     draw_framebuffer.Create();
 }
 
 RasterizerCacheOpenGL::~RasterizerCacheOpenGL() {
+#ifndef ANDROID
+    // This is for switching renderers, which is unsupported on Android, and costly on shutdown
     ClearAll(false);
+#endif
 }
 
 MICROPROFILE_DEFINE(OpenGL_BlitSurface, "OpenGL", "BlitSurface", MP_RGB(128, 192, 64));
@@ -1304,9 +1262,14 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Pica::Texture::TextureInf
                 width = surface->GetScaledWidth();
                 height = surface->GetScaledHeight();
             }
-            for (u32 level = surface->max_level + 1; level <= max_level; ++level) {
-                glTexImage2D(GL_TEXTURE_2D, level, format_tuple.internal_format, width >> level,
-                             height >> level, 0, format_tuple.format, format_tuple.type, nullptr);
+            // If we are using ARB_texture_storage then we've already allocated all of the mipmap
+            // levels
+            if (!GL_ARB_texture_storage) {
+                for (u32 level = surface->max_level + 1; level <= max_level; ++level) {
+                    glTexImage2D(GL_TEXTURE_2D, level, format_tuple.internal_format, width >> level,
+                                 height >> level, 0, format_tuple.format, format_tuple.type,
+                                 nullptr);
+                }
             }
             if (surface->is_custom || !texture_filterer->IsNull()) {
                 // TODO: proper mipmap support for custom textures
@@ -1806,6 +1769,8 @@ void RasterizerCacheOpenGL::ClearAll(bool flush) {
 }
 
 void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u32 size, Surface flush_surface) {
+    std::lock_guard lock{mutex};
+
     if (size == 0)
         return;
 
@@ -1842,6 +1807,8 @@ void RasterizerCacheOpenGL::FlushAll() {
 }
 
 void RasterizerCacheOpenGL::InvalidateRegion(PAddr addr, u32 size, const Surface& region_owner) {
+    std::lock_guard lock{mutex};
+
     if (size == 0)
         return;
 
@@ -1917,6 +1884,8 @@ Surface RasterizerCacheOpenGL::CreateSurface(const SurfaceParams& params) {
 }
 
 void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) {
+    std::lock_guard lock{mutex};
+
     if (surface->registered) {
         return;
     }
@@ -1926,6 +1895,8 @@ void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) {
 }
 
 void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) {
+    std::lock_guard lock{mutex};
+
     if (!surface->registered) {
         return;
     }
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 108aa7bad..da795a968 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -7,6 +7,7 @@
 #include <array>
 #include <list>
 #include <memory>
+#include <mutex>
 #include <set>
 #include <tuple>
 #ifdef __GNUC__
@@ -170,6 +171,8 @@ private:
     bool valid = false;
 };
 
+class RasterizerCacheOpenGL;
+
 struct CachedSurface : SurfaceParams, std::enable_shared_from_this<CachedSurface> {
     CachedSurface(RasterizerCacheOpenGL& owner) : owner{owner} {}
     ~CachedSurface();
@@ -266,6 +269,15 @@ struct CachedTextureCube {
     std::shared_ptr<SurfaceWatcher> nz;
 };
 
+static constexpr std::array<FormatTuple, 4> depth_format_tuples = {{
+    {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16
+    {},
+    {GL_DEPTH_COMPONENT24, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT},   // D24
+    {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24S8
+}};
+
+class TextureDownloaderES;
+
 class RasterizerCacheOpenGL : NonCopyable {
 public:
     RasterizerCacheOpenGL();
@@ -365,11 +377,14 @@ private:
 
     std::unordered_map<TextureCubeConfig, CachedTextureCube> texture_cube_cache;
 
+    std::recursive_mutex mutex;
+
 public:
     OGLTexture AllocateSurfaceTexture(const FormatTuple& format_tuple, u32 width, u32 height);
 
     std::unique_ptr<TextureFilterer> texture_filterer;
     std::unique_ptr<FormatReinterpreterOpenGL> format_reinterpreter;
+    std::unique_ptr<TextureDownloaderES> texture_downloader_es;
 };
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index c4a429ccb..f1516c4e3 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -514,11 +514,21 @@ private:
             }
 
             case OpCode::Id::RCP: {
+                if (!sanitize_mul) {
+                    // When accurate multiplication is OFF, NaN are not really handled. This is a
+                    // workaround to cheaply avoid NaN. Fixes graphical issues in Ocarina of Time.
+                    shader.AddLine("if ({}.x != 0.0)", src1);
+                }
                 SetDest(swizzle, dest_reg, fmt::format("(1.0 / {}.x)", src1), 4, 1);
                 break;
             }
 
             case OpCode::Id::RSQ: {
+                if (!sanitize_mul) {
+                    // When accurate multiplication is OFF, NaN are not really handled. This is a
+                    // workaround to cheaply avoid NaN. Fixes graphical issues in Ocarina of Time.
+                    shader.AddLine("if ({}.x > 0.0)", src1);
+                }
                 SetDest(swizzle, dest_reg, fmt::format("inversesqrt({}.x)", src1), 4, 1);
                 break;
             }
@@ -807,6 +817,13 @@ private:
 
     void Generate() {
         if (sanitize_mul) {
+#ifdef ANDROID
+            // Use a cheaper sanitize_mul on Android, as mobile GPUs struggle here
+            // This seems to be sufficient at least for Ocarina of Time and Attack on Titan accurate
+            // multiplication bugs
+            shader.AddLine(
+                "#define sanitize_mul(lhs, rhs) mix(lhs * rhs, vec4(0.0), isnan(lhs * rhs))");
+#else
             shader.AddLine("vec4 sanitize_mul(vec4 lhs, vec4 rhs) {{");
             ++shader.scope;
             shader.AddLine("vec4 product = lhs * rhs;");
@@ -814,6 +831,7 @@ private:
                            "isnan(lhs)), isnan(product));");
             --shader.scope;
             shader.AddLine("}}\n");
+#endif
         }
 
         // Add declarations for registers
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index ee4cea9f2..8e1110b02 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -102,7 +102,9 @@ static std::string GetVertexInterfaceDeclaration(bool is_output, bool separable_
         out += R"(
 out gl_PerVertex {
     vec4 gl_Position;
+#if !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance)
     float gl_ClipDistance[2];
+#endif // !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance)
 };
 )";
     }
@@ -127,6 +129,17 @@ PicaFSConfig PicaFSConfig::BuildFromRegs(const Pica::Regs& regs) {
 
     state.texture2_use_coord1 = regs.texturing.main_config.texture2_use_coord1 != 0;
 
+    if (GLES) {
+        // With GLES, we need this in the fragment shader to emulate logic operations
+        state.alphablend_enable =
+            Pica::g_state.regs.framebuffer.output_merger.alphablend_enable == 1;
+        state.logic_op = regs.framebuffer.output_merger.logic_op;
+    } else {
+        // We don't need these otherwise, reset them to avoid unnecessary shader generation
+        state.alphablend_enable = {};
+        state.logic_op = {};
+    }
+
     // Copy relevant tev stages fields.
     // We don't sync const_color here because of the high variance, it is a
     // shader uniform instead.
@@ -607,13 +620,15 @@ static void WriteTevStage(std::string& out, const PicaFSConfig& config, unsigned
     if (!IsPassThroughTevStage(stage)) {
         const std::string index_name = std::to_string(index);
 
-        out += fmt::format("vec3 color_results_{}[3] = vec3[3](", index_name);
+        out += fmt::format("vec3 color_results_{}_1 = ", index_name);
         AppendColorModifier(out, config, stage.color_modifier1, stage.color_source1, index_name);
-        out += ", ";
+        out += fmt::format(";\nvec3 color_results_{}_2 = ", index_name);
         AppendColorModifier(out, config, stage.color_modifier2, stage.color_source2, index_name);
-        out += ", ";
+        out += fmt::format(";\nvec3 color_results_{}_3 = ", index_name);
         AppendColorModifier(out, config, stage.color_modifier3, stage.color_source3, index_name);
-        out += ");\n";
+        out += fmt::format(";\nvec3 color_results_{}[3] = vec3[3](color_results_{}_1, "
+                           "color_results_{}_2, color_results_{}_3);\n",
+                           index_name, index_name, index_name, index_name);
 
         // Round the output of each TEV stage to maintain the PICA's 8 bits of precision
         out += fmt::format("vec3 color_output_{} = byteround(", index_name);
@@ -1216,14 +1231,21 @@ float ProcTexNoiseCoef(vec2 x) {
 ShaderDecompiler::ProgramResult GenerateFragmentShader(const PicaFSConfig& config,
                                                        bool separable_shader) {
     const auto& state = config.state;
+    std::string out;
 
-    std::string out = R"(
+    if (GLES) {
+        out += R"(
+#define ALLOW_SHADOW (defined(CITRA_GLES))
+)";
+    } else {
+        out += R"(
 #extension GL_ARB_shader_image_load_store : enable
 #extension GL_ARB_shader_image_size : enable
 #define ALLOW_SHADOW (defined(GL_ARB_shader_image_load_store) && defined(GL_ARB_shader_image_size))
 )";
+    }
 
-    if (separable_shader) {
+    if (separable_shader && !GLES) {
         out += "#extension GL_ARB_separate_shader_objects : enable\n";
     }
 
@@ -1244,6 +1266,7 @@ uniform sampler2D tex0;
 uniform sampler2D tex1;
 uniform sampler2D tex2;
 uniform samplerCube tex_cube;
+uniform samplerBuffer texture_buffer_lut_lf;
 uniform samplerBuffer texture_buffer_lut_rg;
 uniform samplerBuffer texture_buffer_lut_rgba;
 
@@ -1267,7 +1290,7 @@ vec3 quaternion_rotate(vec4 q, vec3 v) {
 }
 
 float LookupLightingLUT(int lut_index, int index, float delta) {
-    vec2 entry = texelFetch(texture_buffer_lut_rg, lighting_lut_offset[lut_index >> 2][lut_index & 3] + index).rg;
+    vec2 entry = texelFetch(texture_buffer_lut_lf, lighting_lut_offset[lut_index >> 2][lut_index & 3] + index).rg;
     return entry.r + entry.g * delta;
 }
 
@@ -1519,7 +1542,7 @@ vec4 secondary_fragment_color = vec4(0.0);
         // Generate clamped fog factor from LUT for given fog index
         out += "float fog_i = clamp(floor(fog_index), 0.0, 127.0);\n"
                "float fog_f = fog_index - fog_i;\n"
-               "vec2 fog_lut_entry = texelFetch(texture_buffer_lut_rg, int(fog_i) + "
+               "vec2 fog_lut_entry = texelFetch(texture_buffer_lut_lf, int(fog_i) + "
                "fog_lut_offset).rg;\n"
                "float fog_factor = fog_lut_entry.r + fog_lut_entry.g * fog_f;\n"
                "fog_factor = clamp(fog_factor, 0.0, 1.0);\n";
@@ -1537,8 +1560,8 @@ vec4 secondary_fragment_color = vec4(0.0);
     if (state.shadow_rendering) {
         out += R"(
 #if ALLOW_SHADOW
-uint d = uint(clamp(depth, 0.0, 1.0) * 0xFFFFFF);
-uint s = uint(last_tex_env_out.g * 0xFF);
+uint d = uint(clamp(depth, 0.0, 1.0) * float(0xFFFFFF));
+uint s = uint(last_tex_env_out.g * float(0xFF));
 ivec2 image_coord = ivec2(gl_FragCoord.xy);
 
 uint old = imageLoad(shadow_buffer, image_coord).x;
@@ -1567,6 +1590,32 @@ do {
         out += "color = byteround(last_tex_env_out);\n";
     }
 
+    if (GLES) {
+        if (!state.alphablend_enable) {
+            switch (state.logic_op) {
+            case FramebufferRegs::LogicOp::Clear:
+                out += "color = vec4(0);\n";
+                break;
+            case FramebufferRegs::LogicOp::Set:
+                out += "color = vec4(1);\n";
+                break;
+            case FramebufferRegs::LogicOp::Copy:
+                // Take the color output as-is
+                break;
+            case FramebufferRegs::LogicOp::CopyInverted:
+                out += "color = ~color;\n";
+                break;
+            case FramebufferRegs::LogicOp::NoOp:
+                // We need to discard the color, but not necessarily the depth. This is not possible
+                // with fragment shader alone, so we emulate this behavior on GLES with glColorMask.
+                break;
+            default:
+                LOG_CRITICAL(HW_GPU, "Unhandled logic_op {:x}", static_cast<int>(state.logic_op));
+                UNIMPLEMENTED();
+            }
+        }
+    }
+
     out += '}';
 
     return {std::move(out)};
@@ -1574,7 +1623,7 @@ do {
 
 ShaderDecompiler::ProgramResult GenerateTrivialVertexShader(bool separable_shader) {
     std::string out;
-    if (separable_shader) {
+    if (separable_shader && !GLES) {
         out += "#extension GL_ARB_separate_shader_objects : enable\n";
     }
 
@@ -1617,8 +1666,8 @@ void main() {
 
 std::optional<ShaderDecompiler::ProgramResult> GenerateVertexShader(
     const Pica::Shader::ShaderSetup& setup, const PicaVSConfig& config, bool separable_shader) {
-    std::string out = "";
-    if (separable_shader) {
+    std::string out;
+    if (separable_shader && !GLES) {
         out += "#extension GL_ARB_separate_shader_objects : enable\n";
     }
 
@@ -1767,8 +1816,8 @@ void EmitPrim(Vertex vtx0, Vertex vtx1, Vertex vtx2) {
 
 ShaderDecompiler::ProgramResult GenerateFixedGeometryShader(const PicaFixedGSConfig& config,
                                                             bool separable_shader) {
-    std::string out = "";
-    if (separable_shader) {
+    std::string out;
+    if (separable_shader && !GLES) {
         out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
     }
 
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index 3b11fa88c..eb0e4cc23 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -61,6 +61,8 @@ struct PicaFSConfigState {
     Pica::RasterizerRegs::DepthBuffering depthmap_enable;
     Pica::TexturingRegs::FogMode fog_mode;
     bool fog_flip;
+    bool alphablend_enable;
+    Pica::FramebufferRegs::LogicOp logic_op;
 
     struct {
         struct {
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index 1452c84d7..a5664b1ff 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -123,6 +123,7 @@ static void SetShaderSamplerBindings(GLuint shader) {
     SetShaderSamplerBinding(shader, "tex_cube", TextureUnits::TextureCube);
 
     // Set the texture samplers to correspond to different lookup table texture units
+    SetShaderSamplerBinding(shader, "texture_buffer_lut_lf", TextureUnits::TextureBufferLUT_LF);
     SetShaderSamplerBinding(shader, "texture_buffer_lut_rg", TextureUnits::TextureBufferLUT_RG);
     SetShaderSamplerBinding(shader, "texture_buffer_lut_rgba", TextureUnits::TextureBufferLUT_RGBA);
 
@@ -176,7 +177,10 @@ public:
             OGLProgram& program = boost::get<OGLProgram>(shader_or_program);
             program.Create(true, {shader.handle});
             SetShaderUniformBlockBindings(program.handle);
-            SetShaderSamplerBindings(program.handle);
+
+            if (type == GL_FRAGMENT_SHADER) {
+                SetShaderSamplerBindings(program.handle);
+            }
         }
     }
 
diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp
index 9ed5e8cc7..a51ad443b 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_util.cpp
@@ -14,7 +14,7 @@
 namespace OpenGL {
 
 GLuint LoadShader(const char* source, GLenum type) {
-    const std::string version = GLES ? R"(#version 310 es
+    const std::string version = GLES ? R"(#version 320 es
 
 #define CITRA_GLES
 
diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h
index 1871403f9..fef6139d3 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.h
+++ b/src/video_core/renderer_opengl/gl_shader_util.h
@@ -12,11 +12,15 @@ namespace OpenGL {
 // High precision may or may not supported in GLES3. If it isn't, use medium precision instead.
 static constexpr char fragment_shader_precision_OES[] = R"(
 #ifdef GL_FRAGMENT_PRECISION_HIGH
-    precision highp float;
+precision highp int;
+precision highp float;
 precision highp samplerBuffer;
+precision highp uimage2D;
 #else
-    precision mediump float;
+precision mediump int;
+precision mediump float;
 precision mediump samplerBuffer;
+precision mediump uimage2D;
 #endif // GL_FRAGMENT_PRECISION_HIGH
 )";
 
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 95ab8525a..89944f80e 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -58,6 +58,7 @@ OpenGLState::OpenGLState() {
     texture_cube_unit.texture_cube = 0;
     texture_cube_unit.sampler = 0;
 
+    texture_buffer_lut_lf.texture_buffer = 0;
     texture_buffer_lut_rg.texture_buffer = 0;
     texture_buffer_lut_rgba.texture_buffer = 0;
 
@@ -169,10 +170,17 @@ void OpenGLState::Apply() const {
     if (blend.enabled != cur_state.blend.enabled) {
         if (blend.enabled) {
             glEnable(GL_BLEND);
-            glDisable(GL_COLOR_LOGIC_OP);
         } else {
             glDisable(GL_BLEND);
-            glEnable(GL_COLOR_LOGIC_OP);
+        }
+
+        // GLES does not support glLogicOp
+        if (!GLES) {
+            if (blend.enabled) {
+                glDisable(GL_COLOR_LOGIC_OP);
+            } else {
+                glEnable(GL_COLOR_LOGIC_OP);
+            }
         }
     }
 
@@ -196,13 +204,11 @@ void OpenGLState::Apply() const {
         glBlendEquationSeparate(blend.rgb_equation, blend.a_equation);
     }
 
-    // GLES3 does not support glLogicOp
+    // GLES does not support glLogicOp
     if (!GLES) {
         if (logic_op != cur_state.logic_op) {
             glLogicOp(logic_op);
         }
-    } else {
-        LOG_TRACE(Render_OpenGL, "glLogicOps are unimplemented...");
     }
 
     // Textures
@@ -224,6 +230,12 @@ void OpenGLState::Apply() const {
         glBindSampler(TextureUnits::TextureCube.id, texture_cube_unit.sampler);
     }
 
+    // Texture buffer LUTs
+    if (texture_buffer_lut_lf.texture_buffer != cur_state.texture_buffer_lut_lf.texture_buffer) {
+        glActiveTexture(TextureUnits::TextureBufferLUT_LF.Enum());
+        glBindTexture(GL_TEXTURE_BUFFER, texture_buffer_lut_lf.texture_buffer);
+    }
+
     // Texture buffer LUTs
     if (texture_buffer_lut_rg.texture_buffer != cur_state.texture_buffer_lut_rg.texture_buffer) {
         glActiveTexture(TextureUnits::TextureBufferLUT_RG.Enum());
@@ -354,6 +366,8 @@ OpenGLState& OpenGLState::ResetTexture(GLuint handle) {
     }
     if (texture_cube_unit.texture_cube == handle)
         texture_cube_unit.texture_cube = 0;
+    if (texture_buffer_lut_lf.texture_buffer == handle)
+        texture_buffer_lut_lf.texture_buffer = 0;
     if (texture_buffer_lut_rg.texture_buffer == handle)
         texture_buffer_lut_rg.texture_buffer = 0;
     if (texture_buffer_lut_rgba.texture_buffer == handle)
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index 3fa585b04..e3b85a297 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -22,7 +22,8 @@ constexpr TextureUnit PicaTexture(int unit) {
     return TextureUnit{unit};
 }
 
-constexpr TextureUnit TextureCube{3};
+constexpr TextureUnit TextureCube{6};
+constexpr TextureUnit TextureBufferLUT_LF{3};
 constexpr TextureUnit TextureBufferLUT_RG{4};
 constexpr TextureUnit TextureBufferLUT_RGBA{5};
 
@@ -101,6 +102,10 @@ public:
         GLuint sampler;      // GL_SAMPLER_BINDING
     } texture_cube_unit;
 
+    struct {
+        GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER
+    } texture_buffer_lut_lf;
+
     struct {
         GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER
     } texture_buffer_lut_rg;
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 113c9aa0a..0d048e8c5 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -29,6 +29,7 @@
 #include "core/tracer/recorder.h"
 #include "video_core/debug_utils/debug_utils.h"
 #include "video_core/rasterizer_interface.h"
+#include "video_core/renderer_opengl/gl_state.h"
 #include "video_core/renderer_opengl/gl_vars.h"
 #include "video_core/renderer_opengl/post_processing_opengl.h"
 #include "video_core/renderer_opengl/renderer_opengl.h"
@@ -39,7 +40,12 @@ namespace OpenGL {
 // If the size of this is too small, it ends up creating a soft cap on FPS as the renderer will have
 // to wait on available presentation frames. There doesn't seem to be much of a downside to a larger
 // number but 9 swap textures at 60FPS presentation allows for 800% speed so thats probably fine
+#ifdef ANDROID
+// Reduce the size of swap_chain, since the UI only allows upto 200% speed.
+constexpr std::size_t SWAP_CHAIN_SIZE = 6;
+#else
 constexpr std::size_t SWAP_CHAIN_SIZE = 9;
+#endif
 
 class OGLTextureMailboxException : public std::runtime_error {
 public:
@@ -96,7 +102,7 @@ public:
         frame->color.Create();
         state.renderbuffer = frame->color.handle;
         state.Apply();
-        glRenderbufferStorage(GL_RENDERBUFFER, GL_RGBA, width, height);
+        glRenderbufferStorage(GL_RENDERBUFFER, GL_RGBA8, width, height);
 
         // Recreate the FBO for the render target
         frame->render.Release();
@@ -1197,14 +1203,18 @@ static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum
 
 /// Initialize the renderer
 VideoCore::ResultStatus RendererOpenGL::Init() {
+#ifndef ANDROID
     if (!gladLoadGL()) {
         return VideoCore::ResultStatus::ErrorBelowGL33;
     }
 
+    // Qualcomm has some spammy info messages that are marked as errors but not important
+    // https://developer.qualcomm.com/comment/11845
     if (GLAD_GL_KHR_debug) {
         glEnable(GL_DEBUG_OUTPUT);
         glDebugMessageCallback(DebugHandler, nullptr);
     }
+#endif
 
     const char* gl_version{reinterpret_cast<char const*>(glGetString(GL_VERSION))};
     const char* gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))};
diff --git a/src/video_core/renderer_opengl/texture_downloader_es.cpp b/src/video_core/renderer_opengl/texture_downloader_es.cpp
new file mode 100644
index 000000000..11663512e
--- /dev/null
+++ b/src/video_core/renderer_opengl/texture_downloader_es.cpp
@@ -0,0 +1,254 @@
+// Copyright 2020 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <chrono>
+#include <vector>
+
+#include <fmt/chrono.h>
+
+#include "common/logging/log.h"
+#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
+#include "video_core/renderer_opengl/gl_state.h"
+#include "video_core/renderer_opengl/gl_vars.h"
+#include "video_core/renderer_opengl/texture_downloader_es.h"
+
+#include "shaders/depth_to_color.frag"
+#include "shaders/depth_to_color.vert"
+#include "shaders/ds_to_color.frag"
+
+namespace OpenGL {
+
+/**
+ * Self tests for the texture downloader
+ */
+void TextureDownloaderES::Test() {
+    auto cur_state = OpenGLState::GetCurState();
+    OpenGLState state;
+
+    {
+        GLint range[2];
+        GLint precision;
+#define PRECISION_TEST(type)                                                                       \
+    glGetShaderPrecisionFormat(GL_FRAGMENT_SHADER, type, range, &precision);                       \
+    LOG_INFO(Render_OpenGL, #type " range: [{}, {}], precision: {}", range[0], range[1], precision);
+        PRECISION_TEST(GL_LOW_INT);
+        PRECISION_TEST(GL_MEDIUM_INT);
+        PRECISION_TEST(GL_HIGH_INT);
+        PRECISION_TEST(GL_LOW_FLOAT);
+        PRECISION_TEST(GL_MEDIUM_FLOAT);
+        PRECISION_TEST(GL_HIGH_FLOAT);
+#undef PRECISION_TEST
+    }
+    glActiveTexture(GL_TEXTURE0);
+
+    const auto test = [this, &state](FormatTuple tuple, auto original_data, std::size_t tex_size,
+                                     auto data_generator) {
+        OGLTexture texture;
+        texture.Create();
+        state.texture_units[0].texture_2d = texture.handle;
+        state.Apply();
+
+        original_data.resize(tex_size * tex_size);
+        for (std::size_t idx = 0; idx < original_data.size(); ++idx)
+            original_data[idx] = data_generator(idx);
+        glTexStorage2D(GL_TEXTURE_2D, 1, tuple.internal_format, tex_size, tex_size);
+        glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, tex_size, tex_size, tuple.format, tuple.type,
+                        original_data.data());
+
+        decltype(original_data) new_data(original_data.size());
+        glFinish();
+        auto start = std::chrono::high_resolution_clock::now();
+        GetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, tex_size, tex_size,
+                    new_data.data());
+        glFinish();
+        auto time = std::chrono::high_resolution_clock::now() - start;
+        LOG_INFO(Render_OpenGL, "test took {}", std::chrono::duration<double, std::milli>(time));
+
+        int diff = 0;
+        for (std::size_t idx = 0; idx < original_data.size(); ++idx)
+            if (new_data[idx] - original_data[idx] != diff) {
+                diff = new_data[idx] - original_data[idx];
+                // every time the error between the real and expected value changes, log it
+                // some error is expected in D24 due to floating point precision
+                LOG_WARNING(Render_OpenGL, "difference changed at {:#X}: {:#X} -> {:#X}", idx,
+                            original_data[idx], new_data[idx]);
+            }
+    };
+    LOG_INFO(Render_OpenGL, "GL_DEPTH24_STENCIL8 download test starting");
+    test(depth_format_tuples[3], std::vector<u32>{}, 4096,
+         [](std::size_t idx) { return static_cast<u32>((idx << 8) | (idx & 0xFF)); });
+    LOG_INFO(Render_OpenGL, "GL_DEPTH_COMPONENT24 download test starting");
+    test(depth_format_tuples[2], std::vector<u32>{}, 4096,
+         [](std::size_t idx) { return static_cast<u32>(idx << 8); });
+    LOG_INFO(Render_OpenGL, "GL_DEPTH_COMPONENT16 download test starting");
+    test(depth_format_tuples[0], std::vector<u16>{}, 256,
+         [](std::size_t idx) { return static_cast<u16>(idx); });
+
+    cur_state.Apply();
+}
+
+TextureDownloaderES::TextureDownloaderES(bool enable_depth_stencil) {
+    vao.Create();
+    read_fbo_generic.Create();
+
+    depth32_fbo.Create();
+    r32ui_renderbuffer.Create();
+    depth16_fbo.Create();
+    r16_renderbuffer.Create();
+
+    const auto init_program = [](ConversionShader& converter, std::string_view frag) {
+        converter.program.Create(depth_to_color_vert.data(), frag.data());
+        converter.lod_location = glGetUniformLocation(converter.program.handle, "lod");
+    };
+
+    // xperia64: The depth stencil shader currently uses a GLES extension that is not supported
+    // across all devices Reportedly broken on Tegra devices and the Nexus 6P, so enabling it can be
+    // toggled
+    if (enable_depth_stencil) {
+        init_program(d24s8_r32ui_conversion_shader, ds_to_color_frag);
+    }
+
+    init_program(d24_r32ui_conversion_shader, depth_to_color_frag);
+    init_program(d16_r16_conversion_shader, R"(
+out highp float color;
+
+uniform highp sampler2D depth;
+uniform int lod;
+
+void main(){
+    color = texelFetch(depth, ivec2(gl_FragCoord.xy), lod).x;
+}
+)");
+
+    sampler.Create();
+    glSamplerParameteri(sampler.handle, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+    glSamplerParameteri(sampler.handle, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+
+    auto cur_state = OpenGLState::GetCurState();
+    auto state = cur_state;
+
+    state.draw.shader_program = d24s8_r32ui_conversion_shader.program.handle;
+    state.draw.draw_framebuffer = depth32_fbo.handle;
+    state.renderbuffer = r32ui_renderbuffer.handle;
+    state.Apply();
+    glRenderbufferStorage(GL_RENDERBUFFER, GL_R32UI, max_size, max_size);
+    glFramebufferRenderbuffer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
+                              r32ui_renderbuffer.handle);
+    glUniform1i(glGetUniformLocation(d24s8_r32ui_conversion_shader.program.handle, "depth"), 1);
+
+    state.draw.draw_framebuffer = depth16_fbo.handle;
+    state.renderbuffer = r16_renderbuffer.handle;
+    state.Apply();
+    glRenderbufferStorage(GL_RENDERBUFFER, GL_R16, max_size, max_size);
+    glFramebufferRenderbuffer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
+                              r16_renderbuffer.handle);
+
+    cur_state.Apply();
+}
+
+/**
+ * OpenGL ES does not support glReadBuffer for depth/stencil formats
+ * This gets around it by converting to a Red surface before downloading
+ */
+GLuint TextureDownloaderES::ConvertDepthToColor(GLuint level, GLenum& format, GLenum& type,
+                                                GLint height, GLint width) {
+    ASSERT(width <= max_size && height <= max_size);
+    const OpenGLState cur_state = OpenGLState::GetCurState();
+    OpenGLState state;
+    state.texture_units[0] = {cur_state.texture_units[0].texture_2d, sampler.handle};
+    state.draw.vertex_array = vao.handle;
+
+    OGLTexture texture_view;
+    const ConversionShader* converter;
+    switch (type) {
+    case GL_UNSIGNED_SHORT:
+        state.draw.draw_framebuffer = depth16_fbo.handle;
+        converter = &d16_r16_conversion_shader;
+        format = GL_RED;
+        break;
+    case GL_UNSIGNED_INT:
+        state.draw.draw_framebuffer = depth32_fbo.handle;
+        converter = &d24_r32ui_conversion_shader;
+        format = GL_RED_INTEGER;
+        break;
+    case GL_UNSIGNED_INT_24_8:
+        state.draw.draw_framebuffer = depth32_fbo.handle;
+        converter = &d24s8_r32ui_conversion_shader;
+        format = GL_RED_INTEGER;
+        type = GL_UNSIGNED_INT;
+        break;
+    default:
+        UNREACHABLE_MSG("Destination type not recognized");
+    }
+    state.draw.shader_program = converter->program.handle;
+    state.viewport = {0, 0, width, height};
+    state.Apply();
+    if (converter->program.handle == d24s8_r32ui_conversion_shader.program.handle) {
+        // TODO BreadFish64: the ARM framebuffer reading extension is probably not the most optimal
+        // way to do this, search for another solution
+        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
+                               state.texture_units[0].texture_2d, level);
+    }
+
+    glUniform1i(converter->lod_location, level);
+    glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+    if (texture_view.handle) {
+        glTexParameteri(GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_DEPTH_COMPONENT);
+    }
+    return state.draw.draw_framebuffer;
+}
+
+/**
+ * OpenGL ES does not support glGetTexImage. Obtain the pixels by attaching the
+ * texture to a framebuffer.
+ * Originally from https://github.com/apitrace/apitrace/blob/master/retrace/glstate_images.cpp
+ * Depth texture download assumes that the texture's format tuple matches what is found
+ * OpenGL::depth_format_tuples
+ */
+void TextureDownloaderES::GetTexImage(GLenum target, GLuint level, GLenum format, GLenum type,
+                                      GLint height, GLint width, void* pixels) {
+    OpenGLState state = OpenGLState::GetCurState();
+    GLuint texture;
+    const GLuint old_read_buffer = state.draw.read_framebuffer;
+    switch (target) {
+    case GL_TEXTURE_2D:
+        texture = state.texture_units[0].texture_2d;
+        break;
+    case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+    case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+    case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+    case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+    case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+    case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+        texture = state.texture_cube_unit.texture_cube;
+        break;
+    default:
+        UNIMPLEMENTED_MSG("Unexpected target {:x}", target);
+    }
+
+    switch (format) {
+    case GL_DEPTH_COMPONENT:
+    case GL_DEPTH_STENCIL:
+        // unfortunately, the accurate way is too slow for release
+        return;
+        state.draw.read_framebuffer = ConvertDepthToColor(level, format, type, height, width);
+        state.Apply();
+        break;
+    default:
+        state.draw.read_framebuffer = read_fbo_generic.handle;
+        state.Apply();
+        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, texture,
+                               level);
+    }
+    GLenum status = glCheckFramebufferStatus(GL_READ_FRAMEBUFFER);
+    if (status != GL_FRAMEBUFFER_COMPLETE) {
+        LOG_DEBUG(Render_OpenGL, "Framebuffer is incomplete, status: {:X}", status);
+    }
+    glReadPixels(0, 0, width, height, format, type, pixels);
+
+    state.draw.read_framebuffer = old_read_buffer;
+    state.Apply();
+}
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/texture_downloader_es.h b/src/video_core/renderer_opengl/texture_downloader_es.h
new file mode 100644
index 000000000..66c27dde1
--- /dev/null
+++ b/src/video_core/renderer_opengl/texture_downloader_es.h
@@ -0,0 +1,36 @@
+// Copyright 2020 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
+
+namespace OpenGL {
+class OpenGLState;
+
+class TextureDownloaderES {
+    static constexpr u16 max_size = 1024;
+
+    OGLVertexArray vao;
+    OGLFramebuffer read_fbo_generic;
+    OGLFramebuffer depth32_fbo, depth16_fbo;
+    OGLRenderbuffer r32ui_renderbuffer, r16_renderbuffer;
+    struct ConversionShader {
+        OGLProgram program;
+        GLint lod_location{-1};
+    } d24_r32ui_conversion_shader, d16_r16_conversion_shader, d24s8_r32ui_conversion_shader;
+    OGLSampler sampler;
+
+    void Test();
+    GLuint ConvertDepthToColor(GLuint level, GLenum& format, GLenum& type, GLint height,
+                               GLint width);
+
+public:
+    TextureDownloaderES(bool enable_depth_stencil);
+
+    void GetTexImage(GLenum target, GLuint level, GLenum format, const GLenum type, GLint height,
+                     GLint width, void* pixels);
+};
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/texture_filters/anime4k/anime4k_ultrafast.cpp b/src/video_core/renderer_opengl/texture_filters/anime4k/anime4k_ultrafast.cpp
index b70cc14f4..69fda08a9 100644
--- a/src/video_core/renderer_opengl/texture_filters/anime4k/anime4k_ultrafast.cpp
+++ b/src/video_core/renderer_opengl/texture_filters/anime4k/anime4k_ultrafast.cpp
@@ -34,30 +34,14 @@
 #include "video_core/renderer_opengl/texture_filters/anime4k/anime4k_ultrafast.h"
 
 #include "shaders/refine.frag"
-#include "shaders/refine.vert"
 #include "shaders/tex_coord.vert"
 #include "shaders/x_gradient.frag"
 #include "shaders/y_gradient.frag"
-#include "shaders/y_gradient.vert"
 
 namespace OpenGL {
 
 Anime4kUltrafast::Anime4kUltrafast(u16 scale_factor) : TextureFilterBase(scale_factor) {
     const OpenGLState cur_state = OpenGLState::GetCurState();
-    const auto setup_temp_tex = [this](TempTex& texture, GLint internal_format, GLint format) {
-        texture.fbo.Create();
-        texture.tex.Create();
-        state.draw.draw_framebuffer = texture.fbo.handle;
-        state.Apply();
-        glActiveTexture(GL_TEXTURE0);
-        glBindTexture(GL_TEXTURE_RECTANGLE, texture.tex.handle);
-        glTexImage2D(GL_TEXTURE_RECTANGLE, 0, internal_format, 1024 * internal_scale_factor,
-                     1024 * internal_scale_factor, 0, format, GL_HALF_FLOAT, nullptr);
-        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_RECTANGLE,
-                               texture.tex.handle, 0);
-    };
-    setup_temp_tex(LUMAD, GL_R16F, GL_RED);
-    setup_temp_tex(XY, GL_RG16F, GL_RG);
 
     vao.Create();
 
@@ -65,17 +49,17 @@ Anime4kUltrafast::Anime4kUltrafast(u16 scale_factor) : TextureFilterBase(scale_f
         samplers[idx].Create();
         state.texture_units[idx].sampler = samplers[idx].handle;
         glSamplerParameteri(samplers[idx].handle, GL_TEXTURE_MIN_FILTER,
-                            idx == 0 ? GL_LINEAR : GL_NEAREST);
+                            idx != 2 ? GL_LINEAR : GL_NEAREST);
         glSamplerParameteri(samplers[idx].handle, GL_TEXTURE_MAG_FILTER,
-                            idx == 0 ? GL_LINEAR : GL_NEAREST);
+                            idx != 2 ? GL_LINEAR : GL_NEAREST);
         glSamplerParameteri(samplers[idx].handle, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
         glSamplerParameteri(samplers[idx].handle, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
     }
     state.draw.vertex_array = vao.handle;
 
     gradient_x_program.Create(tex_coord_vert.data(), x_gradient_frag.data());
-    gradient_y_program.Create(y_gradient_vert.data(), y_gradient_frag.data());
-    refine_program.Create(refine_vert.data(), refine_frag.data());
+    gradient_y_program.Create(tex_coord_vert.data(), y_gradient_frag.data());
+    refine_program.Create(tex_coord_vert.data(), refine_frag.data());
 
     state.draw.shader_program = gradient_y_program.handle;
     state.Apply();
@@ -84,8 +68,6 @@ Anime4kUltrafast::Anime4kUltrafast(u16 scale_factor) : TextureFilterBase(scale_f
     state.draw.shader_program = refine_program.handle;
     state.Apply();
     glUniform1i(glGetUniformLocation(refine_program.handle, "LUMAD"), 1);
-    glUniform1f(glGetUniformLocation(refine_program.handle, "final_scale"),
-                static_cast<GLfloat>(internal_scale_factor) / scale_factor);
 
     cur_state.Apply();
 }
@@ -95,20 +77,48 @@ void Anime4kUltrafast::Filter(GLuint src_tex, const Common::Rectangle<u32>& src_
                               GLuint read_fb_handle, GLuint draw_fb_handle) {
     const OpenGLState cur_state = OpenGLState::GetCurState();
 
+    // These will have handles from the previous texture that was filtered, reset them to avoid
+    // binding invalid textures.
+    state.texture_units[0].texture_2d = 0;
+    state.texture_units[1].texture_2d = 0;
+    state.texture_units[2].texture_2d = 0;
+
+    const auto setup_temp_tex = [this, &src_rect](GLint internal_format, GLint format) {
+        TempTex texture;
+        texture.fbo.Create();
+        texture.tex.Create();
+        state.texture_units[0].texture_2d = texture.tex.handle;
+        state.draw.draw_framebuffer = texture.fbo.handle;
+        state.Apply();
+        glActiveTexture(GL_TEXTURE0);
+        glBindTexture(GL_TEXTURE_2D, texture.tex.handle);
+        if (GL_ARB_texture_storage) {
+            glTexStorage2D(GL_TEXTURE_2D, 1, internal_format,
+                           src_rect.GetWidth() * internal_scale_factor,
+                           src_rect.GetHeight() * internal_scale_factor);
+        } else {
+            glTexImage2D(
+                GL_TEXTURE_2D, 0, internal_format, src_rect.GetWidth() * internal_scale_factor,
+                src_rect.GetHeight() * internal_scale_factor, 0, format, GL_HALF_FLOAT, nullptr);
+        }
+        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
+                               texture.tex.handle, 0);
+        return texture;
+    };
+    auto XY = setup_temp_tex(GL_RG16F, GL_RG);
+    auto LUMAD = setup_temp_tex(GL_R16F, GL_RED);
+
     state.viewport = {static_cast<GLint>(src_rect.left * internal_scale_factor),
                       static_cast<GLint>(src_rect.bottom * internal_scale_factor),
                       static_cast<GLsizei>(src_rect.GetWidth() * internal_scale_factor),
                       static_cast<GLsizei>(src_rect.GetHeight() * internal_scale_factor)};
     state.texture_units[0].texture_2d = src_tex;
+    state.texture_units[1].texture_2d = LUMAD.tex.handle;
+    state.texture_units[2].texture_2d = XY.tex.handle;
     state.draw.draw_framebuffer = XY.fbo.handle;
     state.draw.shader_program = gradient_x_program.handle;
     state.Apply();
 
-    glActiveTexture(GL_TEXTURE1);
-    glBindTexture(GL_TEXTURE_RECTANGLE, LUMAD.tex.handle);
-    glActiveTexture(GL_TEXTURE2);
-    glBindTexture(GL_TEXTURE_RECTANGLE, XY.tex.handle);
-
     glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
 
     // gradient y pass
diff --git a/src/video_core/renderer_opengl/texture_filters/anime4k/anime4k_ultrafast.h b/src/video_core/renderer_opengl/texture_filters/anime4k/anime4k_ultrafast.h
index 9e89da816..8175ed390 100644
--- a/src/video_core/renderer_opengl/texture_filters/anime4k/anime4k_ultrafast.h
+++ b/src/video_core/renderer_opengl/texture_filters/anime4k/anime4k_ultrafast.h
@@ -30,8 +30,6 @@ private:
         OGLTexture tex;
         OGLFramebuffer fbo;
     };
-    TempTex LUMAD;
-    TempTex XY;
 
     std::array<OGLSampler, 3> samplers;
 
diff --git a/src/video_core/renderer_opengl/texture_filters/anime4k/refine.frag b/src/video_core/renderer_opengl/texture_filters/anime4k/refine.frag
index 4417b96f6..569f30078 100644
--- a/src/video_core/renderer_opengl/texture_filters/anime4k/refine.frag
+++ b/src/video_core/renderer_opengl/texture_filters/anime4k/refine.frag
@@ -1,14 +1,12 @@
 //? #version 330
+precision mediump float;
+
 in vec2 tex_coord;
-in vec2 input_max;
 
 out vec4 frag_color;
 
 uniform sampler2D HOOKED;
-uniform sampler2DRect LUMAD;
-uniform sampler2DRect LUMAG;
-
-uniform float final_scale;
+uniform sampler2D LUMAD;
 
 const float LINE_DETECT_THRESHOLD = 0.4;
 const float STRENGTH = 0.6;
@@ -21,12 +19,12 @@ struct RGBAL {
 };
 
 vec4 getAverage(vec4 cc, vec4 a, vec4 b, vec4 c) {
-    return cc * (1 - STRENGTH) + ((a + b + c) / 3) * STRENGTH;
+    return cc * (1.0 - STRENGTH) + ((a + b + c) / 3.0) * STRENGTH;
 }
 
-#define GetRGBAL(offset)                                                                           \
-    RGBAL(textureOffset(HOOKED, tex_coord, offset),                                                \
-          texture(LUMAD, clamp((gl_FragCoord.xy + offset) * final_scale, vec2(0.0), input_max)).x)
+#define GetRGBAL(x_offset, y_offset)                                                               \
+    RGBAL(textureLodOffset(HOOKED, tex_coord, 0.0, ivec2(x_offset, y_offset)),                     \
+          textureLodOffset(LUMAD, tex_coord, 0.0, ivec2(x_offset, y_offset)).x)
 
 float min3v(float a, float b, float c) {
     return min(min(a, b), c);
@@ -37,23 +35,23 @@ float max3v(float a, float b, float c) {
 }
 
 vec4 Compute() {
-    RGBAL cc = GetRGBAL(ivec2(0));
+    RGBAL cc = GetRGBAL(0, 0);
 
     if (cc.l > LINE_DETECT_THRESHOLD) {
         return cc.c;
     }
 
-    RGBAL tl = GetRGBAL(ivec2(-1, -1));
-    RGBAL t = GetRGBAL(ivec2(0, -1));
-    RGBAL tr = GetRGBAL(ivec2(1, -1));
+    RGBAL tl = GetRGBAL(-1, -1);
+    RGBAL t = GetRGBAL(0, -1);
+    RGBAL tr = GetRGBAL(1, -1);
 
-    RGBAL l = GetRGBAL(ivec2(-1, 0));
+    RGBAL l = GetRGBAL(-1, 0);
 
-    RGBAL r = GetRGBAL(ivec2(1, 0));
+    RGBAL r = GetRGBAL(1, 0);
 
-    RGBAL bl = GetRGBAL(ivec2(-1, 1));
-    RGBAL b = GetRGBAL(ivec2(0, 1));
-    RGBAL br = GetRGBAL(ivec2(1, 1));
+    RGBAL bl = GetRGBAL(-1, 1);
+    RGBAL b = GetRGBAL(0, 1);
+    RGBAL br = GetRGBAL(1, 1);
 
     // Kernel 0 and 4
     float maxDark = max3v(br.l, b.l, bl.l);
diff --git a/src/video_core/renderer_opengl/texture_filters/anime4k/refine.vert b/src/video_core/renderer_opengl/texture_filters/anime4k/refine.vert
deleted file mode 100644
index 552a218fb..000000000
--- a/src/video_core/renderer_opengl/texture_filters/anime4k/refine.vert
+++ /dev/null
@@ -1,14 +0,0 @@
-//? #version 330
-out vec2 tex_coord;
-out vec2 input_max;
-
-uniform sampler2D HOOKED;
-
-const vec2 vertices[4] =
-    vec2[4](vec2(-1.0, -1.0), vec2(1.0, -1.0), vec2(-1.0, 1.0), vec2(1.0, 1.0));
-
-void main() {
-    gl_Position = vec4(vertices[gl_VertexID], 0.0, 1.0);
-    tex_coord = (vertices[gl_VertexID] + 1.0) / 2.0;
-    input_max = textureSize(HOOKED, 0) * 2.0 - 1.0;
-}
diff --git a/src/video_core/renderer_opengl/texture_filters/anime4k/x_gradient.frag b/src/video_core/renderer_opengl/texture_filters/anime4k/x_gradient.frag
index 49502fac7..8103cb77c 100644
--- a/src/video_core/renderer_opengl/texture_filters/anime4k/x_gradient.frag
+++ b/src/video_core/renderer_opengl/texture_filters/anime4k/x_gradient.frag
@@ -1,4 +1,6 @@
 //? #version 330
+precision mediump float;
+
 in vec2 tex_coord;
 
 out vec2 frag_color;
@@ -7,7 +9,7 @@ uniform sampler2D tex_input;
 
 const vec3 K = vec3(0.2627, 0.6780, 0.0593);
 // TODO: improve handling of alpha channel
-#define GetLum(xoffset) dot(K, textureOffset(tex_input, tex_coord, ivec2(xoffset, 0)).rgb)
+#define GetLum(xoffset) dot(K, textureLodOffset(tex_input, tex_coord, 0.0, ivec2(xoffset, 0)).rgb)
 
 void main() {
     float l = GetLum(-1);
diff --git a/src/video_core/renderer_opengl/texture_filters/anime4k/y_gradient.frag b/src/video_core/renderer_opengl/texture_filters/anime4k/y_gradient.frag
index a0e820001..81e0d0f6e 100644
--- a/src/video_core/renderer_opengl/texture_filters/anime4k/y_gradient.frag
+++ b/src/video_core/renderer_opengl/texture_filters/anime4k/y_gradient.frag
@@ -1,16 +1,18 @@
 //? #version 330
-in vec2 input_max;
+precision mediump float;
+
+in vec2 tex_coord;
 
 out float frag_color;
 
-uniform sampler2DRect tex_input;
+uniform sampler2D tex_input;
 
 void main() {
-    vec2 t = texture(tex_input, min(gl_FragCoord.xy + vec2(0.0, 1.0), input_max)).xy;
-    vec2 c = texture(tex_input, gl_FragCoord.xy).xy;
-    vec2 b = texture(tex_input, max(gl_FragCoord.xy - vec2(0.0, 1.0), vec2(0.0))).xy;
+    vec2 t = textureLodOffset(tex_input, tex_coord, 0.0, ivec2(0, 1)).xy;
+    vec2 c = textureLod(tex_input, tex_coord, 0.0).xy;
+    vec2 b = textureLodOffset(tex_input, tex_coord, 0.0, ivec2(0, -1)).xy;
 
-    vec2 grad = vec2(t.x + 2 * c.x + b.x, b.y - t.y);
+    vec2 grad = vec2(t.x + 2.0 * c.x + b.x, b.y - t.y);
 
-    frag_color = 1 - length(grad);
+    frag_color = 1.0 - length(grad);
 }
diff --git a/src/video_core/renderer_opengl/texture_filters/bicubic/bicubic.frag b/src/video_core/renderer_opengl/texture_filters/bicubic/bicubic.frag
index 2bdab3cf6..f384c7864 100644
--- a/src/video_core/renderer_opengl/texture_filters/bicubic/bicubic.frag
+++ b/src/video_core/renderer_opengl/texture_filters/bicubic/bicubic.frag
@@ -1,4 +1,6 @@
 //? #version 330
+precision mediump float;
+
 in vec2 tex_coord;
 
 out vec4 frag_color;
@@ -18,7 +20,7 @@ vec4 cubic(float v) {
 
 vec4 textureBicubic(sampler2D sampler, vec2 texCoords) {
 
-    vec2 texSize = textureSize(sampler, 0);
+    vec2 texSize = vec2(textureSize(sampler, 0));
     vec2 invTexSize = 1.0 / texSize;
 
     texCoords = texCoords * texSize - 0.5;
diff --git a/src/video_core/renderer_opengl/texture_filters/xbrz/xbrz_freescale.frag b/src/video_core/renderer_opengl/texture_filters/xbrz/xbrz_freescale.frag
index 4868d18f7..84f1b3503 100644
--- a/src/video_core/renderer_opengl/texture_filters/xbrz/xbrz_freescale.frag
+++ b/src/video_core/renderer_opengl/texture_filters/xbrz/xbrz_freescale.frag
@@ -1,4 +1,6 @@
 //? #version 330
+precision mediump float;
+
 in vec2 tex_coord;
 in vec2 source_size;
 in vec2 output_size;
@@ -6,7 +8,7 @@ in vec2 output_size;
 out vec4 frag_color;
 
 uniform sampler2D tex;
-uniform float scale;
+uniform lowp float scale;
 
 const int BLEND_NONE = 0;
 const int BLEND_NORMAL = 1;
@@ -42,12 +44,12 @@ float GetLeftRatio(vec2 center, vec2 origin, vec2 direction) {
     return smoothstep(-sqrt(2.0) / 2.0, sqrt(2.0) / 2.0, v);
 }
 
-vec2 pos = fract(tex_coord * source_size) - vec2(0.5, 0.5);
-vec2 coord = tex_coord - pos / source_size;
-
 #define P(x, y) textureOffset(tex, coord, ivec2(x, y))
 
 void main() {
+    vec2 pos = fract(tex_coord * source_size) - vec2(0.5, 0.5);
+    vec2 coord = tex_coord - pos / source_size;
+
     //---------------------------------------
     // Input Pixel Mapping:  -|x|x|x|-
     //                       x|A|B|C|x
@@ -142,15 +144,15 @@ void main() {
                               (IsPixEqual(G, H) && IsPixEqual(H, I) && IsPixEqual(I, F) &&
                                IsPixEqual(F, C) && !IsPixEqual(E, I))));
         vec2 origin = vec2(0.0, 1.0 / sqrt(2.0));
-        ivec2 direction = ivec2(1, -1);
+        vec2 direction = vec2(1.0, -1.0);
         if (doLineBlend) {
             bool haveShallowLine =
                 (STEEP_DIRECTION_THRESHOLD * dist_F_G <= dist_H_C) && E != G && D != G;
             bool haveSteepLine =
                 (STEEP_DIRECTION_THRESHOLD * dist_H_C <= dist_F_G) && E != C && B != C;
             origin = haveShallowLine ? vec2(0.0, 0.25) : vec2(0.0, 0.5);
-            direction.x += haveShallowLine ? 1 : 0;
-            direction.y -= haveSteepLine ? 1 : 0;
+            direction.x += haveShallowLine ? 1.0 : 0.0;
+            direction.y -= haveSteepLine ? 1.0 : 0.0;
         }
         vec4 blendPix = mix(H, F, step(ColorDist(E, F), ColorDist(E, H)));
         res = mix(res, blendPix, GetLeftRatio(pos, origin, direction));
@@ -169,15 +171,15 @@ void main() {
                               (IsPixEqual(A, D) && IsPixEqual(D, G) && IsPixEqual(G, H) &&
                                IsPixEqual(H, I) && !IsPixEqual(E, G))));
         vec2 origin = vec2(-1.0 / sqrt(2.0), 0.0);
-        ivec2 direction = ivec2(1, 1);
+        vec2 direction = vec2(1.0, 1.0);
         if (doLineBlend) {
             bool haveShallowLine =
                 (STEEP_DIRECTION_THRESHOLD * dist_H_A <= dist_D_I) && E != A && B != A;
             bool haveSteepLine =
                 (STEEP_DIRECTION_THRESHOLD * dist_D_I <= dist_H_A) && E != I && F != I;
             origin = haveShallowLine ? vec2(-0.25, 0.0) : vec2(-0.5, 0.0);
-            direction.y += haveShallowLine ? 1 : 0;
-            direction.x += haveSteepLine ? 1 : 0;
+            direction.y += haveShallowLine ? 1.0 : 0.0;
+            direction.x += haveSteepLine ? 1.0 : 0.0;
         }
         origin = origin;
         direction = direction;
@@ -198,15 +200,15 @@ void main() {
                               (IsPixEqual(I, F) && IsPixEqual(F, C) && IsPixEqual(C, B) &&
                                IsPixEqual(B, A) && !IsPixEqual(E, C))));
         vec2 origin = vec2(1.0 / sqrt(2.0), 0.0);
-        ivec2 direction = ivec2(-1, -1);
+        vec2 direction = vec2(-1.0, -1.0);
         if (doLineBlend) {
             bool haveShallowLine =
                 (STEEP_DIRECTION_THRESHOLD * dist_B_I <= dist_F_A) && E != I && H != I;
             bool haveSteepLine =
                 (STEEP_DIRECTION_THRESHOLD * dist_F_A <= dist_B_I) && E != A && D != A;
             origin = haveShallowLine ? vec2(0.25, 0.0) : vec2(0.5, 0.0);
-            direction.y -= haveShallowLine ? 1 : 0;
-            direction.x -= haveSteepLine ? 1 : 0;
+            direction.y -= haveShallowLine ? 1.0 : 0.0;
+            direction.x -= haveSteepLine ? 1.0 : 0.0;
         }
         vec4 blendPix = mix(F, B, step(ColorDist(E, B), ColorDist(E, F)));
         res = mix(res, blendPix, GetLeftRatio(pos, origin, direction));
@@ -225,15 +227,15 @@ void main() {
                               (IsPixEqual(C, B) && IsPixEqual(B, A) && IsPixEqual(A, D) &&
                                IsPixEqual(D, G) && !IsPixEqual(E, A))));
         vec2 origin = vec2(0.0, -1.0 / sqrt(2.0));
-        ivec2 direction = ivec2(-1, 1);
+        vec2 direction = vec2(-1.0, 1.0);
         if (doLineBlend) {
             bool haveShallowLine =
                 (STEEP_DIRECTION_THRESHOLD * dist_D_C <= dist_B_G) && E != C && F != C;
             bool haveSteepLine =
                 (STEEP_DIRECTION_THRESHOLD * dist_B_G <= dist_D_C) && E != G && H != G;
             origin = haveShallowLine ? vec2(0.0, -0.25) : vec2(0.0, -0.5);
-            direction.x -= haveShallowLine ? 1 : 0;
-            direction.y += haveSteepLine ? 1 : 0;
+            direction.x -= haveShallowLine ? 1.0 : 0.0;
+            direction.y += haveSteepLine ? 1.0 : 0.0;
         }
         vec4 blendPix = mix(D, B, step(ColorDist(E, B), ColorDist(E, D)));
         res = mix(res, blendPix, GetLeftRatio(pos, origin, direction));
diff --git a/src/video_core/renderer_opengl/texture_filters/xbrz/xbrz_freescale.vert b/src/video_core/renderer_opengl/texture_filters/xbrz/xbrz_freescale.vert
index adf45d564..63905075f 100644
--- a/src/video_core/renderer_opengl/texture_filters/xbrz/xbrz_freescale.vert
+++ b/src/video_core/renderer_opengl/texture_filters/xbrz/xbrz_freescale.vert
@@ -4,7 +4,7 @@ out vec2 source_size;
 out vec2 output_size;
 
 uniform sampler2D tex;
-uniform float scale;
+uniform lowp float scale;
 
 const vec2 vertices[4] =
     vec2[4](vec2(-1.0, -1.0), vec2(1.0, -1.0), vec2(-1.0, 1.0), vec2(1.0, 1.0));
@@ -12,6 +12,6 @@ const vec2 vertices[4] =
 void main() {
     gl_Position = vec4(vertices[gl_VertexID], 0.0, 1.0);
     tex_coord = (vertices[gl_VertexID] + 1.0) / 2.0;
-    source_size = textureSize(tex, 0);
+    source_size = vec2(textureSize(tex, 0));
     output_size = source_size * scale;
 }