diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index f5ae57039..09ecc5bad 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -27,6 +27,8 @@ add_library(video_core STATIC
     renderer_base.h
     renderer_opengl/gl_buffer_cache.cpp
     renderer_opengl/gl_buffer_cache.h
+    renderer_opengl/gl_primitive_assembler.cpp
+    renderer_opengl/gl_primitive_assembler.h
     renderer_opengl/gl_rasterizer.cpp
     renderer_opengl/gl_rasterizer.h
     renderer_opengl/gl_rasterizer_cache.cpp
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 9f5581045..4290da33f 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -744,6 +744,12 @@ public:
                         return static_cast<GPUVAddr>((static_cast<GPUVAddr>(end_addr_high) << 32) |
                                                      end_addr_low);
                     }
+
+                    /// Adjust the index buffer offset so it points to the first desired index.
+                    GPUVAddr IndexStart() const {
+                        return StartAddress() + static_cast<size_t>(first) *
+                                                    static_cast<size_t>(FormatSizeInBytes());
+                    }
                 } index_array;
 
                 INSERT_PADDING_WORDS(0x7);
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 578aca789..c142095c5 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -34,7 +34,7 @@ GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size
     }
 
     AlignBuffer(alignment);
-    GLintptr uploaded_offset = buffer_offset;
+    const GLintptr uploaded_offset = buffer_offset;
 
     Memory::ReadBlock(*cpu_addr, buffer_ptr, size);
 
@@ -57,13 +57,23 @@ GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, std::size_t s
                                           std::size_t alignment) {
     AlignBuffer(alignment);
     std::memcpy(buffer_ptr, raw_pointer, size);
-    GLintptr uploaded_offset = buffer_offset;
+    const GLintptr uploaded_offset = buffer_offset;
 
     buffer_ptr += size;
     buffer_offset += size;
     return uploaded_offset;
 }
 
+std::tuple<u8*, GLintptr> OGLBufferCache::ReserveMemory(std::size_t size, std::size_t alignment) {
+    AlignBuffer(alignment);
+    u8* const uploaded_ptr = buffer_ptr;
+    const GLintptr uploaded_offset = buffer_offset;
+
+    buffer_ptr += size;
+    buffer_offset += size;
+    return std::make_tuple(uploaded_ptr, uploaded_offset);
+}
+
 void OGLBufferCache::Map(std::size_t max_size) {
     bool invalidate;
     std::tie(buffer_ptr, buffer_offset_base, invalidate) =
@@ -74,6 +84,7 @@ void OGLBufferCache::Map(std::size_t max_size) {
         InvalidateAll();
     }
 }
+
 void OGLBufferCache::Unmap() {
     stream_buffer.Unmap(buffer_offset - buffer_offset_base);
 }
@@ -84,7 +95,7 @@ GLuint OGLBufferCache::GetHandle() const {
 
 void OGLBufferCache::AlignBuffer(std::size_t alignment) {
     // Align the offset, not the mapped pointer
-    GLintptr offset_aligned =
+    const GLintptr offset_aligned =
         static_cast<GLintptr>(Common::AlignUp(static_cast<std::size_t>(buffer_offset), alignment));
     buffer_ptr += offset_aligned - buffer_offset;
     buffer_offset = offset_aligned;
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 6c18461f4..965976334 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -6,6 +6,7 @@
 
 #include <cstddef>
 #include <memory>
+#include <tuple>
 
 #include "common/common_types.h"
 #include "video_core/rasterizer_cache.h"
@@ -33,11 +34,17 @@ class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBuffer
 public:
     explicit OGLBufferCache(std::size_t size);
 
+    /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been
+    /// allocated.
     GLintptr UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
                           bool cache = true);
 
+    /// Uploads from a host memory. Returns host's buffer offset where it's been allocated.
     GLintptr UploadHostMemory(const void* raw_pointer, std::size_t size, std::size_t alignment = 4);
 
+    /// Reserves memory to be used by host's CPU. Returns mapped address and offset.
+    std::tuple<u8*, GLintptr> ReserveMemory(std::size_t size, std::size_t alignment = 4);
+
     void Map(std::size_t max_size);
     void Unmap();
 
diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
new file mode 100644
index 000000000..ee1d9601b
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
@@ -0,0 +1,64 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <array>
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "core/memory.h"
+#include "video_core/renderer_opengl/gl_buffer_cache.h"
+#include "video_core/renderer_opengl/gl_primitive_assembler.h"
+
+namespace OpenGL {
+
+constexpr u32 TRIANGLES_PER_QUAD = 6;
+constexpr std::array<u32, TRIANGLES_PER_QUAD> QUAD_MAP = {0, 1, 2, 0, 2, 3};
+
+PrimitiveAssembler::PrimitiveAssembler(OGLBufferCache& buffer_cache) : buffer_cache(buffer_cache) {}
+
+PrimitiveAssembler::~PrimitiveAssembler() = default;
+
+std::size_t PrimitiveAssembler::CalculateQuadSize(u32 count) const {
+    ASSERT_MSG(count % 4 == 0, "Quad count is expected to be a multiple of 4");
+    return (count / 4) * TRIANGLES_PER_QUAD * sizeof(GLuint);
+}
+
+GLintptr PrimitiveAssembler::MakeQuadArray(u32 first, u32 count) {
+    const std::size_t size{CalculateQuadSize(count)};
+    auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(size);
+
+    for (u32 primitive = 0; primitive < count / 4; ++primitive) {
+        for (u32 i = 0; i < TRIANGLES_PER_QUAD; ++i) {
+            const u32 index = first + primitive * 4 + QUAD_MAP[i];
+            std::memcpy(dst_pointer, &index, sizeof(index));
+            dst_pointer += sizeof(index);
+        }
+    }
+
+    return index_offset;
+}
+
+GLintptr PrimitiveAssembler::MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size_t index_size,
+                                             u32 count) {
+    const std::size_t map_size{CalculateQuadSize(count)};
+    auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(map_size);
+
+    auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
+    const boost::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
+    const u8* source{Memory::GetPointer(*cpu_addr)};
+
+    for (u32 primitive = 0; primitive < count / 4; ++primitive) {
+        for (std::size_t i = 0; i < TRIANGLES_PER_QUAD; ++i) {
+            const u32 index = primitive * 4 + QUAD_MAP[i];
+            const u8* src_offset = source + (index * index_size);
+
+            std::memcpy(dst_pointer, src_offset, index_size);
+            dst_pointer += index_size;
+        }
+    }
+
+    return index_offset;
+}
+
+} // namespace OpenGL
\ No newline at end of file
diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.h b/src/video_core/renderer_opengl/gl_primitive_assembler.h
new file mode 100644
index 000000000..a8cb88eb5
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_primitive_assembler.h
@@ -0,0 +1,33 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <vector>
+#include <glad/glad.h>
+
+#include "common/common_types.h"
+#include "video_core/memory_manager.h"
+
+namespace OpenGL {
+
+class OGLBufferCache;
+
+class PrimitiveAssembler {
+public:
+    explicit PrimitiveAssembler(OGLBufferCache& buffer_cache);
+    ~PrimitiveAssembler();
+
+    /// Calculates the size required by MakeQuadArray and MakeQuadIndexed.
+    std::size_t CalculateQuadSize(u32 count) const;
+
+    GLintptr MakeQuadArray(u32 first, u32 count);
+
+    GLintptr MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size_t index_size, u32 count);
+
+private:
+    OGLBufferCache& buffer_cache;
+};
+
+} // namespace OpenGL
\ No newline at end of file
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 587d9dffb..60dcdc184 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -42,6 +42,41 @@ MICROPROFILE_DEFINE(OpenGL_Framebuffer, "OpenGL", "Framebuffer Setup", MP_RGB(12
 MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192));
 MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192));
 MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100));
+MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255, 100, 100));
+
+struct DrawParameters {
+    GLenum primitive_mode;
+    GLsizei count;
+    GLint current_instance;
+    bool use_indexed;
+
+    GLint vertex_first;
+
+    GLenum index_format;
+    GLint base_vertex;
+    GLintptr index_buffer_offset;
+
+    void DispatchDraw() const {
+        if (use_indexed) {
+            const auto index_buffer_ptr = reinterpret_cast<const void*>(index_buffer_offset);
+            if (current_instance > 0) {
+                glDrawElementsInstancedBaseVertexBaseInstance(primitive_mode, count, index_format,
+                                                              index_buffer_ptr, 1, base_vertex,
+                                                              current_instance);
+            } else {
+                glDrawElementsBaseVertex(primitive_mode, count, index_format, index_buffer_ptr,
+                                         base_vertex);
+            }
+        } else {
+            if (current_instance > 0) {
+                glDrawArraysInstancedBaseInstance(primitive_mode, vertex_first, count, 1,
+                                                  current_instance);
+            } else {
+                glDrawArrays(primitive_mode, vertex_first, count);
+            }
+        }
+    }
+};
 
 RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo& info)
     : emu_window{window}, screen_info{info}, buffer_cache(STREAM_BUFFER_SIZE) {
@@ -172,6 +207,53 @@ void RasterizerOpenGL::SetupVertexArrays() {
     }
 }
 
+DrawParameters RasterizerOpenGL::SetupDraw() {
+    const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
+    const auto& regs = gpu.regs;
+    const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
+
+    DrawParameters params{};
+    params.current_instance = gpu.state.current_instance;
+
+    if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) {
+        MICROPROFILE_SCOPE(OpenGL_PrimitiveAssembly);
+
+        params.use_indexed = true;
+        params.primitive_mode = GL_TRIANGLES;
+
+        if (is_indexed) {
+            params.index_format = MaxwellToGL::IndexFormat(regs.index_array.format);
+            params.count = (regs.index_array.count / 4) * 6;
+            params.index_buffer_offset = primitive_assembler.MakeQuadIndexed(
+                regs.index_array.IndexStart(), regs.index_array.FormatSizeInBytes(),
+                regs.index_array.count);
+            params.base_vertex = static_cast<GLint>(regs.vb_element_base);
+        } else {
+            // MakeQuadArray always generates u32 indexes
+            params.index_format = GL_UNSIGNED_INT;
+            params.count = (regs.vertex_buffer.count / 4) * 6;
+            params.index_buffer_offset =
+                primitive_assembler.MakeQuadArray(regs.vertex_buffer.first, params.count);
+        }
+        return params;
+    }
+
+    params.use_indexed = is_indexed;
+    params.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology);
+
+    if (is_indexed) {
+        MICROPROFILE_SCOPE(OpenGL_Index);
+        params.index_format = MaxwellToGL::IndexFormat(regs.index_array.format);
+        params.count = regs.index_array.count;
+        params.index_buffer_offset =
+            buffer_cache.UploadMemory(regs.index_array.IndexStart(), CalculateIndexBufferSize());
+        params.base_vertex = static_cast<GLint>(regs.vb_element_base);
+    } else {
+        params.count = regs.vertex_buffer.count;
+        params.vertex_first = regs.vertex_buffer.first;
+    }
+}
+
 void RasterizerOpenGL::SetupShaders() {
     MICROPROFILE_SCOPE(OpenGL_Shader);
     const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
@@ -256,6 +338,13 @@ std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
     return size;
 }
 
+std::size_t RasterizerOpenGL::CalculateIndexBufferSize() const {
+    const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+
+    return static_cast<std::size_t>(regs.index_array.count) *
+           static_cast<std::size_t>(regs.index_array.FormatSizeInBytes());
+}
+
 bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) {
     accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays;
     DrawArrays();
@@ -459,16 +548,23 @@ void RasterizerOpenGL::DrawArrays() {
 
     // Draw the vertex batch
     const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
-    const u64 index_buffer_size{static_cast<u64>(regs.index_array.count) *
-                                static_cast<u64>(regs.index_array.FormatSizeInBytes())};
 
     state.draw.vertex_buffer = buffer_cache.GetHandle();
     state.Apply();
 
     std::size_t buffer_size = CalculateVertexArraysSize();
 
-    if (is_indexed) {
-        buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) + index_buffer_size;
+    // Add space for index buffer (keeping in mind non-core primitives)
+    switch (regs.draw.topology) {
+    case Maxwell::PrimitiveTopology::Quads:
+        buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) +
+                      primitive_assembler.CalculateQuadSize(regs.vertex_buffer.count);
+        break;
+    default:
+        if (is_indexed) {
+            buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) + CalculateIndexBufferSize();
+        }
+        break;
     }
 
     // Uniform space for the 5 shader stages
@@ -482,20 +578,7 @@ void RasterizerOpenGL::DrawArrays() {
     buffer_cache.Map(buffer_size);
 
     SetupVertexArrays();
-
-    // If indexed mode, copy the index buffer
-    GLintptr index_buffer_offset = 0;
-    if (is_indexed) {
-        MICROPROFILE_SCOPE(OpenGL_Index);
-
-        // Adjust the index buffer offset so it points to the first desired index.
-        auto index_start = regs.index_array.StartAddress();
-        index_start += static_cast<size_t>(regs.index_array.first) *
-                       static_cast<size_t>(regs.index_array.FormatSizeInBytes());
-
-        index_buffer_offset = buffer_cache.UploadMemory(index_start, index_buffer_size);
-    }
-
+    DrawParameters params = SetupDraw();
     SetupShaders();
 
     buffer_cache.Unmap();
@@ -503,31 +586,8 @@ void RasterizerOpenGL::DrawArrays() {
     shader_program_manager->ApplyTo(state);
     state.Apply();
 
-    const GLenum primitive_mode{MaxwellToGL::PrimitiveTopology(regs.draw.topology)};
-    if (is_indexed) {
-        const GLint base_vertex{static_cast<GLint>(regs.vb_element_base)};
-
-        if (gpu.state.current_instance > 0) {
-            glDrawElementsInstancedBaseVertexBaseInstance(
-                primitive_mode, regs.index_array.count,
-                MaxwellToGL::IndexFormat(regs.index_array.format),
-                reinterpret_cast<const void*>(index_buffer_offset), 1, base_vertex,
-                gpu.state.current_instance);
-        } else {
-            glDrawElementsBaseVertex(primitive_mode, regs.index_array.count,
-                                     MaxwellToGL::IndexFormat(regs.index_array.format),
-                                     reinterpret_cast<const void*>(index_buffer_offset),
-                                     base_vertex);
-        }
-    } else {
-        if (gpu.state.current_instance > 0) {
-            glDrawArraysInstancedBaseInstance(primitive_mode, regs.vertex_buffer.first,
-                                              regs.vertex_buffer.count, 1,
-                                              gpu.state.current_instance);
-        } else {
-            glDrawArrays(primitive_mode, regs.vertex_buffer.first, regs.vertex_buffer.count);
-        }
-    }
+    // Execute draw call
+    params.DispatchDraw();
 
     // Disable scissor test
     state.scissor.enabled = false;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 4c8ecbd1c..bf954bb5d 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -23,6 +23,7 @@
 #include "video_core/rasterizer_cache.h"
 #include "video_core/rasterizer_interface.h"
 #include "video_core/renderer_opengl/gl_buffer_cache.h"
+#include "video_core/renderer_opengl/gl_primitive_assembler.h"
 #include "video_core/renderer_opengl/gl_rasterizer_cache.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 #include "video_core/renderer_opengl/gl_shader_cache.h"
@@ -38,6 +39,7 @@ class EmuWindow;
 namespace OpenGL {
 
 struct ScreenInfo;
+struct DrawParameters;
 
 class RasterizerOpenGL : public VideoCore::RasterizerInterface {
 public:
@@ -192,12 +194,17 @@ private:
     static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
     OGLBufferCache buffer_cache;
     OGLFramebuffer framebuffer;
+    PrimitiveAssembler primitive_assembler{buffer_cache};
     GLint uniform_buffer_alignment;
 
     std::size_t CalculateVertexArraysSize() const;
 
+    std::size_t CalculateIndexBufferSize() const;
+
     void SetupVertexArrays();
 
+    DrawParameters SetupDraw();
+
     void SetupShaders();
 
     enum class AccelDraw { Disabled, Arrays, Indexed };