mirror of
				https://git.suyu.dev/suyu/suyu
				synced 2025-10-30 15:39:02 -05:00 
			
		
		
		
	Propagate depth and depth_block on modules using decoders
This commit is contained in:
		| @@ -62,14 +62,16 @@ void Fermi2D::HandleSurfaceCopy() { | ||||
|         u8* dst_buffer = Memory::GetPointer(dest_cpu); | ||||
|         if (!regs.src.linear && regs.dst.linear) { | ||||
|             // If the input is tiled and the output is linear, deswizzle the input and copy it over. | ||||
|             Texture::CopySwizzledData(regs.src.width, regs.src.height, src_bytes_per_pixel, | ||||
|                                       dst_bytes_per_pixel, src_buffer, dst_buffer, true, | ||||
|                                       regs.src.BlockHeight()); | ||||
|             Texture::CopySwizzledData(regs.src.width, regs.src.height, regs.src.depth, | ||||
|                                       src_bytes_per_pixel, dst_bytes_per_pixel, src_buffer, | ||||
|                                       dst_buffer, true, regs.src.BlockHeight(), | ||||
|                                       regs.src.BlockDepth()); | ||||
|         } else { | ||||
|             // If the input is linear and the output is tiled, swizzle the input and copy it over. | ||||
|             Texture::CopySwizzledData(regs.src.width, regs.src.height, src_bytes_per_pixel, | ||||
|                                       dst_bytes_per_pixel, dst_buffer, src_buffer, false, | ||||
|                                       regs.dst.BlockHeight()); | ||||
|             Texture::CopySwizzledData(regs.src.width, regs.src.height, regs.src.depth, | ||||
|                                       src_bytes_per_pixel, dst_bytes_per_pixel, dst_buffer, | ||||
|                                       src_buffer, false, regs.dst.BlockHeight(), | ||||
|                                       regs.dst.BlockDepth()); | ||||
|         } | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -68,12 +68,14 @@ void MaxwellDMA::HandleCopy() { | ||||
|  | ||||
|     if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { | ||||
|         // If the input is tiled and the output is linear, deswizzle the input and copy it over. | ||||
|         Texture::CopySwizzledData(regs.src_params.size_x, regs.src_params.size_y, 1, 1, src_buffer, | ||||
|                                   dst_buffer, true, regs.src_params.BlockHeight()); | ||||
|         Texture::CopySwizzledData(regs.src_params.size_x, regs.src_params.size_y, | ||||
|                                   regs.src_params.size_z, 1, 1, src_buffer, dst_buffer, true, | ||||
|                                   regs.src_params.BlockHeight(), regs.src_params.BlockDepth()); | ||||
|     } else { | ||||
|         // If the input is linear and the output is tiled, swizzle the input and copy it over. | ||||
|         Texture::CopySwizzledData(regs.dst_params.size_x, regs.dst_params.size_y, 1, 1, dst_buffer, | ||||
|                                   src_buffer, false, regs.dst_params.BlockHeight()); | ||||
|         Texture::CopySwizzledData(regs.dst_params.size_x, regs.dst_params.size_y, | ||||
|                                   regs.dst_params.size_z, 1, 1, dst_buffer, src_buffer, false, | ||||
|                                   regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth()); | ||||
|     } | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -43,6 +43,10 @@ public: | ||||
|             u32 BlockHeight() const { | ||||
|                 return 1 << block_height; | ||||
|             } | ||||
|  | ||||
|             u32 BlockDepth() const { | ||||
|                 return 1 << block_depth; | ||||
|             } | ||||
|         }; | ||||
|  | ||||
|         static_assert(sizeof(Parameters) == 24, "Parameters has wrong size"); | ||||
|   | ||||
| @@ -323,8 +323,8 @@ static bool IsFormatBCn(PixelFormat format) { | ||||
| } | ||||
|  | ||||
| template <bool morton_to_gl, PixelFormat format> | ||||
| void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, std::size_t gl_buffer_size, | ||||
|                 VAddr addr) { | ||||
| void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth, u8* gl_buffer, | ||||
|                 std::size_t gl_buffer_size, VAddr addr) { | ||||
|     constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT; | ||||
|     constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); | ||||
|  | ||||
| @@ -333,7 +333,7 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, std::si | ||||
|         // pixel values. | ||||
|         const u32 tile_size{IsFormatBCn(format) ? 4U : 1U}; | ||||
|         const std::vector<u8> data = Tegra::Texture::UnswizzleTexture( | ||||
|             addr, tile_size, bytes_per_pixel, stride, height, block_height); | ||||
|             addr, tile_size, bytes_per_pixel, stride, height, depth, block_height, block_depth); | ||||
|         const std::size_t size_to_copy{std::min(gl_buffer_size, data.size())}; | ||||
|         memcpy(gl_buffer, data.data(), size_to_copy); | ||||
|     } else { | ||||
| @@ -345,7 +345,7 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, std::si | ||||
|     } | ||||
| } | ||||
|  | ||||
| static constexpr std::array<void (*)(u32, u32, u32, u8*, std::size_t, VAddr), | ||||
| static constexpr std::array<void (*)(u32, u32, u32, u32, u32, u8*, std::size_t, VAddr), | ||||
|                             SurfaceParams::MaxPixelFormat> | ||||
|     morton_to_gl_fns = { | ||||
|         // clang-format off | ||||
| @@ -403,7 +403,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, std::size_t, VAddr), | ||||
|         // clang-format on | ||||
| }; | ||||
|  | ||||
| static constexpr std::array<void (*)(u32, u32, u32, u8*, std::size_t, VAddr), | ||||
| static constexpr std::array<void (*)(u32, u32, u32, u32, u32, u8*, std::size_t, VAddr), | ||||
|                             SurfaceParams::MaxPixelFormat> | ||||
|     gl_to_morton_fns = { | ||||
|         // clang-format off | ||||
| @@ -827,25 +827,27 @@ void CachedSurface::LoadGLBuffer() { | ||||
|  | ||||
|     if (params.is_tiled) { | ||||
|         gl_buffer.resize(total_size); | ||||
|         u32 depth = params.depth; | ||||
|         u32 block_depth = params.block_depth; | ||||
|  | ||||
|         ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}", | ||||
|                    params.block_width, static_cast<u32>(params.target)); | ||||
|         ASSERT_MSG(params.block_depth == 1, "Block depth is defined as {} on texture type {}", | ||||
|                    params.block_depth, static_cast<u32>(params.target)); | ||||
|  | ||||
|         // TODO(bunnei): This only unswizzles and copies a 2D texture - we do not yet know how to do | ||||
|         // this for 3D textures, etc. | ||||
|         switch (params.target) { | ||||
|         case SurfaceParams::SurfaceTarget::Texture2D: | ||||
|             // Pass impl. to the fallback code below | ||||
|             // TODO(Blinkhawk): Eliminate this condition once all texture types are implemented. | ||||
|             depth = 1U; | ||||
|             block_depth = 1U; | ||||
|             break; | ||||
|         case SurfaceParams::SurfaceTarget::Texture2DArray: | ||||
|         case SurfaceParams::SurfaceTarget::TextureCubemap: | ||||
|             depth = 1U; | ||||
|             block_depth = 1U; | ||||
|             for (std::size_t index = 0; index < params.depth; ++index) { | ||||
|                 const std::size_t offset{index * copy_size}; | ||||
|                 morton_to_gl_fns[static_cast<std::size_t>(params.pixel_format)]( | ||||
|                     params.width, params.block_height, params.height, gl_buffer.data() + offset, | ||||
|                     copy_size, params.addr + offset); | ||||
|                     params.width, params.block_height, params.height, 1U, 1U, | ||||
|                     gl_buffer.data() + offset, copy_size, params.addr + offset); | ||||
|             } | ||||
|             break; | ||||
|         default: | ||||
| @@ -854,9 +856,11 @@ void CachedSurface::LoadGLBuffer() { | ||||
|             UNREACHABLE(); | ||||
|         } | ||||
|  | ||||
|         const std::size_t size = copy_size * depth; | ||||
|  | ||||
|         morton_to_gl_fns[static_cast<std::size_t>(params.pixel_format)]( | ||||
|             params.width, params.block_height, params.height, gl_buffer.data(), copy_size, | ||||
|             params.addr); | ||||
|             params.width, params.block_height, params.height, block_depth, depth, gl_buffer.data(), | ||||
|             size, params.addr); | ||||
|     } else { | ||||
|         const u8* const texture_src_data_end{texture_src_data + total_size}; | ||||
|         gl_buffer.assign(texture_src_data, texture_src_data_end); | ||||
|   | ||||
| @@ -51,18 +51,18 @@ void Precise3DProcessBlock(u8* swizzled_data, u8* unswizzled_data, const bool un | ||||
|                            const u32 xy_block_size, const u32 layer_z, const u32 stride_x, | ||||
|                            const u32 bytes_per_pixel, const u32 out_bytes_per_pixel) { | ||||
|     std::array<u8*, 2> data_ptrs; | ||||
|     u32 z_adress = tile_offset; | ||||
|     u32 z_address = tile_offset; | ||||
|     const u32 gob_size_x = 64; | ||||
|     const u32 gob_size_y = 8; | ||||
|     const u32 gob_size_z = 1; | ||||
|     const u32 gob_size = gob_size_x * gob_size_y * gob_size_z; | ||||
|     for (u32 z = z_start; z < z_end; z++) { | ||||
|         u32 y_adress = z_adress; | ||||
|         u32 y_address = z_address; | ||||
|         u32 pixel_base = layer_z * z + y_start * stride_x; | ||||
|         for (u32 y = y_start; y < y_end; y++) { | ||||
|             const auto& table = legacy_swizzle_table[y % gob_size_y]; | ||||
|             for (u32 x = x_start; x < x_end; x++) { | ||||
|                 const u32 swizzle_offset{y_adress + table[x * bytes_per_pixel % gob_size_x]}; | ||||
|                 const u32 swizzle_offset{y_address + table[x * bytes_per_pixel % gob_size_x]}; | ||||
|                 const u32 pixel_index{x * out_bytes_per_pixel + pixel_base}; | ||||
|                 data_ptrs[unswizzle] = swizzled_data + swizzle_offset; | ||||
|                 data_ptrs[!unswizzle] = unswizzled_data + pixel_index; | ||||
| @@ -70,9 +70,9 @@ void Precise3DProcessBlock(u8* swizzled_data, u8* unswizzled_data, const bool un | ||||
|             } | ||||
|             pixel_base += stride_x; | ||||
|             if ((y + 1) % gob_size_y == 0) | ||||
|                 y_adress += gob_size; | ||||
|                 y_address += gob_size; | ||||
|         } | ||||
|         z_adress += xy_block_size; | ||||
|         z_address += xy_block_size; | ||||
|     } | ||||
| } | ||||
|  | ||||
| @@ -136,7 +136,7 @@ void Fast3DProcessBlock(u8* swizzled_data, u8* unswizzled_data, const bool unswi | ||||
|                         const u32 xy_block_size, const u32 layer_z, const u32 stride_x, | ||||
|                         const u32 bytes_per_pixel, const u32 out_bytes_per_pixel) { | ||||
|     std::array<u8*, 2> data_ptrs; | ||||
|     u32 z_adress = tile_offset; | ||||
|     u32 z_address = tile_offset; | ||||
|     const u32 x_startb = x_start * bytes_per_pixel; | ||||
|     const u32 x_endb = x_end * bytes_per_pixel; | ||||
|     const u32 copy_size = 16; | ||||
| @@ -145,12 +145,12 @@ void Fast3DProcessBlock(u8* swizzled_data, u8* unswizzled_data, const bool unswi | ||||
|     const u32 gob_size_z = 1; | ||||
|     const u32 gob_size = gob_size_x * gob_size_y * gob_size_z; | ||||
|     for (u32 z = z_start; z < z_end; z++) { | ||||
|         u32 y_adress = z_adress; | ||||
|         u32 y_address = z_address; | ||||
|         u32 pixel_base = layer_z * z + y_start * stride_x; | ||||
|         for (u32 y = y_start; y < y_end; y++) { | ||||
|             const auto& table = fast_swizzle_table[y % gob_size_y]; | ||||
|             for (u32 xb = x_startb; xb < x_endb; xb += copy_size) { | ||||
|                 const u32 swizzle_offset{y_adress + table[(xb / copy_size) % 4]}; | ||||
|                 const u32 swizzle_offset{y_address + table[(xb / copy_size) % 4]}; | ||||
|                 const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel; | ||||
|                 const u32 pixel_index{out_x + pixel_base}; | ||||
|                 data_ptrs[unswizzle] = swizzled_data + swizzle_offset; | ||||
| @@ -159,9 +159,9 @@ void Fast3DProcessBlock(u8* swizzled_data, u8* unswizzled_data, const bool unswi | ||||
|             } | ||||
|             pixel_base += stride_x; | ||||
|             if ((y + 1) % gob_size_y == 0) | ||||
|                 y_adress += gob_size; | ||||
|                 y_address += gob_size; | ||||
|         } | ||||
|         z_adress += xy_block_size; | ||||
|         z_address += xy_block_size; | ||||
|     } | ||||
| } | ||||
|  | ||||
| @@ -214,14 +214,15 @@ void Fast3DSwizzledData(u8* swizzled_data, u8* unswizzled_data, const bool unswi | ||||
|     } | ||||
| } | ||||
|  | ||||
| void CopySwizzledData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_bytes_per_pixel, | ||||
|                       u8* swizzled_data, u8* unswizzled_data, bool unswizzle, u32 block_height) { | ||||
| void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, | ||||
|                       u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, | ||||
|                       bool unswizzle, u32 block_height, u32 block_depth) { | ||||
|     if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % 16 == 0) { | ||||
|         Fast3DSwizzledData(swizzled_data, unswizzled_data, unswizzle, width, height, 1U, | ||||
|                            bytes_per_pixel, out_bytes_per_pixel, block_height, 1U); | ||||
|         Fast3DSwizzledData(swizzled_data, unswizzled_data, unswizzle, width, height, depth, | ||||
|                            bytes_per_pixel, out_bytes_per_pixel, block_height, block_depth); | ||||
|     } else { | ||||
|         Precise3DSwizzledData(swizzled_data, unswizzled_data, unswizzle, width, height, 1U, | ||||
|                               bytes_per_pixel, out_bytes_per_pixel, block_height, 1U); | ||||
|         Precise3DSwizzledData(swizzled_data, unswizzled_data, unswizzle, width, height, depth, | ||||
|                               bytes_per_pixel, out_bytes_per_pixel, block_height, block_depth); | ||||
|     } | ||||
| } | ||||
|  | ||||
| @@ -269,10 +270,11 @@ u32 BytesPerPixel(TextureFormat format) { | ||||
| } | ||||
|  | ||||
| std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size, u32 bytes_per_pixel, u32 width, | ||||
|                                  u32 height, u32 block_height) { | ||||
|                                  u32 height, u32 depth, u32 block_height, u32 block_depth) { | ||||
|     std::vector<u8> unswizzled_data(width * height * bytes_per_pixel); | ||||
|     CopySwizzledData(width / tile_size, height / tile_size, bytes_per_pixel, bytes_per_pixel, | ||||
|                      Memory::GetPointer(address), unswizzled_data.data(), true, block_height); | ||||
|     CopySwizzledData(width / tile_size, height / tile_size, depth, bytes_per_pixel, bytes_per_pixel, | ||||
|                      Memory::GetPointer(address), unswizzled_data.data(), true, block_height, | ||||
|                      block_depth); | ||||
|     return unswizzled_data; | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -14,17 +14,14 @@ namespace Tegra::Texture { | ||||
|  * Unswizzles a swizzled texture without changing its format. | ||||
|  */ | ||||
| std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size, u32 bytes_per_pixel, u32 width, | ||||
|                                  u32 height, u32 block_height = TICEntry::DefaultBlockHeight); | ||||
|  | ||||
| /** | ||||
|  * Unswizzles a swizzled depth texture without changing its format. | ||||
|  */ | ||||
| std::vector<u8> UnswizzleDepthTexture(VAddr address, DepthFormat format, u32 width, u32 height, | ||||
|                                       u32 block_height = TICEntry::DefaultBlockHeight); | ||||
|                                  u32 height, u32 depth, | ||||
|                                  u32 block_height = TICEntry::DefaultBlockHeight, | ||||
|                                  u32 block_depth = TICEntry::DefaultBlockHeight); | ||||
|  | ||||
| /// Copies texture data from a buffer and performs swizzling/unswizzling as necessary. | ||||
| void CopySwizzledData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_bytes_per_pixel, | ||||
|                       u8* swizzled_data, u8* unswizzled_data, bool unswizzle, u32 block_height); | ||||
| void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, | ||||
|                       u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, | ||||
|                       bool unswizzle, u32 block_height, u32 block_depth); | ||||
|  | ||||
| /** | ||||
|  * Decodes an unswizzled texture into a A8R8G8B8 texture. | ||||
|   | ||||
| @@ -141,6 +141,7 @@ static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size"); | ||||
|  | ||||
| struct TICEntry { | ||||
|     static constexpr u32 DefaultBlockHeight = 16; | ||||
|     static constexpr u32 DefaultBlockDepth = 1; | ||||
|  | ||||
|     union { | ||||
|         u32 raw; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 FernandoS27
					FernandoS27