Correct Kepler Memory on Linear Pushes.
This commit is contained in:
		 Fernando Sahmkow
					Fernando Sahmkow
				
			
				
					committed by
					
						 FernandoS27
						FernandoS27
					
				
			
			
				
	
			
			
			 FernandoS27
						FernandoS27
					
				
			
						parent
						
							1f4dfb3998
						
					
				
				
					commit
					8a099ac99f
				
			| @@ -10,6 +10,8 @@ | ||||
| #include "video_core/memory_manager.h" | ||||
| #include "video_core/rasterizer_interface.h" | ||||
| #include "video_core/renderer_base.h" | ||||
| #include "video_core/textures/convert.h" | ||||
| #include "video_core/textures/decoders.h" | ||||
|  | ||||
| namespace Tegra::Engines { | ||||
|  | ||||
| @@ -27,30 +29,40 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) { | ||||
|  | ||||
|     switch (method_call.method) { | ||||
|     case KEPLERMEMORY_REG_INDEX(exec): { | ||||
|         state.write_offset = 0; | ||||
|         ProcessExec(); | ||||
|         break; | ||||
|     } | ||||
|     case KEPLERMEMORY_REG_INDEX(data): { | ||||
|         ProcessData(method_call.argument); | ||||
|         ProcessData(method_call.argument, method_call.IsLastCall()); | ||||
|         break; | ||||
|     } | ||||
|     } | ||||
| } | ||||
|  | ||||
| void KeplerMemory::ProcessData(u32 data) { | ||||
|     ASSERT_MSG(regs.exec.linear, "Non-linear uploads are not supported"); | ||||
|     ASSERT(regs.dest.x == 0 && regs.dest.y == 0 && regs.dest.z == 0); | ||||
| void KeplerMemory::ProcessExec() { | ||||
|     state.write_offset = 0; | ||||
|     state.copy_size = regs.line_length_in * regs.line_count; | ||||
|     state.inner_buffer.resize(state.copy_size); | ||||
| } | ||||
|  | ||||
|     // We have to invalidate the destination region to evict any outdated surfaces from the cache. | ||||
|     // We do this before actually writing the new data because the destination address might | ||||
|     // contain a dirty surface that will have to be written back to memory. | ||||
|     const GPUVAddr address{regs.dest.Address() + state.write_offset * sizeof(u32)}; | ||||
|     rasterizer.InvalidateRegion(ToCacheAddr(memory_manager.GetPointer(address)), sizeof(u32)); | ||||
|     memory_manager.Write<u32>(address, data); | ||||
| void KeplerMemory::ProcessData(u32 data, bool is_last_call) { | ||||
|     const u32 sub_copy_size = std::min(4U, state.copy_size - state.write_offset); | ||||
|     std::memcpy(&state.inner_buffer[state.write_offset], &data, sub_copy_size); | ||||
|     state.write_offset += sub_copy_size; | ||||
|     if (is_last_call) { | ||||
|         UNIMPLEMENTED_IF_MSG(regs.exec.linear == 0, "Block Linear Copy is not implemented"); | ||||
|         if (regs.exec.linear != 0) { | ||||
|             const GPUVAddr address{regs.dest.Address()}; | ||||
|             const auto host_ptr = memory_manager.GetPointer(address); | ||||
|             // We have to invalidate the destination region to evict any outdated surfaces from the | ||||
|             // cache. We do this before actually writing the new data because the destination | ||||
|             // address might contain a dirty surface that will have to be written back to memory. | ||||
|  | ||||
|     system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | ||||
|  | ||||
|     state.write_offset++; | ||||
|             rasterizer.InvalidateRegion(ToCacheAddr(host_ptr), state.copy_size); | ||||
|             std::memcpy(host_ptr, state.inner_buffer.data(), state.copy_size); | ||||
|             system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| } // namespace Tegra::Engines | ||||
|   | ||||
| @@ -6,6 +6,7 @@ | ||||
|  | ||||
| #include <array> | ||||
| #include <cstddef> | ||||
| #include <vector> | ||||
| #include "common/bit_field.h" | ||||
| #include "common/common_funcs.h" | ||||
| #include "common/common_types.h" | ||||
| @@ -51,7 +52,11 @@ public: | ||||
|                     u32 address_high; | ||||
|                     u32 address_low; | ||||
|                     u32 pitch; | ||||
|                     u32 block_dimensions; | ||||
|                     union { | ||||
|                         BitField<0, 4, u32> block_width; | ||||
|                         BitField<4, 4, u32> block_height; | ||||
|                         BitField<8, 4, u32> block_depth; | ||||
|                     }; | ||||
|                     u32 width; | ||||
|                     u32 height; | ||||
|                     u32 depth; | ||||
| @@ -63,6 +68,18 @@ public: | ||||
|                         return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | | ||||
|                                                      address_low); | ||||
|                     } | ||||
|  | ||||
|                     u32 BlockWidth() const { | ||||
|                         return 1U << block_width.Value(); | ||||
|                     } | ||||
|  | ||||
|                     u32 BlockHeight() const { | ||||
|                         return 1U << block_height.Value(); | ||||
|                     } | ||||
|  | ||||
|                     u32 BlockDepth() const { | ||||
|                         return 1U << block_depth.Value(); | ||||
|                     } | ||||
|                 } dest; | ||||
|  | ||||
|                 struct { | ||||
| @@ -81,6 +98,8 @@ public: | ||||
|  | ||||
|     struct { | ||||
|         u32 write_offset = 0; | ||||
|         u32 copy_size = 0; | ||||
|         std::vector<u8> inner_buffer; | ||||
|     } state{}; | ||||
|  | ||||
| private: | ||||
| @@ -88,7 +107,8 @@ private: | ||||
|     VideoCore::RasterizerInterface& rasterizer; | ||||
|     MemoryManager& memory_manager; | ||||
|  | ||||
|     void ProcessData(u32 data); | ||||
|     void ProcessExec(); | ||||
|     void ProcessData(u32 data, bool is_last_call); | ||||
| }; | ||||
|  | ||||
| #define ASSERT_REG_POSITION(field_name, position)                                                  \ | ||||
|   | ||||
		Reference in New Issue
	
	Block a user