From 303cd311621b25fbb8d55e0ed2cc4c3248de44ad Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 30 Dec 2023 04:37:25 +0100 Subject: [PATCH] SMMU: Add Android compatibility --- src/core/device_memory_manager.inc | 7 +-- .../nvdrv/devices/nvhost_nvdec_common.cpp | 6 +- src/core/memory.cpp | 62 ++++++++----------- src/core/memory.h | 4 +- .../host1x/gpu_device_memory_manager.h | 2 +- src/video_core/host1x/host1x.cpp | 2 + src/video_core/host1x/host1x.h | 1 + src/video_core/memory_manager.h | 2 +- src/video_core/query_cache/query_cache.h | 6 +- 9 files changed, 42 insertions(+), 50 deletions(-) diff --git a/src/core/device_memory_manager.inc b/src/core/device_memory_manager.inc index 138eb5017..4f883cece 100644 --- a/src/core/device_memory_manager.inc +++ b/src/core/device_memory_manager.inc @@ -217,9 +217,6 @@ DeviceMemoryManager::DeviceMemoryManager(const DeviceMemory& device_memo cpu_backing_address(device_as_size >> Memory::YUZU_PAGEBITS) { impl = std::make_unique>(); cached_pages = std::make_unique(); - for (size_t i = 0; i < 1ULL << (33 - 12); i++) { - compressed_device_addr[i] = 0; - } } template @@ -517,7 +514,7 @@ void DeviceMemoryManager::UpdatePagesCachedCount(DAddr addr, size_t size u64 cache_begin = 0; u64 uncache_bytes = 0; u64 cache_bytes = 0; - const auto* MarkRegionCaching = &DeviceMemoryManager::DeviceMethods::MarkRegionCaching; + const auto MarkRegionCaching = &DeviceMemoryManager::DeviceMethods::MarkRegionCaching; std::atomic_thread_fence(std::memory_order_acquire); const size_t page_end = Common::DivCeil(addr + size, Memory::YUZU_PAGESIZE); @@ -577,4 +574,4 @@ void DeviceMemoryManager::UpdatePagesCachedCount(DAddr addr, size_t size } } -} // namespace Core \ No newline at end of file +} // namespace Core diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp index 0b6aa9993..a50577c75 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp @@ -8,6 +8,7 @@ #include "common/common_types.h" #include "common/logging/log.h" #include "core/core.h" +#include "core/hle/kernel/k_process.h" #include "core/hle/service/nvdrv/core/container.h" #include "core/hle/service/nvdrv/core/nvmap.h" #include "core/hle/service/nvdrv/core/syncpoint_manager.h" @@ -109,7 +110,7 @@ NvResult nvhost_nvdec_common::Submit(IoctlSubmit& params, std::span data, De ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;); Tegra::ChCommandHeaderList cmdlist(cmd_buffer.word_count); session->process->GetMemory().ReadBlock(object->address + cmd_buffer.offset, cmdlist.data(), - cmdlist.size() * sizeof(u32)); + cmdlist.size() * sizeof(u32)); gpu.PushCommandBuffer(core.Host1xDeviceFile().fd_to_id[fd], cmdlist); } // Some games expect command_buffers to be written back @@ -135,7 +136,8 @@ NvResult nvhost_nvdec_common::GetWaitbase(IoctlGetWaitbase& params) { return NvResult::Success; } -NvResult nvhost_nvdec_common::MapBuffer(IoctlMapBuffer& params, std::span entries, DeviceFD fd) { +NvResult nvhost_nvdec_common::MapBuffer(IoctlMapBuffer& params, std::span entries, + DeviceFD fd) { const size_t num_entries = std::min(params.num_entries, static_cast(entries.size())); for (size_t i = 0; i < num_entries; i++) { DAddr pin_address = nvmap.PinHandle(entries[i].map_handle, sessions[fd], true); diff --git a/src/core/memory.cpp b/src/core/memory.cpp index f126840cb..1c218566f 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -44,8 +44,7 @@ bool AddressSpaceContains(const Common::PageTable& table, const Common::ProcessA // from outside classes. This also allows modification to the internals of the memory // subsystem without needing to rebuild all files that make use of the memory interface. struct Memory::Impl { - explicit Impl(Core::System& system_) - : system{system_} {} + explicit Impl(Core::System& system_) : system{system_} {} void SetCurrentPageTable(Kernel::KProcess& process) { current_page_table = &process.GetPageTable().GetImpl(); @@ -640,18 +639,6 @@ struct Memory::Impl { LOG_DEBUG(HW_Memory, "Mapping {:016X} onto {:016X}-{:016X}", GetInteger(target), base * YUZU_PAGESIZE, (base + size) * YUZU_PAGESIZE); - // During boot, current_page_table might not be set yet, in which case we need not flush - /*if (system.IsPoweredOn()) { - auto& gpu = system.GPU(); - for (u64 i = 0; i < size; i++) { - const auto page = base + i; - if (page_table.pointers[page].Type() == Common::PageType::RasterizerCachedMemory) { - - gpu.FlushAndInvalidateRegion(page << YUZU_PAGEBITS, YUZU_PAGESIZE); - } - } - }*/ - const auto end = base + size; ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}", base + page_table.pointers.size()); @@ -823,8 +810,7 @@ struct Memory::Impl { } const size_t core = system.GetCurrentHostThreadID(); auto& current_area = rasterizer_read_areas[core]; - gpu_device_memory->ApplyOpOnPointer( - p, scratch_buffers[core], [&](DAddr address) { + gpu_device_memory->ApplyOpOnPointer(p, scratch_buffers[core], [&](DAddr address) { const DAddr end_address = address + size; if (current_area.start_address <= address && end_address <= current_area.end_address) [[likely]] { @@ -852,8 +838,7 @@ struct Memory::Impl { sys_core_guard.unlock(); } }); - gpu_device_memory->ApplyOpOnPointer( - p, scratch_buffers[core], [&](DAddr address) { + gpu_device_memory->ApplyOpOnPointer(p, scratch_buffers[core], [&](DAddr address) { auto& current_area = rasterizer_write_areas[core]; PAddr subaddress = address >> YUZU_PAGEBITS; bool do_collection = current_area.last_address == subaddress; @@ -872,12 +857,25 @@ struct Memory::Impl { PAddr last_address; }; - void InvalidateRegion(Common::ProcessAddress dest_addr, size_t size) { - system.GPU().InvalidateRegion(GetInteger(dest_addr), size); - } - - void FlushRegion(Common::ProcessAddress dest_addr, size_t size) { - system.GPU().FlushRegion(GetInteger(dest_addr), size); + void InvalidateGPUMemory(u8* p, size_t size) { + constexpr size_t sys_core = Core::Hardware::NUM_CPU_CORES - 1; + const size_t core = std::min(system.GetCurrentHostThreadID(), + sys_core); // any other calls threads go to syscore. + if (!gpu_device_memory) [[unlikely]] { + gpu_device_memory = &system.Host1x().MemoryManager(); + } + // Guard on sys_core; + if (core == sys_core) [[unlikely]] { + sys_core_guard.lock(); + } + SCOPE_EXIT({ + if (core == sys_core) [[unlikely]] { + sys_core_guard.unlock(); + } + }); + auto& gpu = system.GPU(); + gpu_device_memory->ApplyOpOnPointer( + p, scratch_buffers[core], [&](DAddr address) { gpu.InvalidateRegion(address, size); }); } Core::System& system; @@ -1081,14 +1079,6 @@ void Memory::MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug) impl->MarkRegionDebug(GetInteger(vaddr), size, debug); } -void Memory::InvalidateRegion(Common::ProcessAddress dest_addr, size_t size) { - impl->InvalidateRegion(dest_addr, size); -} - -void Memory::FlushRegion(Common::ProcessAddress dest_addr, size_t size) { - impl->FlushRegion(dest_addr, size); -} - bool Memory::InvalidateNCE(Common::ProcessAddress vaddr, size_t size) { [[maybe_unused]] bool mapped = true; [[maybe_unused]] bool rasterizer = false; @@ -1100,10 +1090,10 @@ bool Memory::InvalidateNCE(Common::ProcessAddress vaddr, size_t size) { GetInteger(vaddr)); mapped = false; }, - [&] { - impl->system.GPU().InvalidateRegion(GetInteger(vaddr), size); - rasterizer = true; - }); + [&] { rasterizer = true; }); + if (rasterizer) { + impl->InvalidateGPUMemory(ptr, size); + } #ifdef __linux__ if (!rasterizer && mapped) { diff --git a/src/core/memory.h b/src/core/memory.h index 47ca6a35a..9d29cfd3f 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -486,10 +486,10 @@ public: void MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug); void SetGPUDirtyManagers(std::span managers); - void InvalidateRegion(Common::ProcessAddress dest_addr, size_t size); + bool InvalidateNCE(Common::ProcessAddress vaddr, size_t size); + bool InvalidateSeparateHeap(void* fault_address); - void FlushRegion(Common::ProcessAddress dest_addr, size_t size); private: Core::System& system; diff --git a/src/video_core/host1x/gpu_device_memory_manager.h b/src/video_core/host1x/gpu_device_memory_manager.h index 6c7858848..9ccd84b9a 100644 --- a/src/video_core/host1x/gpu_device_memory_manager.h +++ b/src/video_core/host1x/gpu_device_memory_manager.h @@ -17,7 +17,7 @@ struct MaxwellDeviceTraits { static constexpr bool supports_pinning = false; static constexpr size_t device_virtual_bits = 34; using DeviceInterface = typename VideoCore::RasterizerInterface; - using DeviceMethods = typename MaxwellDeviceMethods; + using DeviceMethods = MaxwellDeviceMethods; }; using MaxwellDeviceMemoryManager = Core::DeviceMemoryManager; diff --git a/src/video_core/host1x/host1x.cpp b/src/video_core/host1x/host1x.cpp index b7f9a08cf..c4c7a5883 100644 --- a/src/video_core/host1x/host1x.cpp +++ b/src/video_core/host1x/host1x.cpp @@ -13,6 +13,8 @@ Host1x::Host1x(Core::System& system_) memory_manager(system.DeviceMemory()), gmmu_manager{system, memory_manager, 32, 12}, allocator{std::make_unique>(1 << 12)} {} +Host1x::~Host1x() = default; + } // namespace Host1x } // namespace Tegra diff --git a/src/video_core/host1x/host1x.h b/src/video_core/host1x/host1x.h index 13c37e6b4..d72d97b7b 100644 --- a/src/video_core/host1x/host1x.h +++ b/src/video_core/host1x/host1x.h @@ -21,6 +21,7 @@ namespace Host1x { class Host1x { public: explicit Host1x(Core::System& system); + ~Host1x(); SyncpointManager& GetSyncpointManager() { return syncpoint_manager; diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 6b2cd7efb..00d64dcce 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -68,7 +68,7 @@ public: if (!address) { return {}; } - return memory.GetPointer(*address); + return memory.GetPointer(*address); } template diff --git a/src/video_core/query_cache/query_cache.h b/src/video_core/query_cache/query_cache.h index 508afb10a..b5e90cf8c 100644 --- a/src/video_core/query_cache/query_cache.h +++ b/src/video_core/query_cache/query_cache.h @@ -256,8 +256,8 @@ void QueryCacheBase::CounterReport(GPUVAddr addr, QueryType counter_type return std::make_pair(cur_addr >> Core::Memory::YUZU_PAGEBITS, static_cast(cur_addr & Core::Memory::YUZU_PAGEMASK)); }; - u8* pointer = impl->device_memory.GetPointer(cpu_addr); - u8* pointer_timestamp = impl->device_memory.GetPointer(cpu_addr + 8); + u8* pointer = impl->device_memory.template GetPointer(cpu_addr); + u8* pointer_timestamp = impl->device_memory.template GetPointer(cpu_addr + 8); bool is_synced = !Settings::IsGPULevelHigh() && is_fence; std::function operation([this, is_synced, streamer, query_base = query, query_location, pointer, pointer_timestamp] { @@ -561,7 +561,7 @@ bool QueryCacheBase::SemiFlushQueryDirty(QueryCacheBase::QueryLo } if (True(query_base->flags & QueryFlagBits::IsFinalValueSynced) && False(query_base->flags & QueryFlagBits::IsGuestSynced)) { - auto* ptr = impl->device_memory.GetPointer(query_base->guest_address); + auto* ptr = impl->device_memory.template GetPointer(query_base->guest_address); if (True(query_base->flags & QueryFlagBits::HasTimestamp)) { std::memcpy(ptr, &query_base->value, sizeof(query_base->value)); return false;