GPU SMMU: Expand to 34 bits

This commit is contained in:
Fernando Sahmkow 2023-12-29 09:50:04 +01:00 committed by Liam
parent bad705f245
commit 96fd1348ae
12 changed files with 86 additions and 29 deletions

View File

@ -80,6 +80,15 @@ void NvMap::UnmapHandle(Handle& handle_description) {
handle_description.unmap_queue_entry.reset(); handle_description.unmap_queue_entry.reset();
} }
// Free and unmap the handle from Host1x GMMU
if (handle_description.pin_virt_address) {
host1x.GMMU().Unmap(static_cast<GPUVAddr>(handle_description.pin_virt_address),
handle_description.aligned_size);
host1x.Allocator().Free(handle_description.pin_virt_address,
static_cast<u32>(handle_description.aligned_size));
handle_description.pin_virt_address = 0;
}
// Free and unmap the handle from the SMMU // Free and unmap the handle from the SMMU
auto& smmu = host1x.MemoryManager(); auto& smmu = host1x.MemoryManager();
smmu.Unmap(handle_description.d_address, handle_description.aligned_size); smmu.Unmap(handle_description.d_address, handle_description.aligned_size);
@ -141,6 +150,17 @@ DAddr NvMap::PinHandle(NvMap::Handle::Id handle, size_t session_id, bool low_are
} }
std::scoped_lock lock(handle_description->mutex); std::scoped_lock lock(handle_description->mutex);
const auto map_low_area = [&] {
if (handle_description->pin_virt_address == 0) {
auto& gmmu_allocator = host1x.Allocator();
auto& gmmu = host1x.GMMU();
u32 address =
gmmu_allocator.Allocate(static_cast<u32>(handle_description->aligned_size));
gmmu.Map(static_cast<GPUVAddr>(address), handle_description->d_address,
handle_description->aligned_size);
handle_description->pin_virt_address = address;
}
};
if (!handle_description->pins) { if (!handle_description->pins) {
// If we're in the unmap queue we can just remove ourselves and return since we're already // If we're in the unmap queue we can just remove ourselves and return since we're already
// mapped // mapped
@ -152,6 +172,12 @@ DAddr NvMap::PinHandle(NvMap::Handle::Id handle, size_t session_id, bool low_are
unmap_queue.erase(*handle_description->unmap_queue_entry); unmap_queue.erase(*handle_description->unmap_queue_entry);
handle_description->unmap_queue_entry.reset(); handle_description->unmap_queue_entry.reset();
if (low_area_pin) {
map_low_area();
handle_description->pins++;
return static_cast<DAddr>(handle_description->pin_virt_address);
}
handle_description->pins++; handle_description->pins++;
return handle_description->d_address; return handle_description->d_address;
} }
@ -162,10 +188,7 @@ DAddr NvMap::PinHandle(NvMap::Handle::Id handle, size_t session_id, bool low_are
DAddr address{}; DAddr address{};
auto& smmu = host1x.MemoryManager(); auto& smmu = host1x.MemoryManager();
auto* session = core.GetSession(session_id); auto* session = core.GetSession(session_id);
while ((address = smmu.Allocate(handle_description->aligned_size)) == 0) {
auto allocate = std::bind(&Tegra::MaxwellDeviceMemoryManager::Allocate, &smmu, _1);
//: std::bind(&Tegra::MaxwellDeviceMemoryManager::Allocate, &smmu, _1);
while ((address = allocate(static_cast<size_t>(handle_description->aligned_size))) == 0) {
// Free handles until the allocation succeeds // Free handles until the allocation succeeds
std::scoped_lock queueLock(unmap_queue_lock); std::scoped_lock queueLock(unmap_queue_lock);
if (auto freeHandleDesc{unmap_queue.front()}) { if (auto freeHandleDesc{unmap_queue.front()}) {
@ -185,7 +208,14 @@ DAddr NvMap::PinHandle(NvMap::Handle::Id handle, size_t session_id, bool low_are
session->smmu_id); session->smmu_id);
} }
if (low_area_pin) {
map_low_area();
}
handle_description->pins++; handle_description->pins++;
if (low_area_pin) {
return static_cast<DAddr>(handle_description->pin_virt_address);
}
return handle_description->d_address; return handle_description->d_address;
} }

View File

@ -95,7 +95,6 @@ NvResult nvhost_nvdec_common::Submit(IoctlSubmit& params, std::span<u8> data, De
offset += SliceVectors(data, fence_thresholds, params.fence_count, offset); offset += SliceVectors(data, fence_thresholds, params.fence_count, offset);
auto& gpu = system.GPU(); auto& gpu = system.GPU();
//auto& device_memory = system.Host1x().MemoryManager();
auto* session = core.GetSession(sessions[fd]); auto* session = core.GetSession(sessions[fd]);
if (gpu.UseNvdec()) { if (gpu.UseNvdec()) {

View File

@ -88,6 +88,7 @@ struct GPU::Impl {
renderer = std::move(renderer_); renderer = std::move(renderer_);
rasterizer = renderer->ReadRasterizer(); rasterizer = renderer->ReadRasterizer();
host1x.MemoryManager().BindInterface(rasterizer); host1x.MemoryManager().BindInterface(rasterizer);
host1x.GMMU().BindRasterizer(rasterizer);
} }
/// Flush all current written commands into the host GPU for execution. /// Flush all current written commands into the host GPU for execution.

View File

@ -32,13 +32,12 @@ H264::~H264() = default;
std::span<const u8> H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state, std::span<const u8> H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state,
size_t* out_configuration_size, bool is_first_frame) { size_t* out_configuration_size, bool is_first_frame) {
H264DecoderContext context; H264DecoderContext context;
host1x.MemoryManager().ReadBlock(state.picture_info_offset, &context, host1x.GMMU().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext));
sizeof(H264DecoderContext));
const s64 frame_number = context.h264_parameter_set.frame_number.Value(); const s64 frame_number = context.h264_parameter_set.frame_number.Value();
if (!is_first_frame && frame_number != 0) { if (!is_first_frame && frame_number != 0) {
frame.resize_destructive(context.stream_len); frame.resize_destructive(context.stream_len);
host1x.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size()); host1x.GMMU().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size());
*out_configuration_size = 0; *out_configuration_size = 0;
return frame; return frame;
} }
@ -159,8 +158,8 @@ std::span<const u8> H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters
std::memcpy(frame.data(), encoded_header.data(), encoded_header.size()); std::memcpy(frame.data(), encoded_header.data(), encoded_header.size());
*out_configuration_size = encoded_header.size(); *out_configuration_size = encoded_header.size();
host1x.MemoryManager().ReadBlock(state.frame_bitstream_offset, host1x.GMMU().ReadBlock(state.frame_bitstream_offset, frame.data() + encoded_header.size(),
frame.data() + encoded_header.size(), context.stream_len); context.stream_len);
return frame; return frame;
} }

View File

@ -14,7 +14,7 @@ VP8::~VP8() = default;
std::span<const u8> VP8::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state) { std::span<const u8> VP8::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state) {
VP8PictureInfo info; VP8PictureInfo info;
host1x.MemoryManager().ReadBlock(state.picture_info_offset, &info, sizeof(VP8PictureInfo)); host1x.GMMU().ReadBlock(state.picture_info_offset, &info, sizeof(VP8PictureInfo));
const bool is_key_frame = info.key_frame == 1u; const bool is_key_frame = info.key_frame == 1u;
const auto bitstream_size = static_cast<size_t>(info.vld_buffer_size); const auto bitstream_size = static_cast<size_t>(info.vld_buffer_size);
@ -45,7 +45,7 @@ std::span<const u8> VP8::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters&
frame[9] = static_cast<u8>(((info.frame_height >> 8) & 0x3f)); frame[9] = static_cast<u8>(((info.frame_height >> 8) & 0x3f));
} }
const u64 bitstream_offset = state.frame_bitstream_offset; const u64 bitstream_offset = state.frame_bitstream_offset;
host1x.MemoryManager().ReadBlock(bitstream_offset, frame.data() + header_size, bitstream_size); host1x.GMMU().ReadBlock(bitstream_offset, frame.data() + header_size, bitstream_size);
return frame; return frame;
} }

View File

@ -358,7 +358,7 @@ void VP9::WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_
Vp9PictureInfo VP9::GetVp9PictureInfo(const Host1x::NvdecCommon::NvdecRegisters& state) { Vp9PictureInfo VP9::GetVp9PictureInfo(const Host1x::NvdecCommon::NvdecRegisters& state) {
PictureInfo picture_info; PictureInfo picture_info;
host1x.MemoryManager().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo)); host1x.GMMU().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo));
Vp9PictureInfo vp9_info = picture_info.Convert(); Vp9PictureInfo vp9_info = picture_info.Convert();
InsertEntropy(state.vp9_entropy_probs_offset, vp9_info.entropy); InsertEntropy(state.vp9_entropy_probs_offset, vp9_info.entropy);
@ -373,7 +373,7 @@ Vp9PictureInfo VP9::GetVp9PictureInfo(const Host1x::NvdecCommon::NvdecRegisters&
void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) { void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) {
EntropyProbs entropy; EntropyProbs entropy;
host1x.MemoryManager().ReadBlock(offset, &entropy, sizeof(EntropyProbs)); host1x.GMMU().ReadBlock(offset, &entropy, sizeof(EntropyProbs));
entropy.Convert(dst); entropy.Convert(dst);
} }
@ -383,7 +383,7 @@ Vp9FrameContainer VP9::GetCurrentFrame(const Host1x::NvdecCommon::NvdecRegisters
// gpu.SyncGuestHost(); epic, why? // gpu.SyncGuestHost(); epic, why?
current_frame.info = GetVp9PictureInfo(state); current_frame.info = GetVp9PictureInfo(state);
current_frame.bit_stream.resize(current_frame.info.bitstream_size); current_frame.bit_stream.resize(current_frame.info.bitstream_size);
host1x.MemoryManager().ReadBlock(state.frame_bitstream_offset, host1x.GMMU().ReadBlock(state.frame_bitstream_offset,
current_frame.bit_stream.data(), current_frame.bit_stream.data(),
current_frame.info.bitstream_size); current_frame.info.bitstream_size);
} }

View File

@ -15,7 +15,7 @@ struct MaxwellDeviceMethods;
struct MaxwellDeviceTraits { struct MaxwellDeviceTraits {
static constexpr bool supports_pinning = false; static constexpr bool supports_pinning = false;
static constexpr size_t device_virtual_bits = 32; static constexpr size_t device_virtual_bits = 34;
using DeviceInterface = typename VideoCore::RasterizerInterface; using DeviceInterface = typename VideoCore::RasterizerInterface;
using DeviceMethods = typename MaxwellDeviceMethods; using DeviceMethods = typename MaxwellDeviceMethods;
}; };

View File

@ -9,7 +9,9 @@ namespace Tegra {
namespace Host1x { namespace Host1x {
Host1x::Host1x(Core::System& system_) Host1x::Host1x(Core::System& system_)
: system{system_}, syncpoint_manager{}, memory_manager(system.DeviceMemory()) {} : system{system_}, syncpoint_manager{},
memory_manager(system.DeviceMemory()), gmmu_manager{system, memory_manager, 32, 12},
allocator{std::make_unique<Common::FlatAllocator<u32, 0, 32>>(1 << 12)} {}
} // namespace Host1x } // namespace Host1x

View File

@ -5,8 +5,10 @@
#include "common/common_types.h" #include "common/common_types.h"
#include "common/address_space.h"
#include "video_core/host1x/gpu_device_memory_manager.h" #include "video_core/host1x/gpu_device_memory_manager.h"
#include "video_core/host1x/syncpoint_manager.h" #include "video_core/host1x/syncpoint_manager.h"
#include "video_core/memory_manager.h"
namespace Core { namespace Core {
class System; class System;
@ -36,10 +38,28 @@ public:
return memory_manager; return memory_manager;
} }
Tegra::MemoryManager& GMMU() {
return gmmu_manager;
}
const Tegra::MemoryManager& GMMU() const {
return gmmu_manager;
}
Common::FlatAllocator<u32, 0, 32>& Allocator() {
return *allocator;
}
const Common::FlatAllocator<u32, 0, 32>& Allocator() const {
return *allocator;
}
private: private:
Core::System& system; Core::System& system;
SyncpointManager syncpoint_manager; SyncpointManager syncpoint_manager;
Tegra::MaxwellDeviceMemoryManager memory_manager; Tegra::MaxwellDeviceMemoryManager memory_manager;
Tegra::MemoryManager gmmu_manager;
std::unique_ptr<Common::FlatAllocator<u32, 0, 32>> allocator;
}; };
} // namespace Host1x } // namespace Host1x

View File

@ -81,7 +81,7 @@ void Vic::Execute() {
LOG_ERROR(Service_NVDRV, "VIC Luma address not set."); LOG_ERROR(Service_NVDRV, "VIC Luma address not set.");
return; return;
} }
const VicConfig config{host1x.MemoryManager().Read<u64>(config_struct_address + 0x20)}; const VicConfig config{host1x.GMMU().Read<u64>(config_struct_address + 0x20)};
auto frame = nvdec_processor->GetFrame(); auto frame = nvdec_processor->GetFrame();
if (!frame) { if (!frame) {
return; return;
@ -162,11 +162,11 @@ void Vic::WriteRGBFrame(std::unique_ptr<FFmpeg::Frame> frame, const VicConfig& c
Texture::SwizzleSubrect(luma_buffer, frame_buff, 4, width, height, 1, 0, 0, width, height, Texture::SwizzleSubrect(luma_buffer, frame_buff, 4, width, height, 1, 0, 0, width, height,
block_height, 0, width * 4); block_height, 0, width * 4);
host1x.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), size); host1x.GMMU().WriteBlock(output_surface_luma_address, luma_buffer.data(), size);
} else { } else {
// send pitch linear frame // send pitch linear frame
const size_t linear_size = width * height * 4; const size_t linear_size = width * height * 4;
host1x.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr, host1x.GMMU().WriteBlock(output_surface_luma_address, converted_frame_buf_addr,
linear_size); linear_size);
} }
} }
@ -193,7 +193,7 @@ void Vic::WriteYUVFrame(std::unique_ptr<FFmpeg::Frame> frame, const VicConfig& c
const std::size_t dst = y * aligned_width; const std::size_t dst = y * aligned_width;
std::memcpy(luma_buffer.data() + dst, luma_src + src, frame_width); std::memcpy(luma_buffer.data() + dst, luma_src + src, frame_width);
} }
host1x.MemoryManager().WriteBlock(output_surface_luma_address, luma_buffer.data(), host1x.GMMU().WriteBlock(output_surface_luma_address, luma_buffer.data(),
luma_buffer.size()); luma_buffer.size());
// Chroma // Chroma
@ -233,7 +233,7 @@ void Vic::WriteYUVFrame(std::unique_ptr<FFmpeg::Frame> frame, const VicConfig& c
ASSERT(false); ASSERT(false);
break; break;
} }
host1x.MemoryManager().WriteBlock(output_surface_chroma_address, chroma_buffer.data(), host1x.GMMU().WriteBlock(output_surface_chroma_address, chroma_buffer.data(),
chroma_buffer.size()); chroma_buffer.size());
} }

View File

@ -16,17 +16,16 @@
#include "video_core/rasterizer_interface.h" #include "video_core/rasterizer_interface.h"
#include "video_core/renderer_base.h" #include "video_core/renderer_base.h"
namespace Tegra { namespace Tegra {
using Tegra::Memory::GuestMemoryFlags; using Tegra::Memory::GuestMemoryFlags;
std::atomic<size_t> MemoryManager::unique_identifier_generator{}; std::atomic<size_t> MemoryManager::unique_identifier_generator{};
MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 big_page_bits_, MemoryManager::MemoryManager(Core::System& system_, MaxwellDeviceMemoryManager& memory_,
u64 page_bits_) u64 address_space_bits_, u64 big_page_bits_, u64 page_bits_)
: system{system_}, memory{system.Host1x().MemoryManager()}, : system{system_}, memory{memory_}, address_space_bits{address_space_bits_},
address_space_bits{address_space_bits_}, page_bits{page_bits_}, big_page_bits{big_page_bits_}, page_bits{page_bits_}, big_page_bits{big_page_bits_}, entries{}, big_entries{},
entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38, page_table{address_space_bits, address_space_bits + page_bits - 38,
page_bits != big_page_bits ? page_bits : 0}, page_bits != big_page_bits ? page_bits : 0},
kind_map{PTEKind::INVALID}, unique_identifier{unique_identifier_generator.fetch_add( kind_map{PTEKind::INVALID}, unique_identifier{unique_identifier_generator.fetch_add(
1, std::memory_order_acq_rel)}, 1, std::memory_order_acq_rel)},
@ -49,6 +48,11 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64
entries.resize(page_table_size / 32, 0); entries.resize(page_table_size / 32, 0);
} }
MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 big_page_bits_,
u64 page_bits_)
: MemoryManager(system_, system_.Host1x().MemoryManager(), address_space_bits_, big_page_bits_,
page_bits_) {}
MemoryManager::~MemoryManager() = default; MemoryManager::~MemoryManager() = default;
template <bool is_big_page> template <bool is_big_page>

View File

@ -38,6 +38,8 @@ class MemoryManager final {
public: public:
explicit MemoryManager(Core::System& system_, u64 address_space_bits_ = 40, explicit MemoryManager(Core::System& system_, u64 address_space_bits_ = 40,
u64 big_page_bits_ = 16, u64 page_bits_ = 12); u64 big_page_bits_ = 16, u64 page_bits_ = 12);
explicit MemoryManager(Core::System& system_, MaxwellDeviceMemoryManager& memory_, u64 address_space_bits_ = 40,
u64 big_page_bits_ = 16, u64 page_bits_ = 12);
~MemoryManager(); ~MemoryManager();
size_t GetID() const { size_t GetID() const {