Merge pull request #349 from Subv/texturing

GPU: Support non-tiled textures and configurable block height.
This commit is contained in:
bunnei 2018-04-18 14:46:10 -04:00 committed by GitHub
commit d3f9ea90e7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 97 additions and 53 deletions

View File

@ -218,8 +218,9 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
Texture::TICEntry tic_entry; Texture::TICEntry tic_entry;
Memory::ReadBlock(tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry)); Memory::ReadBlock(tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry));
ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear, ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear ||
"TIC versions other than BlockLinear are unimplemented"); tic_entry.header_version == Texture::TICHeaderVersion::Pitch,
"TIC versions other than BlockLinear or Pitch are unimplemented");
ASSERT_MSG((tic_entry.texture_type == Texture::TextureType::Texture2D) || ASSERT_MSG((tic_entry.texture_type == Texture::TextureType::Texture2D) ||
(tic_entry.texture_type == Texture::TextureType::Texture2DNoMipmap), (tic_entry.texture_type == Texture::TextureType::Texture2DNoMipmap),

View File

@ -523,7 +523,8 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& framebu
src_params.width = std::min(framebuffer.width, pixel_stride); src_params.width = std::min(framebuffer.width, pixel_stride);
src_params.height = framebuffer.height; src_params.height = framebuffer.height;
src_params.stride = pixel_stride; src_params.stride = pixel_stride;
src_params.is_tiled = false; src_params.is_tiled = true;
src_params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight;
src_params.pixel_format = src_params.pixel_format =
SurfaceParams::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format); SurfaceParams::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format);
src_params.UpdateParams(); src_params.UpdateParams();

View File

@ -102,39 +102,36 @@ static void MortonCopyTile(u32 stride, u8* tile_buffer, u8* gl_buffer) {
} }
template <bool morton_to_gl, PixelFormat format> template <bool morton_to_gl, PixelFormat format>
void MortonCopy(u32 stride, u32 height, u8* gl_buffer, VAddr base, VAddr start, VAddr end) { void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, VAddr base, VAddr start,
VAddr end) {
constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8; constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8;
constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format);
// TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check the if (morton_to_gl) {
// configuration for this and perform more generic un/swizzle auto data = Tegra::Texture::UnswizzleTexture(
base, SurfaceParams::TextureFormatFromPixelFormat(format), stride, height,
block_height);
std::memcpy(gl_buffer, data.data(), data.size());
} else {
// TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check
// the configuration for this and perform more generic un/swizzle
LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!");
VideoCore::MortonCopyPixels128(stride, height, bytes_per_pixel, gl_bytes_per_pixel, VideoCore::MortonCopyPixels128(stride, height, bytes_per_pixel, gl_bytes_per_pixel,
Memory::GetPointer(base), gl_buffer, morton_to_gl); Memory::GetPointer(base), gl_buffer, morton_to_gl);
} }
template <>
void MortonCopy<true, PixelFormat::DXT1>(u32 stride, u32 height, u8* gl_buffer, VAddr base,
VAddr start, VAddr end) {
constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(PixelFormat::DXT1) / 8;
constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(PixelFormat::DXT1);
// TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check the
// configuration for this and perform more generic un/swizzle
LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!");
auto data =
Tegra::Texture::UnswizzleTexture(base, Tegra::Texture::TextureFormat::DXT1, stride, height);
std::memcpy(gl_buffer, data.data(), data.size());
} }
static constexpr std::array<void (*)(u32, u32, u8*, VAddr, VAddr, VAddr), 2> morton_to_gl_fns = { static constexpr std::array<void (*)(u32, u32, u32, u8*, VAddr, VAddr, VAddr), 2> morton_to_gl_fns =
{
MortonCopy<true, PixelFormat::RGBA8>, MortonCopy<true, PixelFormat::RGBA8>,
MortonCopy<true, PixelFormat::DXT1>, MortonCopy<true, PixelFormat::DXT1>,
}; };
static constexpr std::array<void (*)(u32, u32, u8*, VAddr, VAddr, VAddr), 2> gl_to_morton_fns = { static constexpr std::array<void (*)(u32, u32, u32, u8*, VAddr, VAddr, VAddr), 2> gl_to_morton_fns =
{
MortonCopy<false, PixelFormat::RGBA8>, MortonCopy<false, PixelFormat::RGBA8>,
MortonCopy<false, PixelFormat::DXT1>, // TODO(Subv): Swizzling the DXT1 format is not yet supported
nullptr,
}; };
// Allocate an uninitialized texture of appropriate size and format for the surface // Allocate an uninitialized texture of appropriate size and format for the surface
@ -311,15 +308,16 @@ MathUtil::Rectangle<u32> SurfaceParams::GetScaledSubRect(const SurfaceParams& su
bool SurfaceParams::ExactMatch(const SurfaceParams& other_surface) const { bool SurfaceParams::ExactMatch(const SurfaceParams& other_surface) const {
return std::tie(other_surface.addr, other_surface.width, other_surface.height, return std::tie(other_surface.addr, other_surface.width, other_surface.height,
other_surface.stride, other_surface.pixel_format, other_surface.is_tiled) == other_surface.stride, other_surface.block_height, other_surface.pixel_format,
std::tie(addr, width, height, stride, pixel_format, is_tiled) && other_surface.is_tiled) ==
std::tie(addr, width, height, stride, block_height, pixel_format, is_tiled) &&
pixel_format != PixelFormat::Invalid; pixel_format != PixelFormat::Invalid;
} }
bool SurfaceParams::CanSubRect(const SurfaceParams& sub_surface) const { bool SurfaceParams::CanSubRect(const SurfaceParams& sub_surface) const {
return sub_surface.addr >= addr && sub_surface.end <= end && return sub_surface.addr >= addr && sub_surface.end <= end &&
sub_surface.pixel_format == pixel_format && pixel_format != PixelFormat::Invalid && sub_surface.pixel_format == pixel_format && pixel_format != PixelFormat::Invalid &&
sub_surface.is_tiled == is_tiled && sub_surface.is_tiled == is_tiled && sub_surface.block_height == block_height &&
(sub_surface.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 && (sub_surface.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 &&
(sub_surface.stride == stride || sub_surface.height <= (is_tiled ? 8u : 1u)) && (sub_surface.stride == stride || sub_surface.height <= (is_tiled ? 8u : 1u)) &&
GetSubRect(sub_surface).left + sub_surface.width <= stride; GetSubRect(sub_surface).left + sub_surface.width <= stride;
@ -328,7 +326,8 @@ bool SurfaceParams::CanSubRect(const SurfaceParams& sub_surface) const {
bool SurfaceParams::CanExpand(const SurfaceParams& expanded_surface) const { bool SurfaceParams::CanExpand(const SurfaceParams& expanded_surface) const {
return pixel_format != PixelFormat::Invalid && pixel_format == expanded_surface.pixel_format && return pixel_format != PixelFormat::Invalid && pixel_format == expanded_surface.pixel_format &&
addr <= expanded_surface.end && expanded_surface.addr <= end && addr <= expanded_surface.end && expanded_surface.addr <= end &&
is_tiled == expanded_surface.is_tiled && stride == expanded_surface.stride && is_tiled == expanded_surface.is_tiled && block_height == expanded_surface.block_height &&
stride == expanded_surface.stride &&
(std::max(expanded_surface.addr, addr) - std::min(expanded_surface.addr, addr)) % (std::max(expanded_surface.addr, addr) - std::min(expanded_surface.addr, addr)) %
BytesInPixels(stride * (is_tiled ? 8 : 1)) == BytesInPixels(stride * (is_tiled ? 8 : 1)) ==
0; 0;
@ -339,6 +338,9 @@ bool SurfaceParams::CanTexCopy(const SurfaceParams& texcopy_params) const {
end < texcopy_params.end) { end < texcopy_params.end) {
return false; return false;
} }
if (texcopy_params.block_height != block_height)
return false;
if (texcopy_params.width != texcopy_params.stride) { if (texcopy_params.width != texcopy_params.stride) {
const u32 tile_stride = static_cast<u32>(BytesInPixels(stride * (is_tiled ? 8 : 1))); const u32 tile_stride = static_cast<u32>(BytesInPixels(stride * (is_tiled ? 8 : 1)));
return (texcopy_params.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 && return (texcopy_params.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 &&
@ -481,18 +483,13 @@ void CachedSurface::LoadGLBuffer(VAddr load_start, VAddr load_end) {
const u64 start_offset = load_start - addr; const u64 start_offset = load_start - addr;
if (!is_tiled) { if (!is_tiled) {
ASSERT(type == SurfaceType::Color);
const u32 bytes_per_pixel{GetFormatBpp() >> 3}; const u32 bytes_per_pixel{GetFormatBpp() >> 3};
// TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check std::memcpy(&gl_buffer[start_offset], texture_src_data + start_offset,
// the configuration for this and perform more generic un/swizzle bytes_per_pixel * width * height);
LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!");
VideoCore::MortonCopyPixels128(width, height, bytes_per_pixel, 4,
texture_src_data + start_offset, &gl_buffer[start_offset],
true);
} else { } else {
morton_to_gl_fns[static_cast<size_t>(pixel_format)](stride, height, &gl_buffer[0], addr, morton_to_gl_fns[static_cast<size_t>(pixel_format)](
load_start, load_end); stride, block_height, height, &gl_buffer[0], addr, load_start, load_end);
} }
} }
@ -533,11 +530,10 @@ void CachedSurface::FlushGLBuffer(VAddr flush_start, VAddr flush_end) {
if (backup_bytes) if (backup_bytes)
std::memcpy(&dst_buffer[coarse_start_offset], &backup_data[0], backup_bytes); std::memcpy(&dst_buffer[coarse_start_offset], &backup_data[0], backup_bytes);
} else if (!is_tiled) { } else if (!is_tiled) {
ASSERT(type == SurfaceType::Color);
std::memcpy(dst_buffer + start_offset, &gl_buffer[start_offset], flush_end - flush_start); std::memcpy(dst_buffer + start_offset, &gl_buffer[start_offset], flush_end - flush_start);
} else { } else {
gl_to_morton_fns[static_cast<size_t>(pixel_format)](stride, height, &gl_buffer[0], addr, gl_to_morton_fns[static_cast<size_t>(pixel_format)](
flush_start, flush_end); stride, block_height, height, &gl_buffer[0], addr, flush_start, flush_end);
} }
} }
@ -1041,9 +1037,18 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu
params.height = config.tic.Height(); params.height = config.tic.Height();
params.is_tiled = config.tic.IsTiled(); params.is_tiled = config.tic.IsTiled();
params.pixel_format = SurfaceParams::PixelFormatFromTextureFormat(config.tic.format); params.pixel_format = SurfaceParams::PixelFormatFromTextureFormat(config.tic.format);
if (config.tic.IsTiled()) {
params.block_height = config.tic.BlockHeight();
} else {
// Use the texture-provided stride value if the texture isn't tiled.
params.stride = params.PixelsInBytes(config.tic.Pitch());
}
params.UpdateParams(); params.UpdateParams();
if (config.tic.Width() % 8 != 0 || config.tic.Height() % 8 != 0) { if (config.tic.Width() % 8 != 0 || config.tic.Height() % 8 != 0 ||
params.stride != params.width) {
Surface src_surface; Surface src_surface;
MathUtil::Rectangle<u32> rect; MathUtil::Rectangle<u32> rect;
std::tie(src_surface, rect) = GetSurfaceSubRect(params, ScaleMatch::Ignore, true); std::tie(src_surface, rect) = GetSurfaceSubRect(params, ScaleMatch::Ignore, true);
@ -1094,6 +1099,8 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(
color_params.res_scale = resolution_scale_factor; color_params.res_scale = resolution_scale_factor;
color_params.width = config.width; color_params.width = config.width;
color_params.height = config.height; color_params.height = config.height;
// TODO(Subv): Can framebuffers use a different block height?
color_params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight;
SurfaceParams depth_params = color_params; SurfaceParams depth_params = color_params;
color_params.addr = memory_manager->PhysicalToVirtualAddress(config.Address()); color_params.addr = memory_manager->PhysicalToVirtualAddress(config.Address());

View File

@ -115,6 +115,18 @@ struct SurfaceParams {
} }
} }
static Tegra::Texture::TextureFormat TextureFormatFromPixelFormat(PixelFormat format) {
// TODO(Subv): Properly implement this
switch (format) {
case PixelFormat::RGBA8:
return Tegra::Texture::TextureFormat::A8R8G8B8;
case PixelFormat::DXT1:
return Tegra::Texture::TextureFormat::DXT1;
default:
UNREACHABLE();
}
}
static bool CheckFormatsBlittable(PixelFormat pixel_format_a, PixelFormat pixel_format_b) { static bool CheckFormatsBlittable(PixelFormat pixel_format_a, PixelFormat pixel_format_b) {
SurfaceType a_type = GetFormatType(pixel_format_a); SurfaceType a_type = GetFormatType(pixel_format_a);
SurfaceType b_type = GetFormatType(pixel_format_b); SurfaceType b_type = GetFormatType(pixel_format_b);
@ -213,6 +225,7 @@ struct SurfaceParams {
u32 width = 0; u32 width = 0;
u32 height = 0; u32 height = 0;
u32 stride = 0; u32 stride = 0;
u32 block_height = 0;
u16 res_scale = 1; u16 res_scale = 1;
bool is_tiled = false; bool is_tiled = false;

View File

@ -56,23 +56,22 @@ u32 BytesPerPixel(TextureFormat format) {
} }
} }
std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height) { std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height,
u32 block_height) {
u8* data = Memory::GetPointer(address); u8* data = Memory::GetPointer(address);
u32 bytes_per_pixel = BytesPerPixel(format); u32 bytes_per_pixel = BytesPerPixel(format);
static constexpr u32 DefaultBlockHeight = 16;
std::vector<u8> unswizzled_data(width * height * bytes_per_pixel); std::vector<u8> unswizzled_data(width * height * bytes_per_pixel);
switch (format) { switch (format) {
case TextureFormat::DXT1: case TextureFormat::DXT1:
// In the DXT1 format, each 4x4 tile is swizzled instead of just individual pixel values. // In the DXT1 format, each 4x4 tile is swizzled instead of just individual pixel values.
CopySwizzledData(width / 4, height / 4, bytes_per_pixel, bytes_per_pixel, data, CopySwizzledData(width / 4, height / 4, bytes_per_pixel, bytes_per_pixel, data,
unswizzled_data.data(), true, DefaultBlockHeight); unswizzled_data.data(), true, block_height);
break; break;
case TextureFormat::A8R8G8B8: case TextureFormat::A8R8G8B8:
CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data, CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data,
unswizzled_data.data(), true, DefaultBlockHeight); unswizzled_data.data(), true, block_height);
break; break;
default: default:
UNIMPLEMENTED_MSG("Format not implemented"); UNIMPLEMENTED_MSG("Format not implemented");

View File

@ -14,7 +14,8 @@ namespace Texture {
/** /**
* Unswizzles a swizzled texture without changing its format. * Unswizzles a swizzled texture without changing its format.
*/ */
std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height); std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height,
u32 block_height = TICEntry::DefaultBlockHeight);
/** /**
* Decodes an unswizzled texture into a A8R8G8B8 texture. * Decodes an unswizzled texture into a A8R8G8B8 texture.

View File

@ -4,6 +4,7 @@
#pragma once #pragma once
#include "common/assert.h"
#include "common/bit_field.h" #include "common/bit_field.h"
#include "common/common_funcs.h" #include "common/common_funcs.h"
#include "common/common_types.h" #include "common/common_types.h"
@ -57,6 +58,8 @@ union TextureHandle {
static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size"); static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size");
struct TICEntry { struct TICEntry {
static constexpr u32 DefaultBlockHeight = 16;
union { union {
u32 raw; u32 raw;
BitField<0, 7, TextureFormat> format; BitField<0, 7, TextureFormat> format;
@ -70,7 +73,12 @@ struct TICEntry {
BitField<0, 16, u32> address_high; BitField<0, 16, u32> address_high;
BitField<21, 3, TICHeaderVersion> header_version; BitField<21, 3, TICHeaderVersion> header_version;
}; };
INSERT_PADDING_BYTES(4); union {
BitField<3, 3, u32> block_height;
// High 16 bits of the pitch value
BitField<0, 16, u32> pitch_high;
};
union { union {
BitField<0, 16, u32> width_minus_1; BitField<0, 16, u32> width_minus_1;
BitField<23, 4, TextureType> texture_type; BitField<23, 4, TextureType> texture_type;
@ -82,6 +90,13 @@ struct TICEntry {
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low); return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low);
} }
u32 Pitch() const {
ASSERT(header_version == TICHeaderVersion::Pitch ||
header_version == TICHeaderVersion::PitchColorKey);
// The pitch value is 21 bits, and is 32B aligned.
return pitch_high << 5;
}
u32 Width() const { u32 Width() const {
return width_minus_1 + 1; return width_minus_1 + 1;
} }
@ -90,6 +105,13 @@ struct TICEntry {
return height_minus_1 + 1; return height_minus_1 + 1;
} }
u32 BlockHeight() const {
ASSERT(header_version == TICHeaderVersion::BlockLinear ||
header_version == TICHeaderVersion::BlockLinearColorKey);
// The block height is stored in log2 format.
return 1 << block_height;
}
bool IsTiled() const { bool IsTiled() const {
return header_version == TICHeaderVersion::BlockLinear || return header_version == TICHeaderVersion::BlockLinear ||
header_version == TICHeaderVersion::BlockLinearColorKey; header_version == TICHeaderVersion::BlockLinearColorKey;