GPU/HW: Interpolate native texture coordinates when upscaling

This commit is contained in:
Stenzek 2024-06-15 23:54:56 +10:00
parent 2a7de25505
commit ef152c47a6
No known key found for this signature in database
4 changed files with 105 additions and 86 deletions

View file

@ -849,9 +849,8 @@ bool GPU_HW::CompilePipelines()
const u32 active_texture_modes = const u32 active_texture_modes =
m_allow_sprite_mode ? NUM_TEXTURE_MODES : m_allow_sprite_mode ? NUM_TEXTURE_MODES :
(NUM_TEXTURE_MODES - (NUM_TEXTURE_MODES - static_cast<u32>(BatchTextureMode::SpriteStart))); (NUM_TEXTURE_MODES - (NUM_TEXTURE_MODES - static_cast<u32>(BatchTextureMode::SpriteStart)));
const u32 active_vertex_shaders = m_allow_sprite_mode ? 3 : 2;
const u32 total_pipelines = const u32 total_pipelines =
active_vertex_shaders + // vertex shaders (m_allow_sprite_mode ? 5 : 3) + // vertex shaders
(active_texture_modes * 5 * 9 * 2 * 2 * 2) + // fragment shaders (active_texture_modes * 5 * 9 * 2 * 2 * 2) + // fragment shaders
((m_pgxp_depth_buffer ? 2 : 1) * 5 * 5 * active_texture_modes * 2 * 2 * 2) + // batch pipelines ((m_pgxp_depth_buffer ? 2 : 1) * 5 * 5 * active_texture_modes * 2 * 2 * 2) + // batch pipelines
((m_wireframe_mode != GPUWireframeMode::Disabled) ? 1 : 0) + // wireframe ((m_wireframe_mode != GPUWireframeMode::Disabled) ? 1 : 0) + // wireframe
@ -867,29 +866,34 @@ bool GPU_HW::CompilePipelines()
ShaderCompileProgressTracker progress("Compiling Pipelines", total_pipelines); ShaderCompileProgressTracker progress("Compiling Pipelines", total_pipelines);
// vertex shaders - [non-textured/textured/sprite] // vertex shaders - [textured/palette/sprite]
// fragment shaders - [render_mode][transparency_mode][texture_mode][check_mask][dithering][interlacing] // fragment shaders - [render_mode][transparency_mode][texture_mode][check_mask][dithering][interlacing]
static constexpr auto destroy_shader = [](std::unique_ptr<GPUShader>& s) { s.reset(); }; static constexpr auto destroy_shader = [](std::unique_ptr<GPUShader>& s) { s.reset(); };
DimensionalArray<std::unique_ptr<GPUShader>, 3> batch_vertex_shaders{}; DimensionalArray<std::unique_ptr<GPUShader>, 2, 2, 2> batch_vertex_shaders{};
DimensionalArray<std::unique_ptr<GPUShader>, 2, 2, 2, NUM_TEXTURE_MODES, 5, 5> batch_fragment_shaders{}; DimensionalArray<std::unique_ptr<GPUShader>, 2, 2, 2, NUM_TEXTURE_MODES, 5, 5> batch_fragment_shaders{};
ScopedGuard batch_shader_guard([&batch_vertex_shaders, &batch_fragment_shaders]() { ScopedGuard batch_shader_guard([&batch_vertex_shaders, &batch_fragment_shaders]() {
batch_vertex_shaders.enumerate(destroy_shader); batch_vertex_shaders.enumerate(destroy_shader);
batch_fragment_shaders.enumerate(destroy_shader); batch_fragment_shaders.enumerate(destroy_shader);
}); });
for (u8 textured = 0; textured < active_vertex_shaders; textured++) for (u8 textured = 0; textured < 2; textured++)
{ {
const bool sprite = (textured > 1); for (u8 palette = 0; palette < (textured ? 2 : 1); palette++)
const bool uv_limits = ShouldClampUVs(sprite ? m_sprite_texture_filtering : m_texture_filtering);
const std::string vs = shadergen.GenerateBatchVertexShader(textured != 0, uv_limits,
!sprite && force_round_texcoords, m_pgxp_depth_buffer);
if (!(batch_vertex_shaders[textured] =
g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GetLanguage(), vs)))
{ {
return false; for (u8 sprite = 0; sprite < (textured ? 2 : 1); sprite++)
} {
const bool uv_limits = ShouldClampUVs(sprite ? m_sprite_texture_filtering : m_texture_filtering);
const std::string vs = shadergen.GenerateBatchVertexShader(
textured != 0, palette != 0, uv_limits, !sprite && force_round_texcoords, m_pgxp_depth_buffer);
if (!(batch_vertex_shaders[textured][palette][sprite] =
g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GetLanguage(), vs)))
{
return false;
}
progress.Increment(); progress.Increment();
}
}
} }
for (u8 render_mode = 0; render_mode < 5; render_mode++) for (u8 render_mode = 0; render_mode < 5; render_mode++)
@ -1010,6 +1014,11 @@ bool GPU_HW::CompilePipelines()
for (u8 check_mask = 0; check_mask < 2; check_mask++) for (u8 check_mask = 0; check_mask < 2; check_mask++)
{ {
const bool textured = (static_cast<BatchTextureMode>(texture_mode) != BatchTextureMode::Disabled); const bool textured = (static_cast<BatchTextureMode>(texture_mode) != BatchTextureMode::Disabled);
const bool palette =
(static_cast<BatchTextureMode>(texture_mode) == BatchTextureMode::Palette4Bit ||
static_cast<BatchTextureMode>(texture_mode) == BatchTextureMode::Palette8Bit ||
static_cast<BatchTextureMode>(texture_mode) == BatchTextureMode::SpritePalette4Bit ||
static_cast<BatchTextureMode>(texture_mode) == BatchTextureMode::SpritePalette8Bit);
const bool sprite = (static_cast<BatchTextureMode>(texture_mode) >= BatchTextureMode::SpriteStart); const bool sprite = (static_cast<BatchTextureMode>(texture_mode) >= BatchTextureMode::SpriteStart);
const bool uv_limits = ShouldClampUVs(sprite ? m_sprite_texture_filtering : m_texture_filtering); const bool uv_limits = ShouldClampUVs(sprite ? m_sprite_texture_filtering : m_texture_filtering);
const bool use_shader_blending = const bool use_shader_blending =
@ -1026,7 +1035,8 @@ bool GPU_HW::CompilePipelines()
NUM_BATCH_TEXTURED_VERTEX_ATTRIBUTES)) : NUM_BATCH_TEXTURED_VERTEX_ATTRIBUTES)) :
std::span<const GPUPipeline::VertexAttribute>(vertex_attributes, NUM_BATCH_VERTEX_ATTRIBUTES); std::span<const GPUPipeline::VertexAttribute>(vertex_attributes, NUM_BATCH_VERTEX_ATTRIBUTES);
plconfig.vertex_shader = batch_vertex_shaders[BoolToUInt8(textured) + BoolToUInt8(sprite)].get(); plconfig.vertex_shader =
batch_vertex_shaders[BoolToUInt8(textured)][BoolToUInt8(palette)][BoolToUInt8(sprite)].get();
plconfig.fragment_shader = plconfig.fragment_shader =
batch_fragment_shaders[render_mode] batch_fragment_shaders[render_mode]
[use_shader_blending ? transparency_mode : [use_shader_blending ? transparency_mode :
@ -1132,7 +1142,7 @@ bool GPU_HW::CompilePipelines()
GPUPipeline::BlendState::GetNoBlendingState(); GPUPipeline::BlendState::GetNoBlendingState();
plconfig.blend.write_mask = 0x7; plconfig.blend.write_mask = 0x7;
plconfig.depth = GPUPipeline::DepthState::GetNoTestsState(); plconfig.depth = GPUPipeline::DepthState::GetNoTestsState();
plconfig.vertex_shader = batch_vertex_shaders[0].get(); plconfig.vertex_shader = batch_vertex_shaders[0][0][0].get();
plconfig.geometry_shader = gs.get(); plconfig.geometry_shader = gs.get();
plconfig.fragment_shader = fs.get(); plconfig.fragment_shader = fs.get();

View file

@ -57,12 +57,13 @@ void GPU_HW_ShaderGen::WriteBatchUniformBuffer(std::stringstream& ss)
false); false);
} }
std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured, bool uv_limits, bool force_round_texcoords, std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured, bool palette, bool uv_limits,
bool pgxp_depth) bool force_round_texcoords, bool pgxp_depth)
{ {
std::stringstream ss; std::stringstream ss;
WriteHeader(ss); WriteHeader(ss);
DefineMacro(ss, "TEXTURED", textured); DefineMacro(ss, "TEXTURED", textured);
DefineMacro(ss, "PALETTE", palette);
DefineMacro(ss, "UV_LIMITS", uv_limits); DefineMacro(ss, "UV_LIMITS", uv_limits);
DefineMacro(ss, "FORCE_ROUND_TEXCOORDS", force_round_texcoords); DefineMacro(ss, "FORCE_ROUND_TEXCOORDS", force_round_texcoords);
DefineMacro(ss, "PGXP_DEPTH", pgxp_depth); DefineMacro(ss, "PGXP_DEPTH", pgxp_depth);
@ -76,14 +77,14 @@ std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured, bool uv_l
{ {
DeclareVertexEntryPoint( DeclareVertexEntryPoint(
ss, {"float4 a_pos", "float4 a_col0", "uint a_texcoord", "uint a_texpage", "float4 a_uv_limits"}, 1, 1, ss, {"float4 a_pos", "float4 a_col0", "uint a_texcoord", "uint a_texpage", "float4 a_uv_limits"}, 1, 1,
{{"nointerpolation", "uint4 v_texpage"}, {"nointerpolation", "float4 v_uv_limits"}}, false, "", UsingMSAA(), {{"nointerpolation", palette ? "uint4 v_texpage" : "uint2 v_texpage"}, {"nointerpolation", "float4 v_uv_limits"}},
UsingPerSampleShading(), m_disable_color_perspective); false, "", UsingMSAA(), UsingPerSampleShading(), m_disable_color_perspective);
} }
else else
{ {
DeclareVertexEntryPoint(ss, {"float4 a_pos", "float4 a_col0", "uint a_texcoord", "uint a_texpage"}, 1, 1, DeclareVertexEntryPoint(ss, {"float4 a_pos", "float4 a_col0", "uint a_texcoord", "uint a_texpage"}, 1, 1,
{{"nointerpolation", "uint4 v_texpage"}}, false, "", UsingMSAA(), UsingPerSampleShading(), {{"nointerpolation", palette ? "uint4 v_texpage" : "uint2 v_texpage"}}, false, "",
m_disable_color_perspective); UsingMSAA(), UsingPerSampleShading(), m_disable_color_perspective);
} }
} }
else else
@ -126,22 +127,32 @@ std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured, bool uv_l
v_col0 = a_col0; v_col0 = a_col0;
#if TEXTURED #if TEXTURED
v_tex0 = float2(float((a_texcoord & 0xFFFFu) * RESOLUTION_SCALE), v_tex0 = float2(uint2(a_texcoord & 0xFFFFu, a_texcoord >> 16));
float((a_texcoord >> 16) * RESOLUTION_SCALE)); #if !PALETTE
v_tex0 *= float(RESOLUTION_SCALE);
#endif
// base_x,base_y,palette_x,palette_y // base_x,base_y,palette_x,palette_y
// Palette X is scaled in fragment shader, since it can wrap. v_texpage.x = (a_texpage & 15u) * 64u;
v_texpage.x = (a_texpage & 15u) * 64u * RESOLUTION_SCALE; v_texpage.y = ((a_texpage >> 4) & 1u) * 256u;
v_texpage.y = ((a_texpage >> 4) & 1u) * 256u * RESOLUTION_SCALE; #if PALETTE
v_texpage.z = ((a_texpage >> 16) & 63u) * 16u; v_texpage.z = ((a_texpage >> 16) & 63u) * 16u;
v_texpage.w = ((a_texpage >> 22) & 511u) * RESOLUTION_SCALE; v_texpage.w = ((a_texpage >> 22) & 511u);
#endif
#if UV_LIMITS #if UV_LIMITS
v_uv_limits = a_uv_limits * float4(255.0, 255.0, 255.0, 255.0); v_uv_limits = a_uv_limits * 255.0;
#if FORCE_ROUND_TEXCOORDS
#if FORCE_ROUND_TEXCOORDS && PALETTE
// Add 0.5 to the upper bounds when upscaling, to work around interpolation differences. // Add 0.5 to the upper bounds when upscaling, to work around interpolation differences.
// Limited to force-round-texcoord hack, to avoid breaking other games. // Limited to force-round-texcoord hack, to avoid breaking other games.
v_uv_limits.zw += 0.5; v_uv_limits.zw += 0.5;
#elif !PALETTE
// Treat coordinates as being in upscaled space, and extend the UV range to all "upscaled"
// pixels. This means 1-pixel-high polygon-based framebuffer effects won't be downsampled.
// (e.g. Mega Man Legends 2 haze effect)
v_uv_limits *= float(RESOLUTION_SCALE);
v_uv_limits.zw += float(RESOLUTION_SCALE - 1u);
#endif #endif
#endif #endif
#endif #endif
@ -158,7 +169,7 @@ void GPU_HW_ShaderGen::WriteBatchTextureFilter(std::stringstream& ss, GPUTexture
{ {
DefineMacro(ss, "BINALPHA", texture_filter == GPUTextureFilter::BilinearBinAlpha); DefineMacro(ss, "BINALPHA", texture_filter == GPUTextureFilter::BilinearBinAlpha);
ss << R"( ss << R"(
void FilteredSampleFromVRAM(uint4 texpage, float2 coords, float4 uv_limits, void FilteredSampleFromVRAM(TEXPAGE_VALUE texpage, float2 coords, float4 uv_limits,
out float4 texcol, out float ialpha) out float4 texcol, out float ialpha)
{ {
// Compute the coordinates of the four texels we will be interpolating between. // Compute the coordinates of the four texels we will be interpolating between.
@ -246,7 +257,7 @@ float4 resampler(float4 x)
return res; return res;
} }
void FilteredSampleFromVRAM(uint4 texpage, float2 coords, float4 uv_limits, void FilteredSampleFromVRAM(TEXPAGE_VALUE texpage, float2 coords, float4 uv_limits,
out float4 texcol, out float ialpha) out float4 texcol, out float ialpha)
{ {
float4 weights[4]; float4 weights[4];
@ -392,7 +403,7 @@ float get_left_ratio(float2 center, float2 origin, float2 direction, float2 scal
#define P(coord, xoffs, yoffs) SampleFromVRAM(texpage, clamp(coords + float2((xoffs), (yoffs)), uv_limits.xy, uv_limits.zw)) #define P(coord, xoffs, yoffs) SampleFromVRAM(texpage, clamp(coords + float2((xoffs), (yoffs)), uv_limits.xy, uv_limits.zw))
void FilteredSampleFromVRAM(uint4 texpage, float2 coords, float4 uv_limits, void FilteredSampleFromVRAM(TEXPAGE_VALUE texpage, float2 coords, float4 uv_limits,
out float4 texcol, out float ialpha) out float4 texcol, out float ialpha)
{ {
//--------------------------------------- //---------------------------------------
@ -647,6 +658,8 @@ std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMod
DebugAssert(transparency == GPUTransparencyMode::Disabled || render_mode == GPU_HW::BatchRenderMode::ShaderBlend); DebugAssert(transparency == GPUTransparencyMode::Disabled || render_mode == GPU_HW::BatchRenderMode::ShaderBlend);
const bool textured = (texture_mode != GPU_HW::BatchTextureMode::Disabled); const bool textured = (texture_mode != GPU_HW::BatchTextureMode::Disabled);
const bool palette =
(texture_mode == GPU_HW::BatchTextureMode::Palette4Bit || texture_mode == GPU_HW::BatchTextureMode::Palette8Bit);
const bool shader_blending = (render_mode == GPU_HW::BatchRenderMode::ShaderBlend && const bool shader_blending = (render_mode == GPU_HW::BatchRenderMode::ShaderBlend &&
(transparency != GPUTransparencyMode::Disabled || check_mask)); (transparency != GPUTransparencyMode::Disabled || check_mask));
const bool use_dual_source = (!shader_blending && m_supports_dual_source_blend && const bool use_dual_source = (!shader_blending && m_supports_dual_source_blend &&
@ -663,9 +676,7 @@ std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMod
DefineMacro(ss, "SHADER_BLENDING", shader_blending); DefineMacro(ss, "SHADER_BLENDING", shader_blending);
DefineMacro(ss, "CHECK_MASK_BIT", check_mask); DefineMacro(ss, "CHECK_MASK_BIT", check_mask);
DefineMacro(ss, "TEXTURED", textured); DefineMacro(ss, "TEXTURED", textured);
DefineMacro(ss, "PALETTE", DefineMacro(ss, "PALETTE", palette);
texture_mode == GPU_HW::BatchTextureMode::Palette4Bit ||
texture_mode == GPU_HW::BatchTextureMode::Palette8Bit);
DefineMacro(ss, "PALETTE_4_BIT", texture_mode == GPU_HW::BatchTextureMode::Palette4Bit); DefineMacro(ss, "PALETTE_4_BIT", texture_mode == GPU_HW::BatchTextureMode::Palette4Bit);
DefineMacro(ss, "PALETTE_8_BIT", texture_mode == GPU_HW::BatchTextureMode::Palette8Bit); DefineMacro(ss, "PALETTE_8_BIT", texture_mode == GPU_HW::BatchTextureMode::Palette8Bit);
DefineMacro(ss, "DITHERING", dithering); DefineMacro(ss, "DITHERING", dithering);
@ -679,6 +690,7 @@ std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMod
DefineMacro(ss, "USE_DUAL_SOURCE", use_dual_source); DefineMacro(ss, "USE_DUAL_SOURCE", use_dual_source);
DefineMacro(ss, "WRITE_MASK_AS_DEPTH", m_write_mask_as_depth); DefineMacro(ss, "WRITE_MASK_AS_DEPTH", m_write_mask_as_depth);
DefineMacro(ss, "FORCE_ROUND_TEXCOORDS", force_round_texcoords); DefineMacro(ss, "FORCE_ROUND_TEXCOORDS", force_round_texcoords);
DefineMacro(ss, "UPSCALED", m_resolution_scale > 1);
WriteCommonFunctions(ss); WriteCommonFunctions(ss);
WriteBatchUniformBuffer(ss); WriteBatchUniformBuffer(ss);
@ -719,6 +731,12 @@ uint3 ApplyDithering(uint2 coord, uint3 icol)
#if TEXTURED #if TEXTURED
CONSTANT float4 TRANSPARENT_PIXEL_COLOR = float4(0.0, 0.0, 0.0, 0.0); CONSTANT float4 TRANSPARENT_PIXEL_COLOR = float4(0.0, 0.0, 0.0, 0.0);
#if PALETTE
#define TEXPAGE_VALUE uint4
#else
#define TEXPAGE_VALUE uint2
#endif
uint2 ApplyTextureWindow(uint2 coords) uint2 ApplyTextureWindow(uint2 coords)
{ {
uint x = (uint(coords.x) & u_texture_window_and.x) | u_texture_window_or.x; uint x = (uint(coords.x) & u_texture_window_and.x) | u_texture_window_or.x;
@ -726,13 +744,6 @@ uint2 ApplyTextureWindow(uint2 coords)
return uint2(x, y); return uint2(x, y);
} }
uint2 ApplyUpscaledTextureWindow(uint2 coords)
{
uint2 native_coords = coords / uint2(RESOLUTION_SCALE, RESOLUTION_SCALE);
uint2 coords_offset = coords % uint2(RESOLUTION_SCALE, RESOLUTION_SCALE);
return (ApplyTextureWindow(native_coords) * uint2(RESOLUTION_SCALE, RESOLUTION_SCALE)) + coords_offset;
}
uint2 FloatToIntegerCoords(float2 coords) uint2 FloatToIntegerCoords(float2 coords)
{ {
// With the vertex offset applied at 1x resolution scale, we want to round the texture coordinates. // With the vertex offset applied at 1x resolution scale, we want to round the texture coordinates.
@ -740,42 +751,56 @@ uint2 FloatToIntegerCoords(float2 coords)
return uint2((RESOLUTION_SCALE == 1u || FORCE_ROUND_TEXCOORDS != 0) ? roundEven(coords) : floor(coords)); return uint2((RESOLUTION_SCALE == 1u || FORCE_ROUND_TEXCOORDS != 0) ? roundEven(coords) : floor(coords));
} }
float4 SampleFromVRAM(uint4 texpage, float2 coords) float4 SampleFromVRAM(TEXPAGE_VALUE texpage, float2 coords)
{ {
#if PALETTE #if PALETTE
uint2 icoord = ApplyTextureWindow(FloatToIntegerCoords(coords)); uint2 icoord = ApplyTextureWindow(FloatToIntegerCoords(coords));
uint2 index_coord = icoord;
uint2 vicoord;
#if PALETTE_4_BIT #if PALETTE_4_BIT
index_coord.x /= 4u; // 4bit will never wrap, since it's in the last texpage row.
vicoord = uint2(texpage.x + (icoord.x / 4u), texpage.y + icoord.y);
#elif PALETTE_8_BIT #elif PALETTE_8_BIT
index_coord.x /= 2u; // 8bit can wrap in the X direction.
vicoord = uint2((texpage.x + (icoord.x / 2u)) & 0x3FFu, texpage.y + icoord.y);
#endif #endif
// fixup coords
uint2 vicoord = texpage.xy + (index_coord * uint2(RESOLUTION_SCALE, RESOLUTION_SCALE));
// load colour/palette // load colour/palette
float4 texel = LOAD_TEXTURE(samp0, int2(vicoord), 0); float4 texel = LOAD_TEXTURE(samp0, int2(vicoord * RESOLUTION_SCALE), 0);
uint vram_value = RGBA8ToRGBA5551(texel); uint vram_value = RGBA8ToRGBA5551(texel);
// apply palette // apply palette
#if PALETTE_4_BIT #if PALETTE_4_BIT
uint subpixel = icoord.x & 3u; uint subpixel = icoord.x & 3u;
uint palette_index = (vram_value >> (subpixel * 4u)) & 0x0Fu; uint palette_index = (vram_value >> (subpixel * 4u)) & 0x0Fu;
uint2 palette_icoord = uint2((texpage.z + palette_index) * RESOLUTION_SCALE, texpage.w); uint2 palette_icoord = uint2((texpage.z + palette_index), texpage.w);
#elif PALETTE_8_BIT #elif PALETTE_8_BIT
// can only wrap in X direction for 8-bit, 4-bit will fit in texpage size. // can only wrap in X direction for 8-bit, 4-bit will fit in texpage size.
uint subpixel = icoord.x & 1u; uint subpixel = icoord.x & 1u;
uint palette_index = (vram_value >> (subpixel * 8u)) & 0xFFu; uint palette_index = (vram_value >> (subpixel * 8u)) & 0xFFu;
uint2 palette_icoord = uint2(((texpage.z + palette_index) & 0x3FFu) * RESOLUTION_SCALE, texpage.w); uint2 palette_icoord = uint2(((texpage.z + palette_index) & 0x3FFu), texpage.w);
#endif #endif
return LOAD_TEXTURE(samp0, int2(palette_icoord), 0); return LOAD_TEXTURE(samp0, int2(palette_icoord * RESOLUTION_SCALE), 0);
#else #else
// Direct texturing. Render-to-texture effects. Use upscaled coordinates. // Direct texturing - usually render-to-texture effects.
uint2 icoord = ApplyUpscaledTextureWindow(FloatToIntegerCoords(coords)); uint2 vicoord;
uint2 direct_icoord = texpage.xy + icoord; #if !UPSCALED
return LOAD_TEXTURE(samp0, int2(direct_icoord), 0); uint2 icoord = ApplyTextureWindow(FloatToIntegerCoords(coords));
vicoord = (texpage.xy + icoord) & uint2(1023, 511);
#else
// Coordinates are already upscaled, we need to downscale them to apply the texture
// window, then re-upscale/offset. We can't round here, because it could result in
// going outside of the texture window.
float2 ncoords = coords / float(RESOLUTION_SCALE);
float2 nfpart = frac(ncoords);
uint2 nicoord = ApplyTextureWindow(uint2(floor(ncoords)));
uint2 nvicoord = (texpage.xy + nicoord) & uint2(1023, 511);
coords = (float2(nvicoord) + nfpart) * float(RESOLUTION_SCALE);
vicoord = uint2(floor(coords));
#endif
return LOAD_TEXTURE(samp0, int2(vicoord), 0);
#endif #endif
} }
@ -808,15 +833,16 @@ float3 ApplyDebanding(float2 frag_coord)
if (uv_limits) if (uv_limits)
{ {
DeclareFragmentEntryPoint(ss, 1, 1, DeclareFragmentEntryPoint(ss, 1, 1,
{{"nointerpolation", "uint4 v_texpage"}, {"nointerpolation", "float4 v_uv_limits"}}, {{"nointerpolation", palette ? "uint4 v_texpage" : "uint2 v_texpage"},
{"nointerpolation", "float4 v_uv_limits"}},
true, use_dual_source ? 2 : 1, use_dual_source, m_write_mask_as_depth, UsingMSAA(), true, use_dual_source ? 2 : 1, use_dual_source, m_write_mask_as_depth, UsingMSAA(),
UsingPerSampleShading(), false, m_disable_color_perspective, shader_blending); UsingPerSampleShading(), false, m_disable_color_perspective, shader_blending);
} }
else else
{ {
DeclareFragmentEntryPoint(ss, 1, 1, {{"nointerpolation", "uint4 v_texpage"}}, true, use_dual_source ? 2 : 1, DeclareFragmentEntryPoint(ss, 1, 1, {{"nointerpolation", palette ? "uint4 v_texpage" : "uint2 v_texpage"}}, true,
use_dual_source, m_write_mask_as_depth, UsingMSAA(), UsingPerSampleShading(), false, use_dual_source ? 2 : 1, use_dual_source, m_write_mask_as_depth, UsingMSAA(),
m_disable_color_perspective, shader_blending); UsingPerSampleShading(), false, m_disable_color_perspective, shader_blending);
} }
} }
else else
@ -841,34 +867,16 @@ float3 ApplyDebanding(float2 frag_coord)
#endif #endif
#if TEXTURED #if TEXTURED
// We can't currently use upscaled coordinate for palettes because of how they're packed.
// Not that it would be any benefit anyway, render-to-texture effects don't use palettes.
float2 coords = v_tex0;
#if PALETTE
coords /= float2(RESOLUTION_SCALE, RESOLUTION_SCALE);
#endif
#if UV_LIMITS
float4 uv_limits = v_uv_limits;
#if !PALETTE
// Extend the UV range to all "upscaled" pixels. This means 1-pixel-high polygon-based
// framebuffer effects won't be downsampled. (e.g. Mega Man Legends 2 haze effect)
uv_limits *= float(RESOLUTION_SCALE);
uv_limits.zw += float(RESOLUTION_SCALE - 1u);
#endif
#endif
float4 texcol; float4 texcol;
#if TEXTURE_FILTERING #if TEXTURE_FILTERING
FilteredSampleFromVRAM(v_texpage, coords, uv_limits, texcol, ialpha); FilteredSampleFromVRAM(v_texpage, v_tex0, v_uv_limits, texcol, ialpha);
if (ialpha < 0.5) if (ialpha < 0.5)
discard; discard;
#else #else
#if UV_LIMITS #if UV_LIMITS
texcol = SampleFromVRAM(v_texpage, clamp(coords, uv_limits.xy, uv_limits.zw)); texcol = SampleFromVRAM(v_texpage, clamp(v_tex0, v_uv_limits.xy, v_uv_limits.zw));
#else #else
texcol = SampleFromVRAM(v_texpage, coords); texcol = SampleFromVRAM(v_texpage, v_tex0);
#endif #endif
if (VECTOR_EQ(texcol, TRANSPARENT_PIXEL_COLOR)) if (VECTOR_EQ(texcol, TRANSPARENT_PIXEL_COLOR))
discard; discard;

View file

@ -13,7 +13,8 @@ public:
bool supports_dual_source_blend, bool supports_framebuffer_fetch, bool debanding); bool supports_dual_source_blend, bool supports_framebuffer_fetch, bool debanding);
~GPU_HW_ShaderGen(); ~GPU_HW_ShaderGen();
std::string GenerateBatchVertexShader(bool textured, bool uv_limits, bool force_round_texcoords, bool pgxp_depth); std::string GenerateBatchVertexShader(bool textured, bool palette, bool uv_limits, bool force_round_texcoords,
bool pgxp_depth);
std::string GenerateBatchFragmentShader(GPU_HW::BatchRenderMode render_mode, GPUTransparencyMode transparency, std::string GenerateBatchFragmentShader(GPU_HW::BatchRenderMode render_mode, GPUTransparencyMode transparency,
GPU_HW::BatchTextureMode texture_mode, GPUTextureFilter texture_filtering, GPU_HW::BatchTextureMode texture_mode, GPUTextureFilter texture_filtering,
bool uv_limits, bool force_round_texcoords, bool dithering, bool interlacing, bool uv_limits, bool force_round_texcoords, bool dithering, bool interlacing,

View file

@ -4,4 +4,4 @@
#pragma once #pragma once
#include "common/types.h" #include "common/types.h"
static constexpr u32 SHADER_CACHE_VERSION = 16; static constexpr u32 SHADER_CACHE_VERSION = 17;