diff --git a/src/core/gpu.cpp b/src/core/gpu.cpp
index bb0567864..fba1d8d11 100644
--- a/src/core/gpu.cpp
+++ b/src/core/gpu.cpp
@@ -440,10 +440,6 @@ void GPU::UpdateCRTCDisplayParameters()
     cs.display_vram_left = std::min<u16>(
       m_crtc_state.regs.X + ((horizontal_display_start_tick - cs.horizontal_display_start) / cs.dot_clock_divider),
       VRAM_WIDTH - 1);
-
-    // for 24-bit scanout we must stay aligned
-    if (m_GPUSTAT.display_area_color_depth_24 && ((cs.display_vram_left - cs.regs.X) & 1u))
-      cs.display_vram_left--;
   }
 
   if (cs.horizontal_display_end <= horizontal_display_end_tick)
diff --git a/src/core/gpu_hw_d3d11.cpp b/src/core/gpu_hw_d3d11.cpp
index 10250b646..da9a9c2ef 100644
--- a/src/core/gpu_hw_d3d11.cpp
+++ b/src/core/gpu_hw_d3d11.cpp
@@ -552,43 +552,24 @@ void GPU_HW_D3D11::UpdateDisplay()
     }
     else
     {
-      const u32 field_offset = BoolToUInt8(interlaced && m_GPUSTAT.interlaced_field);
+      m_context->OMSetRenderTargets(1, m_display_texture.GetD3DRTVArray(), nullptr);
+      m_context->PSSetShaderResources(0, 1, m_vram_texture.GetD3DSRVArray());
 
+      const u32 reinterpret_field_offset =
+        (m_crtc_state.regs.Y + BoolToUInt8(interlaced && m_GPUSTAT.interlaced_field)) & 1u;
+      const u32 reinterpret_start_x = m_crtc_state.regs.X * m_resolution_scale;
+      const u32 reinterpret_width = scaled_display_width + (m_crtc_state.display_vram_left - m_crtc_state.regs.X);
+      const u32 uniforms[4] = {reinterpret_field_offset, reinterpret_start_x};
       ID3D11PixelShader* display_pixel_shader =
         m_display_pixel_shaders[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)][BoolToUInt8(interlaced)].Get();
 
-      // Because of how the reinterpret shader works, we need to use the downscaled version.
-      if (m_GPUSTAT.display_area_color_depth_24 && m_resolution_scale > 1)
-      {
-        const u32 copy_width = std::min<u32>((display_width * 3) / 2, VRAM_WIDTH - vram_offset_x);
-        const u32 scaled_copy_width = copy_width * m_resolution_scale;
-        BlitTexture(m_vram_encoding_texture.GetD3DRTV(), vram_offset_x, vram_offset_y, copy_width, display_height,
-                    m_vram_texture.GetD3DSRV(), scaled_vram_offset_x, scaled_vram_offset_y, scaled_copy_width,
-                    scaled_display_height, m_vram_texture.GetWidth(), m_vram_texture.GetHeight(), false);
+      SetViewportAndScissor(reinterpret_start_x, m_crtc_state.display_vram_top, reinterpret_width,
+                            scaled_display_height);
+      DrawUtilityShader(display_pixel_shader, uniforms, sizeof(uniforms));
 
-        m_context->OMSetRenderTargets(1, m_display_texture.GetD3DRTVArray(), nullptr);
-        m_context->PSSetShaderResources(0, 1, m_vram_encoding_texture.GetD3DSRVArray());
-
-        const u32 uniforms[4] = {vram_offset_x, vram_offset_y, field_offset};
-        SetViewportAndScissor(0, field_offset, display_width, display_height);
-        DrawUtilityShader(display_pixel_shader, uniforms, sizeof(uniforms));
-
-        m_host_display->SetDisplayTexture(m_display_texture.GetD3DSRV(), m_display_texture.GetWidth(),
-                                          m_display_texture.GetHeight(), 0, 0, display_width, display_height);
-      }
-      else
-      {
-        m_context->OMSetRenderTargets(1, m_display_texture.GetD3DRTVArray(), nullptr);
-        m_context->PSSetShaderResources(0, 1, m_vram_texture.GetD3DSRVArray());
-
-        const u32 uniforms[4] = {scaled_vram_offset_x, scaled_vram_offset_y, field_offset};
-        SetViewportAndScissor(0, field_offset, scaled_display_width, scaled_display_height);
-        DrawUtilityShader(display_pixel_shader, uniforms, sizeof(uniforms));
-
-        m_host_display->SetDisplayTexture(m_display_texture.GetD3DSRV(), m_display_texture.GetWidth(),
-                                          m_display_texture.GetHeight(), 0, 0, scaled_display_width,
-                                          scaled_display_height);
-      }
+      m_host_display->SetDisplayTexture(m_display_texture.GetD3DSRV(), m_display_texture.GetWidth(),
+                                        m_display_texture.GetHeight(), scaled_vram_offset_x, scaled_vram_offset_y,
+                                        scaled_display_width, scaled_display_height);
 
       RestoreGraphicsAPIState();
     }
diff --git a/src/core/gpu_hw_opengl.cpp b/src/core/gpu_hw_opengl.cpp
index 534c16a08..159e88087 100644
--- a/src/core/gpu_hw_opengl.cpp
+++ b/src/core/gpu_hw_opengl.cpp
@@ -507,63 +507,32 @@ void GPU_HW_OpenGL::UpdateDisplay()
     }
     else
     {
-      const u32 flipped_vram_offset_y = VRAM_HEIGHT - vram_offset_y - display_height;
-      const u32 scaled_flipped_vram_offset_y =
-        m_vram_texture.GetHeight() - scaled_vram_offset_y - scaled_display_height;
-      const u32 field_offset = BoolToUInt8(interlaced && m_GPUSTAT.interlaced_field);
-
       glDisable(GL_BLEND);
       glDisable(GL_SCISSOR_TEST);
 
-      const GL::Program& prog =
-        m_display_programs[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)][BoolToUInt8(interlaced)];
-      prog.Bind();
+      m_display_programs[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)][BoolToUInt8(interlaced)].Bind();
+      m_display_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER);
+      m_vram_texture.Bind();
 
-      // Because of how the reinterpret shader works, we need to use the downscaled version.
-      if (m_GPUSTAT.display_area_color_depth_24 && m_resolution_scale > 1)
-      {
-        const u32 copy_width = std::min<u32>((display_width * 3) / 2, VRAM_WIDTH - vram_offset_x);
-        const u32 scaled_copy_width = copy_width * m_resolution_scale;
-        m_vram_encoding_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER);
-        m_vram_texture.BindFramebuffer(GL_READ_FRAMEBUFFER);
-        glBlitFramebuffer(scaled_vram_offset_x, scaled_flipped_vram_offset_y, scaled_vram_offset_x + scaled_copy_width,
-                          scaled_flipped_vram_offset_y + scaled_display_height, vram_offset_x, flipped_vram_offset_y,
-                          vram_offset_x + copy_width, flipped_vram_offset_y + display_height, GL_COLOR_BUFFER_BIT,
-                          GL_NEAREST);
+      const u32 flipped_vram_offset_y = VRAM_HEIGHT - vram_offset_y - display_height;
+      const u32 scaled_flipped_vram_offset_y =
+        m_vram_texture.GetHeight() - scaled_vram_offset_y - scaled_display_height;
 
-        m_display_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER);
-        m_vram_encoding_texture.Bind();
+      const u32 reinterpret_field_offset =
+        (m_crtc_state.regs.Y + BoolToUInt8(interlaced && m_GPUSTAT.interlaced_field)) & 1u;
+      const u32 reinterpret_start_x = m_crtc_state.regs.X * m_resolution_scale;
+      const u32 reinterpret_width = scaled_display_width + (m_crtc_state.display_vram_left - m_crtc_state.regs.X);
+      const u32 uniforms[4] = {reinterpret_field_offset, reinterpret_start_x};
+      UploadUniformBlock(uniforms, sizeof(uniforms));
+      m_batch_ubo_dirty = true;
 
-        glViewport(0, field_offset, display_width, display_height);
+      glViewport(reinterpret_start_x, scaled_flipped_vram_offset_y, reinterpret_width, scaled_display_height);
+      glDrawArrays(GL_TRIANGLES, 0, 3);
 
-        const u32 uniforms[4] = {vram_offset_x, flipped_vram_offset_y, field_offset};
-        UploadUniformBlock(uniforms, sizeof(uniforms));
-        m_batch_ubo_dirty = true;
-
-        glDrawArrays(GL_TRIANGLES, 0, 3);
-
-        m_host_display->SetDisplayTexture(reinterpret_cast<void*>(static_cast<uintptr_t>(m_display_texture.GetGLId())),
-                                          m_display_texture.GetWidth(), m_display_texture.GetHeight(), 0,
-                                          display_height, display_width, -static_cast<s32>(display_height));
-      }
-      else
-      {
-        m_display_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER);
-        m_vram_texture.Bind();
-
-        glViewport(0, field_offset, scaled_display_width, scaled_display_height);
-
-        const u32 uniforms[4] = {scaled_vram_offset_x, scaled_flipped_vram_offset_y, field_offset};
-        UploadUniformBlock(uniforms, sizeof(uniforms));
-        m_batch_ubo_dirty = true;
-
-        glDrawArrays(GL_TRIANGLES, 0, 3);
-
-        m_host_display->SetDisplayTexture(reinterpret_cast<void*>(static_cast<uintptr_t>(m_display_texture.GetGLId())),
-                                          m_display_texture.GetWidth(), m_display_texture.GetHeight(), 0,
-                                          scaled_display_height, scaled_display_width,
-                                          -static_cast<s32>(scaled_display_height));
-      }
+      m_host_display->SetDisplayTexture(reinterpret_cast<void*>(static_cast<uintptr_t>(m_display_texture.GetGLId())),
+                                        m_display_texture.GetWidth(), m_display_texture.GetHeight(),
+                                        scaled_vram_offset_x, m_vram_texture.GetHeight() - scaled_vram_offset_y,
+                                        scaled_display_width, -static_cast<s32>(scaled_display_height));
 
       // restore state
       m_vram_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER);
diff --git a/src/core/gpu_hw_opengl_es.cpp b/src/core/gpu_hw_opengl_es.cpp
index 900f2ab88..c1a7c6ded 100644
--- a/src/core/gpu_hw_opengl_es.cpp
+++ b/src/core/gpu_hw_opengl_es.cpp
@@ -371,75 +371,46 @@ void GPU_HW_OpenGL_ES::UpdateDisplay()
     }
     else
     {
-      const u32 flipped_vram_offset_y = VRAM_HEIGHT - vram_offset_y - display_height;
-      const u32 scaled_flipped_vram_offset_y =
-        m_vram_texture.GetHeight() - scaled_vram_offset_y - scaled_display_height;
-      const u32 field_offset = BoolToUInt8(interlaced && m_GPUSTAT.interlaced_field);
-
       glDisable(GL_BLEND);
       glDisable(GL_SCISSOR_TEST);
 
-      const GL::Program& prog =
+      GL::Program& prog =
         m_display_programs[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)][BoolToUInt8(interlaced)];
       prog.Bind();
+      m_display_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER);
+      m_vram_texture.Bind();
 
-      // Because of how the reinterpret shader works, we need to use the downscaled version.
-      if (m_GPUSTAT.display_area_color_depth_24 && m_resolution_scale > 1)
-      {
-        const u32 copy_width = std::min<u32>((display_width * 3) / 2, VRAM_WIDTH - vram_offset_x);
-        const u32 scaled_copy_width = copy_width * m_resolution_scale;
-        m_vram_encoding_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER);
-        m_vram_texture.BindFramebuffer(GL_READ_FRAMEBUFFER);
-        glBlitFramebuffer(scaled_vram_offset_x, scaled_flipped_vram_offset_y, scaled_vram_offset_x + scaled_copy_width,
-                          scaled_flipped_vram_offset_y + scaled_display_height, vram_offset_x, flipped_vram_offset_y,
-                          vram_offset_x + copy_width, flipped_vram_offset_y + display_height, GL_COLOR_BUFFER_BIT,
-                          GL_NEAREST);
+      const u32 flipped_vram_offset_y = VRAM_HEIGHT - vram_offset_y - display_height;
+      const u32 scaled_flipped_vram_offset_y =
+        m_vram_texture.GetHeight() - scaled_vram_offset_y - scaled_display_height;
 
-        m_display_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER);
-        m_vram_encoding_texture.Bind();
+      const u32 reinterpret_field_offset =
+        (m_crtc_state.regs.Y + BoolToUInt8(interlaced && m_GPUSTAT.interlaced_field)) & 1u;
+      const u32 reinterpret_start_x = m_crtc_state.regs.X * m_resolution_scale;
+      const u32 reinterpret_width = scaled_display_width + (m_crtc_state.display_vram_left - m_crtc_state.regs.X);
 
-        glViewport(0, field_offset, display_width, display_height);
+      prog.Uniform2i(0, reinterpret_field_offset, reinterpret_start_x);
+      m_batch_ubo_dirty = true;
 
-        prog.Uniform3i(0, static_cast<s32>(vram_offset_x), static_cast<s32>(flipped_vram_offset_y),
-                       static_cast<s32>(field_offset));
-        m_batch_ubo_dirty = true;
+      glViewport(reinterpret_start_x, scaled_flipped_vram_offset_y, reinterpret_width, scaled_display_height);
+      glDrawArrays(GL_TRIANGLES, 0, 3);
 
-        glDrawArrays(GL_TRIANGLES, 0, 3);
-
-        m_host_display->SetDisplayTexture(reinterpret_cast<void*>(static_cast<uintptr_t>(m_display_texture.GetGLId())),
-                                          m_display_texture.GetWidth(), m_display_texture.GetHeight(), 0,
-                                          display_height, display_width, -static_cast<s32>(display_height));
-      }
-      else
-      {
-        m_display_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER);
-        m_vram_texture.Bind();
-
-        glViewport(0, field_offset, scaled_display_width, scaled_display_height);
-
-        prog.Uniform3i(0, static_cast<s32>(scaled_vram_offset_x), static_cast<s32>(scaled_flipped_vram_offset_y),
-                       static_cast<s32>(field_offset));
-        m_batch_ubo_dirty = true;
-
-        glDrawArrays(GL_TRIANGLES, 0, 3);
-
-        m_host_display->SetDisplayTexture(reinterpret_cast<void*>(static_cast<uintptr_t>(m_display_texture.GetGLId())),
-                                          m_display_texture.GetWidth(), m_display_texture.GetHeight(), 0,
-                                          scaled_display_height, scaled_display_width,
-                                          -static_cast<s32>(scaled_display_height));
-      }
-
-      // restore state
-      m_vram_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER);
-      glViewport(0, 0, m_vram_texture.GetWidth(), m_vram_texture.GetHeight());
-      glEnable(GL_SCISSOR_TEST);
+      m_host_display->SetDisplayTexture(reinterpret_cast<void*>(static_cast<uintptr_t>(m_display_texture.GetGLId())),
+                                        m_display_texture.GetWidth(), m_display_texture.GetHeight(),
+                                        scaled_vram_offset_x, m_vram_texture.GetHeight() - scaled_vram_offset_y,
+                                        scaled_display_width, -static_cast<s32>(scaled_display_height));
     }
 
-    m_host_display->SetDisplayParameters(m_crtc_state.display_width, m_crtc_state.display_height,
-                                         m_crtc_state.display_origin_left, m_crtc_state.display_origin_top,
-                                         m_crtc_state.display_vram_width, m_crtc_state.display_vram_height,
-                                         m_crtc_state.display_aspect_ratio);
+    // restore state
+    m_vram_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER);
+    glViewport(0, 0, m_vram_texture.GetWidth(), m_vram_texture.GetHeight());
+    glEnable(GL_SCISSOR_TEST);
   }
+
+  m_host_display->SetDisplayParameters(m_crtc_state.display_width, m_crtc_state.display_height,
+                                       m_crtc_state.display_origin_left, m_crtc_state.display_origin_top,
+                                       m_crtc_state.display_vram_width, m_crtc_state.display_vram_height,
+                                       m_crtc_state.display_aspect_ratio);
 }
 
 void GPU_HW_OpenGL_ES::ReadVRAM(u32 x, u32 y, u32 width, u32 height)
diff --git a/src/core/gpu_hw_shadergen.cpp b/src/core/gpu_hw_shadergen.cpp
index 3bf86d29c..7189ba8fd 100644
--- a/src/core/gpu_hw_shadergen.cpp
+++ b/src/core/gpu_hw_shadergen.cpp
@@ -750,7 +750,7 @@ std::string GPU_HW_ShaderGen::GenerateDisplayFragmentShader(bool depth_24bit, bo
   DefineMacro(ss, "INTERLACED", interlaced);
 
   WriteCommonFunctions(ss);
-  DeclareUniformBuffer(ss, {"int3 u_base_coords"});
+  DeclareUniformBuffer(ss, {"int u_field_offset", "int u_vram_start_x"});
   DeclareTexture(ss, "samp0", 0);
 
   DeclareFragmentEntryPoint(ss, 0, 1, {}, true, false);
@@ -759,55 +759,28 @@ std::string GPU_HW_ShaderGen::GenerateDisplayFragmentShader(bool depth_24bit, bo
   int2 icoords = int2(v_pos.xy);
 
   #if INTERLACED
-    if (((icoords.y - u_base_coords.z) & 1) != 0)
+    if (((icoords.y / RESOLUTION_SCALE) & 1) != u_field_offset)
       discard;
   #endif
 
   #if DEPTH_24BIT
-    // compute offset in dwords from the start of the 24-bit values
-    int2 base = int2(u_base_coords.x, u_base_coords.y + icoords.y);
-    int xoff = int(icoords.x);
-    int dword_index = (xoff / 2) + (xoff / 4);
+    // relative to start of scanout
+    int relative_x = (icoords.x - u_vram_start_x) / RESOLUTION_SCALE;
+    icoords.x = u_vram_start_x + ((relative_x * 3) / 2) * RESOLUTION_SCALE;
 
-    // sample two adjacent dwords, or four 16-bit values as the 24-bit value will lie somewhere between these
-    uint s0 = RGBA8ToRGBA5551(LOAD_TEXTURE(samp0, int2(base.x + dword_index * 2 + 0, base.y), 0));
-    uint s1 = RGBA8ToRGBA5551(LOAD_TEXTURE(samp0, int2(base.x + dword_index * 2 + 1, base.y), 0));
-    uint s2 = RGBA8ToRGBA5551(LOAD_TEXTURE(samp0, int2(base.x + (dword_index + 1) * 2 + 0, base.y), 0));
-    uint s3 = RGBA8ToRGBA5551(LOAD_TEXTURE(samp0, int2(base.x + (dword_index + 1) * 2 + 1, base.y), 0));
-
-    // select the bit for this pixel depending on its offset in the 4-pixel block
-    uint r, g, b;
-    int block_offset = xoff & 3;
-    if (block_offset == 0)
-    {
-      r = s0 & 0xFFu;
-      g = s0 >> 8;
-      b = s1 & 0xFFu;
-    }
-    else if (block_offset == 1)
-    {
-      r = s1 >> 8;
-      g = s2 & 0xFFu;
-      b = s2 >> 8;
-    }
-    else if (block_offset == 2)
-    {
-      r = s1 & 0xFFu;
-      g = s1 >> 8;
-      b = s2 & 0xFFu;
-    }
-    else
-    {
-      r = s2 >> 8;
-      g = s3 & 0xFFu;
-      b = s3 >> 8;
-    }
-
-    // and normalize
-    o_col0 = float4(float(r) / 255.0, float(g) / 255.0, float(b) / 255.0, 1.0);
+    // load adjacent 16-bit texels
+    uint s0 = RGBA8ToRGBA5551(LOAD_TEXTURE(samp0, icoords, 0));
+    uint s1 = RGBA8ToRGBA5551(LOAD_TEXTURE(samp0, icoords + int2(RESOLUTION_SCALE, 0), 0));
+    
+    // select which part of the combined 16-bit texels we are currently shading
+    uint s1s0 = ((s1 << 16) | s0) >> ((relative_x & 1) * 8);
+    
+    // extract components and normalize
+    o_col0 = float4(float(s1s0 & 0xFFu) / 255.0, float((s1s0 >> 8u) & 0xFFu) / 255.0,
+                    float((s1s0 >> 16u) & 0xFFu) / 255.0, 1.0);
   #else
     // load and return
-    o_col0 = LOAD_TEXTURE(samp0, u_base_coords.xy + icoords, 0);
+    o_col0 = LOAD_TEXTURE(samp0, icoords, 0);
   #endif
 }
 )";