diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index 6b55e9c71..4add266bb 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -28,6 +28,8 @@ void GPU_HW::Reset() { GPU::Reset(); + m_vram_shadow.fill(0); + m_batch = {}; m_batch_ubo_data = {}; m_batch_ubo_dirty = true; @@ -181,6 +183,22 @@ void GPU_HW::CalcScissorRect(int* left, int* top, int* right, int* bottom) *bottom = std::max((m_drawing_area.bottom + 1) * m_resolution_scale, *top + 1); } +Common::Rectangle GPU_HW::GetVRAMTransferBounds(u32 x, u32 y, u32 width, u32 height) +{ + Common::Rectangle out_rc = Common::Rectangle::FromExtents(x, y, width, height); + if (out_rc.right > VRAM_WIDTH) + { + out_rc.left = 0; + out_rc.right = VRAM_WIDTH; + } + if (out_rc.bottom > VRAM_HEIGHT) + { + out_rc.top = 0; + out_rc.bottom = VRAM_HEIGHT; + } + return out_rc; +} + GPU_HW::BatchPrimitive GPU_HW::GetPrimitiveForCommand(RenderCommand rc) { if (rc.primitive == Primitive::Line) @@ -191,6 +209,22 @@ GPU_HW::BatchPrimitive GPU_HW::GetPrimitiveForCommand(RenderCommand rc) return BatchPrimitive::Triangles; } +void GPU_HW::ReadVRAM(u32 x, u32 y, u32 width, u32 height, void* buffer) +{ + u8* out_ptr = static_cast(buffer); + + for (u32 row = 0; row < height; row++) + { + const u32 row_offset = ((y + row) % VRAM_HEIGHT) * VRAM_WIDTH; + for (u32 col = 0; col < width; col++) + { + const u32 col_offset = row_offset + ((x + col) % VRAM_WIDTH); + std::memcpy(out_ptr, &m_vram_shadow[col_offset], sizeof(u16)); + out_ptr += sizeof(u16); + } + } +} + void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) { m_vram_dirty_rect.Include(Common::Rectangle::FromExtents(x, y, width, height)); diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index 0e9e40e68..9a2c22a78 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -1,4 +1,5 @@ #pragma once +#include "common/heap_array.h" #include "gpu.h" #include #include @@ -126,6 +127,7 @@ protected: bool IsFlushed() const { return m_batch_current_vertex_ptr == m_batch_start_vertex_ptr; } + void ReadVRAM(u32 x, u32 y, u32 width, u32 height, void* buffer) override; void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override; void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) override; void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; @@ -139,6 +141,11 @@ protected: return std::make_tuple(x * s32(m_resolution_scale), y * s32(m_resolution_scale)); } + /// Computes the area affected by a VRAM transfer, including wrap-around of X. + Common::Rectangle GetVRAMTransferBounds(u32 x, u32 y, u32 width, u32 height); + + HeapArray m_vram_shadow; + BatchVertex* m_batch_start_vertex_ptr = nullptr; BatchVertex* m_batch_end_vertex_ptr = nullptr; BatchVertex* m_batch_current_vertex_ptr = nullptr; diff --git a/src/core/gpu_hw_d3d11.cpp b/src/core/gpu_hw_d3d11.cpp index 7c328b321..73cb2d5fb 100644 --- a/src/core/gpu_hw_d3d11.cpp +++ b/src/core/gpu_hw_d3d11.cpp @@ -159,7 +159,9 @@ bool GPU_HW_D3D11::CreateFramebuffer() if (!m_vram_texture.Create(m_device.Get(), texture_width, texture_height, texture_format, true, true) || !m_vram_read_texture.Create(m_device.Get(), texture_width, texture_height, texture_format, true, false) || - !m_display_texture.Create(m_device.Get(), texture_width, texture_height, texture_format, true, true)) + !m_display_texture.Create(m_device.Get(), texture_width, texture_height, texture_format, true, true) || + !m_vram_encoding_texture.Create(m_device.Get(), VRAM_WIDTH, VRAM_HEIGHT, texture_format, true, true) || + !m_vram_readback_texture.Create(m_device.Get(), VRAM_WIDTH, VRAM_HEIGHT, texture_format, false)) { return false; } @@ -177,12 +179,6 @@ bool GPU_HW_D3D11::CreateFramebuffer() old_vram_texture.GetWidth(), old_vram_texture.GetHeight(), linear_filter); } - if (m_resolution_scale > 1 && - !m_vram_downsample_texture.Create(m_device.Get(), texture_width, texture_height, texture_format, true, true)) - { - return false; - } - m_context->OMSetRenderTargets(1, m_vram_texture.GetD3DRTVArray(), nullptr); SetFullVRAMDirtyRectangle(); return true; @@ -199,8 +195,9 @@ void GPU_HW_D3D11::DestroyFramebuffer() { m_vram_read_texture.Destroy(); m_vram_texture.Destroy(); - m_vram_downsample_texture.Destroy(); + m_vram_encoding_texture.Destroy(); m_display_texture.Destroy(); + m_vram_readback_texture.Destroy(); } bool GPU_HW_D3D11::CreateVertexBuffer() @@ -370,6 +367,11 @@ bool GPU_HW_D3D11::CompileShaders() if (!m_fill_pixel_shader) return false; + m_vram_read_pixel_shader = D3D11::ShaderCompiler::CompileAndCreatePixelShader( + m_device.Get(), shadergen.GenerateVRAMReadFragmentShader(), debug); + if (!m_vram_read_pixel_shader) + return false; + m_vram_write_pixel_shader = D3D11::ShaderCompiler::CompileAndCreatePixelShader( m_device.Get(), shadergen.GenerateVRAMWriteFragmentShader(), debug); if (!m_vram_write_pixel_shader) @@ -548,17 +550,16 @@ void GPU_HW_D3D11::UpdateDisplay() { const u32 copy_width = std::min((display_width * 3) / 2, VRAM_WIDTH - vram_offset_x); const u32 scaled_copy_width = copy_width * m_resolution_scale; - BlitTexture(m_vram_downsample_texture.GetD3DRTV(), vram_offset_x, vram_offset_y, copy_width, display_height, + BlitTexture(m_vram_encoding_texture.GetD3DRTV(), vram_offset_x, vram_offset_y, copy_width, display_height, m_vram_texture.GetD3DSRV(), scaled_vram_offset_x, scaled_vram_offset_y, scaled_copy_width, scaled_display_height, m_vram_texture.GetWidth(), m_vram_texture.GetHeight(), false); m_context->OMSetRenderTargets(1, m_display_texture.GetD3DRTVArray(), nullptr); - m_context->PSSetShaderResources(0, 1, m_vram_downsample_texture.GetD3DSRVArray()); + m_context->PSSetShaderResources(0, 1, m_vram_encoding_texture.GetD3DSRVArray()); const u32 uniforms[4] = {vram_offset_x, vram_offset_y, field_offset}; SetViewportAndScissor(0, field_offset, display_width, display_height); DrawUtilityShader(display_pixel_shader, uniforms, sizeof(uniforms)); - UploadUniformBlock(uniforms, sizeof(uniforms)); m_host_display->SetDisplayTexture(m_display_texture.GetD3DSRV(), 0, 0, display_width, display_height, m_display_texture.GetWidth(), m_display_texture.GetHeight(), @@ -572,7 +573,6 @@ void GPU_HW_D3D11::UpdateDisplay() const u32 uniforms[4] = {scaled_vram_offset_x, scaled_vram_offset_y, field_offset}; SetViewportAndScissor(0, field_offset, scaled_display_width, scaled_display_height); DrawUtilityShader(display_pixel_shader, uniforms, sizeof(uniforms)); - UploadUniformBlock(uniforms, sizeof(uniforms)); m_host_display->SetDisplayTexture(m_display_texture.GetD3DSRV(), 0, 0, scaled_display_width, scaled_display_height, m_display_texture.GetWidth(), @@ -586,7 +586,37 @@ void GPU_HW_D3D11::UpdateDisplay() void GPU_HW_D3D11::ReadVRAM(u32 x, u32 y, u32 width, u32 height, void* buffer) { - Log_WarningPrintf("VRAM readback not implemented"); + // Get bounds with wrap-around handled. + const Common::Rectangle copy_rect = GetVRAMTransferBounds(x, y, width, height); + const u32 encoded_width = copy_rect.GetWidth() / 2; + const u32 encoded_height = copy_rect.GetHeight(); + + // Encode the 24-bit texture as 16-bit. + const u32 uniforms[4] = {copy_rect.left, copy_rect.top, copy_rect.GetWidth(), copy_rect.GetHeight()}; + m_context->OMSetRenderTargets(1, m_vram_encoding_texture.GetD3DRTVArray(), nullptr); + m_context->PSSetShaderResources(0, 1, m_vram_texture.GetD3DSRVArray()); + SetViewportAndScissor(0, 0, encoded_width, encoded_height); + DrawUtilityShader(m_vram_read_pixel_shader.Get(), uniforms, sizeof(uniforms)); + + // Stage the readback. + m_vram_readback_texture.CopyFromTexture(m_context.Get(), m_vram_encoding_texture.GetD3DTexture(), 0, 0, 0, 0, 0, + encoded_width, encoded_height); + // And copy it into our shadow buffer. + if (m_vram_readback_texture.Map(m_context.Get(), false)) + { + m_vram_readback_texture.ReadPixels(0, 0, encoded_width * 2, encoded_height, VRAM_WIDTH, + &m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left]); + m_vram_readback_texture.Unmap(m_context.Get()); + } + else + { + Log_ErrorPrintf("Failed to map VRAM readback texture"); + } + + RestoreGraphicsAPIState(); + + // Feed the shadow buffer back to the output. + GPU_HW::ReadVRAM(x, y, width, height, buffer); } void GPU_HW_D3D11::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) diff --git a/src/core/gpu_hw_d3d11.h b/src/core/gpu_hw_d3d11.h index e471c1d01..80a006640 100644 --- a/src/core/gpu_hw_d3d11.h +++ b/src/core/gpu_hw_d3d11.h @@ -69,7 +69,7 @@ private: // downsample texture - used for readbacks at >1xIR. D3D11::Texture m_vram_texture; D3D11::Texture m_vram_read_texture; - D3D11::Texture m_vram_downsample_texture; + D3D11::Texture m_vram_encoding_texture; D3D11::Texture m_display_texture; D3D11::StreamBuffer m_vertex_stream_buffer; @@ -77,6 +77,9 @@ private: D3D11::StreamBuffer m_uniform_stream_buffer; D3D11::StreamBuffer m_texture_stream_buffer; + + D3D11::StagingTexture m_vram_readback_texture; + ComPtr m_texture_stream_buffer_srv_r16ui; ComPtr m_cull_none_rasterizer_state; @@ -98,6 +101,7 @@ private: ComPtr m_screen_quad_vertex_shader; ComPtr m_copy_pixel_shader; ComPtr m_fill_pixel_shader; + ComPtr m_vram_read_pixel_shader; ComPtr m_vram_write_pixel_shader; std::array, 2>, 2> m_display_pixel_shaders; // [depth_24][interlaced] }; diff --git a/src/core/gpu_hw_opengl.cpp b/src/core/gpu_hw_opengl.cpp index 98eca2f54..cb1b3de66 100644 --- a/src/core/gpu_hw_opengl.cpp +++ b/src/core/gpu_hw_opengl.cpp @@ -179,11 +179,8 @@ void GPU_HW_OpenGL::CreateFramebuffer() m_vram_read_texture = std::make_unique(texture_width, texture_height, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, false, true); - if (m_resolution_scale > 1) - { - m_vram_downsample_texture = - std::make_unique(VRAM_WIDTH, VRAM_HEIGHT, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, false, true); - } + m_vram_encoding_texture = + std::make_unique(VRAM_WIDTH, VRAM_HEIGHT, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, false, true); m_display_texture = std::make_unique(texture_width, texture_height, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, false, true); @@ -205,7 +202,7 @@ void GPU_HW_OpenGL::DestroyFramebuffer() { m_vram_read_texture.reset(); m_vram_texture.reset(); - m_vram_downsample_texture.reset(); + m_vram_encoding_texture.reset(); m_display_texture.reset(); } @@ -325,6 +322,23 @@ bool GPU_HW_OpenGL::CompilePrograms() } } + if (!m_vram_read_program.Compile(shadergen.GenerateScreenQuadVertexShader(), + shadergen.GenerateVRAMReadFragmentShader())) + { + return false; + } + + if (!m_is_gles) + m_vram_read_program.BindFragData(0, "o_col0"); + + if (!m_vram_read_program.Link()) + return false; + + m_vram_read_program.BindUniformBlock("UBOBlock", 1); + + m_vram_read_program.Bind(); + m_vram_read_program.Uniform1i("samp0", 0); + if (m_supports_texture_buffer) { if (!m_vram_write_program.Compile(shadergen.GenerateScreenQuadVertexShader(), @@ -462,7 +476,7 @@ void GPU_HW_OpenGL::UpdateDisplay() { const u32 copy_width = std::min((display_width * 3) / 2, VRAM_WIDTH - vram_offset_x); const u32 scaled_copy_width = copy_width * m_resolution_scale; - m_vram_downsample_texture->BindFramebuffer(GL_DRAW_FRAMEBUFFER); + m_vram_encoding_texture->BindFramebuffer(GL_DRAW_FRAMEBUFFER); m_vram_texture->BindFramebuffer(GL_READ_FRAMEBUFFER); glBlitFramebuffer(scaled_vram_offset_x, scaled_flipped_vram_offset_y, scaled_vram_offset_x + scaled_copy_width, scaled_flipped_vram_offset_y + scaled_display_height, vram_offset_x, flipped_vram_offset_y, @@ -470,7 +484,7 @@ void GPU_HW_OpenGL::UpdateDisplay() GL_NEAREST); m_display_texture->BindFramebuffer(GL_DRAW_FRAMEBUFFER); - m_vram_downsample_texture->Bind(); + m_vram_encoding_texture->Bind(); glViewport(0, field_offset, display_width, display_height); @@ -514,60 +528,35 @@ void GPU_HW_OpenGL::UpdateDisplay() void GPU_HW_OpenGL::ReadVRAM(u32 x, u32 y, u32 width, u32 height, void* buffer) { - // we need to convert RGBA8 -> RGBA5551 - std::vector temp_buffer(width * height); - const u32 flipped_y = VRAM_HEIGHT - y - height; + // Get bounds with wrap-around handled. + const Common::Rectangle copy_rect = GetVRAMTransferBounds(x, y, width, height); + const u32 encoded_width = copy_rect.GetWidth() / 2; + const u32 encoded_height = copy_rect.GetHeight(); - // downscaling to 1xIR. - if (m_resolution_scale > 1) - { - const u32 texture_height = m_vram_texture->GetHeight(); - const u32 scaled_x = x * m_resolution_scale; - const u32 scaled_y = y * m_resolution_scale; - const u32 scaled_width = width * m_resolution_scale; - const u32 scaled_height = height * m_resolution_scale; - const u32 scaled_flipped_y = texture_height - scaled_y - scaled_height; + // Encode the 24-bit texture as 16-bit. + const u32 uniforms[4] = {copy_rect.left, VRAM_HEIGHT - copy_rect.top - copy_rect.GetHeight(), copy_rect.GetWidth(), + copy_rect.GetHeight()}; + m_vram_encoding_texture->BindFramebuffer(GL_DRAW_FRAMEBUFFER); + m_vram_texture->Bind(); + m_vram_read_program.Bind(); + UploadUniformBlock(uniforms, sizeof(uniforms)); + glDisable(GL_BLEND); + glDisable(GL_SCISSOR_TEST); + glViewport(0, 0, encoded_width, encoded_height); + glDrawArrays(GL_TRIANGLES, 0, 3); - m_vram_texture->BindFramebuffer(GL_READ_FRAMEBUFFER); - m_vram_downsample_texture->BindFramebuffer(GL_DRAW_FRAMEBUFFER); - glDisable(GL_SCISSOR_TEST); - glBlitFramebuffer(scaled_x, scaled_flipped_y, scaled_x + scaled_width, scaled_flipped_y + scaled_height, 0, 0, - width, height, GL_COLOR_BUFFER_BIT, GL_LINEAR); - glEnable(GL_SCISSOR_TEST); - m_vram_texture->BindFramebuffer(GL_DRAW_FRAMEBUFFER); - m_vram_downsample_texture->BindFramebuffer(GL_READ_FRAMEBUFFER); - glReadPixels(0, 0, width, height, GL_RGBA, GL_UNSIGNED_BYTE, temp_buffer.data()); - } - else - { - m_vram_texture->BindFramebuffer(GL_READ_FRAMEBUFFER); - glReadPixels(x, flipped_y, width, height, GL_RGBA, GL_UNSIGNED_BYTE, temp_buffer.data()); - } + // Readback encoded texture. + m_vram_encoding_texture->BindFramebuffer(GL_READ_FRAMEBUFFER); + glPixelStorei(GL_PACK_ALIGNMENT, 2); + glPixelStorei(GL_PACK_ROW_LENGTH, VRAM_WIDTH / 2); + glReadPixels(0, 0, encoded_width, encoded_height, GL_RGBA, GL_UNSIGNED_BYTE, + &m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left]); + glPixelStorei(GL_PACK_ALIGNMENT, 4); + glPixelStorei(GL_PACK_ROW_LENGTH, 0); + RestoreGraphicsAPIState(); - // reverse copy because of lower-left origin - const u32 source_stride = width * sizeof(u32); - const u8* source_ptr = reinterpret_cast(temp_buffer.data()) + (source_stride * (height - 1)); - const u32 dst_stride = width * sizeof(u16); - u8* dst_ptr = static_cast(buffer); - for (u32 row = 0; row < height; row++) - { - const u8* source_row_ptr = source_ptr; - u8* dst_row_ptr = dst_ptr; - - for (u32 col = 0; col < width; col++) - { - u32 src_col; - std::memcpy(&src_col, source_row_ptr, sizeof(src_col)); - source_row_ptr += sizeof(src_col); - - const u16 dst_col = RGBA8888ToRGBA5551(src_col); - std::memcpy(dst_row_ptr, &dst_col, sizeof(dst_col)); - dst_row_ptr += sizeof(dst_col); - } - - source_ptr -= source_stride; - dst_ptr += dst_stride; - } + // Feed the shadow buffer back to the output. + GPU_HW::ReadVRAM(x, y, width, height, buffer); } void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) @@ -655,7 +644,7 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* // have to write to the 1x texture first if (m_resolution_scale > 1) - m_vram_downsample_texture->Bind(); + m_vram_encoding_texture->Bind(); else m_vram_texture->Bind(); @@ -676,7 +665,7 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* const u32 scaled_y = y * m_resolution_scale; const u32 scaled_flipped_y = m_vram_texture->GetHeight() - scaled_y - scaled_height; glDisable(GL_SCISSOR_TEST); - m_vram_downsample_texture->BindFramebuffer(GL_READ_FRAMEBUFFER); + m_vram_encoding_texture->BindFramebuffer(GL_READ_FRAMEBUFFER); glBlitFramebuffer(x, flipped_y, x + width, flipped_y + height, scaled_x, scaled_flipped_y, scaled_x + scaled_width, scaled_flipped_y + scaled_height, GL_COLOR_BUFFER_BIT, GL_NEAREST); glEnable(GL_SCISSOR_TEST); diff --git a/src/core/gpu_hw_opengl.h b/src/core/gpu_hw_opengl.h index 1d19861ac..fe4405ee0 100644 --- a/src/core/gpu_hw_opengl.h +++ b/src/core/gpu_hw_opengl.h @@ -62,7 +62,7 @@ private: // downsample texture - used for readbacks at >1xIR. std::unique_ptr m_vram_texture; std::unique_ptr m_vram_read_texture; - std::unique_ptr m_vram_downsample_texture; + std::unique_ptr m_vram_encoding_texture; std::unique_ptr m_display_texture; std::unique_ptr m_vertex_stream_buffer; @@ -76,6 +76,7 @@ private: std::array, 9>, 4> m_render_programs; // [render_mode][texture_mode][dithering] std::array, 2> m_display_programs; // [depth_24][interlaced] + GL::Program m_vram_read_program; GL::Program m_vram_write_program; u32 m_uniform_buffer_alignment = 1; diff --git a/src/core/gpu_hw_shadergen.cpp b/src/core/gpu_hw_shadergen.cpp index 3f430ebc0..f123a082e 100644 --- a/src/core/gpu_hw_shadergen.cpp +++ b/src/core/gpu_hw_shadergen.cpp @@ -63,6 +63,7 @@ void GPU_HW_ShaderGen::WriteHeader(std::stringstream& ss) ss << "#define CONSTANT const\n"; ss << "#define SAMPLE_TEXTURE(name, coords) texture(name, coords)\n"; ss << "#define LOAD_TEXTURE(name, coords, mip) texelFetch(name, coords, mip)\n"; + ss << "#define LOAD_TEXTURE_OFFSET(name, coords, mip, offset) texelFetchOffset(name, coords, mip, offset)\n"; ss << "#define LOAD_TEXTURE_BUFFER(name, index) texelFetch(name, index)\n"; } else @@ -71,6 +72,7 @@ void GPU_HW_ShaderGen::WriteHeader(std::stringstream& ss) ss << "#define CONSTANT static const\n"; ss << "#define SAMPLE_TEXTURE(name, coords) name.Sample(name##_ss, coords)\n"; ss << "#define LOAD_TEXTURE(name, coords, mip) name.Load(int3(coords, mip))\n"; + ss << "#define LOAD_TEXTURE_OFFSET(name, coords, mip, offset) name.Load(int3(coords, mip), offset)\n"; ss << "#define LOAD_TEXTURE_BUFFER(name, index) name.Load(index)\n"; } @@ -705,6 +707,59 @@ std::string GPU_HW_ShaderGen::GenerateDisplayFragmentShader(bool depth_24bit, bo return ss.str(); } +std::string GPU_HW_ShaderGen::GenerateVRAMReadFragmentShader() +{ + std::stringstream ss; + WriteHeader(ss); + WriteCommonFunctions(ss); + DeclareUniformBuffer(ss, {"int2 u_base_coords", "int2 u_size"}); + + DeclareTexture(ss, "samp0", 0); + + ss << R"( +uint SampleVRAM(int2 coords) +{ + if (RESOLUTION_SCALE == 1) + return RGBA8ToRGBA5551(LOAD_TEXTURE(samp0, coords, 0)); + + // Box filter for downsampling. + float4 value = float4(0.0, 0.0, 0.0, 0.0); + int2 base_coords = coords * int2(RESOLUTION_SCALE, RESOLUTION_SCALE); + for (int offset_x = 0; offset_x < RESOLUTION_SCALE; offset_x++) + { + for (int offset_y = 0; offset_y < RESOLUTION_SCALE; offset_y++) + value += LOAD_TEXTURE(samp0, base_coords + int2(offset_x, offset_y), 0); + } + value /= float(RESOLUTION_SCALE * RESOLUTION_SCALE); + return RGBA8ToRGBA5551(value); +} +)"; + + DeclareFragmentEntryPoint(ss, 0, 1, {}, true, false); + ss << R"( +{ + int2 sample_coords = int2(int(v_pos.x) * 2, int(v_pos.y)); + + #if API_OPENGL || API_OPENGL_ES || 1 + // Lower-left origin flip for OpenGL. + // We want to write the image out upside-down so we can read it top-to-bottom. + sample_coords.y = u_size.y - sample_coords.y - 1; + #endif + + sample_coords += u_base_coords; + + // We're encoding as 32-bit, so the output width is halved and we pack two 16-bit pixels in one 32-bit pixel. + uint left = SampleVRAM(sample_coords); + uint right = SampleVRAM(int2(sample_coords.x + 1, sample_coords.y)); + + o_col0 = float4(float(left & 0xFFu), float((left >> 8) & 0xFFu), + float(right & 0xFFu), float((right >> 8) & 0xFFu)) + / float4(255.0, 255.0, 255.0, 255.0); +})"; + + return ss.str(); +} + std::string GPU_HW_ShaderGen::GenerateVRAMWriteFragmentShader() { std::stringstream ss; diff --git a/src/core/gpu_hw_shadergen.h b/src/core/gpu_hw_shadergen.h index ae851f1f8..769a987ea 100644 --- a/src/core/gpu_hw_shadergen.h +++ b/src/core/gpu_hw_shadergen.h @@ -18,6 +18,7 @@ public: std::string GenerateFillFragmentShader(); std::string GenerateCopyFragmentShader(); std::string GenerateDisplayFragmentShader(bool depth_24bit, bool interlaced); + std::string GenerateVRAMReadFragmentShader(); std::string GenerateVRAMWriteFragmentShader(); HostDisplay::RenderAPI m_render_api;