From ae43cc838b9bd31d8a0c0212579023d96d59afe8 Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Sat, 14 Sep 2019 02:07:31 +1000 Subject: [PATCH] GPU: Partially implemented texture support --- src/common/gl_program.cpp | 88 ++++++++++++- src/common/gl_program.h | 16 ++- src/common/gl_texture.cpp | 5 + src/common/gl_texture.h | 1 + src/pse-sdl/sdl_interface.cpp | 16 +-- src/pse/dma.cpp | 6 +- src/pse/gpu.cpp | 149 ++++++++++++++++++++-- src/pse/gpu.h | 59 ++++++++- src/pse/gpu_hw.cpp | 229 +++++++++++++++++++++++++++++----- src/pse/gpu_hw.h | 22 +++- src/pse/gpu_hw_opengl.cpp | 193 +++++++++++++++++++--------- src/pse/gpu_hw_opengl.h | 15 ++- src/pse/system.cpp | 5 +- src/pse/system.h | 4 + 14 files changed, 681 insertions(+), 127 deletions(-) diff --git a/src/common/gl_program.cpp b/src/common/gl_program.cpp index 913b4d514..ef27dec29 100644 --- a/src/common/gl_program.cpp +++ b/src/common/gl_program.cpp @@ -121,7 +121,7 @@ bool Program::Link() return true; } -void Program::Bind() +void Program::Bind() const { glUseProgram(m_program_id); } @@ -152,15 +152,95 @@ u32 Program::RegisterUniform(const char* name) return id; } -void Program::Uniform1ui(u32 index, u32 value) +void Program::Uniform1ui(u32 index, u32 x) const { Assert(index < m_uniform_locations.size()); const int location = m_uniform_locations[index]; if (location >= 0) - glUniform1ui(location, value); + glUniform1ui(location, x); } -void Program::Uniform4f(u32 index, float x, float y, float z, float w) +void Program::Uniform2ui(u32 index, u32 x, u32 y) const +{ + Assert(index < m_uniform_locations.size()); + const int location = m_uniform_locations[index]; + if (location >= 0) + glUniform2ui(location, x, y); +} + +void Program::Uniform3ui(u32 index, u32 x, u32 y, u32 z) const +{ + Assert(index < m_uniform_locations.size()); + const int location = m_uniform_locations[index]; + if (location >= 0) + glUniform3ui(location, x, y, z); +} + +void Program::Uniform4ui(u32 index, u32 x, u32 y, u32 z, u32 w) const +{ + Assert(index < m_uniform_locations.size()); + const int location = m_uniform_locations[index]; + if (location >= 0) + glUniform4ui(location, x, y, z, w); +} + +void Program::Uniform1i(u32 index, s32 x) const +{ + Assert(index < m_uniform_locations.size()); + const int location = m_uniform_locations[index]; + if (location >= 0) + glUniform1i(location, x); +} + +void Program::Uniform2i(u32 index, s32 x, s32 y) const +{ + Assert(index < m_uniform_locations.size()); + const int location = m_uniform_locations[index]; + if (location >= 0) + glUniform2i(location, x, y); +} + +void Program::Uniform3i(u32 index, s32 x, s32 y, s32 z) const +{ + Assert(index < m_uniform_locations.size()); + const int location = m_uniform_locations[index]; + if (location >= 0) + glUniform3i(location, x, y, z); +} + +void Program::Uniform4i(u32 index, s32 x, s32 y, s32 z, s32 w) const +{ + Assert(index < m_uniform_locations.size()); + const int location = m_uniform_locations[index]; + if (location >= 0) + glUniform4i(location, x, y, z, w); +} + +void Program::Uniform1f(u32 index, float x) const +{ + Assert(index < m_uniform_locations.size()); + const int location = m_uniform_locations[index]; + if (location >= 0) + glUniform1f(location, x); +} + +void Program::Uniform2f(u32 index, float x, float y) const +{ + Assert(index < m_uniform_locations.size()); + const int location = m_uniform_locations[index]; + if (location >= 0) + glUniform2f(location, x, y); +} + +void Program::Uniform3f(u32 index, float x, float y, float z) const +{ + Assert(index < m_uniform_locations.size()); + const int location = m_uniform_locations[index]; + if (location >= 0) + glUniform3f(location, x, y, z); +} + +void Program::Uniform4f(u32 index, float x, float y, float z, float w) const { Assert(index < m_uniform_locations.size()); const int location = m_uniform_locations[index]; diff --git a/src/common/gl_program.h b/src/common/gl_program.h index bd97de244..1a92fc063 100644 --- a/src/common/gl_program.h +++ b/src/common/gl_program.h @@ -23,13 +23,23 @@ public: bool Link(); - void Bind(); + void Bind() const; void Destroy(); u32 RegisterUniform(const char* name); - void Uniform1ui(u32 index, u32 value); - void Uniform4f(u32 index, float x, float y, float z, float w); + void Uniform1ui(u32 index, u32 x) const; + void Uniform2ui(u32 index, u32 x, u32 y) const; + void Uniform3ui(u32 index, u32 x, u32 y, u32 z) const; + void Uniform4ui(u32 index, u32 x, u32 y, u32 z, u32 w) const; + void Uniform1i(u32 index, s32 x) const; + void Uniform2i(u32 index, s32 x, s32 y) const; + void Uniform3i(u32 index, s32 x, s32 y, s32 z) const; + void Uniform4i(u32 index, s32 x, s32 y, s32 z, s32 w) const; + void Uniform1f(u32 index, float x) const; + void Uniform2f(u32 index, float x, float y) const; + void Uniform3f(u32 index, float x, float y, float z) const; + void Uniform4f(u32 index, float x, float y, float z, float w) const; private: GLuint m_program_id = 0; diff --git a/src/common/gl_texture.cpp b/src/common/gl_texture.cpp index 7640b26c7..b08072015 100644 --- a/src/common/gl_texture.cpp +++ b/src/common/gl_texture.cpp @@ -26,4 +26,9 @@ void Texture::Bind() glBindTexture(GL_TEXTURE_2D, m_id); } +void Texture::Unbind() +{ + glBindTexture(GL_TEXTURE_2D, 0); +} + } // namespace GL \ No newline at end of file diff --git a/src/common/gl_texture.h b/src/common/gl_texture.h index 4e4e56d17..4e9bc34b3 100644 --- a/src/common/gl_texture.h +++ b/src/common/gl_texture.h @@ -14,6 +14,7 @@ public: u32 GetHeight() const { return m_height; } void Bind(); + static void Unbind(); private: GLuint m_id; diff --git a/src/pse-sdl/sdl_interface.cpp b/src/pse-sdl/sdl_interface.cpp index d8554034b..c2a404e6c 100644 --- a/src/pse-sdl/sdl_interface.cpp +++ b/src/pse-sdl/sdl_interface.cpp @@ -77,6 +77,7 @@ bool SDLInterface::CreateGLContext() glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS); } + SDL_GL_SetSwapInterval(0); return true; } @@ -132,7 +133,7 @@ void main() m_display_program.RegisterUniform("samp0"); m_display_program.Bind(); - m_display_program.Uniform1ui(0, 0); + m_display_program.Uniform1i(0, 0); glGenVertexArrays(1, &m_display_vao); return true; @@ -410,8 +411,6 @@ void SDLInterface::SetDisplayTexture(GL::Texture* texture, u32 offset_x, u32 off m_display_texture_width = width; m_display_texture_height = height; m_display_texture_changed = true; - - Render(); } void SDLInterface::RenderOSDMessages() @@ -476,8 +475,6 @@ void SDLInterface::DoSaveState(u32 index) void SDLInterface::Run() { - Timer last_render_time; - while (m_running) { for (;;) @@ -489,12 +486,7 @@ void SDLInterface::Run() break; } - while (!m_display_texture_changed || last_render_time.GetTimeSeconds() < (1.0f / 60.0f)) - { - m_system->RunFrame(); - } - - // Render(); - last_render_time.Reset(); + m_system->RunFrame(); + Render(); } } diff --git a/src/pse/dma.cpp b/src/pse/dma.cpp index 9e0f5260f..b8d1ebfe4 100644 --- a/src/pse/dma.cpp +++ b/src/pse/dma.cpp @@ -116,8 +116,10 @@ void DMA::WriteRegister(u32 offset, u32 value) void DMA::SetRequest(Channel channel, bool request) { ChannelState& cs = m_state[static_cast(channel)]; - cs.request = request; + if (cs.request == request) + return; + cs.request = request; if (CanRunChannel(channel)) RunDMA(channel); } @@ -199,7 +201,7 @@ void DMA::RunDMA(Channel channel) const u32 word_count = header >> 24; const u32 next_address = header & UINT32_C(0xFFFFFF); - Log_DebugPrintf(" .. linked list entry at 0x%08X size=%u(%u words) next=0x%08X", current_address, + Log_TracePrintf(" .. linked list entry at 0x%08X size=%u(%u words) next=0x%08X", current_address, word_count * UINT32_C(4), word_count, next_address); current_address += sizeof(header); diff --git a/src/pse/gpu.cpp b/src/pse/gpu.cpp index 179d7fe16..23d28eee7 100644 --- a/src/pse/gpu.cpp +++ b/src/pse/gpu.cpp @@ -2,6 +2,7 @@ #include "YBaseLib/Log.h" #include "bus.h" #include "dma.h" +#include "system.h" Log_SetChannel(GPU); GPU::GPU() = default; @@ -24,14 +25,40 @@ void GPU::Reset() void GPU::SoftReset() { m_GPUSTAT.bits = 0x14802000; - UpdateDMARequest(); + UpdateGPUSTAT(); } -void GPU::UpdateDMARequest() +void GPU::UpdateGPUSTAT() { - const bool request = m_GPUSTAT.dma_direction != DMADirection::Off; - m_GPUSTAT.dma_data_request = request; - m_dma->SetRequest(DMA::Channel::GPU, request); + m_GPUSTAT.ready_to_send_vram = !m_GPUREAD_buffer.empty(); + m_GPUSTAT.ready_to_recieve_cmd = m_GPUREAD_buffer.empty(); + m_GPUSTAT.ready_to_recieve_dma = m_GPUREAD_buffer.empty(); + + bool dma_request; + switch (m_GPUSTAT.dma_direction) + { + case DMADirection::Off: + dma_request = false; + break; + + case DMADirection::FIFO: + dma_request = true; // FIFO not full/full + break; + + case DMADirection::CPUtoGP0: + dma_request = m_GPUSTAT.ready_to_recieve_dma; + break; + + case DMADirection::GPUREADtoCPU: + dma_request = m_GPUSTAT.ready_to_send_vram; + break; + + default: + dma_request = false; + break; + } + m_GPUSTAT.dma_data_request = dma_request; + m_dma->SetRequest(DMA::Channel::GPU, dma_request); } u32 GPU::ReadRegister(u32 offset) @@ -96,8 +123,16 @@ void GPU::DMAWrite(u32 value) u32 GPU::ReadGPUREAD() { - Log_ErrorPrintf("GPUREAD not implemented"); - return UINT32_C(0xFFFFFFFF); + if (m_GPUREAD_buffer.empty()) + { + Log_ErrorPrintf("GPUREAD read while buffer is empty"); + return UINT32_C(0xFFFFFFFF); + } + + const u32 value = m_GPUREAD_buffer.front(); + m_GPUREAD_buffer.pop_front(); + UpdateGPUSTAT(); + return value; } void GPU::WriteGP0(u32 value) @@ -107,6 +142,7 @@ void GPU::WriteGP0(u32 value) const u8 command = Truncate8(m_GP0_command[0] >> 24); const u32 param = m_GP0_command[0] & UINT32_C(0x00FFFFFF); + UpdateGPUSTAT(); if (command >= 0x20 && command <= 0x7F) { @@ -128,6 +164,13 @@ void GPU::WriteGP0(u32 value) } break; + case 0xC0: // Copy Rectnagle VRAM->CPU + { + if (!HandleCopyRectangleVRAMToCPUCommand()) + return; + } + break; + case 0xE1: // Set draw mode { // 0..10 bits match GPUSTAT @@ -136,6 +179,7 @@ void GPU::WriteGP0(u32 value) m_GPUSTAT.texture_disable = (param & (UINT32_C(1) << 11)) != 0; m_texture_config.x_flip = (param & (UINT32_C(1) << 12)) != 0; m_texture_config.y_flip = (param & (UINT32_C(1) << 13)) != 0; + m_texture_config.SetColorMode(m_GPUSTAT.texture_color_mode); Log_DebugPrintf("Set draw mode %08X", param); } break; @@ -195,6 +239,7 @@ void GPU::WriteGP0(u32 value) } m_GP0_command.clear(); + UpdateGPUSTAT(); } void GPU::WriteGP1(u32 value) @@ -207,7 +252,15 @@ void GPU::WriteGP1(u32 value) { m_GPUSTAT.dma_direction = static_cast(param); Log_DebugPrintf("DMA direction <- 0x%02X", static_cast(m_GPUSTAT.dma_direction.GetValue())); - UpdateDMARequest(); + UpdateGPUSTAT(); + } + break; + + case 0x05: // Set display start address + { + // TODO: Remove this later.. + FlushRender(); + UpdateDisplay(); } break; @@ -319,12 +372,90 @@ bool GPU::HandleCopyRectangleCPUToVRAMCommand() return true; } + FlushRender(); UpdateVRAM(dst_x, dst_y, copy_width, copy_height, &m_GP0_command[3]); return true; } +bool GPU::HandleCopyRectangleVRAMToCPUCommand() +{ + if (m_GP0_command.size() < 3) + return false; + + const u32 width = m_GP0_command[2] & UINT32_C(0xFFFF); + const u32 height = m_GP0_command[2] >> 16; + const u32 num_pixels = width * height; + const u32 num_words = ((num_pixels + 1) / 2); + const u32 src_x = m_GP0_command[1] & UINT32_C(0xFFFF); + const u32 src_y = m_GP0_command[1] >> 16; + + Log_DebugPrintf("Copy rectangle from VRAM to CPU offset=(%u,%u), size=(%u,%u)", src_x, src_y, width, height); + + if ((src_x + width) > VRAM_WIDTH || (src_x + height) > VRAM_HEIGHT) + { + Panic("Out of bounds VRAM copy"); + return true; + } + + // TODO: Implement. + for (u32 i = 0; i < num_words; i++) + m_GPUREAD_buffer.push_back(0); + + // Is this correct? + return true; +} + +void GPU::UpdateDisplay() +{ + m_texture_config.page_changed = true; + m_system->IncrementFrameNumber(); +} + void GPU::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) {} void GPU::DispatchRenderCommand(RenderCommand rc, u32 num_vertices) {} -void GPU::FlushRender() {} \ No newline at end of file +void GPU::FlushRender() {} + +void GPU::TextureConfig::SetColorMode(TextureColorMode new_color_mode) +{ + if (new_color_mode == TextureColorMode::Reserved_Direct16Bit) + new_color_mode = TextureColorMode::Direct16Bit; + + if (color_mode == new_color_mode) + return; + + color_mode = new_color_mode; +} + +void GPU::TextureConfig::SetFromPolygonTexcoord(u32 texcoord0, u32 texcoord1) +{ + SetFromPaletteAttribute(Truncate16(texcoord0 >> 16)); + SetFromPageAttribute(Truncate16(texcoord1 >> 16)); +} + +void GPU::TextureConfig::SetFromRectangleTexcoord(u32 texcoord) +{ + SetFromPaletteAttribute(Truncate16(texcoord >> 16)); +} + +void GPU::TextureConfig::SetFromPageAttribute(u16 value) +{ + value &= PAGE_ATTRIBUTE_MASK; + if (page_attribute == value) + return; + + base_x = static_cast(ZeroExtend32(value & UINT16_C(0x1FF)) * UINT32_C(64)); + base_y = static_cast(ZeroExtend32((value >> 11) & UINT16_C(1)) * UINT32_C(512)); + page_changed = true; +} + +void GPU::TextureConfig::SetFromPaletteAttribute(u16 value) +{ + value &= PALETTE_ATTRIBUTE_MASK; + if (palette_attribute == value) + return; + + palette_x = static_cast(ZeroExtend32(value & UINT16_C(0x3F)) * UINT32_C(16)); + palette_y = static_cast(ZeroExtend32((value >> 6) & UINT16_C(0x1FF))); +} diff --git a/src/pse/gpu.h b/src/pse/gpu.h index 385d4994b..4fb9836c6 100644 --- a/src/pse/gpu.h +++ b/src/pse/gpu.h @@ -2,6 +2,8 @@ #include "common/bitfield.h" #include "types.h" #include +#include +#include class System; class Bus; @@ -30,6 +32,8 @@ protected: static constexpr u32 VRAM_WIDTH = 1024; static constexpr u32 VRAM_HEIGHT = 512; static constexpr u32 VRAM_SIZE = VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16); + static constexpr u32 TEXTURE_PAGE_WIDTH = 256; + static constexpr u32 TEXTURE_PAGE_HEIGHT = 256; static constexpr s32 S11ToS32(u32 value) { @@ -63,17 +67,26 @@ protected: R16x16 = 3 }; + enum class TextureColorMode : u8 + { + Palette4Bit = 0, + Palette8Bit = 1, + Direct16Bit = 2, + Reserved_Direct16Bit = 3 + }; + union RenderCommand { u32 bits; BitField color_for_first_vertex; - BitField texture_enable; // not valid for lines + BitField texture_blending_raw; // not valid for lines BitField transparency_enable; + BitField texture_enable; BitField rectangle_size; // only for rectangles BitField quad_polygon; // only for polygons BitField polyline; // only for lines - BitField shading_enable; // 0 - flat, 1 = gouroud + BitField shading_enable; // 0 - flat, 1 = gouroud BitField primitive; }; @@ -90,7 +103,10 @@ protected: }; void SoftReset(); - void UpdateDMARequest(); + + // Updates dynamic bits in GPUSTAT (ready to send VRAM/ready to receive DMA) + void UpdateGPUSTAT(); + u32 ReadGPUREAD(); void WriteGP0(u32 value); void WriteGP1(u32 value); @@ -98,8 +114,10 @@ protected: // Rendering commands, returns false if not enough data is provided bool HandleRenderCommand(); bool HandleCopyRectangleCPUToVRAMCommand(); + bool HandleCopyRectangleVRAMToCPUCommand(); // Rendering in the backend + virtual void UpdateDisplay(); virtual void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data); virtual void DispatchRenderCommand(RenderCommand rc, u32 num_vertices); virtual void FlushRender(); @@ -114,7 +132,7 @@ protected: BitField texture_page_x_base; BitField texture_page_y_base; BitField semi_transparency; - BitField texture_page_colors; + BitField texture_color_mode; BitField dither_enable; BitField draw_to_display_area; BitField draw_set_mask_bit; @@ -140,6 +158,33 @@ protected: struct TextureConfig { + static constexpr u16 PAGE_ATTRIBUTE_MASK = UINT16_C(0b0000100111111111); + static constexpr u16 PALETTE_ATTRIBUTE_MASK = UINT16_C(0b0111111111111111); + + // decoded values + s32 base_x; + s32 base_y; + s32 palette_x; + s32 palette_y; + + // original values + u16 page_attribute; // from register in rectangle modes/vertex in polygon modes + u16 palette_attribute; // from vertex + TextureColorMode color_mode; // from register/vertex in polygon modes + + bool page_changed = false; + + bool IsPageChanged() const { return page_changed; } + void ClearPageChangedFlag() { page_changed = false; } + + void SetColorMode(TextureColorMode new_color_mode); + + void SetFromPolygonTexcoord(u32 texcoord0, u32 texcoord1); + void SetFromRectangleTexcoord(u32 texcoord); + + void SetFromPageAttribute(u16 value); + void SetFromPaletteAttribute(u16 value); + u8 window_mask_x; // in 8 pixel steps u8 window_mask_y; // in 8 pixel steps u8 window_offset_x; // in 8 pixel steps @@ -160,5 +205,11 @@ protected: s32 y; } m_drawing_offset = {}; + struct TexturePageConfig + { + + } m_texture_page_config = {}; + std::vector m_GP0_command; + std::deque m_GPUREAD_buffer; }; diff --git a/src/pse/gpu_hw.cpp b/src/pse/gpu_hw.cpp index 132aeba21..eaf602d37 100644 --- a/src/pse/gpu_hw.cpp +++ b/src/pse/gpu_hw.cpp @@ -17,8 +17,8 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices) const bool textured = rc.texture_enable; // if we're drawing quads, we need to create a degenerate triangle to restart the triangle strip - if (rc.quad_polygon && !m_vertex_staging.empty()) - m_vertex_staging.push_back(m_vertex_staging.back()); + if (rc.quad_polygon && !m_batch_vertices.empty()) + m_batch_vertices.push_back(m_batch_vertices.back()); u32 buffer_pos = 1; for (u32 i = 0; i < num_vertices; i++) @@ -30,12 +30,20 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices) hw_vert.x = vp.x(); hw_vert.y = vp.y(); + // excluding lower-right coordinates + if ((i & UINT32_C(1)) != 0) + hw_vert.x--; + if ((i & UINT32_C(2)) != 0) + hw_vert.y--; + if (textured) - hw_vert.texcoord = (m_GP0_command[buffer_pos++] & UINT32_C(0x0000FFFF)); + hw_vert.texcoord = Truncate16(m_GP0_command[buffer_pos++]); else hw_vert.texcoord = 0; - m_vertex_staging.push_back(hw_vert); + hw_vert.padding = 0; + + m_batch_vertices.push_back(hw_vert); } } break; @@ -62,35 +70,73 @@ void GPU_HW::CalcScissorRect(int* left, int* top, int* right, int* bottom) *bottom = m_drawing_area.bottom_right_y; } +static void DefineMacro(std::stringstream& ss, const char* name, bool enabled) +{ + if (enabled) + ss << "#define " << name << " 1\n"; + else + ss << "/* #define " << name << " 0 */\n"; +} + +void GPU_HW::GenerateShaderHeader(std::stringstream& ss) +{ + ss << "#version 330 core\n\n"; + ss << "const vec2 vram_size = vec2(float(" << VRAM_WIDTH << "), float(" << VRAM_HEIGHT << "));\n"; + ss << "const vec2 rcp_vram_size = vec2(1.0, 1.0) / vram_size;\n"; + ss << R"( + +float fixYCoord(float y) +{ + return 1.0 - rcp_vram_size.y - y; +} + +uint RGBA8ToRGBA5551(vec4 v) +{ + uint r = uint(v.r * 255.0) >> 3; + uint g = uint(v.g * 255.0) >> 3; + uint b = uint(v.b * 255.0) >> 3; + uint a = (v.a != 0.0) ? 1u : 0u; + return (r) | (g << 5) | (b << 10) | (a << 15); +} + +vec4 RGBA5551ToRGBA8(uint v) +{ + uint r = (v & 0x1Fu); + uint g = ((v >> 5) & 0x1Fu); + uint b = ((v >> 10) & 0x1Fu); + uint a = ((v >> 15) & 0x01u); + + return vec4(float(r) * 255.0, float(g) * 255.0, float(b) * 255.0, float(a) * 255.0); +} +)"; +} + std::string GPU_HW::GenerateVertexShader(bool textured) { std::stringstream ss; - ss << "#version 330 core\n"; - if (textured) - ss << "#define TEXTURED 1\n"; - else - ss << "/* #define TEXTURED 0 */\n"; + GenerateShaderHeader(ss); + DefineMacro(ss, "TEXTURED", textured); ss << R"( -in ivec2 a_position; -in vec4 a_color; -in uint a_texcoord; +in ivec2 a_pos; +in vec4 a_col0; +in vec2 a_tex0; -out vec4 v_color; +out vec4 v_col0; #if TEXTURED - out vec2 v_texcoord; + out vec2 v_tex0; #endif void main() { // 0..+1023 -> -1..1 - float pos_x = (float(a_position.x) / 511.5) - 1.0; - float pos_y = (float(a_position.y) / -255.5) + 1.0; + float pos_x = (float(a_pos.x) / 511.5) - 1.0; + float pos_y = (float(a_pos.y) / -255.5) + 1.0; gl_Position = vec4(pos_x, pos_y, 0.0, 1.0); - v_color = a_color; + v_col0 = a_col0; #if TEXTURED - v_texcoord = vec2(float(a_texcoord & 0xFFu) / 256.0, float((a_texcoord >> 8) & 0xFFu) / 256.0); + v_tex0 = vec2(a_tex0.x / 4, a_tex0.y); #endif } )"; @@ -98,29 +144,154 @@ void main() return ss.str(); } -std::string GPU_HW::GenerateFragmentShader(bool textured) +std::string GPU_HW::GenerateFragmentShader(bool textured, bool blending) { std::stringstream ss; - ss << "#version 330 core\n"; - if (textured) - ss << "#define TEXTURED 1\n"; - else - ss << "/* #define TEXTURED 0 */\n"; + GenerateShaderHeader(ss); + DefineMacro(ss, "TEXTURED", textured); + DefineMacro(ss, "BLENDING", blending); ss << R"( -in vec4 v_color; +in vec4 v_col0; #if TEXTURED - in vec2 v_texcoord; + in vec2 v_tex0; + uniform sampler2D samp0; +#endif + +out vec4 o_col0; + +void main() +{ + #if TEXTURED + vec4 texcol = texture(samp0, v_tex0); + #if BLENDING + o_col0 = v_col0 * texcol; + #else + o_col0 = /*v_col0 + */texcol; + #endif + #else + o_col0 = v_col0; + #endif +} +)"; + + return ss.str(); +} + +std::string GPU_HW::GenerateScreenQuadVertexShader() +{ + std::stringstream ss; + GenerateShaderHeader(ss); + ss << R"( + +out vec2 v_tex0; + +void main() +{ + v_tex0 = vec2(float((gl_VertexID << 1) & 2), float(gl_VertexID & 2)); + gl_Position = vec4(v_tex0 * vec2(2.0f, -2.0f) + vec2(-1.0f, 1.0f), 0.0f, 1.0f); + gl_Position.y = -gl_Position.y; +} +)"; + + return ss.str(); +} + +std::string GPU_HW::GenerateTexturePageProgram(TextureColorMode mode) +{ + const bool is_palette = (mode == GPU::TextureColorMode::Palette4Bit || mode == GPU::TextureColorMode::Palette8Bit); + + std::stringstream ss; + GenerateShaderHeader(ss); + DefineMacro(ss, "PALETTE", is_palette); + DefineMacro(ss, "PALETTE_4_BIT", mode == GPU::TextureColorMode::Palette4Bit); + DefineMacro(ss, "PALETTE_8_BIT", mode == GPU::TextureColorMode::Palette8Bit); + + ss << R"( +uniform sampler2D samp0; +uniform vec2 base_offset; + +#if PALETTE +uniform vec2 palette_offset; #endif -out vec4 ocol0; +in vec2 v_tex0; +out vec4 o_col0; void main() { - ocol0 = v_color; - //ocol0 = vec4(1.0, 0.5, 0.5, 1.0); + #if PALETTE_4_BIT + vec2 local_coords = vec2(v_tex0.x / 4.0, v_tex0.y); + #elif PALETTE_8_BIT + vec2 local_coords = vec2(v_tex0.x / 2.0, v_tex0.y); + #else + vec2 local_coords = v_tex0; + #endif + + // fixup coords + vec2 coords = vec2(local_coords.x + base_offset.x, fixYCoord(local_coords.y + base_offset.y)); + + // load colour/palette + vec4 color = texture(samp0, coords); + + // apply palette + #if PALETTE + #if PALETTE_4_BIT + uint subpixel = uint(gl_FragCoord.x) & 3u; + uint vram_value = RGBA8ToRGBA5551(color); + float palette_index = float((vram_value >> (subpixel * 4u)) & 0x0Fu) * rcp_vram_size.x; + #elif PALETTE_8_BIT + // TODO: Still has precision issues here + uint subpixel = uint(gl_FragCoord.x) & 1u; + float palette_index = ((subpixel == 0u) ? color.x : color.y) * (255.0 * rcp_vram_size.x); + #endif + vec2 palette_coords = vec2(palette_offset.x + palette_index, fixYCoord(palette_offset.y)); + color = texture(samp0, palette_coords); + #endif + + o_col0 = color; } )"; return ss.str(); } + +void GPU_HW::UpdateTexturePageTexture() {} + +void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices) +{ + if (rc.texture_enable) + { + // extract texture lut/page + switch (rc.primitive) + { + case Primitive::Polygon: + { + if (rc.shading_enable) + m_texture_config.SetFromPolygonTexcoord(m_GP0_command[2], m_GP0_command[5]); + else + m_texture_config.SetFromPolygonTexcoord(m_GP0_command[2], m_GP0_command[4]); + } + break; + + default: + break; + } + + if (m_texture_config.IsPageChanged()) + { + if (!m_batch_vertices.empty()) + FlushRender(); + + UpdateTexturePageTexture(); + m_texture_config.ClearPageChangedFlag(); + } + } + + // flush when the command changes + if (!m_batch_vertices.empty()) // && m_batch_command.bits != rc.bits) + FlushRender(); + + m_batch_command = rc; + LoadVertices(rc, num_vertices); +} diff --git a/src/pse/gpu_hw.h b/src/pse/gpu_hw.h index 28b05c405..3a8f96f5b 100644 --- a/src/pse/gpu_hw.h +++ b/src/pse/gpu_hw.h @@ -1,6 +1,7 @@ #pragma once #include "gpu.h" #include +#include #include class GPU_HW : public GPU @@ -15,17 +16,30 @@ protected: s32 x; s32 y; u32 color; - u32 texcoord; + u16 texcoord; + u16 padding; }; - void LoadVertices(RenderCommand rc, u32 num_vertices); + virtual void UpdateTexturePageTexture(); + + bool IsFlushed() const { return !m_batch_vertices.empty(); } + + void DispatchRenderCommand(RenderCommand rc, u32 num_vertices) override; void CalcViewport(int* x, int* y, int* width, int* height); void CalcScissorRect(int* left, int* top, int* right, int* bottom); std::string GenerateVertexShader(bool textured); - std::string GenerateFragmentShader(bool textured); + std::string GenerateFragmentShader(bool textured, bool blending); + std::string GenerateScreenQuadVertexShader(); + std::string GenerateTexturePageProgram(TextureColorMode mode); - std::vector m_vertex_staging; + std::vector m_batch_vertices; + RenderCommand m_batch_command = {}; + +private: + void GenerateShaderHeader(std::stringstream& ss); + + void LoadVertices(RenderCommand rc, u32 num_vertices); }; diff --git a/src/pse/gpu_hw_opengl.cpp b/src/pse/gpu_hw_opengl.cpp index fe7994efd..2aa1d854b 100644 --- a/src/pse/gpu_hw_opengl.cpp +++ b/src/pse/gpu_hw_opengl.cpp @@ -32,6 +32,11 @@ void GPU_HW_OpenGL::Reset() ClearFramebuffer(); } +std::tuple GPU_HW_OpenGL::ConvertToFramebufferCoordinates(s32 x, s32 y) +{ + return std::make_tuple(x, static_cast(static_cast(VRAM_HEIGHT) - y)); +} + void GPU_HW_OpenGL::CreateFramebuffer() { m_framebuffer_texture = @@ -41,6 +46,13 @@ void GPU_HW_OpenGL::CreateFramebuffer() glBindFramebuffer(GL_FRAMEBUFFER, m_framebuffer_fbo_id); glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_framebuffer_texture->GetGLId(), 0); Assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE); + + m_texture_page_texture = + std::make_unique(TEXTURE_PAGE_WIDTH, TEXTURE_PAGE_HEIGHT, GL_RGBA, GL_UNSIGNED_BYTE, nullptr, false); + glGenFramebuffers(1, &m_texture_page_fbo_id); + glBindFramebuffer(GL_FRAMEBUFFER, m_texture_page_fbo_id); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_texture_page_texture->GetGLId(), 0); + Assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE); } void GPU_HW_OpenGL::ClearFramebuffer() @@ -51,14 +63,17 @@ void GPU_HW_OpenGL::ClearFramebuffer() glClear(GL_COLOR_BUFFER_BIT); glBindFramebuffer(GL_FRAMEBUFFER, 0); - //m_system->GetHostInterface()->SetDisplayTexture(m_framebuffer_texture.get(), 0, 0, VRAM_WIDTH, VRAM_HEIGHT); + m_system->GetHostInterface()->SetDisplayTexture(m_framebuffer_texture.get(), 0, 0, VRAM_WIDTH, VRAM_HEIGHT); } void GPU_HW_OpenGL::DestroyFramebuffer() { + glDeleteFramebuffers(1, &m_texture_page_fbo_id); + m_texture_page_fbo_id = 0; + m_texture_page_texture.reset(); + glDeleteFramebuffers(1, &m_framebuffer_fbo_id); m_framebuffer_fbo_id = 0; - m_framebuffer_texture.reset(); } @@ -70,52 +85,86 @@ void GPU_HW_OpenGL::CreateVertexBuffer() glGenVertexArrays(1, &m_vao_id); glBindVertexArray(m_vao_id); - glVertexAttribIPointer(0, 2, GL_INT, sizeof(HWVertex), reinterpret_cast(offsetof(HWVertex, x))); - glVertexAttribPointer(1, 4, GL_UNSIGNED_BYTE, true, sizeof(HWVertex), - reinterpret_cast(offsetof(HWVertex, color))); - glVertexAttribIPointer(2, 1, GL_UNSIGNED_INT, sizeof(HWVertex), reinterpret_cast(offsetof(HWVertex, color))); glEnableVertexAttribArray(0); glEnableVertexAttribArray(1); glEnableVertexAttribArray(2); + glVertexAttribIPointer(0, 2, GL_INT, sizeof(HWVertex), reinterpret_cast(offsetof(HWVertex, x))); + glVertexAttribPointer(1, 4, GL_UNSIGNED_BYTE, true, sizeof(HWVertex), + reinterpret_cast(offsetof(HWVertex, color))); + glVertexAttribPointer(2, 2, GL_UNSIGNED_BYTE, true, sizeof(HWVertex), + reinterpret_cast(offsetof(HWVertex, texcoord))); glBindVertexArray(0); + + glGenVertexArrays(1, &m_attributeless_vao_id); } bool GPU_HW_OpenGL::CompilePrograms() { - for (u32 texture_enable_i = 0; texture_enable_i < 2; texture_enable_i++) + bool result = true; + result &= CompileProgram(m_color_program, false, false); + result &= CompileProgram(m_texture_program, true, false); + result &= CompileProgram(m_blended_texture_program, true, true); + if (!result) + return false; + + const std::string screen_quad_vs = GenerateScreenQuadVertexShader(); + for (u32 palette_size = 0; palette_size < static_cast(m_texture_page_programs.size()); palette_size++) { - const bool texture_enable = ConvertToBool(texture_enable_i); - const std::string vs = GenerateVertexShader(texture_enable); - const std::string fs = GenerateFragmentShader(texture_enable); + const std::string fs = GenerateTexturePageProgram(static_cast(palette_size)); - GL::Program& prog = texture_enable ? m_texture_program : m_color_program; - if (!prog.Compile(vs.c_str(), fs.c_str())) + GL::Program& prog = m_texture_page_programs[palette_size]; + if (!prog.Compile(screen_quad_vs.c_str(), fs.c_str())) return false; - prog.BindAttribute(0, "a_position"); - prog.BindAttribute(1, "a_color"); - if (texture_enable) - prog.BindAttribute(2, "a_texcoord"); - - prog.BindFragData(0, "ocol0"); + prog.BindFragData(0, "o_col0"); if (!prog.Link()) return false; + + prog.RegisterUniform("samp0"); + prog.RegisterUniform("base_offset"); + prog.RegisterUniform("palette_offset"); + prog.Bind(); + prog.Uniform1i(0, 0); } return true; } -bool GPU_HW_OpenGL::SetProgram(bool texture_enable) +bool GPU_HW_OpenGL::CompileProgram(GL::Program& prog, bool textured, bool blending) { - GL::Program& prog = texture_enable ? m_texture_program : m_color_program; - if (!prog.IsVaild()) + const std::string vs = GenerateVertexShader(textured); + const std::string fs = GenerateFragmentShader(textured, blending); + if (!prog.Compile(vs.c_str(), fs.c_str())) + return false; + + prog.BindAttribute(0, "a_pos"); + prog.BindAttribute(1, "a_col0"); + if (textured) + prog.BindAttribute(2, "a_tex0"); + + prog.BindFragData(0, "o_col0"); + + if (!prog.Link()) return false; prog.Bind(); + + if (textured) + { + prog.RegisterUniform("samp0"); + prog.Uniform1i(0, 0); + } + return true; } +void GPU_HW_OpenGL::SetProgram(bool textured, bool blending) +{ + const GL::Program& prog = textured ? (blending ? m_blended_texture_program : m_texture_program) : m_color_program; + prog.Bind(); +} + void GPU_HW_OpenGL::SetViewport() { int x, y, width, height; @@ -144,70 +193,100 @@ inline u32 ConvertRGBA5551ToRGBA8888(u16 color) return ZeroExtend32(r) | (ZeroExtend32(g) << 8) | (ZeroExtend32(b) << 16) | (ZeroExtend32(a) << 24); } +void GPU_HW_OpenGL::UpdateDisplay() +{ + GPU_HW::UpdateDisplay(); + m_system->GetHostInterface()->SetDisplayTexture(m_framebuffer_texture.get(), 0, 0, VRAM_WIDTH, VRAM_HEIGHT); +} + void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) { - const u32 pixel_count = width * height; std::vector rgba_data; - rgba_data.reserve(pixel_count); + rgba_data.reserve(width * height); - const u8* source_ptr = static_cast(data); - for (u32 i = 0; i < pixel_count; i++) + // reverse copy the rows so it matches opengl's lower-left origin + const u32 source_stride = width * sizeof(u16); + const u8* source_ptr = static_cast(data) + (source_stride * (height - 1)); + for (u32 row = 0; row < height; row++) { - u16 src_col; - std::memcpy(&src_col, source_ptr, sizeof(src_col)); - source_ptr += sizeof(src_col); + const u8* source_row_ptr = source_ptr; + + for (u32 col = 0; col < width; col++) + { + u16 src_col; + std::memcpy(&src_col, source_row_ptr, sizeof(src_col)); + source_row_ptr += sizeof(src_col); + + const u32 dst_col = ConvertRGBA5551ToRGBA8888(src_col); + rgba_data.push_back(dst_col); + } - const u32 dst_col = ConvertRGBA5551ToRGBA8888(src_col); - rgba_data.push_back(dst_col); + source_ptr -= source_stride; } m_framebuffer_texture->Bind(); - glTexSubImage2D(GL_TEXTURE_2D, 0, x, y, width, height, GL_RGBA, GL_UNSIGNED_BYTE, + + // lower-left origin flip happens here + glTexSubImage2D(GL_TEXTURE_2D, 0, x, VRAM_HEIGHT - y - height, width, height, GL_RGBA, GL_UNSIGNED_BYTE, rgba_data.data()); - m_system->GetHostInterface()->SetDisplayTexture(m_framebuffer_texture.get(), 0, 0, VRAM_WIDTH, VRAM_HEIGHT); } -void GPU_HW_OpenGL::DispatchRenderCommand(RenderCommand rc, u32 num_vertices) +void GPU_HW_OpenGL::UpdateTexturePageTexture() { - LoadVertices(rc, num_vertices); - if (m_vertex_staging.empty()) - return; + glBindFramebuffer(GL_FRAMEBUFFER, m_texture_page_fbo_id); + m_framebuffer_texture->Bind(); - if (!SetProgram(rc.texture_enable)) + glDisable(GL_BLEND); + glViewport(0, 0, TEXTURE_PAGE_WIDTH, TEXTURE_PAGE_HEIGHT); + glBindVertexArray(m_attributeless_vao_id); + + const GL::Program& prog = m_texture_page_programs[static_cast(m_texture_config.color_mode)]; + prog.Bind(); + + const float base_x = static_cast(m_texture_config.base_x) * (1.0f / static_cast(VRAM_WIDTH)); + const float base_y = static_cast(m_texture_config.base_y) * (1.0f / static_cast(VRAM_HEIGHT)); + prog.Uniform2f(1, base_x, base_y); + + if (m_texture_config.color_mode >= GPU::TextureColorMode::Palette4Bit) { - Log_ErrorPrintf("Failed to set GL program"); - m_vertex_staging.clear(); - return; + const float palette_x = static_cast(m_texture_config.palette_x) * (1.0f / static_cast(VRAM_WIDTH)); + const float palette_y = static_cast(m_texture_config.palette_y) * (1.0f / static_cast(VRAM_HEIGHT)); + prog.Uniform2f(2, palette_x, palette_y); } + glDrawArrays(GL_TRIANGLES, 0, 3); + + m_framebuffer_texture->Unbind(); + glBindFramebuffer(GL_FRAMEBUFFER, m_framebuffer_fbo_id); +} + +void GPU_HW_OpenGL::FlushRender() +{ + if (m_batch_vertices.empty()) + return; + + SetProgram(m_batch_command.texture_enable, m_batch_command.texture_blending_raw); SetViewport(); + if (m_batch_command.texture_enable) + m_texture_page_texture->Bind(); + glBindFramebuffer(GL_FRAMEBUFFER, m_framebuffer_fbo_id); glBindVertexArray(m_vao_id); glBindBuffer(GL_ARRAY_BUFFER, m_vertex_buffer); - glBufferData(GL_ARRAY_BUFFER, static_cast(sizeof(HWVertex) * m_vertex_staging.size()), - m_vertex_staging.data(), GL_STREAM_DRAW); - glEnableVertexAttribArray(0); + glBufferData(GL_ARRAY_BUFFER, static_cast(sizeof(HWVertex) * m_batch_vertices.size()), + m_batch_vertices.data(), GL_STREAM_DRAW); glVertexAttribIPointer(0, 2, GL_INT, sizeof(HWVertex), reinterpret_cast(offsetof(HWVertex, x))); - glEnableVertexAttribArray(1); glVertexAttribPointer(1, 4, GL_UNSIGNED_BYTE, true, sizeof(HWVertex), reinterpret_cast(offsetof(HWVertex, color))); - glEnableVertexAttribArray(2); - glVertexAttribIPointer(2, 1, GL_UNSIGNED_INT, sizeof(HWVertex), reinterpret_cast(offsetof(HWVertex, color))); - - glDrawArrays(rc.quad_polygon ? GL_TRIANGLE_STRIP : GL_TRIANGLES, 0, static_cast(m_vertex_staging.size())); - - m_system->GetHostInterface()->SetDisplayTexture(m_framebuffer_texture.get(), 0, 0, VRAM_WIDTH, VRAM_HEIGHT); - m_vertex_staging.clear(); -} + glVertexAttribPointer(2, 2, GL_UNSIGNED_BYTE, true, sizeof(HWVertex), + reinterpret_cast(offsetof(HWVertex, texcoord))); -void GPU_HW_OpenGL::FlushRender() -{ - if (m_vertex_staging.empty()) - return; + glDrawArrays(m_batch_command.quad_polygon ? GL_TRIANGLE_STRIP : GL_TRIANGLES, 0, + static_cast(m_batch_vertices.size())); - m_vertex_staging.clear(); + m_batch_vertices.clear(); } std::unique_ptr GPU::CreateHardwareOpenGLRenderer() diff --git a/src/pse/gpu_hw_opengl.h b/src/pse/gpu_hw_opengl.h index 090f634c5..35a7f090a 100644 --- a/src/pse/gpu_hw_opengl.h +++ b/src/pse/gpu_hw_opengl.h @@ -5,6 +5,7 @@ #include "gpu_hw.h" #include #include +#include class GPU_HW_OpenGL : public GPU_HW { @@ -16,11 +17,14 @@ public: void Reset() override; protected: + void UpdateDisplay() override; void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) override; - void DispatchRenderCommand(RenderCommand rc, u32 num_vertices) override; + void UpdateTexturePageTexture() override; void FlushRender() override; private: + std::tuple ConvertToFramebufferCoordinates(s32 x, s32 y); + void CreateFramebuffer(); void ClearFramebuffer(); void DestroyFramebuffer(); @@ -28,17 +32,24 @@ private: void CreateVertexBuffer(); bool CompilePrograms(); + bool CompileProgram(GL::Program& prog, bool textured, bool blending); - bool SetProgram(bool texture_enable); + void SetProgram(bool textured, bool blending); void SetViewport(); void SetScissor(); std::unique_ptr m_framebuffer_texture; GLuint m_framebuffer_fbo_id = 0; + std::unique_ptr m_texture_page_texture; + GLuint m_texture_page_fbo_id = 0; + GLuint m_vertex_buffer = 0; GLuint m_vao_id = 0; + GLuint m_attributeless_vao_id = 0; GL::Program m_texture_program; GL::Program m_color_program; + GL::Program m_blended_texture_program; + std::array m_texture_page_programs; }; diff --git a/src/pse/system.cpp b/src/pse/system.cpp index c255e09e5..6ff0d4541 100644 --- a/src/pse/system.cpp +++ b/src/pse/system.cpp @@ -38,9 +38,12 @@ void System::Reset() m_bus->Reset(); m_dma->Reset(); m_gpu->Reset(); + m_frame_number = 1; } void System::RunFrame() { - m_cpu->Execute(); + u32 current_frame_number = m_frame_number; + while (current_frame_number == m_frame_number) + m_cpu->Execute(); } diff --git a/src/pse/system.h b/src/pse/system.h index b48baeffa..0338f3e9f 100644 --- a/src/pse/system.h +++ b/src/pse/system.h @@ -20,6 +20,9 @@ public: HostInterface* GetHostInterface() const { return m_host_interface; } + u32 GetFrameNumber() const { return m_frame_number; } + void IncrementFrameNumber() { m_frame_number++; } + bool Initialize(); void Reset(); @@ -31,4 +34,5 @@ private: std::unique_ptr m_bus; std::unique_ptr m_dma; std::unique_ptr m_gpu; + u32 m_frame_number = 1; };