From 232f36f8666637ed12942326ceeffc2d13f6e84b Mon Sep 17 00:00:00 2001
From: Tomas Jakobsson <tompsson@hotmail.com>
Date: Fri, 10 Nov 2017 19:48:23 +0100
Subject: [PATCH] Implement StringUtil which has stolen some functions from
 Font and added unicode2Chars

---
 es-core/src/StringUtil.h                     | 136 ++++++++++++++++
 es-core/src/components/TextComponent.cpp     |   3 +-
 es-core/src/components/TextEditComponent.cpp |   5 +-
 es-core/src/resources/Font.cpp               | 156 ++-----------------
 es-core/src/resources/Font.h                 |  14 +-
 5 files changed, 160 insertions(+), 154 deletions(-)
 create mode 100644 es-core/src/StringUtil.h

diff --git a/es-core/src/StringUtil.h b/es-core/src/StringUtil.h
new file mode 100644
index 000000000..a7f8f84e3
--- /dev/null
+++ b/es-core/src/StringUtil.h
@@ -0,0 +1,136 @@
+#pragma once
+#ifndef ES_CORE_STRING_UTIL_H
+#define ES_CORE_STRING_UTIL_H
+
+namespace StringUtil
+{
+	inline unsigned int chars2Unicode(const std::string& _string, size_t& _cursor)
+	{
+		const char&  c      = _string[_cursor];
+		unsigned int result = '?';
+
+		if((c & 0x80) == 0) // 0xxxxxxx, one byte character
+		{
+			// 0xxxxxxx
+			result = ((_string[_cursor++]       )      );
+		}
+		else if((c & 0xE0) == 0xC0) // 110xxxxx, two byte character
+		{
+			// 110xxxxx 10xxxxxx
+			result = ((_string[_cursor++] & 0x1F) <<  6) |
+			         ((_string[_cursor++] & 0x3F)      );
+		}
+		else if((c & 0xF0) == 0xE0) // 1110xxxx, three byte character
+		{
+			// 1110xxxx 10xxxxxx 10xxxxxx
+			result = ((_string[_cursor++] & 0x0F) << 12) |
+			         ((_string[_cursor++] & 0x3F) <<  6) |
+			         ((_string[_cursor++] & 0x3F)      );
+		}
+		else if((c & 0xF8) == 0xF0) // 11110xxx, four byte character
+		{
+			// 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+			result = ((_string[_cursor++] & 0x07) << 18) |
+			         ((_string[_cursor++] & 0x3F) << 12) |
+			         ((_string[_cursor++] & 0x3F) <<  6) |
+			         ((_string[_cursor++] & 0x3F)      );
+		}
+		else
+		{
+			// error, invalid unicode
+			++_cursor;
+		}
+
+		return result;
+
+	} // chars2Unicode
+
+	inline std::string unicode2Chars(const unsigned int _unicode)
+	{
+		std::string result;
+
+		if(_unicode < 0x80) // one byte character
+		{
+			result += ((_unicode      )       );
+		}
+		else if(_unicode < 0x800) // two byte character
+		{
+			result += ((_unicode >>  6)       ) | 0xC0;
+			result += ((_unicode      ) & 0x3F) | 0x80;
+		}
+		else if(_unicode < 0xFFFF) // three byte character
+		{
+			result += ((_unicode >> 12)       ) | 0xE0;
+			result += ((_unicode >>  6) & 0x3F) | 0x80;
+			result += ((_unicode      ) & 0x3F) | 0x80;
+		}
+		else if(_unicode <= 0x1fffff) // four byte character
+		{
+			result += ((_unicode >> 18)       ) | 0xF0;
+			result += ((_unicode >> 12) & 0x3F) | 0x80;
+			result += ((_unicode >>  6) & 0x3F) | 0x80;
+			result += ((_unicode      ) & 0x3F) | 0x80;
+		}
+		else
+		{
+			// error, invalid unicode
+			result += '?';
+		}
+
+		return result;
+
+	} // unicode2Chars
+
+	inline size_t nextCursor(const std::string& _string, const size_t _cursor)
+	{
+		size_t result = _cursor;
+
+		while(result < _string.length())
+		{
+			++result;
+
+			if((_string[result] & 0xC0) != 0x80) // break if current character is not 10xxxxxx
+				break;
+		}
+
+		return result;
+
+	} // nextCursor
+
+	inline size_t prevCursor(const std::string& _string, const size_t _cursor)
+	{
+		size_t result = _cursor;
+
+		while(result > 0)
+		{
+			--result;
+
+			if((_string[result] & 0xC0) != 0x80) // break if current character is not 10xxxxxx
+				break;
+		}
+
+		return result;
+
+	} // prevCursor
+
+	inline size_t moveCursor(const std::string& _string, const size_t _cursor, const int _amount)
+	{
+		size_t result = _cursor;
+
+		if(_amount > 0)
+		{
+			for(int i = 0; i < _amount; ++i)
+				result = nextCursor(_string, result);
+		}
+		else if(_amount < 0)
+		{
+			for(int i = _amount; i < 0; ++i)
+				result = prevCursor(_string, result);
+		}
+
+		return result;
+
+	} // moveCursor
+}
+
+#endif // ES_CORE_STRING_UTIL_H
diff --git a/es-core/src/components/TextComponent.cpp b/es-core/src/components/TextComponent.cpp
index 633d939d6..f7885139e 100644
--- a/es-core/src/components/TextComponent.cpp
+++ b/es-core/src/components/TextComponent.cpp
@@ -3,6 +3,7 @@
 #include "Log.h"
 #include "Renderer.h"
 #include "Settings.h"
+#include "StringUtil.h"
 #include "Util.h"
 
 TextComponent::TextComponent(Window* window) : GuiComponent(window), 
@@ -197,7 +198,7 @@ void TextComponent::onTextChanged()
 
 		while(text.size() && size.x() + abbrevSize.x() > mSize.x())
 		{
-			size_t newSize = Font::getPrevCursor(text, text.size());
+			size_t newSize = StringUtil::prevCursor(text, text.size());
 			text.erase(newSize, text.size() - newSize);
 			size = f->sizeText(text);
 		}
diff --git a/es-core/src/components/TextEditComponent.cpp b/es-core/src/components/TextEditComponent.cpp
index 2fb4f16c4..758b1aef9 100644
--- a/es-core/src/components/TextEditComponent.cpp
+++ b/es-core/src/components/TextEditComponent.cpp
@@ -2,6 +2,7 @@
 
 #include "resources/Font.h"
 #include "Renderer.h"
+#include "StringUtil.h"
 
 #define TEXT_PADDING_HORIZ 10
 #define TEXT_PADDING_VERT 2
@@ -59,7 +60,7 @@ void TextEditComponent::textInput(const char* text)
 		{
 			if(mCursor > 0)
 			{
-				size_t newCursor = Font::getPrevCursor(mText, mCursor);
+				size_t newCursor = StringUtil::prevCursor(mText, mCursor);
 				mText.erase(mText.begin() + newCursor, mText.begin() + mCursor);
 				mCursor = newCursor;
 			}
@@ -190,7 +191,7 @@ void TextEditComponent::updateCursorRepeat(int deltaTime)
 
 void TextEditComponent::moveCursor(int amt)
 {
-	mCursor = Font::moveCursor(mText, mCursor, amt);
+	mCursor = StringUtil::moveCursor(mText, mCursor, amt);
 	onCursorChanged();
 }
 
diff --git a/es-core/src/resources/Font.cpp b/es-core/src/resources/Font.cpp
index 4b88d9d76..5b7ef4128 100644
--- a/es-core/src/resources/Font.cpp
+++ b/es-core/src/resources/Font.cpp
@@ -2,6 +2,7 @@
 
 #include "Log.h"
 #include "Renderer.h"
+#include "StringUtil.h"
 #include "Util.h"
 
 FT_Library Font::sLibrary = NULL;
@@ -10,130 +11,6 @@ int Font::getSize() const { return mSize; }
 
 std::map< std::pair<std::string, int>, std::weak_ptr<Font> > Font::sFontMap;
 
-
-// utf8 stuff
-size_t Font::getNextCursor(const std::string& str, size_t cursor)
-{
-	// compare to character at the cursor
-	const char& c = str[cursor];
-
-	size_t result = cursor;
-	if((c & 0x80) == 0) // 0xxxxxxx, one byte character
-	{
-		result += 1;
-	}
-	else if((c & 0xE0) == 0xC0) // 110xxxxx, two bytes left in character
-	{
-		result += 2;
-	}
-	else if((c & 0xF0) == 0xE0) // 1110xxxx, three bytes left in character
-	{
-		result += 3;
-	}
-	else if((c & 0xF8) == 0xF0) // 11110xxx, four bytes left in character
-	{
-		result += 4;
-	}
-	else
-	{
-		// error, invalid utf8 string
-		
-		// if this assert is tripped, the cursor is in the middle of a utf8 code point
-		assert((c & 0xC0) != 0x80); // character is 10xxxxxx
-
-		// if that wasn't it, something crazy happened
-		assert(false);
-	}
-
-	if(str.length() < result || result < cursor) // don't go beyond the very end of the string, try and catch overflow
-		return cursor;
-	return result;
-}
-
-// note: will happily accept malformed utf8
-size_t Font::getPrevCursor(const std::string& str, size_t cursor)
-{
-	if(cursor == 0)
-		return 0;
-
-	do
-	{
-		cursor--;
-	} while(cursor > 0 &&
-		(str[cursor] & 0xC0) == 0x80); // character is 10xxxxxx
-
-	return cursor;
-}
-
-size_t Font::moveCursor(const std::string& str, size_t cursor, int amt)
-{
-	if(amt > 0)
-	{
-		for(int i = 0; i < amt; i++)
-			cursor = Font::getNextCursor(str, cursor);
-	}
-	else if(amt < 0)
-	{
-		for(int i = amt; i < 0; i++)
-			cursor = Font::getPrevCursor(str, cursor);
-	}
-
-	return cursor;
-}
-
-UnicodeChar Font::readUnicodeChar(const std::string& str, size_t& cursor)
-{
-	const char& c = str[cursor];
-
-	if((c & 0x80) == 0) // 0xxxxxxx, one byte character
-	{
-		// 0xxxxxxx
-		cursor++;
-		return (UnicodeChar)c;
-	}
-	else if((c & 0xE0) == 0xC0) // 110xxxxx, two bytes left in character
-	{
-		// 110xxxxx 10xxxxxx
-		UnicodeChar val = ((str[cursor] & 0x1F) << 6) |
-			(str[cursor + 1] & 0x3F);
-		cursor += 2;
-		return val;
-	}
-	else if((c & 0xF0) == 0xE0) // 1110xxxx, three bytes left in character
-	{
-		// 1110xxxx 10xxxxxx 10xxxxxx
-		UnicodeChar val = ((str[cursor] & 0x0F) << 12) |
-			((str[cursor + 1] & 0x3F) << 6) |
-			 (str[cursor + 2] & 0x3F);
-		cursor += 3;
-		return val;
-	}
-	else if((c & 0xF8) == 0xF0) // 11110xxx, four bytes left in character
-	{
-		// 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
-		UnicodeChar val = ((str[cursor] & 0x07) << 18) |
-			((str[cursor + 1] & 0x3F) << 12) |
-			((str[cursor + 2] & 0x3F) << 6) |
-			 (str[cursor + 3] & 0x3F);
-		cursor += 4;
-		return val;
-	}
-	else
-	{
-		// error, invalid utf8 string
-
-		// if this assert is tripped, the cursor is in the middle of a utf8 code point
-		assert((c & 0xC0) != 0x80); // character is 10xxxxxx
-
-		// if that wasn't it, something crazy happened
-		assert(false);
-	}
-
-	// error
-	return 0;
-}
-
-
 Font::FontFace::FontFace(ResourceData&& d, int size) : data(d)
 {
 	int err = FT_New_Memory_Face(sLibrary, data.ptr.get(), data.length, 0, &face);
@@ -201,7 +78,7 @@ Font::Font(int size, const std::string& path) : mSize(size), mPath(path)
 		initLibrary();
 
 	// always initialize ASCII characters
-	for(UnicodeChar i = 32; i < 128; i++)
+	for(unsigned int i = 32; i < 128; i++)
 		getGlyph(i);
 
 	clearFaceCache();
@@ -398,7 +275,7 @@ std::vector<std::string> getFallbackFontPaths()
 #endif
 }
 
-FT_Face Font::getFaceForChar(UnicodeChar id)
+FT_Face Font::getFaceForChar(unsigned int id)
 {
 	static const std::vector<std::string> fallbackFonts = getFallbackFontPaths();
 
@@ -430,7 +307,7 @@ void Font::clearFaceCache()
 	mFaceCache.clear();
 }
 
-Font::Glyph* Font::getGlyph(UnicodeChar id)
+Font::Glyph* Font::getGlyph(unsigned int id)
 {
 	// is it already loaded?
 	auto it = mGlyphMap.find(id);
@@ -571,9 +448,9 @@ Vector2f Font::sizeText(std::string text, float lineSpacing)
 	size_t i = 0;
 	while(i < text.length())
 	{
-		UnicodeChar character = readUnicodeChar(text, i); // advances i
+		unsigned int character = StringUtil::chars2Unicode(text, i); // advances i
 
-		if(character == (UnicodeChar)'\n')
+		if(character == '\n')
 		{
 			if(lineWidth > highestWidth)
 				highestWidth = lineWidth;
@@ -600,7 +477,7 @@ float Font::getHeight(float lineSpacing) const
 
 float Font::getLetterHeight()
 {
-	Glyph* glyph = getGlyph((UnicodeChar)'S');
+	Glyph* glyph = getGlyph('S');
 	assert(glyph);
 	return glyph->texSize.y() * glyph->texture->textureSize.y();
 }
@@ -664,21 +541,21 @@ Vector2f Font::getWrappedTextCursorOffset(std::string text, float xLen, size_t s
 	size_t cursor = 0;
 	while(cursor < stop)
 	{
-		UnicodeChar wrappedCharacter = readUnicodeChar(wrappedText, wrapCursor);
-		UnicodeChar character = readUnicodeChar(text, cursor);
+		unsigned int wrappedCharacter = StringUtil::chars2Unicode(wrappedText, wrapCursor);
+		unsigned int character = StringUtil::chars2Unicode(text, cursor);
 
-		if(wrappedCharacter == (UnicodeChar)'\n' && character != (UnicodeChar)'\n')
+		if(wrappedCharacter == '\n' && character != '\n')
 		{
 			//this is where the wordwrap inserted a newline
 			//reset lineWidth and increment y, but don't consume a cursor character
 			lineWidth = 0.0f;
 			y += getHeight(lineSpacing);
 
-			cursor = getPrevCursor(text, cursor); // unconsume
+			cursor = StringUtil::prevCursor(text, cursor); // unconsume
 			continue;
 		}
 
-		if(character == (UnicodeChar)'\n')
+		if(character == '\n')
 		{
 			lineWidth = 0.0f;
 			y += getHeight(lineSpacing);
@@ -727,7 +604,7 @@ TextCache* Font::buildTextCache(const std::string& text, Vector2f offset, unsign
 {
 	float x = offset[0] + (xLen != 0 ? getNewlineStartOffset(text, 0, xLen, alignment) : 0);
 	
-	float yTop = getGlyph((UnicodeChar)'S')->bearing.y();
+	float yTop = getGlyph('S')->bearing.y();
 	float yBot = getHeight(lineSpacing);
 	float y = offset[1] + (yBot + yTop)/2.0f;
 
@@ -735,17 +612,16 @@ TextCache* Font::buildTextCache(const std::string& text, Vector2f offset, unsign
 	std::map< FontTexture*, std::vector<TextCache::Vertex> > vertMap;
 
 	size_t cursor = 0;
-	UnicodeChar character;
-	Glyph* glyph;
 	while(cursor < text.length())
 	{
-		character = readUnicodeChar(text, cursor); // also advances cursor
+		unsigned int character = StringUtil::chars2Unicode(text, cursor); // also advances cursor
+		Glyph* glyph;
 
 		// invalid character
 		if(character == 0)
 			continue;
 
-		if(character == (UnicodeChar)'\n')
+		if(character == '\n')
 		{
 			y += getHeight(lineSpacing);
 			x = offset[0] + (xLen != 0 ? getNewlineStartOffset(text, cursor /* cursor is already advanced */, xLen, alignment) : 0);
diff --git a/es-core/src/resources/Font.h b/es-core/src/resources/Font.h
index acf55ca7e..e24f7313c 100644
--- a/es-core/src/resources/Font.h
+++ b/es-core/src/resources/Font.h
@@ -21,8 +21,6 @@ class TextCache;
 #define FONT_PATH_LIGHT ":/opensans_hebrew_condensed_light.ttf"
 #define FONT_PATH_REGULAR ":/opensans_hebrew_condensed_regular.ttf"
 
-typedef unsigned long UnicodeChar;
-
 enum Alignment
 {
 	ALIGN_LEFT,
@@ -68,12 +66,6 @@ public:
 	size_t getMemUsage() const; // returns an approximation of VRAM used by this font's texture (in bytes)
 	static size_t getTotalMemUsage(); // returns an approximation of total VRAM used by font textures (in bytes)
 
-	// utf8 stuff
-	static size_t getNextCursor(const std::string& str, size_t cursor);
-	static size_t getPrevCursor(const std::string& str, size_t cursor);
-	static size_t moveCursor(const std::string& str, size_t cursor, int moveAmt); // negative moveAmt = move backwards, positive = move forwards
-	static UnicodeChar readUnicodeChar(const std::string& str, size_t& cursor); // reads unicode character at cursor AND moves cursor to the next valid unicode char
-
 private:
 	static FT_Library sLibrary;
 	static std::map< std::pair<std::string, int>, std::weak_ptr<Font> > sFontMap;
@@ -114,7 +106,7 @@ private:
 	void getTextureForNewGlyph(const Vector2i& glyphSize, FontTexture*& tex_out, Vector2i& cursor_out);
 
 	std::map< unsigned int, std::unique_ptr<FontFace> > mFaceCache;
-	FT_Face getFaceForChar(UnicodeChar id);
+	FT_Face getFaceForChar(unsigned int id);
 	void clearFaceCache();
 
 	struct Glyph
@@ -128,9 +120,9 @@ private:
 		Vector2f bearing;
 	};
 
-	std::map<UnicodeChar, Glyph> mGlyphMap;
+	std::map<unsigned int, Glyph> mGlyphMap;
 
-	Glyph* getGlyph(UnicodeChar id);
+	Glyph* getGlyph(unsigned int id);
 
 	int mMaxGlyphHeight;