Beef/BeefLibs/corlib/src/Text/Encoding.bf

using System.Diagnostics;
namespace System.Text
{
	abstract class Encoding
	{
		public enum DecodeError
		{
			case PartialDecode(int decodedBytes, int outChars);
			case FormatError;
		}

		public enum EncodeError
		{
			case PartialEncode(int inChars, int encodedBytes);
		}

		public static readonly ASCIIEncoding ASCII = new ASCIIEncoding() ~ delete _;
		public static readonly UTF8Encoding UTF8 = new UTF8Encoding() ~ delete _;
		public static readonly UTF8EncodingWithBOM UTF8WithBOM = new UTF8EncodingWithBOM() ~ delete _;
		public static readonly UTF16Encoding UTF16 = new UTF16Encoding() ~ delete _;
		public static readonly UTF16EncodingWithBOM UTF16WithBOM = new UTF16EncodingWithBOM() ~ delete _;

		public abstract int GetCharUnitSize();
		public abstract int GetEncodedLength(char32 c);
		public abstract int Encode(char32 c, Span<uint8> dest);
		public abstract int GetMaxCharCount(int size);

		public virtual int GetEncodedSize(StringView str)
		{
			int len = 0;
			for (char32 c in str.DecodedChars)
				len += GetEncodedLength(c);
			return len * GetCharUnitSize();
		}

		public virtual Result<int, EncodeError> Encode(StringView str, Span<uint8> dest)
		{
			uint8* destPtr = dest.Ptr;
			int sizeLeft = dest.Length;

			for (char32 c in str.DecodedChars)
			{
				int encSize = Encode(c, .(destPtr, sizeLeft));
				if (encSize > sizeLeft)
				{
					return .Err(.PartialEncode(@c.NextIndex, dest.Length - sizeLeft));
				}

				destPtr += encSize;
				sizeLeft -= encSize;
			}

			return dest.Length - sizeLeft;
		}

		/// Returns number of UTF8 characters required to hold the decoded result
		public abstract int GetDecodedUTF8Size(Span<uint8> bytes);

		/// Decodes from bytes to UTF8
		public abstract Result<int, DecodeError> DecodeToUTF8(Span<uint8> inBytes, StringView outChars);

		/// Decodes from bytes to UTF8
		public virtual Result<int, DecodeError> DecodeToUTF8(Span<uint8> inBytes, String outStr)
		{
			int utf8Len = GetDecodedUTF8Size(inBytes);

			int prevSize = outStr.Length;
			switch (DecodeToUTF8(inBytes, StringView(outStr.PrepareBuffer(utf8Len))))
			{
			case .Ok(let val):
				 return .Ok(val);
			case .Err(let err):
				switch (err)
				{
				case .PartialDecode(let decodedBytes, let outChars):
					outStr.[Friend]mLength = (.)(prevSize + outChars);
				case .FormatError:
				}
				return .Err(err);
			}
		}

		public static Encoding DetectEncoding(Span<uint8> data, out int bomSize)
		{
			bomSize = 0;
			if (data.Length < 2)
				return ASCII;

			if ((data[0] == 0xFE) && (data[1] == 0xFF))
			{
				// Big endian UTF16
				//bomSize = 2;
				return ASCII;
			}
			else if ((data[0] == 0xFF) && (data[1] == 0xFE))
			{
				// Little endian UTF16
				bomSize = 2;
				return UTF16WithBOM;
			}

			if (data.Length < 3)
				return ASCII;

			if ((data[0] == 0xEF) && (data[1] == 0xBB) && (data[2] == 0xBF))
			{
				// Big endian unicode
				bomSize = 3;
				return UTF8WithBOM;
			}

			return ASCII;
		}
	}

	class ASCIIEncoding : Encoding
	{
		public override int GetMaxCharCount(int size)
		{
			return size;
		}

		public override int GetCharUnitSize()
		{
			return 1;
		}

		public override int GetEncodedLength(char32 c)
		{
			return 1;
		}

		public override int Encode(char32 c, Span<uint8> dest)
		{
			dest[0] = (uint8)c;
			return 1;
		}

		public override Result<int, EncodeError> Encode(StringView str, Span<uint8> dest)
		{
			// Strings are by definition UTF8 so we can just memcpy
			//  Technically this gives us different results than individually encoding char32s
			//  but truncation will always be wrong for chars over 0x7F whereas UTF8 encoding will
			//  sometimes be right. We are really just opting for the fastest method at the time.

			if (dest.Length < str.Length)
			{
				Internal.MemCpy(dest.Ptr, str.Ptr, dest.Length);
				return .Err(.PartialEncode(dest.Length, dest.Length));
			}

			Internal.MemCpy(dest.Ptr, str.Ptr, str.Length);
			return str.Length;
		}

		public override int GetDecodedUTF8Size(Span<uint8> bytes)
		{
			return bytes.Length;
		}

		public override Result<int, DecodeError> DecodeToUTF8(Span<uint8> inBytes, StringView outChars)
		{
			if (outChars.Length < inBytes.Length)
			{
				Internal.MemCpy(outChars.Ptr, inBytes.Ptr, outChars.Length);
				return .Err(.PartialDecode(outChars.Length, outChars.Length));
			}
			Internal.MemCpy(outChars.Ptr, inBytes.Ptr, inBytes.Length);
			return .Ok(inBytes.Length);
		}
	}

	class UTF8Encoding : Encoding
	{
		public override int GetMaxCharCount(int size)
		{
			return size;
		}

		public override int GetCharUnitSize()
		{
			return 1;
		}

		public override int GetEncodedLength(char32 c)
		{
			return Text.UTF8.GetEncodedLength(c);
		}

		public override int Encode(char32 c, Span<uint8> dest)
		{
			return Text.UTF8.Encode(c, .((char8*)dest.Ptr, dest.Length));
		}

		public override Result<int, EncodeError> Encode(StringView str, Span<uint8> dest)
		{
			// Strings are by definition UTF8 so we can just memcpy.
			if (dest.Length < str.Length)
			{
				Internal.MemCpy(dest.Ptr, str.Ptr, dest.Length);
				return .Err(.PartialEncode(dest.Length, dest.Length));
			}

			Internal.MemCpy(dest.Ptr, str.Ptr, str.Length);
			return str.Length;
		}

		public override int GetDecodedUTF8Size(Span<uint8> bytes)
		{
			return bytes.Length;
		}

		public override Result<int, DecodeError> DecodeToUTF8(Span<uint8> inBytes, StringView outChars)
		{
			if (outChars.Length < inBytes.Length)
			{
				Internal.MemCpy(outChars.Ptr, inBytes.Ptr, outChars.Length);
				return .Err(.PartialDecode(outChars.Length, outChars.Length));
			}
			Internal.MemCpy(outChars.Ptr, inBytes.Ptr, inBytes.Length);
			return .Ok(inBytes.Length);
		}
	}

	class UTF8EncodingWithBOM : UTF8Encoding
	{
		public override int GetEncodedSize(StringView str)
		{
			return 3 + base.GetEncodedSize(str);
		}

		public override Result<int, EncodeError> Encode(StringView str, Span<uint8> dest)
		{
			uint8* destPtr = dest.Ptr;
			if (dest.Length < 3)
			{
				return .Err(.PartialEncode(0, 0));
			}

			if (dest.Length >= 3)
			{
				*(destPtr++) = 0xEF;
				*(destPtr++) = 0xBB;
				*(destPtr++) = 0xBF;
			}

			switch (base.Encode(str, .(dest.Ptr, dest.Length - 3)))
			{
			case .Ok(let encSize):
				return .Ok(3 + encSize);
			case .Err(let err):
				switch (err)
				{
				case .PartialEncode(let inChars, let encodedBytes):
					return .Err(.PartialEncode(inChars, 3 + encodedBytes));
				}
			}
		}
	}

	class UTF16Encoding : Encoding
	{
		public override int GetMaxCharCount(int size)
		{
			return size / 2;
		}

		public override int GetCharUnitSize()
		{
			return 2;
		}

		public override int GetEncodedLength(char32 c)
		{
			return Text.UTF16.GetEncodedLength(c);
		}

		public override int Encode(char32 c, Span<uint8> dest)
		{
			return Text.UTF16.Encode(c, dest);
		}

		public override int GetDecodedUTF8Size(Span<uint8> bytes)
		{
			return Text.UTF16.GetLengthAsUTF8(Span<char16>((.)bytes.Ptr, bytes.Length / 2));
		}

		public override Result<int, DecodeError> DecodeToUTF8(Span<uint8> inBytes, StringView outChars)
		{
			char16* cPtr = (char16*)inBytes.Ptr;
			int bytesLeft = inBytes.Length;
			char8* outPtr = outChars.Ptr;
			int outLeft = outChars.Length;

			while (bytesLeft >= 2)
			{
				int charsLeft = bytesLeft / 2;
				let (c, len) = Text.UTF16.Decode(cPtr, charsLeft);
				if ((len == 2) && (charsLeft == 1))
				{
					// Failed to decode
					break;
				}
				cPtr += len;

				// Simple case
				if (c < '\x80')
				{
					*outPtr = (.)c;
					outPtr++;
					outLeft--;
					bytesLeft -= len * 2;
					continue;
				}

				int cOutLen = Text.UTF8.Encode(c, .(outPtr, outLeft));
				if (cOutLen > outLeft)
					break;

				outPtr += cOutLen;
				outLeft -= cOutLen;
				bytesLeft -= len * 2;
			}

			if (bytesLeft == 0)
				return .Ok(outChars.Length - outLeft);

			Debug.Assert(outLeft >= 0);
			return .Err(.PartialDecode(inBytes.Length - bytesLeft, outChars.Length - outLeft));
		}
	}

	class UTF16EncodingWithBOM : UTF16Encoding
	{
		public override int GetEncodedSize(StringView str)
		{
			return 2 + base.GetEncodedSize(str);
		}

		public override Result<int, EncodeError> Encode(StringView str, Span<uint8> dest)
		{
			uint8* destPtr = dest.Ptr;
			if (dest.Length >= 2)
			{
				*(destPtr++) = 0xFF;
				*(destPtr++) = 0xFE;
			}

			switch (base.Encode(str, .(dest.Ptr, dest.Length - 2)))
			{
			case .Ok(let encSize):
				return .Ok(2 + encSize);
			case .Err(let err):
				switch (err)
				{
				case .PartialEncode(let inChars, let encodedBytes):
					return .Err(.PartialEncode(inChars, 3 + encodedBytes));
				}
			}
		}
	}

	class EncodedString
	{
		uint8* mData ~ delete _;
		int32 mSize;

		public uint8* Ptr
		{
			get
            {
                return mData;
			}
		}

		public int Size
		{
			get
			{
				return mSize;
			}
		}

		public this(StringView str, Encoding encoding)
		{
			mSize = (int32)encoding.GetEncodedSize(str);
			mData = new uint8[mSize]*;
			encoding.Encode(str, .(mData, mSize));
		}
	}
}