mirror of
https://github.com/beefytech/Beef.git
synced 2025-06-19 08:30:25 +02:00
Moving corlib files out of "System" directory into root
This commit is contained in:
parent
4cd58262e4
commit
7dbfd15292
179 changed files with 3 additions and 0 deletions
12
BeefLibs/corlib/src/Text/Decoder.bf
Normal file
12
BeefLibs/corlib/src/Text/Decoder.bf
Normal file
|
@ -0,0 +1,12 @@
|
|||
namespace System.Text
|
||||
{
|
||||
class Decoder
|
||||
{
|
||||
public Result<int> GetChars(uint8[] data, int dataOfs, int dataLen, char8[] chars, int charOffset)
|
||||
{
|
||||
for (int32 i = 0; i < dataLen; i++)
|
||||
chars[i + charOffset] = (char8)data[i + dataOfs];
|
||||
return dataLen;
|
||||
}
|
||||
}
|
||||
}
|
370
BeefLibs/corlib/src/Text/Encoding.bf
Normal file
370
BeefLibs/corlib/src/Text/Encoding.bf
Normal file
|
@ -0,0 +1,370 @@
|
|||
using System.Diagnostics;
|
||||
namespace System.Text
|
||||
{
|
||||
abstract class Encoding
|
||||
{
|
||||
public enum DecodeError
|
||||
{
|
||||
case PartialDecode(int decodedBytes, int outChars);
|
||||
case FormatError;
|
||||
}
|
||||
|
||||
public enum EncodeError
|
||||
{
|
||||
case PartialEncode(int inChars, int encodedBytes);
|
||||
}
|
||||
|
||||
public static readonly ASCIIEncoding ASCII = new ASCIIEncoding() ~ delete _;
|
||||
public static readonly UTF8Encoding UTF8 = new UTF8Encoding() ~ delete _;
|
||||
public static readonly UTF8EncodingWithBOM UTF8WithBOM = new UTF8EncodingWithBOM() ~ delete _;
|
||||
public static readonly UTF16Encoding UTF16 = new UTF16Encoding() ~ delete _;
|
||||
public static readonly UTF16EncodingWithBOM UTF16WithBOM = new UTF16EncodingWithBOM() ~ delete _;
|
||||
|
||||
public abstract int GetCharUnitSize();
|
||||
public abstract int GetEncodedLength(char32 c);
|
||||
public abstract int Encode(char32 c, Span<uint8> dest);
|
||||
public abstract int GetMaxCharCount(int size);
|
||||
|
||||
public virtual int GetEncodedSize(StringView str)
|
||||
{
|
||||
int len = 0;
|
||||
for (char32 c in str.DecodedChars)
|
||||
len += GetEncodedLength(c);
|
||||
return len * GetCharUnitSize();
|
||||
}
|
||||
|
||||
public virtual Result<int, EncodeError> Encode(StringView str, Span<uint8> dest)
|
||||
{
|
||||
uint8* destPtr = dest.Ptr;
|
||||
int sizeLeft = dest.Length;
|
||||
|
||||
for (char32 c in str.DecodedChars)
|
||||
{
|
||||
int encSize = Encode(c, .(destPtr, sizeLeft));
|
||||
if (encSize > sizeLeft)
|
||||
{
|
||||
return .Err(.PartialEncode(@c.NextIndex, dest.Length - sizeLeft));
|
||||
}
|
||||
|
||||
destPtr += encSize;
|
||||
sizeLeft -= encSize;
|
||||
}
|
||||
|
||||
return dest.Length - sizeLeft;
|
||||
}
|
||||
|
||||
/// Returns number of UTF8 characters required to hold the decoded result
|
||||
public abstract int GetDecodedUTF8Size(Span<uint8> bytes);
|
||||
|
||||
/// Decodes from bytes to UTF8
|
||||
public abstract Result<int, DecodeError> DecodeToUTF8(Span<uint8> inBytes, StringView outChars);
|
||||
|
||||
public static Encoding DetectEncoding(Span<uint8> data, out int bomSize)
|
||||
{
|
||||
bomSize = 0;
|
||||
if (data.Length < 2)
|
||||
return ASCII;
|
||||
|
||||
if ((data[0] == 0xFE) && (data[1] == 0xFF))
|
||||
{
|
||||
// Big endian UTF16
|
||||
//bomSize = 2;
|
||||
return ASCII;
|
||||
}
|
||||
else if ((data[0] == 0xFF) && (data[1] == 0xFE))
|
||||
{
|
||||
// Little endian UTF16
|
||||
bomSize = 2;
|
||||
return UTF16WithBOM;
|
||||
}
|
||||
|
||||
if (data.Length < 3)
|
||||
return ASCII;
|
||||
|
||||
if ((data[0] == 0xEF) && (data[1] == 0xBB) && (data[2] == 0xBF))
|
||||
{
|
||||
// Big endian unicode
|
||||
bomSize = 3;
|
||||
return UTF8WithBOM;
|
||||
}
|
||||
|
||||
return ASCII;
|
||||
}
|
||||
}
|
||||
|
||||
class ASCIIEncoding : Encoding
|
||||
{
|
||||
public override int GetMaxCharCount(int size)
|
||||
{
|
||||
return size;
|
||||
}
|
||||
|
||||
public override int GetCharUnitSize()
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
public override int GetEncodedLength(char32 c)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
public override int Encode(char32 c, Span<uint8> dest)
|
||||
{
|
||||
dest[0] = (uint8)c;
|
||||
return 1;
|
||||
}
|
||||
|
||||
public override Result<int, EncodeError> Encode(StringView str, Span<uint8> dest)
|
||||
{
|
||||
// Strings are by definition UTF8 so we can just memcpy
|
||||
// Technically this gives us different results than individually encoding char32s
|
||||
// but truncation will always be wrong for chars over 0x7F whereas UTF8 encoding will
|
||||
// sometimes be right. We are really just opting for the fastest method at the time.
|
||||
|
||||
if (dest.Length < str.Length)
|
||||
{
|
||||
Internal.MemCpy(dest.Ptr, str.Ptr, dest.Length);
|
||||
return .Err(.PartialEncode(dest.Length, dest.Length));
|
||||
}
|
||||
|
||||
Internal.MemCpy(dest.Ptr, str.Ptr, str.Length);
|
||||
return str.Length;
|
||||
}
|
||||
|
||||
public override int GetDecodedUTF8Size(Span<uint8> bytes)
|
||||
{
|
||||
return bytes.Length;
|
||||
}
|
||||
|
||||
public override Result<int, DecodeError> DecodeToUTF8(Span<uint8> inBytes, StringView outChars)
|
||||
{
|
||||
if (outChars.Length < inBytes.Length)
|
||||
{
|
||||
Internal.MemCpy(outChars.Ptr, inBytes.Ptr, outChars.Length);
|
||||
return .Err(.PartialDecode(outChars.Length, outChars.Length));
|
||||
}
|
||||
Internal.MemCpy(outChars.Ptr, inBytes.Ptr, inBytes.Length);
|
||||
return .Ok(inBytes.Length);
|
||||
}
|
||||
}
|
||||
|
||||
class UTF8Encoding : Encoding
|
||||
{
|
||||
public override int GetMaxCharCount(int size)
|
||||
{
|
||||
return size;
|
||||
}
|
||||
|
||||
public override int GetCharUnitSize()
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
public override int GetEncodedLength(char32 c)
|
||||
{
|
||||
return Text.UTF8.GetEncodedLength(c);
|
||||
}
|
||||
|
||||
public override int Encode(char32 c, Span<uint8> dest)
|
||||
{
|
||||
return Text.UTF8.Encode(c, .((char8*)dest.Ptr, dest.Length));
|
||||
}
|
||||
|
||||
public override Result<int, EncodeError> Encode(StringView str, Span<uint8> dest)
|
||||
{
|
||||
// Strings are by definition UTF8 so we can just memcpy.
|
||||
if (dest.Length < str.Length)
|
||||
{
|
||||
Internal.MemCpy(dest.Ptr, str.Ptr, dest.Length);
|
||||
return .Err(.PartialEncode(dest.Length, dest.Length));
|
||||
}
|
||||
|
||||
Internal.MemCpy(dest.Ptr, str.Ptr, str.Length);
|
||||
return str.Length;
|
||||
}
|
||||
|
||||
public override int GetDecodedUTF8Size(Span<uint8> bytes)
|
||||
{
|
||||
return bytes.Length;
|
||||
}
|
||||
|
||||
public override Result<int, DecodeError> DecodeToUTF8(Span<uint8> inBytes, StringView outChars)
|
||||
{
|
||||
if (outChars.Length < inBytes.Length)
|
||||
{
|
||||
Internal.MemCpy(outChars.Ptr, inBytes.Ptr, outChars.Length);
|
||||
return .Err(.PartialDecode(outChars.Length, outChars.Length));
|
||||
}
|
||||
Internal.MemCpy(outChars.Ptr, inBytes.Ptr, inBytes.Length);
|
||||
return .Ok(inBytes.Length);
|
||||
}
|
||||
}
|
||||
|
||||
class UTF8EncodingWithBOM : UTF8Encoding
|
||||
{
|
||||
public override int GetEncodedSize(StringView str)
|
||||
{
|
||||
return 3 + base.GetEncodedSize(str);
|
||||
}
|
||||
|
||||
public override Result<int, EncodeError> Encode(StringView str, Span<uint8> dest)
|
||||
{
|
||||
uint8* destPtr = dest.Ptr;
|
||||
if (dest.Length < 3)
|
||||
{
|
||||
return .Err(.PartialEncode(0, 0));
|
||||
}
|
||||
|
||||
if (dest.Length >= 3)
|
||||
{
|
||||
*(destPtr++) = 0xEF;
|
||||
*(destPtr++) = 0xBB;
|
||||
*(destPtr++) = 0xBF;
|
||||
}
|
||||
|
||||
switch (base.Encode(str, .(dest.Ptr, dest.Length - 3)))
|
||||
{
|
||||
case .Ok(let encSize):
|
||||
return .Ok(3 + encSize);
|
||||
case .Err(let err):
|
||||
switch (err)
|
||||
{
|
||||
case .PartialEncode(let inChars, let encodedBytes):
|
||||
return .Err(.PartialEncode(inChars, 3 + encodedBytes));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class UTF16Encoding : Encoding
|
||||
{
|
||||
public override int GetMaxCharCount(int size)
|
||||
{
|
||||
return size / 2;
|
||||
}
|
||||
|
||||
public override int GetCharUnitSize()
|
||||
{
|
||||
return 2;
|
||||
}
|
||||
|
||||
public override int GetEncodedLength(char32 c)
|
||||
{
|
||||
return Text.UTF16.GetEncodedLength(c);
|
||||
}
|
||||
|
||||
public override int Encode(char32 c, Span<uint8> dest)
|
||||
{
|
||||
return Text.UTF16.Encode(c, dest);
|
||||
}
|
||||
|
||||
public override int GetDecodedUTF8Size(Span<uint8> bytes)
|
||||
{
|
||||
return Text.UTF16.GetLengthAsUTF8(Span<char16>((.)bytes.Ptr, bytes.Length));
|
||||
}
|
||||
|
||||
public override Result<int, DecodeError> DecodeToUTF8(Span<uint8> inBytes, StringView outChars)
|
||||
{
|
||||
char16* cPtr = (char16*)inBytes.Ptr;
|
||||
int bytesLeft = inBytes.Length;
|
||||
char8* outPtr = outChars.Ptr;
|
||||
int outLeft = outChars.Length;
|
||||
|
||||
while (bytesLeft >= 2)
|
||||
{
|
||||
int charsLeft = bytesLeft / 2;
|
||||
let (c, len) = Text.UTF16.Decode(cPtr, charsLeft);
|
||||
if ((len == 2) && (charsLeft == 1))
|
||||
{
|
||||
// Failed to decode
|
||||
break;
|
||||
}
|
||||
cPtr += len;
|
||||
|
||||
// Simple case
|
||||
if (c < '\x80')
|
||||
{
|
||||
*outPtr = (.)c;
|
||||
outPtr++;
|
||||
outLeft--;
|
||||
bytesLeft -= len * 2;
|
||||
continue;
|
||||
}
|
||||
|
||||
int cOutLen = Text.UTF8.Encode(c, .(outPtr, outLeft));
|
||||
if (cOutLen > outLeft)
|
||||
break;
|
||||
|
||||
outPtr += cOutLen;
|
||||
outLeft -= cOutLen;
|
||||
bytesLeft -= len * 2;
|
||||
}
|
||||
|
||||
if (bytesLeft == 0)
|
||||
return .Ok(outChars.Length - outLeft);
|
||||
|
||||
Debug.Assert(outLeft >= 0);
|
||||
return .Err(.PartialDecode(inBytes.Length - bytesLeft, outChars.Length - outLeft));
|
||||
}
|
||||
}
|
||||
|
||||
class UTF16EncodingWithBOM : UTF16Encoding
|
||||
{
|
||||
public override int GetEncodedSize(StringView str)
|
||||
{
|
||||
return 2 + base.GetEncodedSize(str);
|
||||
}
|
||||
|
||||
public override Result<int, EncodeError> Encode(StringView str, Span<uint8> dest)
|
||||
{
|
||||
uint8* destPtr = dest.Ptr;
|
||||
if (dest.Length >= 2)
|
||||
{
|
||||
*(destPtr++) = 0xFF;
|
||||
*(destPtr++) = 0xFE;
|
||||
}
|
||||
|
||||
switch (base.Encode(str, .(dest.Ptr, dest.Length - 2)))
|
||||
{
|
||||
case .Ok(let encSize):
|
||||
return .Ok(2 + encSize);
|
||||
case .Err(let err):
|
||||
switch (err)
|
||||
{
|
||||
case .PartialEncode(let inChars, let encodedBytes):
|
||||
return .Err(.PartialEncode(inChars, 3 + encodedBytes));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class EncodedString
|
||||
{
|
||||
uint8* mData ~ delete _;
|
||||
int32 mSize;
|
||||
|
||||
public uint8* Ptr
|
||||
{
|
||||
get
|
||||
{
|
||||
return mData;
|
||||
}
|
||||
}
|
||||
|
||||
public int Size
|
||||
{
|
||||
get
|
||||
{
|
||||
return mSize;
|
||||
}
|
||||
}
|
||||
|
||||
public this(StringView str, Encoding encoding)
|
||||
{
|
||||
mSize = (int32)encoding.GetEncodedSize(str);
|
||||
mData = new uint8[mSize]*;
|
||||
encoding.Encode(str, .(mData, mSize));
|
||||
}
|
||||
}
|
||||
}
|
241
BeefLibs/corlib/src/Text/UTF16.bf
Normal file
241
BeefLibs/corlib/src/Text/UTF16.bf
Normal file
|
@ -0,0 +1,241 @@
|
|||
using System.Diagnostics;
|
||||
namespace System.Text
|
||||
{
|
||||
public class UTF16
|
||||
{
|
||||
public enum EncodeError
|
||||
{
|
||||
case Overflow(int len);
|
||||
}
|
||||
|
||||
public static void Decode(char16* utf16Str, String outStr)
|
||||
{
|
||||
int utf8Len = GetLengthAsUTF8(utf16Str);
|
||||
outStr.Reserve(outStr.Length + utf8Len);
|
||||
|
||||
char16* utf16Ptr = utf16Str;
|
||||
char16 utf16hi = 0;
|
||||
while (true)
|
||||
{
|
||||
char16 c = *(utf16Ptr++);
|
||||
char32 c32 = c;
|
||||
if (c32 == 0)
|
||||
break;
|
||||
if ((c >= '\u{D800}') && (c < '\u{DC00}'))
|
||||
{
|
||||
utf16hi = (char16)c;
|
||||
continue;
|
||||
}
|
||||
else if ((c >= '\u{DC00}') && (c < '\u{E000}'))
|
||||
{
|
||||
char16 utf16lo = c;
|
||||
c32 = (char32)(0x10000 + ((uint32)(utf16hi - 0xD800) << 10) | (uint32)(utf16lo - 0xDC00));
|
||||
}
|
||||
|
||||
outStr.Append(c32);
|
||||
}
|
||||
}
|
||||
|
||||
public static void Decode(Span<char16> utf16Str, String outStr)
|
||||
{
|
||||
int utf8Len = GetLengthAsUTF8(utf16Str);
|
||||
outStr.Reserve(outStr.Length + utf8Len);
|
||||
|
||||
char16* utf16Ptr = utf16Str.Ptr;
|
||||
char16* utf16End = utf16Str.EndPtr;
|
||||
char16 utf16hi = 0;
|
||||
while (utf16Ptr < utf16End)
|
||||
{
|
||||
char16 c = *(utf16Ptr++);
|
||||
char32 c32 = c;
|
||||
if ((c >= '\u{D800}') && (c < '\u{DC00}'))
|
||||
{
|
||||
utf16hi = (char16)c;
|
||||
continue;
|
||||
}
|
||||
else if ((c >= '\u{DC00}') && (c < '\u{E000}'))
|
||||
{
|
||||
char16 utf16lo = c;
|
||||
c32 = (char32)(0x10000 + ((uint32)(utf16hi - 0xD800) << 10) | (uint32)(utf16lo - 0xDC00));
|
||||
}
|
||||
|
||||
outStr.Append(c32);
|
||||
}
|
||||
}
|
||||
|
||||
public static (char32, int) Decode(char16* buf, int lenLeft = 0)
|
||||
{
|
||||
char16 c = buf[0];
|
||||
if ((c >='\u{D800}') && (c < '\u{DC00}'))
|
||||
{
|
||||
if (lenLeft == 1)
|
||||
{
|
||||
// This is considered a soft error
|
||||
return ((char32)c, 2);
|
||||
}
|
||||
|
||||
char16 utf16lo = buf[1];
|
||||
if (utf16lo == 0)
|
||||
{
|
||||
#if BF_UTF_PEDANTIC
|
||||
// No trailing char
|
||||
Debug.Assert(utf16lo != 0);
|
||||
#endif
|
||||
return ((char32)c, 1);
|
||||
}
|
||||
char32 c32 = (char32)(0x10000 + ((uint32)(c - 0xD800) << 10) | (uint32)(utf16lo - 0xDC00));
|
||||
return (c32, 2);
|
||||
}
|
||||
#if BF_UTF_PEDANTIC
|
||||
Debug.Assert((c <= '\u{D7FF}') || (c >= '\u{E000}'));
|
||||
#endif
|
||||
return (c, 1);
|
||||
}
|
||||
|
||||
public static int GetLengthAsUTF8(char16* utf16Str)
|
||||
{
|
||||
int utf8len = 0;
|
||||
char16* c16Ptr = utf16Str;
|
||||
while (true)
|
||||
{
|
||||
let (c, encLen) = Decode(c16Ptr, 0);
|
||||
if (c == 0)
|
||||
return utf8len;
|
||||
c16Ptr += encLen;
|
||||
utf8len += UTF8.GetEncodedLength(c);
|
||||
}
|
||||
}
|
||||
|
||||
public static int GetLengthAsUTF8(Span<char16> utf16Str)
|
||||
{
|
||||
int utf8len = 0;
|
||||
char16* c16Ptr = utf16Str.Ptr;
|
||||
int lenLeft = utf16Str.Length;
|
||||
while (lenLeft > 0)
|
||||
{
|
||||
let (c, encLen) = Decode(c16Ptr, lenLeft);
|
||||
c16Ptr += encLen;
|
||||
lenLeft -= encLen;
|
||||
utf8len += UTF8.GetEncodedLength(c);
|
||||
}
|
||||
return utf8len;
|
||||
}
|
||||
|
||||
public static bool Equals(char16* utf16Str, String str)
|
||||
{
|
||||
int strIdx = 0;
|
||||
char16* c16Ptr = utf16Str;
|
||||
while (true)
|
||||
{
|
||||
let (cA, encLenA) = Decode(c16Ptr);
|
||||
if (strIdx == str.Length)
|
||||
return cA == 0;
|
||||
let (cB, encLenB) = str.GetChar32(strIdx);
|
||||
if (cA != cB)
|
||||
return false;
|
||||
c16Ptr += encLenA;
|
||||
strIdx += encLenB;
|
||||
}
|
||||
}
|
||||
|
||||
public static int GetMaxEncodedLen(int utf8Len)
|
||||
{
|
||||
// Consider all incoming char8s are < \u80, each incoming char88 equals one outgoing char816 (utfLen * 1)
|
||||
// For char8s from \u80 to \u7FF, then two incoming char88 equals one outgoing char816 (utfLen * 0.5)
|
||||
// For char8s from \u800 to \u7FFF, then three incoming char88 equals one or two char816s (utfLen * 0.33) to (utfLen * 0.67)
|
||||
// For char8s from \u1000 to \u10FFFF, then four incoming char88 equals two outgoing char816s (utfLen * 0.5)
|
||||
return utf8Len;
|
||||
}
|
||||
|
||||
public static int GetEncodedLength(char32 c)
|
||||
{
|
||||
if (c <= '\u{FFFF}')
|
||||
return 1;
|
||||
return 2;
|
||||
}
|
||||
|
||||
public static int GetEncodedLen(StringView str)
|
||||
{
|
||||
int len = 0;
|
||||
for (var c in str.DecodedChars)
|
||||
{
|
||||
if (c <= '\u{FFFF}')
|
||||
{
|
||||
#if BF_UTF_PEDANTIC
|
||||
// Illegal UTF16 char?
|
||||
Debug.Assert((c <= '\u{D7FF}') || (c >= '\u{E000}'));
|
||||
#endif
|
||||
len++;
|
||||
}
|
||||
else
|
||||
len += 2;
|
||||
}
|
||||
len++; // null terminator
|
||||
return len;
|
||||
}
|
||||
|
||||
public static int Encode(char32 c, Span<uint8> dest)
|
||||
{
|
||||
if (c <= '\u{FFFF}')
|
||||
{
|
||||
if (dest.Length >= 2)
|
||||
*((char16*)dest.Ptr) = (char16)c;
|
||||
return 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (dest.Length >= 4)
|
||||
{
|
||||
*((char16*)dest.Ptr) = (char16)((int32)c >> 10) + 0xD800;
|
||||
*((char16*)dest.Ptr + 1) = (char16)(((int32)c & 0x3FF) + 0xDC00);
|
||||
}
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
|
||||
public static Result<int, EncodeError> Encode(StringView str, char16* outUTF16Buf, int bufLen)
|
||||
{
|
||||
char16* buf = outUTF16Buf;
|
||||
int bufLeft = bufLen;
|
||||
|
||||
void EncodeChar(char16 c)
|
||||
{
|
||||
if (buf != null)
|
||||
*(buf++) = (char16)c;
|
||||
if (--bufLeft == 0)
|
||||
buf = null;
|
||||
}
|
||||
|
||||
for (var c in str.DecodedChars)
|
||||
{
|
||||
if (c <= '\u{FFFF}')
|
||||
{
|
||||
#if BF_UTF_PEDANTIC
|
||||
// Illegal UTF16 char8?
|
||||
Debug.Assert((c <= '\u{D7FF}') || (c >= '\u{E000}'));
|
||||
#endif
|
||||
EncodeChar((char16)c);
|
||||
}
|
||||
else
|
||||
{
|
||||
int32 valLeft = (int32)c;
|
||||
EncodeChar((char16)(valLeft >> 10) + 0xD800);
|
||||
EncodeChar((char16)(valLeft & 0x3FF) + 0xDC00);
|
||||
}
|
||||
}
|
||||
EncodeChar(0);
|
||||
|
||||
int encodedLen = bufLen - bufLeft;
|
||||
if (bufLeft < 0)
|
||||
return .Err(.Overflow(encodedLen));
|
||||
return .Ok(encodedLen);
|
||||
}
|
||||
|
||||
public static int CStrLen(char16* str)
|
||||
{
|
||||
for (int i = 0; true; i++)
|
||||
if (str[i] == 0)
|
||||
return i;
|
||||
}
|
||||
}
|
||||
}
|
154
BeefLibs/corlib/src/Text/UTF8.bf
Normal file
154
BeefLibs/corlib/src/Text/UTF8.bf
Normal file
|
@ -0,0 +1,154 @@
|
|||
namespace System.Text
|
||||
{
|
||||
class UTF8
|
||||
{
|
||||
public static int8* sTrailingBytesForUTF8 = new int8[]*
|
||||
{
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
|
||||
} ~ delete _;
|
||||
|
||||
public static uint32* sOffsetsFromUTF8 = new uint32[]*
|
||||
{
|
||||
0x00000000, 0x00003080, 0x000E2080,
|
||||
0x03C82080, 0xFA082080, 0x82082080
|
||||
} ~ delete _;
|
||||
|
||||
public static int GetEncodedLength(char32 c)
|
||||
{
|
||||
if (c <(char32)0x80)
|
||||
return 1;
|
||||
else if (c < (char32)0x800)
|
||||
return 2;
|
||||
else if (c < (char32)0x10000)
|
||||
return 3;
|
||||
else if (c < (char32)0x110000)
|
||||
return 4;
|
||||
return 5;
|
||||
}
|
||||
|
||||
public static int GetDecodedLength(char8* buf)
|
||||
{
|
||||
char32 c = *buf;
|
||||
return UTF8.sTrailingBytesForUTF8[c] + 1;
|
||||
}
|
||||
|
||||
public static int GetDecodedLength(char8 firstChar)
|
||||
{
|
||||
return UTF8.sTrailingBytesForUTF8[firstChar] + 1;
|
||||
}
|
||||
|
||||
public static (char32, int32) Decode(char8* buf, int bufSize)
|
||||
{
|
||||
char32 c = *buf;
|
||||
int8 trailingBytes = UTF8.sTrailingBytesForUTF8[c];
|
||||
if (trailingBytes > bufSize)
|
||||
return ((char32)-1, trailingBytes + 1);
|
||||
|
||||
int bufIdx = 1;
|
||||
switch (trailingBytes)
|
||||
{
|
||||
case 3: c <<= 6; c += (int32)buf[bufIdx++]; fallthrough;
|
||||
case 2: c <<= 6; c += (int32)buf[bufIdx++]; fallthrough;
|
||||
case 1: c <<= 6; c += (int32)buf[bufIdx++]; fallthrough;
|
||||
}
|
||||
c -= (int32)UTF8.sOffsetsFromUTF8[trailingBytes];
|
||||
return (c, trailingBytes + 1);
|
||||
}
|
||||
|
||||
public static Result<(char32, int32)> TryDecode(char8* buf, int bufSize)
|
||||
{
|
||||
char32 c = *buf;
|
||||
int8 trailingBytes = UTF8.sTrailingBytesForUTF8[c];
|
||||
if (trailingBytes > bufSize)
|
||||
return .Ok(((char32)-1, trailingBytes + 1));
|
||||
|
||||
switch (trailingBytes)
|
||||
{
|
||||
case 1:
|
||||
char8 c2 = buf[1];
|
||||
if (((uint8)c2 & 0xC0) != 0x80)
|
||||
return .Err;
|
||||
c <<= 6;
|
||||
c += (int32)c2;
|
||||
case 2:
|
||||
char8 c2 = buf[1];
|
||||
if (((uint8)c2 & 0xC0) != 0x80)
|
||||
return .Err;
|
||||
char8 c3 = buf[2];
|
||||
if (((uint8)c3 & 0xC0) != 0x80)
|
||||
return .Err;
|
||||
c <<= 6;
|
||||
c += (int32)c2;
|
||||
c <<= 6;
|
||||
c += (int32)c3;
|
||||
case 3:
|
||||
char8 c2 = buf[1];
|
||||
if (((uint8)c2 & 0xC0) != 0x80)
|
||||
return .Err;
|
||||
char8 c3 = buf[2];
|
||||
if (((uint8)c3 & 0xC0) != 0x80)
|
||||
return .Err;
|
||||
char8 c4 = buf[3];
|
||||
if (((uint8)c4 & 0xC0) != 0x80)
|
||||
return .Err;
|
||||
c <<= 6;
|
||||
c += (int32)c2;
|
||||
c <<= 6;
|
||||
c += (int32)c3;
|
||||
c <<= 6;
|
||||
c += (int32)c4;
|
||||
}
|
||||
c -= (int32)UTF8.sOffsetsFromUTF8[trailingBytes];
|
||||
return .Ok((c, trailingBytes + 1));
|
||||
}
|
||||
|
||||
public static int Encode(char32 c, Span<char8> dest)
|
||||
{
|
||||
char8* destEnd = dest.EndPtr;
|
||||
char8* curDest = dest.Ptr;
|
||||
int len = 0;
|
||||
if (c < (char32)0x80)
|
||||
{
|
||||
if (curDest >= destEnd)
|
||||
return 1;
|
||||
len = 1;
|
||||
*curDest++ = (char8)c;
|
||||
}
|
||||
else if (c < (char32)0x800)
|
||||
{
|
||||
if (curDest >= destEnd - 1)
|
||||
return 2;
|
||||
len = 2;
|
||||
*curDest++ = (.)(((uint32)c >> 6) | 0xC0);
|
||||
*curDest++ = (.)(((uint32)c & 0x3F) | 0x80);
|
||||
}
|
||||
else if (c < (char32)0x10000)
|
||||
{
|
||||
if (curDest >= destEnd - 2)
|
||||
return 3;
|
||||
len = 3;
|
||||
*curDest++ = (.)(((uint32)c >> 12) | 0xE0);
|
||||
*curDest++ = (.)((((uint32)c >> 6) & 0x3F) | 0x80);
|
||||
*curDest++ = (.)(((uint32)c & 0x3F) | 0x80);
|
||||
}
|
||||
else if (c < (char32)0x110000)
|
||||
{
|
||||
if (curDest >= destEnd - 3)
|
||||
return 4;
|
||||
len = 4;
|
||||
*curDest++ = (.)(((uint32)c >> 18) | 0xF0);
|
||||
*curDest++ = (.)((((uint32)c >> 12) & 0x3F) | 0x80);
|
||||
*curDest++ = (.)((((uint32)c >> 6) & 0x3F) | 0x80);
|
||||
*curDest++ = (.)(((uint32)c & 0x3F) | 0x80);
|
||||
}
|
||||
return len;
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue