mirror of
https://github.com/beefytech/Beef.git
synced 2025-06-26 19:48:01 +02:00
More SIMD work
This commit is contained in:
parent
b57cbe2d69
commit
ca4b383339
19 changed files with 695 additions and 76 deletions
12
BeefLibs/corlib/src/Numerics/Bool2.bf
Normal file
12
BeefLibs/corlib/src/Numerics/Bool2.bf
Normal file
|
@ -0,0 +1,12 @@
|
|||
namespace System.Numerics
|
||||
{
|
||||
[UnderlyingArray(typeof(bool), 2, true)]
|
||||
struct bool2
|
||||
{
|
||||
public bool x;
|
||||
public bool y;
|
||||
|
||||
[Intrinsic("and")]
|
||||
public static extern bool2 operator&(bool2 lhs, bool2 rhs);
|
||||
}
|
||||
}
|
89
BeefLibs/corlib/src/Numerics/Float2.bf
Normal file
89
BeefLibs/corlib/src/Numerics/Float2.bf
Normal file
|
@ -0,0 +1,89 @@
|
|||
namespace System.Numerics
|
||||
{
|
||||
[UnderlyingArray(typeof(float), 2, true)]
|
||||
struct float2
|
||||
{
|
||||
public float x;
|
||||
public float y;
|
||||
|
||||
[Inline]
|
||||
public this()
|
||||
{
|
||||
this = default;
|
||||
}
|
||||
|
||||
[Inline]
|
||||
public this(float x, float y)
|
||||
{
|
||||
this.x = x;
|
||||
this.y = y;
|
||||
}
|
||||
|
||||
public extern float this[int idx] { [Intrinsic("index")] get; [Intrinsic("index")] set; }
|
||||
|
||||
public extern float2 yx { [Intrinsic("shuffle10")] get; [Intrinsic("shuffle10")] set; }
|
||||
|
||||
[Intrinsic("add")]
|
||||
public static extern float2 operator+(float2 lhs, float2 rhs);
|
||||
[Intrinsic("add"), Commutable]
|
||||
public static extern float2 operator+(float2 lhs, float rhs);
|
||||
[Intrinsic("add")]
|
||||
public static extern float2 operator++(float2 lhs);
|
||||
|
||||
[Intrinsic("sub")]
|
||||
public static extern float2 operator-(float2 lhs, float2 rhs);
|
||||
[Intrinsic("sub"), Commutable]
|
||||
public static extern float2 operator-(float2 lhs, float rhs);
|
||||
[Intrinsic("sub")]
|
||||
public static extern float2 operator--(float2 lhs);
|
||||
|
||||
[Intrinsic("mul")]
|
||||
public static extern float2 operator*(float2 lhs, float2 rhs);
|
||||
[Intrinsic("mul"), Commutable]
|
||||
public static extern float2 operator*(float2 lhs, float rhs);
|
||||
|
||||
[Intrinsic("div")]
|
||||
public static extern float2 operator/(float2 lhs, float2 rhs);
|
||||
[Intrinsic("div")]
|
||||
public static extern float2 operator/(float2 lhs, float rhs);
|
||||
[Intrinsic("div")]
|
||||
public static extern float2 operator/(float lhs, float2 rhs);
|
||||
|
||||
[Intrinsic("mod")]
|
||||
public static extern float2 operator%(float2 lhs, float2 rhs);
|
||||
[Intrinsic("mod")]
|
||||
public static extern float2 operator%(float2 lhs, float rhs);
|
||||
[Intrinsic("mod")]
|
||||
public static extern float2 operator%(float lhs, float2 rhs);
|
||||
|
||||
[Intrinsic("eq")]
|
||||
public static extern bool2 operator==(float2 lhs, float2 rhs);
|
||||
[Intrinsic("eq"), Commutable]
|
||||
public static extern bool2 operator==(float2 lhs, float rhs);
|
||||
|
||||
[Intrinsic("neq")]
|
||||
public static extern bool2 operator!=(float2 lhs, float2 rhs);
|
||||
[Intrinsic("neq"), Commutable]
|
||||
public static extern bool2 operator!=(float2 lhs, float rhs);
|
||||
|
||||
[Intrinsic("lt")]
|
||||
public static extern bool2 operator<(float2 lhs, float2 rhs);
|
||||
[Intrinsic("lt")]
|
||||
public static extern bool2 operator<(float2 lhs, float rhs);
|
||||
|
||||
[Intrinsic("lte")]
|
||||
public static extern bool2 operator<=(float2 lhs, float2 rhs);
|
||||
[Intrinsic("lte")]
|
||||
public static extern bool2 operator<=(float2 lhs, float rhs);
|
||||
|
||||
[Intrinsic("gt")]
|
||||
public static extern bool2 operator>(float2 lhs, float2 rhs);
|
||||
[Intrinsic("gt")]
|
||||
public static extern bool2 operator>(float2 lhs, float rhs);
|
||||
|
||||
[Intrinsic("gte")]
|
||||
public static extern bool2 operator>=(float2 lhs, float2 rhs);
|
||||
[Intrinsic("gte")]
|
||||
public static extern bool2 operator>=(float2 lhs, float rhs);
|
||||
}
|
||||
}
|
|
@ -8,11 +8,13 @@ namespace System.Numerics
|
|||
public float z;
|
||||
public float w;
|
||||
|
||||
[Inline]
|
||||
public this()
|
||||
{
|
||||
this = default;
|
||||
}
|
||||
|
||||
[Inline]
|
||||
public this(float x, float y, float z, float w)
|
||||
{
|
||||
this.x = x;
|
||||
|
@ -21,6 +23,8 @@ namespace System.Numerics
|
|||
this.w = w;
|
||||
}
|
||||
|
||||
public extern float this[int idx] { [Intrinsic("index")] get; [Intrinsic("index")] set; }
|
||||
|
||||
public extern float4 wzyx { [Intrinsic("shuffle3210")] get; [Intrinsic("shuffle3210")] set; }
|
||||
|
||||
[Intrinsic("add")]
|
||||
|
|
137
BeefLibs/corlib/src/Numerics/V128.bf
Normal file
137
BeefLibs/corlib/src/Numerics/V128.bf
Normal file
|
@ -0,0 +1,137 @@
|
|||
namespace System.Numerics
|
||||
{
|
||||
[UnderlyingArray(typeof(uint8), 16, true), Align(16), Union]
|
||||
struct v128
|
||||
{
|
||||
public int8[16] int8;
|
||||
public uint8[16] uint8;
|
||||
public int16[8] int16;
|
||||
public uint16[8] uint16;
|
||||
public int32[4] int32;
|
||||
public uint32[4] uint32;
|
||||
public int64[2] int64;
|
||||
public uint64[2] uint64;
|
||||
public float[4] float;
|
||||
public double[2] double;
|
||||
|
||||
[Inline]
|
||||
public this(int8 v0)
|
||||
{
|
||||
this.int8 = .(v0, v0, v0, v0, v0, v0, v0, v0, v0, v0, v0, v0, v0, v0, v0, v0);
|
||||
}
|
||||
|
||||
[Inline]
|
||||
public this(uint8 v0)
|
||||
{
|
||||
this.uint8 = .(v0, v0, v0, v0, v0, v0, v0, v0, v0, v0, v0, v0, v0, v0, v0, v0);
|
||||
}
|
||||
|
||||
[Inline]
|
||||
public this(int16 v0)
|
||||
{
|
||||
this.int16 = .(v0, v0, v0, v0, v0, v0, v0, v0);
|
||||
}
|
||||
|
||||
[Inline]
|
||||
public this(uint16 v0)
|
||||
{
|
||||
this.uint16 = .(v0, v0, v0, v0, v0, v0, v0, v0);
|
||||
}
|
||||
|
||||
[Inline]
|
||||
public this(int32 v0)
|
||||
{
|
||||
this.int32 = .(v0, v0, v0, v0);
|
||||
}
|
||||
|
||||
[Inline]
|
||||
public this(uint32 v0)
|
||||
{
|
||||
this.uint32 = .(v0, v0, v0, v0);
|
||||
}
|
||||
|
||||
[Inline]
|
||||
public this(int64 v0)
|
||||
{
|
||||
this.int64 = .(v0, v0);
|
||||
}
|
||||
|
||||
[Inline]
|
||||
public this(uint64 v0)
|
||||
{
|
||||
this.uint64 = .(v0, v0);
|
||||
}
|
||||
|
||||
[Inline]
|
||||
public this(float v0)
|
||||
{
|
||||
this.float = .(v0, v0, v0, v0);
|
||||
}
|
||||
|
||||
[Inline]
|
||||
public this(double v0)
|
||||
{
|
||||
this.double = .(v0, v0);
|
||||
}
|
||||
|
||||
[Inline]
|
||||
public this(int8 v0, int8 v1, int8 v2, int8 v3, int8 v4, int8 v5, int8 v6, int8 v7, int8 v8, int8 v9, int8 v10, int8 v11, int8 v12, int8 v13, int8 v14, int8 v15)
|
||||
{
|
||||
this.int8 = .(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15);
|
||||
}
|
||||
|
||||
[Inline]
|
||||
public this(uint8 v0, uint8 v1, uint8 v2, uint8 v3, uint8 v4, uint8 v5, uint8 v6, uint8 v7, uint8 v8, uint8 v9, uint8 v10, uint8 v11, uint8 v12, uint8 v13, uint8 v14, uint8 v15)
|
||||
{
|
||||
this.uint8 = .(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15);
|
||||
}
|
||||
|
||||
[Inline]
|
||||
public this(int16 v0, int16 v1, int16 v2, int16 v3, int16 v4, int16 v5, int16 v6, int16 v7)
|
||||
{
|
||||
this.int16 = .(v0, v1, v2, v3, v4, v5, v6, v7);
|
||||
}
|
||||
|
||||
[Inline]
|
||||
public this(uint16 v0, uint16 v1, uint16 v2, uint16 v3, uint16 v4, uint16 v5, uint16 v6, uint16 v7)
|
||||
{
|
||||
this.uint16 = .(v0, v1, v2, v3, v4, v5, v6, v7);
|
||||
}
|
||||
|
||||
[Inline]
|
||||
public this(int32 v0, int32 v1, int32 v2, int32 v3)
|
||||
{
|
||||
this.int32 = .(v0, v1, v2, v3);
|
||||
}
|
||||
|
||||
[Inline]
|
||||
public this(uint32 v0, uint32 v1, uint32 v2, uint32 v3)
|
||||
{
|
||||
this.uint32 = .(v0, v1, v2, v3);
|
||||
}
|
||||
|
||||
[Inline]
|
||||
public this(int64 v0, int64 v1)
|
||||
{
|
||||
this.int64 = .(v0, v1);
|
||||
}
|
||||
|
||||
[Inline]
|
||||
public this(uint64 v0, uint64 v1)
|
||||
{
|
||||
this.uint64 = .(v0, v1);
|
||||
}
|
||||
|
||||
[Inline]
|
||||
public this(float v0, float v1, float v2, float v3)
|
||||
{
|
||||
this.float = .(v0, v1, v2, v3);
|
||||
}
|
||||
|
||||
[Inline]
|
||||
public this(double v0, double v1)
|
||||
{
|
||||
this.double = .(v0, v1);
|
||||
}
|
||||
}
|
||||
}
|
193
BeefLibs/corlib/src/Numerics/X86/SSE.bf
Normal file
193
BeefLibs/corlib/src/Numerics/X86/SSE.bf
Normal file
|
@ -0,0 +1,193 @@
|
|||
namespace System.Numerics.X86
|
||||
{
|
||||
static class SSE
|
||||
{
|
||||
[Intrinsic(":add_ps")]
|
||||
public static extern v128 add_ps(v128 a, v128 b);
|
||||
|
||||
[Inline]
|
||||
public static v128 add_ss(v128 a, v128 b)
|
||||
{
|
||||
var res = a;
|
||||
res.float[0] += b.float[0];
|
||||
return res;
|
||||
}
|
||||
|
||||
public static extern v128 andnot_ps(v128 a, v128 b);
|
||||
|
||||
public static extern v128 and_ps(v128 a, v128 b);
|
||||
|
||||
public static extern v128 cmpeq_ps(v128 a, v128 b);
|
||||
|
||||
public static extern v128 cmpeq_ss(v128 a, v128 b);
|
||||
|
||||
public static extern v128 cmpge_ps(v128 a, v128 b);
|
||||
|
||||
public static extern v128 cmpge_ss(v128 a, v128 b);
|
||||
|
||||
public static extern v128 cmpgt_ps(v128 a, v128 b);
|
||||
|
||||
public static extern v128 cmpgt_ss(v128 a, v128 b);
|
||||
|
||||
public static extern v128 cmple_ps(v128 a, v128 b);
|
||||
|
||||
public static extern v128 cmple_ss(v128 a, v128 b);
|
||||
|
||||
public static extern v128 cmplt_ps(v128 a, v128 b);
|
||||
|
||||
public static extern v128 cmplt_ss(v128 a, v128 b);
|
||||
|
||||
public static extern v128 cmpneq_ps(v128 a, v128 b);
|
||||
|
||||
public static extern v128 cmpneq_ss(v128 a, v128 b);
|
||||
|
||||
public static extern v128 cmpnge_ps(v128 a, v128 b);
|
||||
|
||||
public static extern v128 cmpnge_ss(v128 a, v128 b);
|
||||
|
||||
public static extern v128 cmpngt_ps(v128 a, v128 b);
|
||||
|
||||
public static extern v128 cmpngt_ss(v128 a, v128 b);
|
||||
|
||||
public static extern v128 cmpnle_ps(v128 a, v128 b);
|
||||
|
||||
public static extern v128 cmpnle_ss(v128 a, v128 b);
|
||||
|
||||
public static extern v128 cmpnlt_ps(v128 a, v128 b);
|
||||
|
||||
public static extern v128 cmpnlt_ss(v128 a, v128 b);
|
||||
|
||||
public static extern v128 cmpord_ps(v128 a, v128 b);
|
||||
|
||||
public static extern v128 cmpord_ss(v128 a, v128 b);
|
||||
|
||||
public static extern v128 cmpunord_ps(v128 a, v128 b);
|
||||
|
||||
public static extern v128 cmpunord_ss(v128 a, v128 b);
|
||||
|
||||
public static extern int32 comieq_ss(v128 a, v128 b);
|
||||
|
||||
public static extern int32 comige_ss(v128 a, v128 b);
|
||||
|
||||
public static extern int32 comigt_ss(v128 a, v128 b);
|
||||
|
||||
public static extern int32 comile_ss(v128 a, v128 b);
|
||||
|
||||
public static extern int32 comilt_ss(v128 a, v128 b);
|
||||
|
||||
public static extern int32 comineq_ss(v128 a, v128 b);
|
||||
|
||||
public static extern v128 cvtsi32_ss(v128 a, int32 b);
|
||||
|
||||
public static extern v128 cvtsi64_ss(v128 a, int64 b);
|
||||
|
||||
public static extern float cvtss_f32(v128 a);
|
||||
|
||||
public static extern int32 cvtss_si32(v128 a);
|
||||
|
||||
public static extern int64 cvtss_si64(v128 a);
|
||||
|
||||
public static extern int32 cvttss_si32(v128 a);
|
||||
|
||||
public static extern int64 cvttss_si64(v128 a);
|
||||
|
||||
public static extern int32 cvtt_ss2si(v128 a);
|
||||
|
||||
public static extern int32 cvt_ss2si(v128 a);
|
||||
|
||||
public static extern v128 div_ps(v128 a, v128 b);
|
||||
|
||||
public static extern v128 div_ss(v128 a, v128 b);
|
||||
|
||||
public static extern v128 loadu_ps(void* ptr);
|
||||
|
||||
public static extern v128 loadu_si16(void* mem_addr);
|
||||
|
||||
public static extern v128 loadu_si64(void* mem_addr);
|
||||
|
||||
public static extern v128 load_ps(void* ptr);
|
||||
|
||||
public static extern v128 max_ps(v128 a, v128 b);
|
||||
|
||||
public static extern v128 max_ss(v128 a, v128 b);
|
||||
|
||||
public static extern v128 min_ps(v128 a, v128 b);
|
||||
|
||||
public static extern v128 min_ss(v128 a, v128 b);
|
||||
|
||||
public static extern v128 movehl_ps(v128 a, v128 b);
|
||||
|
||||
public static extern v128 movelh_ps(v128 a, v128 b);
|
||||
|
||||
public static extern int32 movemask_ps(v128 a);
|
||||
|
||||
public static extern v128 move_ss(v128 a, v128 b);
|
||||
|
||||
public static extern v128 mul_ps(v128 a, v128 b);
|
||||
|
||||
public static extern v128 mul_ss(v128 a, v128 b);
|
||||
|
||||
public static extern v128 or_ps(v128 a, v128 b);
|
||||
|
||||
public static extern v128 rcp_ps(v128 a);
|
||||
|
||||
public static extern v128 rcp_ss(v128 a);
|
||||
|
||||
public static extern v128 rsqrt_ps(v128 a);
|
||||
|
||||
public static extern v128 rsqrt_ss(v128 a);
|
||||
|
||||
public static extern v128 set1_ps(float a);
|
||||
|
||||
public static extern v128 setr_ps(float e3, float e2, float e1, float e0);
|
||||
|
||||
public static extern v128 setzero_ps();
|
||||
|
||||
public static extern v128 set_ps(float e3, float e2, float e1, float e0);
|
||||
|
||||
public static extern v128 set_ps1(float a);
|
||||
|
||||
public static extern v128 set_ss(float a);
|
||||
public static extern int32 SHUFFLE(int32 d, int32 c, int32 b, int32 a);
|
||||
|
||||
public static extern v128 shuffle_ps(v128 a, v128 b, int32 imm8);
|
||||
|
||||
public static extern v128 sqrt_ps(v128 a);
|
||||
|
||||
public static extern v128 sqrt_ss(v128 a);
|
||||
|
||||
public static extern void storeu_ps(void* ptr, v128 val);
|
||||
public static extern void storeu_si16(void* mem_addr, v128 a);
|
||||
|
||||
public static extern void storeu_si64(void* mem_addr, v128 a);
|
||||
|
||||
public static extern void store_ps(void* ptr, v128 val);
|
||||
|
||||
public static extern void stream_ps(void* mem_addr, v128 a);
|
||||
|
||||
public static extern v128 sub_ps(v128 a, v128 b);
|
||||
|
||||
public static extern v128 sub_ss(v128 a, v128 b);
|
||||
|
||||
public static extern void TRANSPOSE4_PS(ref v128 row0, ref v128 row1, ref v128 row2, ref v128 row3);
|
||||
|
||||
public static extern int32 ucomieq_ss(v128 a, v128 b);
|
||||
|
||||
public static extern int32 ucomige_ss(v128 a, v128 b);
|
||||
|
||||
public static extern int32 ucomigt_ss(v128 a, v128 b);
|
||||
|
||||
public static extern int32 ucomile_ss(v128 a, v128 b);
|
||||
|
||||
public static extern int32 ucomilt_ss(v128 a, v128 b);
|
||||
|
||||
public static extern int32 ucomineq_ss(v128 a, v128 b);
|
||||
|
||||
public static extern v128 unpackhi_ps(v128 a, v128 b);
|
||||
|
||||
public static extern v128 unpacklo_ps(v128 a, v128 b);
|
||||
|
||||
//[Intrinsic("x86:x86_sse_cmp_ss")]
|
||||
public static extern v128 xor_ps(v128 a, v128 b);
|
||||
}
|
||||
}
|
6
BeefLibs/corlib/src/Numerics/X86/SSE2.bf
Normal file
6
BeefLibs/corlib/src/Numerics/X86/SSE2.bf
Normal file
|
@ -0,0 +1,6 @@
|
|||
namespace System.Numerics.X86
|
||||
{
|
||||
static class SSE2
|
||||
{
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue