1
0
Fork 0
mirror of https://github.com/beefytech/Beef.git synced 2025-06-08 19:48:20 +02:00

More SIMD work

This commit is contained in:
Brian Fiete 2020-08-27 10:11:42 -07:00
parent 01ae40fcc8
commit 122e321d1c
12 changed files with 500 additions and 40 deletions

View file

@ -89,5 +89,10 @@ namespace System.Numerics
public static extern bool4 operator>=(float4 lhs, float4 rhs); public static extern bool4 operator>=(float4 lhs, float4 rhs);
[Intrinsic("gte")] [Intrinsic("gte")]
public static extern bool4 operator>=(float4 lhs, float rhs); public static extern bool4 operator>=(float4 lhs, float rhs);
[Intrinsic("cast")]
public static extern explicit operator v128(float4 lhs);
[Intrinsic("cast")]
public static extern explicit operator float4(v128 lhs);
} }
} }

View file

@ -0,0 +1,128 @@
namespace System.Numerics
{
[UnderlyingArray(typeof(int32), 4, true)]
struct int32_4
{
public int32 x;
public int32 y;
public int32 z;
public int32 w;
[Inline]
public this()
{
this = default;
}
[Inline]
public this(int32 x, int32 y, int32 z, int32 w)
{
this.x = x;
this.y = y;
this.z = z;
this.w = w;
}
public extern int32 this[int32 idx] { [Intrinsic("index")] get; [Intrinsic("index")] set; }
public extern int32_4 wzyx { [Intrinsic("shuffle3210")] get; [Intrinsic("shuffle3210")] set; }
[Intrinsic("not")]
public static extern int32_4 operator~(int32_4 lhs);
[Intrinsic("add")]
public static extern int32_4 operator+(int32_4 lhs, int32_4 rhs);
[Intrinsic("add"), Commutable]
public static extern int32_4 operator+(int32_4 lhs, int32 rhs);
[Intrinsic("add")]
public static extern int32_4 operator++(int32_4 lhs);
[Intrinsic("sub")]
public static extern int32_4 operator-(int32_4 lhs, int32_4 rhs);
[Intrinsic("sub"), Commutable]
public static extern int32_4 operator-(int32_4 lhs, int32 rhs);
[Intrinsic("sub")]
public static extern int32_4 operator--(int32_4 lhs);
[Intrinsic("mul")]
public static extern int32_4 operator*(int32_4 lhs, int32_4 rhs);
[Intrinsic("mul"), Commutable]
public static extern int32_4 operator*(int32_4 lhs, int32 rhs);
[Intrinsic("div")]
public static extern int32_4 operator/(int32_4 lhs, int32_4 rhs);
[Intrinsic("div")]
public static extern int32_4 operator/(int32_4 lhs, int32 rhs);
[Intrinsic("div")]
public static extern int32_4 operator/(int32 lhs, int32_4 rhs);
[Intrinsic("mod")]
public static extern int32_4 operator%(int32_4 lhs, int32_4 rhs);
[Intrinsic("mod")]
public static extern int32_4 operator%(int32_4 lhs, int32 rhs);
[Intrinsic("mod")]
public static extern int32_4 operator%(int32 lhs, int32_4 rhs);
[Intrinsic("and")]
public static extern int32_4 operator&(int32_4 lhs, int32_4 rhs);
[Intrinsic("and")]
public static extern int32_4 operator&(int32_4 lhs, int32 rhs);
[Intrinsic("and")]
public static extern int32_4 operator&(int32 lhs, int32_4 rhs);
[Intrinsic("or")]
public static extern int32_4 operator|(int32_4 lhs, int32_4 rhs);
[Intrinsic("or")]
public static extern int32_4 operator|(int32_4 lhs, int32 rhs);
[Intrinsic("or")]
public static extern int32_4 operator|(int32 lhs, int32_4 rhs);
[Intrinsic("xor")]
public static extern int32_4 operator^(int32_4 lhs, int32_4 rhs);
[Intrinsic("xor")]
public static extern int32_4 operator^(int32_4 lhs, int32 rhs);
[Intrinsic("xor")]
public static extern int32_4 operator^(int32 lhs, int32_4 rhs);
[Intrinsic("shl")]
public static extern int32_4 operator<<(int32_4 lhs, int rhs);
[Intrinsic("sar")]
public static extern int32_4 operator>>(int32_4 lhs, int rhs);
[Intrinsic("eq")]
public static extern bool4 operator==(int32_4 lhs, int32_4 rhs);
[Intrinsic("eq"), Commutable]
public static extern bool4 operator==(int32_4 lhs, int32 rhs);
[Intrinsic("neq")]
public static extern bool4 operator!=(int32_4 lhs, int32_4 rhs);
[Intrinsic("neq"), Commutable]
public static extern bool4 operator!=(int32_4 lhs, int32 rhs);
[Intrinsic("lt")]
public static extern bool4 operator<(int32_4 lhs, int32_4 rhs);
[Intrinsic("lt")]
public static extern bool4 operator<(int32_4 lhs, int32 rhs);
[Intrinsic("lte")]
public static extern bool4 operator<=(int32_4 lhs, int32_4 rhs);
[Intrinsic("lte")]
public static extern bool4 operator<=(int32_4 lhs, int32 rhs);
[Intrinsic("gt")]
public static extern bool4 operator>(int32_4 lhs, int32_4 rhs);
[Intrinsic("gt")]
public static extern bool4 operator>(int32_4 lhs, int32 rhs);
[Intrinsic("gte")]
public static extern bool4 operator>=(int32_4 lhs, int32_4 rhs);
[Intrinsic("gte")]
public static extern bool4 operator>=(int32_4 lhs, int32 rhs);
[Intrinsic("cast")]
public static extern explicit operator v128(int32_4 lhs);
[Intrinsic("cast")]
public static extern explicit operator int32_4(v128 lhs);
}
}

View file

@ -133,5 +133,10 @@ namespace System.Numerics
{ {
this.double = .(v0, v1); this.double = .(v0, v1);
} }
[Intrinsic("and")]
public static extern v128 operator&(v128 lhs, v128 rhs);
[Intrinsic("not")]
public static extern v128 operator~(v128 lhs);
} }
} }

View file

@ -12,8 +12,12 @@ namespace System.Numerics.X86
res.float[0] += b.float[0]; res.float[0] += b.float[0];
return res; return res;
} }
public static extern v128 andnot_ps(v128 a, v128 b); [Inline]
public static v128 andnot_ps(v128 a, v128 b)
{
return ~a & b;
}
public static extern v128 and_ps(v128 a, v128 b); public static extern v128 and_ps(v128 a, v128 b);

View file

@ -115,6 +115,7 @@ public:
{ {
return (mTypeCode == BeTypeCode_Float) || (mTypeCode == BeTypeCode_Double); return (mTypeCode == BeTypeCode_Float) || (mTypeCode == BeTypeCode_Double);
} }
bool IsStruct() bool IsStruct()
{ {
@ -136,6 +137,12 @@ public:
return (mTypeCode == BeTypeCode_Vector); return (mTypeCode == BeTypeCode_Vector);
} }
bool IsFloatOrVector()
{
return (mTypeCode == BeTypeCode_Float) || (mTypeCode == BeTypeCode_Double) ||
(mTypeCode == BeTypeCode_Vector) || (mTypeCode == BeTypeCode_M128) || (mTypeCode == BeTypeCode_M256) || (mTypeCode == BeTypeCode_M512);
}
bool IsComposite() bool IsComposite()
{ {
return (mTypeCode == BeTypeCode_Struct) || (mTypeCode == BeTypeCode_SizedArray) || (mTypeCode == BeTypeCode_Vector) || (mTypeCode == BeTypeCode_M128) || (mTypeCode == BeTypeCode_M256) || (mTypeCode == BeTypeCode_M512); return (mTypeCode == BeTypeCode_Struct) || (mTypeCode == BeTypeCode_SizedArray) || (mTypeCode == BeTypeCode_Vector) || (mTypeCode == BeTypeCode_M128) || (mTypeCode == BeTypeCode_M256) || (mTypeCode == BeTypeCode_M512);

View file

@ -2066,6 +2066,7 @@ void BeIRCodeGen::HandleNextCmd()
CMD_PARAM(CmdParamVec<BeType*>, paramTypes); CMD_PARAM(CmdParamVec<BeType*>, paramTypes);
auto intrin = mBeModule->mAlloc.Alloc<BeIntrinsic>(); auto intrin = mBeModule->mAlloc.Alloc<BeIntrinsic>();
intrin->mName = intrinName;
intrin->mKind = (BfIRIntrinsic)intrinId; intrin->mKind = (BfIRIntrinsic)intrinId;
intrin->mReturnType = returnType; intrin->mReturnType = returnType;
SetResult(curId, intrin); SetResult(curId, intrin);

View file

@ -5,6 +5,7 @@
#include "BeMCContext.h" #include "BeMCContext.h"
#include "BeCOFFObject.h" #include "BeCOFFObject.h"
#include "BeIRCodeGen.h" #include "BeIRCodeGen.h"
#include "../Compiler/BfIRCodeGen.h"
#include "BeefySysLib/util/BeefPerf.h" #include "BeefySysLib/util/BeefPerf.h"
#include "BeefySysLib/util/AllocDebug.h" #include "BeefySysLib/util/AllocDebug.h"
@ -2258,6 +2259,7 @@ BeMCOperand BeMCContext::GetOperand(BeValue* value, bool allowMetaResult, bool a
break; break;
case BeTypeCode_Struct: case BeTypeCode_Struct:
case BeTypeCode_SizedArray: case BeTypeCode_SizedArray:
case BeTypeCode_Vector:
mcOperand.mImmediate = constant->mInt64; mcOperand.mImmediate = constant->mInt64;
mcOperand.mKind = BeMCOperandKind_Immediate_i64; mcOperand.mKind = BeMCOperandKind_Immediate_i64;
break; break;
@ -8982,7 +8984,7 @@ bool BeMCContext::DoLegalization()
//TODO: For what instructions was this true? CMP, MOV, ADD, etc... seems to have it //TODO: For what instructions was this true? CMP, MOV, ADD, etc... seems to have it
if ((arg1Type != NULL) && (arg1Type->IsFloat())) if ((arg1Type != NULL) && (arg1Type->IsFloatOrVector()))
{ {
// MOV is allowed on '<r/m>, <imm>' // MOV is allowed on '<r/m>, <imm>'
if ((inst->mKind != BeMCInstKind_Mov) && (arg0.MayBeMemory()) && (arg1.IsImmediate())) if ((inst->mKind != BeMCInstKind_Mov) && (arg0.MayBeMemory()) && (arg1.IsImmediate()))
@ -9033,7 +9035,7 @@ bool BeMCContext::DoLegalization()
{ {
inst->mResult = BeMCOperand(); inst->mResult = BeMCOperand();
} }
else if ((arg0Type->IsFloat()) && (!CouldBeReg(inst->mResult))) else if ((arg0Type->IsFloatOrVector()) && (!CouldBeReg(inst->mResult)))
{ {
// We need a REG on the dest for sure, so just create a scratch here, otherwise we end up // We need a REG on the dest for sure, so just create a scratch here, otherwise we end up
// requiring additional scratch vregs later // requiring additional scratch vregs later
@ -9129,7 +9131,7 @@ bool BeMCContext::DoLegalization()
(inst->mKind != BeMCInstKind_CmpToBool) && (inst->mKind != BeMCInstKind_CmpToBool) &&
(inst->mKind != BeMCInstKind_Mov)) (inst->mKind != BeMCInstKind_Mov))
{ {
if ((arg0Type != NULL) && (arg0Type->IsFloat())) if ((arg0Type != NULL) && (arg0Type->IsFloatOrVector()))
{ {
// <r/m>, <xmm> is not valid, <xmm>, <r/m> // <r/m>, <xmm> is not valid, <xmm>, <r/m>
if (!arg0.IsNativeReg()) if (!arg0.IsNativeReg())
@ -9174,7 +9176,7 @@ bool BeMCContext::DoLegalization()
} }
} }
if ((!inst->IsMov()) && (arg0.IsVReg()) && (arg0Type->IsFloat())) if ((!inst->IsMov()) && (arg0.IsVReg()) && (arg0Type->IsFloatOrVector()))
{ {
BF_ASSERT(!inst->mResult); BF_ASSERT(!inst->mResult);
// XMM instructions (besides MOVs) require native register destinations // XMM instructions (besides MOVs) require native register destinations
@ -11356,11 +11358,11 @@ void BeMCContext::DoRegFinalization()
auto mcPopReg = inst->mArg0; auto mcPopReg = inst->mArg0;
auto popType = GetType(mcPopReg); auto popType = GetType(mcPopReg);
if (!popType->IsFloat()) if (!popType->IsFloatOrVector())
mcPopReg.mReg = GetFullRegister(mcPopReg.mReg); mcPopReg.mReg = GetFullRegister(mcPopReg.mReg);
auto pushType = GetType(inst->mArg1); auto pushType = GetType(inst->mArg1);
auto useTypeCode = pushType->IsFloat() ? pushType->mTypeCode : BeTypeCode_Int64; auto useTypeCode = pushType->IsFloatOrVector() ? pushType->mTypeCode : BeTypeCode_Int64;
BF_ASSERT(deferredIdx < 4); BF_ASSERT(deferredIdx < 4);
auto mcDeferred = GetCallArgVReg(deferredIdx++, useTypeCode); auto mcDeferred = GetCallArgVReg(deferredIdx++, useTypeCode);
@ -11377,7 +11379,7 @@ void BeMCContext::DoRegFinalization()
{ {
inst->mKind = BeMCInstKind_Mov; inst->mKind = BeMCInstKind_Mov;
inst->mArg0 = mcDeferred; inst->mArg0 = mcDeferred;
if (!popType->IsFloat()) if (!popType->IsFloatOrVector())
inst->mArg1 = BeMCOperand::FromReg(GetFullRegister(arg1.mReg)); inst->mArg1 = BeMCOperand::FromReg(GetFullRegister(arg1.mReg));
else else
inst->mArg1 = BeMCOperand::FromReg(arg1.mReg); inst->mArg1 = BeMCOperand::FromReg(arg1.mReg);
@ -11518,6 +11520,18 @@ BeMCInstForm BeMCContext::GetInstForm(BeMCInst* inst)
auto arg1Type = GetType(arg1); auto arg1Type = GetType(arg1);
if ((arg0Type != NULL) && (arg1Type != NULL) && if ((arg0Type != NULL) && (arg1Type != NULL) &&
((arg0Type->IsVector()) || (arg1Type->IsVector())))
{
if (arg0.IsNativeReg())
{
return BeMCInstForm_XMM128_RM128;
}
else
{
return BeMCInstForm_FRM128_XMM128;
}
}
else if ((arg0Type != NULL) && (arg1Type != NULL) &&
((arg0Type->IsFloat()) || (arg1Type->IsFloat()))) ((arg0Type->IsFloat()) || (arg1Type->IsFloat())))
{ {
if (arg0.IsNativeReg()) if (arg0.IsNativeReg())
@ -11593,19 +11607,7 @@ BeMCInstForm BeMCContext::GetInstForm(BeMCInst* inst)
else else
NotImpl(); NotImpl();
} }
} }
else if ((arg0Type != NULL) && (arg1Type != NULL) &&
((arg0Type->IsVector()) || (arg1Type->IsVector())))
{
if (arg0.IsNativeReg())
{
return BeMCInstForm_XMM128_RM128;
}
else
{
return BeMCInstForm_FRM128_XMM128;
}
}
if ((arg1.IsImmediate()) && (arg0Type != NULL)) // MOV r/m64, imm32 if ((arg1.IsImmediate()) && (arg0Type != NULL)) // MOV r/m64, imm32
{ {
@ -11977,16 +11979,29 @@ bool BeMCContext::EmitStdXMMInst(BeMCInstForm instForm, BeMCInst* inst, uint8 op
return true; return true;
case BeMCInstForm_XMM128_RM128: case BeMCInstForm_XMM128_RM128:
{ {
BfTypeCode elemType = BfTypeCode_Float; BeTypeCode elemType = BeTypeCode_Float;
auto arg0 = GetFixedOperand(inst->mArg0); auto arg0 = GetFixedOperand(inst->mArg0);
auto arg1 = GetFixedOperand(inst->mArg1); auto arg1 = GetFixedOperand(inst->mArg1);
auto arg0Type = GetType(inst->mArg0); auto arg0Type = GetType(inst->mArg0);
auto arg1Type = GetType(inst->mArg1);
if (arg0Type->IsExplicitVectorType()) if (arg0Type->IsExplicitVectorType())
{ {
auto vecType = (BeVectorType*)arg0Type;
elemType = vecType->mElementType->mTypeCode;
} }
if (elemType == BfTypeCode_Double)
if (arg1Type->IsFloat())
{
if (elemType == BeTypeCode_Double)
Emit(0x66);
EmitREX(arg1, arg1, false);
Emit(0x0F); Emit(0xC6); // SHUFPS / SHUFPD
EmitModRM(arg1, arg1);
Emit(0);
}
if (elemType == BeTypeCode_Double)
Emit(0x66); Emit(0x66);
EmitREX(arg0, arg1, is64Bit); EmitREX(arg0, arg1, is64Bit);
Emit(0x0F); Emit(opcode); Emit(0x0F); Emit(opcode);
@ -11994,13 +12009,15 @@ bool BeMCContext::EmitStdXMMInst(BeMCInstForm instForm, BeMCInst* inst, uint8 op
return true; return true;
} }
break; break;
// case BeMCInstForm_FRM128_XMM128: case BeMCInstForm_FRM128_XMM128:
// { // {
// Emit(0xF2); EmitREX(inst->mArg0, inst->mArg1, is64Bit); // Emit(0xF2); EmitREX(inst->mArg0, inst->mArg1, is64Bit);
// Emit(0x0F); Emit(opcode); // Emit(0x0F); Emit(opcode);
// EmitModRM(inst->mArg0, inst->mArg1); // EmitModRM(inst->mArg0, inst->mArg1);
// } // }
// break; NOP;
break;
} }
return false; return false;
@ -12065,6 +12082,131 @@ bool BeMCContext::EmitPackedXMMInst(BeMCInstForm instForm, BeMCInst* inst, uint8
return false; return false;
} }
BeMCOperand BeMCContext::IntXMMGetPacked(BeMCOperand arg, BeVectorType* vecType)
{
auto argType = GetType(arg);
if (!argType->IsVector())
{
BeMCOperand xmm15;
xmm15.mReg = X64Reg_M128_XMM15;
xmm15.mKind = BeMCOperandKind_NativeReg;
if (arg.IsImmediate())
{
BeMCOperand immOperand;
immOperand.mKind = BeMCOperandKind_Immediate_int32x4;
immOperand.mImmediate = arg.mImmediate;
Emit(0xF3);
EmitREX(xmm15, immOperand, false);
Emit(0x0F); Emit(0x6F); // MOVDQU
EmitModRM(xmm15, immOperand);
}
else
{
Emit(0x66);
EmitREX(xmm15, arg, false);
Emit(0x0F); Emit(0x6E); // MOVD
EmitModRM(xmm15, arg);
if (vecType->mElementType->mTypeCode == BeTypeCode_Int16)
Emit(0xF2);
else
Emit(0x66);
EmitREX(xmm15, xmm15, false);
Emit(0x0F);
switch (vecType->mElementType->mTypeCode)
{
case BeTypeCode_Int8:
Emit(0x38); Emit(0x00); // PSHUFB
break;
case BeTypeCode_Int16:
Emit(0x70); // PSHUFW
break;
case BeTypeCode_Int32:
Emit(0x70); // PSHUFD
break;
}
EmitModRM(xmm15, xmm15);
Emit(0);
}
arg = xmm15;
}
return arg;
}
bool BeMCContext::EmitIntXMMInst(BeMCInstForm instForm, BeMCInst* inst, uint8 opcode)
{
if (instForm != BeMCInstForm_XMM128_RM128)
return false;
auto arg0 = GetFixedOperand(inst->mArg0);
auto arg1 = GetFixedOperand(inst->mArg1);
auto arg0Type = GetType(inst->mArg0);
auto arg1Type = GetType(inst->mArg1);
if (arg0Type->IsExplicitVectorType())
{
auto vecType = (BeVectorType*)arg0Type;
if ((vecType->mElementType->mTypeCode == BeTypeCode_Int8) ||
(vecType->mElementType->mTypeCode == BeTypeCode_Int16) ||
(vecType->mElementType->mTypeCode == BeTypeCode_Int32))
{
arg1 = IntXMMGetPacked(arg1, vecType);
Emit(0x66);
EmitREX(arg0, arg1, false);
Emit(0x0F);
switch (vecType->mElementType->mTypeCode)
{
case BeTypeCode_Int8:
Emit(opcode);
break;
case BeTypeCode_Int16:
Emit(opcode + 1);
break;
case BeTypeCode_Int32:
Emit(opcode + 2);
break;
}
EmitModRM(arg0, arg1);
return true;
}
}
return false;
}
bool BeMCContext::EmitIntBitwiseXMMInst(BeMCInstForm instForm, BeMCInst* inst, uint8 opcode)
{
if (instForm != BeMCInstForm_XMM128_RM128)
return false;
auto arg0 = GetFixedOperand(inst->mArg0);
auto arg1 = GetFixedOperand(inst->mArg1);
auto arg0Type = GetType(inst->mArg0);
if (arg0Type->IsExplicitVectorType())
{
auto vecType = (BeVectorType*)arg0Type;
if ((vecType->mElementType->mTypeCode == BeTypeCode_Int8) ||
(vecType->mElementType->mTypeCode == BeTypeCode_Int16) ||
(vecType->mElementType->mTypeCode == BeTypeCode_Int32))
{
arg1 = IntXMMGetPacked(arg1, vecType);
Emit(0x66);
EmitREX(arg0, arg1, false);
Emit(0x0F);
Emit(opcode); // PXOR
EmitModRM(arg0, arg1);
return true;
}
}
return false;
}
void BeMCContext::EmitAggMov(const BeMCOperand& dest, const BeMCOperand& src) void BeMCContext::EmitAggMov(const BeMCOperand& dest, const BeMCOperand& src)
{ {
BeRMParamsInfo rmInfo; BeRMParamsInfo rmInfo;
@ -12381,6 +12523,8 @@ void BeMCContext::DoCodeEmission()
{ {
for (auto inst : mcBlock->mInstructions) for (auto inst : mcBlock->mInstructions)
{ {
mActiveInst = inst;
if (mDebugging) if (mDebugging)
{ {
ToString(inst, dbgStr, true, true); ToString(inst, dbgStr, true, true);
@ -13330,6 +13474,10 @@ void BeMCContext::DoCodeEmission()
// MOVUPS // MOVUPS
EmitREX(arg0, arg1, true); EmitREX(arg0, arg1, true);
Emit(0x0F); Emit(0x10); Emit(0x0F); Emit(0x10);
if (arg1.IsImmediateInt())
arg1.mKind = BeMCOperandKind_Immediate_int32x4;
EmitModRM(arg0, arg1); EmitModRM(arg0, arg1);
} }
break; break;
@ -13967,6 +14115,8 @@ void BeMCContext::DoCodeEmission()
break; break;
} }
if (EmitIntXMMInst(instForm, inst, 0xFC)) // PADD?
break;
if (EmitStdXMMInst(instForm, inst, 0x58)) if (EmitStdXMMInst(instForm, inst, 0x58))
break; break;
EmitStdInst(instForm, inst, 0x01, 0x03, 0x81, 0x0, 0x83, 0x0); EmitStdInst(instForm, inst, 0x01, 0x03, 0x81, 0x0, 0x83, 0x0);
@ -14006,6 +14156,9 @@ void BeMCContext::DoCodeEmission()
mOut.Write((int32)inst->mArg1.mImmediate); mOut.Write((int32)inst->mArg1.mImmediate);
break; break;
} }
if (EmitIntXMMInst(instForm, inst, 0xF8)) // PSUB?
break;
if (EmitStdXMMInst(instForm, inst, 0x5C)) if (EmitStdXMMInst(instForm, inst, 0x5C))
break; break;
EmitStdInst(instForm, inst, 0x29, 0x2B, 0x81, 0x5, 0x83, 0x5); EmitStdInst(instForm, inst, 0x29, 0x2B, 0x81, 0x5, 0x83, 0x5);
@ -14014,7 +14167,29 @@ void BeMCContext::DoCodeEmission()
break; break;
case BeMCInstKind_Mul: case BeMCInstKind_Mul:
case BeMCInstKind_IMul: case BeMCInstKind_IMul:
{ {
if (instForm == BeMCInstForm_XMM128_RM128)
{
if (arg0Type->IsExplicitVectorType())
{
auto vecType = (BeVectorType*)arg0Type;
if (vecType->mElementType->mTypeCode == BeTypeCode_Int32)
{
Emit(0x66);
EmitREX(arg0, arg1, false);
Emit(0x0F);
if (inst->mKind == BeMCInstKind_IMul)
Emit(0xD5); // PMULLW
else
{
Emit(0x38); Emit(0x40); // PMULLD
}
EmitModRM(arg0, arg1);
break;
}
}
}
if (EmitStdXMMInst(instForm, inst, 0x59)) if (EmitStdXMMInst(instForm, inst, 0x59))
break; break;
@ -14283,7 +14458,10 @@ void BeMCContext::DoCodeEmission()
} }
break; break;
case BeMCInstKind_And: case BeMCInstKind_And:
{ {
if (EmitIntBitwiseXMMInst(instForm, inst, 0xDB)) //PAND
break;
BeMCInst modInst = *inst; BeMCInst modInst = *inst;
bool isZeroing = false; bool isZeroing = false;
@ -14372,12 +14550,16 @@ void BeMCContext::DoCodeEmission()
} }
break; break;
case BeMCInstKind_Or: case BeMCInstKind_Or:
{ {
if (EmitIntBitwiseXMMInst(instForm, inst, 0xEB)) //POR
break;
EmitStdInst(instForm, inst, 0x09, 0x0B, 0x81, 0x1, 0x83, 0x1); EmitStdInst(instForm, inst, 0x09, 0x0B, 0x81, 0x1, 0x83, 0x1);
} }
break; break;
case BeMCInstKind_Xor: case BeMCInstKind_Xor:
{ {
if (EmitIntBitwiseXMMInst(instForm, inst, 0xEF)) //PXOR
break;
if (EmitPackedXMMInst(instForm, inst, 0x57)) if (EmitPackedXMMInst(instForm, inst, 0x57))
break; break;
EmitStdInst(instForm, inst, 0x31, 0x33, 0x81, 0x6, 0x83, 0x6); EmitStdInst(instForm, inst, 0x31, 0x33, 0x81, 0x6, 0x83, 0x6);
@ -14387,6 +14569,53 @@ void BeMCContext::DoCodeEmission()
case BeMCInstKind_Shr: case BeMCInstKind_Shr:
case BeMCInstKind_Sar: case BeMCInstKind_Sar:
{ {
if (instForm == Beefy::BeMCInstForm_XMM128_RM128)
{
if (arg1.IsImmediate())
{
Emit(0x66);
EmitREX(arg1, arg0, false);
Emit(0x0F);
int rx = 0;
switch (inst->mKind)
{
case BeMCInstKind_Shl:
rx = 6;
break;
case BeMCInstKind_Shr:
rx = 2;
break;
case BeMCInstKind_Sar:
rx = 4;
break;
}
Emit(0x71); // PSLLW / PSRAW / PSRLW
EmitModRM(rx, arg0);
Emit((uint8)arg1.mImmediate);
}
else
{
Emit(0x66);
EmitREX(arg0, arg1, false);
Emit(0x0F);
switch (inst->mKind)
{
case BeMCInstKind_Shl:
Emit(0xF1); // PSLLW
break;
case BeMCInstKind_Shr:
Emit(0xD1); // PSRLW
break;
case BeMCInstKind_Sar:
Emit(0xE1); // PSRAW
break;
}
EmitModRM(arg0, arg1);
}
break;
}
int rx = 0; int rx = 0;
switch (inst->mKind) switch (inst->mKind)
{ {
@ -14399,7 +14628,7 @@ void BeMCContext::DoCodeEmission()
case BeMCInstKind_Sar: case BeMCInstKind_Sar:
rx = 7; rx = 7;
break; break;
} }
bool handled = false; bool handled = false;
switch (instForm) switch (instForm)
@ -14568,6 +14797,7 @@ void BeMCContext::DoCodeEmission()
} }
} }
} }
mActiveInst = NULL;
if (mDebugging) if (mDebugging)
{ {
@ -16689,22 +16919,73 @@ void BeMCContext::Generate(BeFunction* function)
case BfIRIntrinsic_Mul: case BfIRIntrinsic_Mul:
case BfIRIntrinsic_Neq: case BfIRIntrinsic_Neq:
case BfIRIntrinsic_Or: case BfIRIntrinsic_Or:
case BfIRIntrinsic_SAR:
case BfIRIntrinsic_SHL:
case BfIRIntrinsic_SHR:
case BfIRIntrinsic_Sub: case BfIRIntrinsic_Sub:
case BfIRIntrinsic_Xor: case BfIRIntrinsic_Xor:
{ {
auto mcLHS = TryToVector(castedInst->mArgs[0].mValue); auto mcLHS = TryToVector(castedInst->mArgs[0].mValue);
auto mcRHS = TryToVector(castedInst->mArgs[1].mValue); BeMCOperand mcRHS;
if ((intrin->mKind == BfIRIntrinsic_SAR) ||
(intrin->mKind == BfIRIntrinsic_SHL) ||
(intrin->mKind == BfIRIntrinsic_SHR))
{
mcRHS = GetOperand(castedInst->mArgs[1].mValue);
if (!mcRHS.IsImmediateInt())
mcRHS = BeMCOperand();
}
if (!mcRHS)
mcRHS = TryToVector(castedInst->mArgs[1].mValue);
switch (intrin->mKind) switch (intrin->mKind)
{ {
case BfIRIntrinsic_Add:
result = AllocBinaryOp(BeMCInstKind_Add, mcLHS, mcRHS, BeMCBinIdentityKind_None); break;
break;
case BfIRIntrinsic_And:
result = AllocBinaryOp(BeMCInstKind_And, mcLHS, mcRHS, BeMCBinIdentityKind_None); break;
break;
case BfIRIntrinsic_Mul: case BfIRIntrinsic_Mul:
result = AllocBinaryOp(BeMCInstKind_IMul, mcLHS, mcRHS, BeMCBinIdentityKind_None); break; result = AllocBinaryOp(BeMCInstKind_IMul, mcLHS, mcRHS, BeMCBinIdentityKind_None); break;
break; break;
case BfIRIntrinsic_Or:
result = AllocBinaryOp(BeMCInstKind_Or, mcLHS, mcRHS, BeMCBinIdentityKind_None); break;
break;
case BfIRIntrinsic_SAR:
result = AllocBinaryOp(BeMCInstKind_Sar, mcLHS, mcRHS, BeMCBinIdentityKind_None); break;
break;
case BfIRIntrinsic_SHL:
result = AllocBinaryOp(BeMCInstKind_Shl, mcLHS, mcRHS, BeMCBinIdentityKind_None); break;
break;
case BfIRIntrinsic_SHR:
result = AllocBinaryOp(BeMCInstKind_Shr, mcLHS, mcRHS, BeMCBinIdentityKind_None); break;
break;
case BfIRIntrinsic_Sub:
result = AllocBinaryOp(BeMCInstKind_Sub, mcLHS, mcRHS, BeMCBinIdentityKind_None); break;
break;
case BfIRIntrinsic_Xor:
result = AllocBinaryOp(BeMCInstKind_Xor, mcLHS, mcRHS, BeMCBinIdentityKind_None); break;
break;
default: default:
SoftFail("Unhandled intrinsic"); SoftFail("Unhandled intrinsic");
} }
} }
break; break;
// case BfIRIntrinsic_Cast:
// {
//
// }
// break;
case BfIRIntrinsic_Not:
{
auto mcLHS = TryToVector(castedInst->mArgs[0].mValue);
BeMCOperand mcRHS = BeMCOperand::FromImmediate(-1);
result = AllocBinaryOp(BeMCInstKind_Xor, mcLHS, mcRHS, BeMCBinIdentityKind_None); break;
}
break;
case BfIRIntrinsic_Abs: case BfIRIntrinsic_Abs:
{ {
@ -17096,7 +17377,7 @@ void BeMCContext::Generate(BeFunction* function)
} }
break; break;
default: default:
SoftFail("Intrinsic not handled", castedInst->mDbgLoc); SoftFail(StrFormat("Intrinsic not handled: '%s'", intrin->mName.c_str()), castedInst->mDbgLoc);
break; break;
} }
} }

View file

@ -1449,6 +1449,7 @@ public:
int FindRestoreVolatiles(BeMCBlock* mcBlock, int instIdx); int FindRestoreVolatiles(BeMCBlock* mcBlock, int instIdx);
uint8 GetJumpOpCode(BeCmpKind cmpKind, bool isLong); uint8 GetJumpOpCode(BeCmpKind cmpKind, bool isLong);
BeMCOperand IntXMMGetPacked(BeMCOperand arg, BeVectorType* vecType);
void Emit(uint8 val); void Emit(uint8 val);
void EmitModRM(int mod, int reg, int rm); void EmitModRM(int mod, int reg, int rm);
void EmitModRMRelStack(int rx, int regOffset, int scale); void EmitModRMRelStack(int rx, int regOffset, int scale);
@ -1464,7 +1465,9 @@ public:
void EmitStdInst(BeMCInstForm instForm, BeMCInst* inst, uint8 opcode_rm_r, uint8 opcode_r_rm, uint8 opcode_rm_imm, uint8 opcode_rm_imm_rx, uint8 opcode_rm_imm8, uint8 opcode_rm_imm8_rx); void EmitStdInst(BeMCInstForm instForm, BeMCInst* inst, uint8 opcode_rm_r, uint8 opcode_r_rm, uint8 opcode_rm_imm, uint8 opcode_rm_imm_rx, uint8 opcode_rm_imm8, uint8 opcode_rm_imm8_rx);
bool EmitStdXMMInst(BeMCInstForm instForm, BeMCInst* inst, uint8 opcode); bool EmitStdXMMInst(BeMCInstForm instForm, BeMCInst* inst, uint8 opcode);
bool EmitStdXMMInst(BeMCInstForm instForm, BeMCInst* inst, uint8 opcode, uint8 opcode_dest_frm); bool EmitStdXMMInst(BeMCInstForm instForm, BeMCInst* inst, uint8 opcode, uint8 opcode_dest_frm);
bool EmitPackedXMMInst(BeMCInstForm instForm, BeMCInst* inst, uint8 opcode); bool EmitPackedXMMInst(BeMCInstForm instForm, BeMCInst* inst, uint8 opcode);
bool EmitIntXMMInst(BeMCInstForm instForm, BeMCInst* inst, uint8 opcode);
bool EmitIntBitwiseXMMInst(BeMCInstForm instForm, BeMCInst* inst, uint8 opcode);
void EmitAggMov(const BeMCOperand& dest, const BeMCOperand& src); void EmitAggMov(const BeMCOperand& dest, const BeMCOperand& src);
void DoTLSSetup(); void DoTLSSetup();

View file

@ -471,6 +471,7 @@ class BeIntrinsic : public BeValue
public: public:
BE_VALUE_TYPE(BeIntrinsic, BeValue); BE_VALUE_TYPE(BeIntrinsic, BeValue);
String mName;
BfIRIntrinsic mKind; BfIRIntrinsic mKind;
BeType* mReturnType; BeType* mReturnType;

View file

@ -146,6 +146,7 @@ USING_NS_BF;
case BfTypeCode_Char32: return CreateConst(constLHS->mTypeCode, (uint64)(constLHS->mUInt32 OP constRHS->mUInt32)); \ case BfTypeCode_Char32: return CreateConst(constLHS->mTypeCode, (uint64)(constLHS->mUInt32 OP constRHS->mUInt32)); \
case BfTypeCode_Int64: return CreateConst(constLHS->mTypeCode, (uint64)(constLHS->mInt64 OP constRHS->mInt64)); \ case BfTypeCode_Int64: return CreateConst(constLHS->mTypeCode, (uint64)(constLHS->mInt64 OP constRHS->mInt64)); \
case BfTypeCode_UInt64: return CreateConst(constLHS->mTypeCode, constLHS->mUInt64 OP constRHS->mUInt64); \ case BfTypeCode_UInt64: return CreateConst(constLHS->mTypeCode, constLHS->mUInt64 OP constRHS->mUInt64); \
default: break; \
} }
#define UNARYOP_APPLY(val, OP) \ #define UNARYOP_APPLY(val, OP) \

View file

@ -447,13 +447,16 @@ enum BfIRIntrinsic : uint8
BfIRIntrinsic_MemMove, BfIRIntrinsic_MemMove,
BfIRIntrinsic_MemSet, BfIRIntrinsic_MemSet,
BfIRIntrinsic_Mod, BfIRIntrinsic_Mod,
BfIRIntrinsic_Mul, BfIRIntrinsic_Mul,
BfIRIntrinsic_Not,
BfIRIntrinsic_Neq, BfIRIntrinsic_Neq,
BfIRIntrinsic_Not,
BfIRIntrinsic_Or, BfIRIntrinsic_Or,
BfIRIntrinsic_Pow, BfIRIntrinsic_Pow,
BfIRIntrinsic_PowI, BfIRIntrinsic_PowI,
BfIRIntrinsic_Round, BfIRIntrinsic_Round,
BfIRIntrinsic_SAR,
BfIRIntrinsic_SHL,
BfIRIntrinsic_SHR,
BfIRIntrinsic_Shuffle, BfIRIntrinsic_Shuffle,
BfIRIntrinsic_Sin, BfIRIntrinsic_Sin,
BfIRIntrinsic_Sqrt, BfIRIntrinsic_Sqrt,

View file

@ -181,6 +181,9 @@ static const BuiltinEntry gIntrinEntries[] =
{"pow"}, {"pow"},
{"powi"}, {"powi"},
{"round"}, {"round"},
{"sar"},
{"shl"},
{"shr"},
{"shuffle"}, {"shuffle"},
{"sin"}, {"sin"},
{"sqrt"}, {"sqrt"},
@ -2313,7 +2316,7 @@ void BfIRCodeGen::HandleNextCmd()
{ (llvm::Intrinsic::ID)-2, -1}, // AtomicXor, { (llvm::Intrinsic::ID)-2, -1}, // AtomicXor,
{ llvm::Intrinsic::bswap, -1}, { llvm::Intrinsic::bswap, -1},
{ (llvm::Intrinsic::ID)-2, -1}, // cast, { (llvm::Intrinsic::ID)-2, -1}, // cast,
{ llvm::Intrinsic::cos, 0, -1}, { llvm::Intrinsic::cos, 0, -1},
{ (llvm::Intrinsic::ID)-2, -1}, // div { (llvm::Intrinsic::ID)-2, -1}, // div
{ (llvm::Intrinsic::ID)-2, -1}, // eq { (llvm::Intrinsic::ID)-2, -1}, // eq
{ llvm::Intrinsic::floor, 0, -1}, { llvm::Intrinsic::floor, 0, -1},
@ -2338,8 +2341,11 @@ void BfIRCodeGen::HandleNextCmd()
{ llvm::Intrinsic::pow, 0, -1}, { llvm::Intrinsic::pow, 0, -1},
{ llvm::Intrinsic::powi, 0, -1}, { llvm::Intrinsic::powi, 0, -1},
{ llvm::Intrinsic::round, 0, -1}, { llvm::Intrinsic::round, 0, -1},
{ (llvm::Intrinsic::ID)-2, -1}, // sar
{ (llvm::Intrinsic::ID)-2, -1}, // shl
{ (llvm::Intrinsic::ID)-2, -1}, // shr
{ (llvm::Intrinsic::ID)-2, -1}, // shuffle { (llvm::Intrinsic::ID)-2, -1}, // shuffle
{ llvm::Intrinsic::sin, 0, -1}, { llvm::Intrinsic::sin, 0, -1},
{ llvm::Intrinsic::sqrt, 0, -1}, { llvm::Intrinsic::sqrt, 0, -1},
{ (llvm::Intrinsic::ID)-2, -1}, // sub, { (llvm::Intrinsic::ID)-2, -1}, // sub,
{ (llvm::Intrinsic::ID)-2, -1}, // xor { (llvm::Intrinsic::ID)-2, -1}, // xor
@ -2753,6 +2759,12 @@ void BfIRCodeGen::HandleNextCmd()
} }
} }
break; break;
case BfIRIntrinsic_Not:
{
auto val0 = TryToVector(args[0]);
SetResult(curId, mIRBuilder->CreateNot(val0));
}
break;
case BfIRIntrinsic_Shuffle: case BfIRIntrinsic_Shuffle:
{ {
llvm::SmallVector<uint, 8> intMask; llvm::SmallVector<uint, 8> intMask;
@ -3137,7 +3149,16 @@ void BfIRCodeGen::HandleNextCmd()
break; break;
case BfIRIntrinsic_Cast: case BfIRIntrinsic_Cast:
{ {
SetResult(curId, mIRBuilder->CreateBitCast(args[0], intrinsicData->mReturnType)); auto arg0Type = args[0]->getType();
if (arg0Type->isPointerTy())
{
auto castedRes = mIRBuilder->CreateBitCast(args[0], intrinsicData->mReturnType->getPointerTo());
SetResult(curId, mIRBuilder->CreateAlignedLoad(castedRes, 1));
}
else
{
FatalError("Expected address");
}
} }
break; break;
default: default: