1
0
Fork 0
mirror of https://github.com/beefytech/Beef.git synced 2025-06-10 12:32:20 +02:00

Capture xmm/ymm registers

This commit is contained in:
Brian Fiete 2022-01-03 11:24:45 -05:00
parent f1c38c792d
commit 14e1ffa5ac

View file

@ -150,7 +150,7 @@ static void BfpRecordManager(BfpManager* manager)
gManagerTail = manager;
}
typedef NTSTATUS(NTAPI *NtQuerySystemInformation_t)(SYSTEM_INFORMATION_CLASS, PVOID, ULONG, PULONG);
typedef NTSTATUS(NTAPI* NtQuerySystemInformation_t)(SYSTEM_INFORMATION_CLASS, PVOID, ULONG, PULONG);
static NtQuerySystemInformation_t gNtQuerySystemInformation = NULL;
static HMODULE gNTDll = NULL;
@ -216,22 +216,22 @@ WindowsSharedInfo* GetSharedInfo()
if (sharedFileMapping == NULL)
{
sharedFileMapping = ::CreateFileMappingA(
INVALID_HANDLE_VALUE,
NULL,
PAGE_READWRITE,
0,
sizeof(WindowsSharedInfo),
sharedName.c_str());
INVALID_HANDLE_VALUE,
NULL,
PAGE_READWRITE,
0,
sizeof(WindowsSharedInfo),
sharedName.c_str());
created = true;
}
BF_ASSERT(sharedFileMapping != NULL);
gGlobalPlatformInfo = (WindowsSharedInfo*) MapViewOfFile(sharedFileMapping,
FILE_MAP_READ | FILE_MAP_WRITE,
0,
0,
sizeof(WindowsSharedInfo));
gGlobalPlatformInfo = (WindowsSharedInfo*)MapViewOfFile(sharedFileMapping,
FILE_MAP_READ | FILE_MAP_WRITE,
0,
0,
sizeof(WindowsSharedInfo));
if (created)
{
@ -289,7 +289,7 @@ uint64 Beefy::BFGetTickCountMicroFast()
#endif
::SetThreadPriority(::GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL);
::SetThreadAffinityMask(::GetCurrentThread(), (int64)1<<(windowsSharedInfo->mThreadAcc % processorCount));
::SetThreadAffinityMask(::GetCurrentThread(), (int64)1 << (windowsSharedInfo->mThreadAcc % processorCount));
uint64 deltaMicro = 0;
@ -300,7 +300,7 @@ uint64 Beefy::BFGetTickCountMicroFast()
uint32 prevQPFMicro = 0;
LARGE_INTEGER frequency = {0, 1};
LARGE_INTEGER frequency = { 0, 1 };
QueryPerformanceFrequency(&frequency);
uint64 startMicro = __rdtsc();
@ -312,18 +312,18 @@ uint64 Beefy::BFGetTickCountMicroFast()
LARGE_INTEGER timeNow;
QueryPerformanceCounter(&timeNow);
qPFMicro = (uint32) ((timeNow.QuadPart * 100000000) / frequency.QuadPart);
qPFMicro = (uint32)((timeNow.QuadPart * 100000000) / frequency.QuadPart);
} while (qPFMicro - prevQPFMicro < 100000);
prevQPFMicro = qPFMicro;
int64 curMicro = __rdtsc();
int aDivisor = (int) (curMicro - startMicro);
int aDivisor = (int)(curMicro - startMicro);
startMicro = curMicro;
timingSet[i] = aDivisor;
}
qsort(timingSet, BF_ARRAY_COUNT(timingSet), sizeof(timingSet[0]), IntCompare);
gTimerDivisor = timingSet[BF_ARRAY_COUNT(timingSet)/3];
gTimerDivisor = timingSet[BF_ARRAY_COUNT(timingSet) / 3];
//gTimerDivisor = *gTimingSet.rbegin();
OutputDebugStrF("BFGetTickCountMicro divisor: %d\n", gTimerDivisor);
@ -364,7 +364,7 @@ uint64 Beefy::BFGetTickCountMicro()
QueryPerformanceCounter(&value);
curTime = value.QuadPart;
return (int64) ((curTime - startTime) * (double)1000000 / freq.QuadPart);
return (int64)((curTime - startTime) * (double)1000000 / freq.QuadPart);
}
static uint64 WinConvertFILETIME(const FILETIME& ft)
@ -410,7 +410,7 @@ bool Beefy::DirectoryExists(const StringImpl& path, String* outActualName)
void Beefy::BFFatalError(const StringImpl& message, const StringImpl& file, int line)
{
#ifndef BF_NO_BFAPP
if (gBFApp != NULL)
if (gBFApp != NULL)
gBFApp->mSysDialogCnt++;
#endif
@ -876,13 +876,13 @@ public:
static IOCPManager* Get()
{
AutoCrit autoCrit(gBfpCritSect);
if (gIOCPManager == NULL)
{
gIOCPManager = new IOCPManager();
BfpRecordManager(gIOCPManager);
}
return gIOCPManager;
AutoCrit autoCrit(gBfpCritSect);
if (gIOCPManager == NULL)
{
gIOCPManager = new IOCPManager();
BfpRecordManager(gIOCPManager);
}
return gIOCPManager;
}
};
@ -892,7 +892,7 @@ static void __cdecl HandlePureVirtualFunctionCall()
BfpSystem_FatalError("Pure virtual function call", NULL);
}
static void __cdecl HandleInvalidParameter(const wchar_t *, const wchar_t *, const wchar_t *, unsigned int, uintptr_t)
static void __cdecl HandleInvalidParameter(const wchar_t*, const wchar_t*, const wchar_t*, unsigned int, uintptr_t)
{
BfpSystem_FatalError("Invalid parameter", NULL);
}
@ -1304,7 +1304,7 @@ BFP_EXPORT void BFP_CALLTYPE BfpProcess_Enumerate(const char* machineName, BfpPr
memcpy(process->mInfo, curProcessInfo, dataSize);
UTF16String utf16;
utf16.Set(curProcessInfo->ImageName.Buffer, curProcessInfo->ImageName.Length/2);
utf16.Set(curProcessInfo->ImageName.Buffer, curProcessInfo->ImageName.Length / 2);
process->mImageName = UTF8Encode(utf16);
outProcesses[count++] = process;
@ -1989,20 +1989,20 @@ BFP_EXPORT void BFP_CALLTYPE BfpSpawn_Kill(BfpSpawn* spawn, int exitCode, BfpKil
return;
}
// BOOL hadConsole = ::FreeConsole();
// ::AttachConsole(spawn->mProcessId);
// ::SetConsoleCtrlHandler(NULL, true);
// ::GenerateConsoleCtrlEvent(CTRL_C_EVENT, 0);
// //::Sleep(2000);
// ::FreeConsole();
// ::SetConsoleCtrlHandler(NULL, false);
// BOOL hadConsole = ::FreeConsole();
// ::AttachConsole(spawn->mProcessId);
// ::SetConsoleCtrlHandler(NULL, true);
// ::GenerateConsoleCtrlEvent(CTRL_C_EVENT, 0);
// //::Sleep(2000);
// ::FreeConsole();
// ::SetConsoleCtrlHandler(NULL, false);
// if (!::TerminateProcess(spawn->mHProcess, (UINT)exitCode))
// {
// int lastError = ::GetLastError();
// OUTRESULT(BfpSpawnResult_UnknownError);
// return;
// }
// if (!::TerminateProcess(spawn->mHProcess, (UINT)exitCode))
// {
// int lastError = ::GetLastError();
// OUTRESULT(BfpSpawnResult_UnknownError);
// return;
// }
OUTRESULT(BfpSpawnResult_Ok);
}
@ -2101,13 +2101,13 @@ BFP_EXPORT void BFP_CALLTYPE BfpThread_SetName(BfpThread* thread, const char* na
if (hThread == NULL)
hThread = ::GetCurrentThread();
if (gSetThreadDescription != NULL)
{
gSetThreadDescription(hThread, UTF8Decode(name).c_str());
if (gSetThreadDescription != NULL)
{
gSetThreadDescription(hThread, UTF8Decode(name).c_str());
OUTRESULT(BfpThreadResult_Ok);
return;
}
OUTRESULT(BfpThreadResult_Ok);
return;
}
SetThreadName(::GetThreadId(hThread), name);
@ -2193,32 +2193,165 @@ BFP_EXPORT void BFP_CALLTYPE BfpThread_Resume(BfpThread* thread, BfpThreadResult
OUTRESULT(BfpThreadResult_Ok);
}
// Windows 7 SP1 is the first version of Windows to support the AVX API.
// The value for CONTEXT_XSTATE has changed between Windows 7 and
// Windows 7 SP1 and greater.
// While the value will be correct for future SDK headers, we need to set
// this value manually when building with a Windows 7 SDK for running on
// Windows 7 SPI OS bits.
#undef CONTEXT_XSTATE
#if defined(_M_X64)
#define CONTEXT_XSTATE (0x00100040)
#else
#define CONTEXT_XSTATE (0x00010040)
#endif
// Since the AVX API is not declared in the Windows 7 SDK headers and
// since we don't have the proper libs to work with, we will declare
// the API as function pointers and get them with GetProcAddress calls
// from kernel32.dll. We also need to set some #defines.
#define XSTATE_AVX (XSTATE_GSSE)
#define XSTATE_MASK_AVX (XSTATE_MASK_GSSE)
typedef DWORD64(WINAPI* PGETENABLEDXSTATEFEATURES)();
static PGETENABLEDXSTATEFEATURES pfnGetEnabledXStateFeatures = NULL;
typedef BOOL(WINAPI* PINITIALIZECONTEXT)(PVOID Buffer, DWORD ContextFlags, PCONTEXT* Context, PDWORD ContextLength);
static PINITIALIZECONTEXT pfnInitializeContext = NULL;
typedef BOOL(WINAPI* PGETXSTATEFEATURESMASK)(PCONTEXT Context, PDWORD64 FeatureMask);
static PGETXSTATEFEATURESMASK pfnGetXStateFeaturesMask = NULL;
typedef PVOID(WINAPI* LOCATEXSTATEFEATURE)(PCONTEXT Context, DWORD FeatureId, PDWORD Length);
static LOCATEXSTATEFEATURE pfnLocateXStateFeature = NULL;
typedef BOOL(WINAPI* SETXSTATEFEATURESMASK)(PCONTEXT Context, DWORD64 FeatureMask);
static SETXSTATEFEATURESMASK pfnSetXStateFeaturesMask = NULL;
static uint8 ContextBuffer[4096];
static CONTEXT* CaptureRegistersEx(HANDLE hThread, intptr*& curPtr)
{
PCONTEXT Context;
DWORD ContextSize;
DWORD64 FeatureMask;
DWORD FeatureLength;
BOOL Success;
PM128A Xmm;
PM128A Ymm;
if (pfnGetEnabledXStateFeatures == (PGETENABLEDXSTATEFEATURES)-1)
return NULL;
if (pfnGetEnabledXStateFeatures == NULL)
{
HMODULE hm = GetModuleHandleA("kernel32.dll");
if (hm == NULL)
{
pfnGetEnabledXStateFeatures = (PGETENABLEDXSTATEFEATURES)-1;
return NULL;
}
pfnGetEnabledXStateFeatures = (PGETENABLEDXSTATEFEATURES)GetProcAddress(hm, "GetEnabledXStateFeatures");
pfnInitializeContext = (PINITIALIZECONTEXT)GetProcAddress(hm, "InitializeContext");
pfnGetXStateFeaturesMask = (PGETXSTATEFEATURESMASK)GetProcAddress(hm, "GetXStateFeaturesMask");
pfnLocateXStateFeature = (LOCATEXSTATEFEATURE)GetProcAddress(hm, "LocateXStateFeature");
pfnSetXStateFeaturesMask = (SETXSTATEFEATURESMASK)GetProcAddress(hm, "SetXStateFeaturesMask");
if (pfnGetEnabledXStateFeatures == NULL
|| pfnInitializeContext == NULL
|| pfnGetXStateFeaturesMask == NULL
|| pfnLocateXStateFeature == NULL
|| pfnSetXStateFeaturesMask == NULL)
{
pfnGetEnabledXStateFeatures = (PGETENABLEDXSTATEFEATURES)-1;
return NULL;
}
}
FeatureMask = pfnGetEnabledXStateFeatures();
if ((FeatureMask & XSTATE_MASK_AVX) == 0)
return NULL;
ContextSize = 0;
Success = pfnInitializeContext(NULL,
CONTEXT_ALL | CONTEXT_XSTATE | CONTEXT_EXCEPTION_REQUEST,
NULL,
&ContextSize);
if (ContextSize > sizeof(ContextBuffer))
return NULL;
Success = pfnInitializeContext(ContextBuffer,
CONTEXT_ALL | CONTEXT_XSTATE | CONTEXT_EXCEPTION_REQUEST,
&Context,
&ContextSize);
if (Success == FALSE)
return NULL;
Success = pfnSetXStateFeaturesMask(Context, XSTATE_MASK_AVX);
if (Success == FALSE)
return Context;
Success = GetThreadContext(hThread, Context);
if (Success == FALSE)
return Context;
Success = pfnGetXStateFeaturesMask(Context, &FeatureMask);
if (Success == FALSE)
return Context;
if ((FeatureMask & XSTATE_MASK_AVX) == 0)
return Context;
Xmm = (PM128A)pfnLocateXStateFeature(Context, XSTATE_LEGACY_SSE, &FeatureLength);
Ymm = (PM128A)pfnLocateXStateFeature(Context, XSTATE_AVX, NULL);
memcpy(curPtr, Ymm, FeatureLength);
curPtr += FeatureLength / sizeof(intptr);
return Context;
}
BFP_EXPORT void BFP_CALLTYPE BfpThread_GetIntRegisters(BfpThread* thread, intptr* outStackPtr, intptr* outIntRegs, int* inOutIntRegCount, BfpThreadResult* outResult)
{
CONTEXT ctx;
memset(&ctx, 0, sizeof(CONTEXT));
ctx.ContextFlags = CONTEXT_INTEGER | CONTEXT_CONTROL;
BOOL success = ::GetThreadContext((HANDLE)thread, &ctx);
if (!success)
intptr* curPtr = outIntRegs;
CONTEXT* ctxPtr = NULL;
if (*inOutIntRegCount > 48)
ctxPtr = CaptureRegistersEx((HANDLE)thread, curPtr);
if (ctxPtr == NULL)
{
int error = GetLastError();
OUTRESULT(BfpThreadResult_UnknownError);
return;
memset(&ctx, 0, sizeof(CONTEXT));
ctx.ContextFlags = CONTEXT_ALL;
BOOL success = ::GetThreadContext((HANDLE)thread, (CONTEXT*)&ctx);
if (!success)
{
int error = GetLastError();
OUTRESULT(BfpThreadResult_UnknownError);
return;
}
ctxPtr = &ctx;
DWORD lastError = GetLastError();
BF_ASSERT(success);
}
DWORD lastError = GetLastError();
BF_ASSERT(success);
#ifdef BF32
*outStackPtr = (intptr)ctx.Esp;
if (*inOutIntRegCount < 7)
* outStackPtr = (intptr)ctxPtr->Esp;
if (*inOutIntRegCount < (int)(curPtr - outIntRegs) + 7)
{
OUTRESULT(BfpThreadResult_InsufficientBuffer);
return;
}
#else
*outStackPtr = (intptr)ctx.Rsp;
if (*inOutIntRegCount < 15)
* outStackPtr = (intptr)ctxPtr->Rsp;
if (*inOutIntRegCount < (int)(curPtr - outIntRegs) + 48)
{
OUTRESULT(BfpThreadResult_InsufficientBuffer);
return;
@ -2230,36 +2363,36 @@ BFP_EXPORT void BFP_CALLTYPE BfpThread_GetIntRegisters(BfpThread* thread, intptr
if (outIntRegs == NULL)
return;
intptr* curPtr = outIntRegs;
#ifdef BF32
*(curPtr++) = (intptr)ctx.Eax;
*(curPtr++) = (intptr)ctx.Ebx;
*(curPtr++) = (intptr)ctx.Ecx;
*(curPtr++) = (intptr)ctx.Edx;
*(curPtr++) = (intptr)ctx.Esi;
*(curPtr++) = (intptr)ctx.Edi;
*(curPtr++) = (intptr)ctx.Ebp;
* (curPtr++) = (intptr)ctxPtr->Eax;
*(curPtr++) = (intptr)ctxPtr->Ebx;
*(curPtr++) = (intptr)ctxPtr->Ecx;
*(curPtr++) = (intptr)ctxPtr->Edx;
*(curPtr++) = (intptr)ctxPtr->Esi;
*(curPtr++) = (intptr)ctxPtr->Edi;
*(curPtr++) = (intptr)ctxPtr->Ebp;
#else
*(curPtr++) = (intptr)ctx.SegFs; // Testing
*(curPtr++) = (intptr)ctx.Rax;
*(curPtr++) = (intptr)ctx.Rbx;
*(curPtr++) = (intptr)ctx.Rcx;
*(curPtr++) = (intptr)ctx.Rdx;
*(curPtr++) = (intptr)ctx.Rsi;
*(curPtr++) = (intptr)ctx.Rdi;
*(curPtr++) = (intptr)ctx.Rbp;
*(curPtr++) = (intptr)ctx.R8;
*(curPtr++) = (intptr)ctx.R9;
*(curPtr++) = (intptr)ctx.R10;
*(curPtr++) = (intptr)ctx.R11;
*(curPtr++) = (intptr)ctx.R12;
*(curPtr++) = (intptr)ctx.R13;
*(curPtr++) = (intptr)ctx.R14;
*(curPtr++) = (intptr)ctx.R15;
* (curPtr++) = (intptr)ctxPtr->SegFs; // Testing
*(curPtr++) = (intptr)ctxPtr->Rax;
*(curPtr++) = (intptr)ctxPtr->Rbx;
*(curPtr++) = (intptr)ctxPtr->Rcx;
*(curPtr++) = (intptr)ctxPtr->Rdx;
*(curPtr++) = (intptr)ctxPtr->Rsi;
*(curPtr++) = (intptr)ctxPtr->Rdi;
*(curPtr++) = (intptr)ctxPtr->Rbp;
*(curPtr++) = (intptr)ctxPtr->R8;
*(curPtr++) = (intptr)ctxPtr->R9;
*(curPtr++) = (intptr)ctxPtr->R10;
*(curPtr++) = (intptr)ctxPtr->R11;
*(curPtr++) = (intptr)ctxPtr->R12;
*(curPtr++) = (intptr)ctxPtr->R13;
*(curPtr++) = (intptr)ctxPtr->R14;
*(curPtr++) = (intptr)ctxPtr->R15;
memcpy(curPtr, &ctxPtr->Xmm0, 16 * 16);
curPtr += (16 * 16) / sizeof(intptr);
#endif
*inOutIntRegCount = (int)(curPtr - outIntRegs);
* inOutIntRegCount = (int)(curPtr - outIntRegs);
}
struct BfpCritSect