1
0
Fork 0
mirror of https://github.com/beefytech/Beef.git synced 2025-06-10 04:22:20 +02:00

Capture xmm/ymm registers

This commit is contained in:
Brian Fiete 2022-01-03 11:24:45 -05:00
parent f1c38c792d
commit 14e1ffa5ac

View file

@ -150,7 +150,7 @@ static void BfpRecordManager(BfpManager* manager)
gManagerTail = manager; gManagerTail = manager;
} }
typedef NTSTATUS(NTAPI *NtQuerySystemInformation_t)(SYSTEM_INFORMATION_CLASS, PVOID, ULONG, PULONG); typedef NTSTATUS(NTAPI* NtQuerySystemInformation_t)(SYSTEM_INFORMATION_CLASS, PVOID, ULONG, PULONG);
static NtQuerySystemInformation_t gNtQuerySystemInformation = NULL; static NtQuerySystemInformation_t gNtQuerySystemInformation = NULL;
static HMODULE gNTDll = NULL; static HMODULE gNTDll = NULL;
@ -216,22 +216,22 @@ WindowsSharedInfo* GetSharedInfo()
if (sharedFileMapping == NULL) if (sharedFileMapping == NULL)
{ {
sharedFileMapping = ::CreateFileMappingA( sharedFileMapping = ::CreateFileMappingA(
INVALID_HANDLE_VALUE, INVALID_HANDLE_VALUE,
NULL, NULL,
PAGE_READWRITE, PAGE_READWRITE,
0, 0,
sizeof(WindowsSharedInfo), sizeof(WindowsSharedInfo),
sharedName.c_str()); sharedName.c_str());
created = true; created = true;
} }
BF_ASSERT(sharedFileMapping != NULL); BF_ASSERT(sharedFileMapping != NULL);
gGlobalPlatformInfo = (WindowsSharedInfo*) MapViewOfFile(sharedFileMapping, gGlobalPlatformInfo = (WindowsSharedInfo*)MapViewOfFile(sharedFileMapping,
FILE_MAP_READ | FILE_MAP_WRITE, FILE_MAP_READ | FILE_MAP_WRITE,
0, 0,
0, 0,
sizeof(WindowsSharedInfo)); sizeof(WindowsSharedInfo));
if (created) if (created)
{ {
@ -289,7 +289,7 @@ uint64 Beefy::BFGetTickCountMicroFast()
#endif #endif
::SetThreadPriority(::GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL); ::SetThreadPriority(::GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL);
::SetThreadAffinityMask(::GetCurrentThread(), (int64)1<<(windowsSharedInfo->mThreadAcc % processorCount)); ::SetThreadAffinityMask(::GetCurrentThread(), (int64)1 << (windowsSharedInfo->mThreadAcc % processorCount));
uint64 deltaMicro = 0; uint64 deltaMicro = 0;
@ -300,7 +300,7 @@ uint64 Beefy::BFGetTickCountMicroFast()
uint32 prevQPFMicro = 0; uint32 prevQPFMicro = 0;
LARGE_INTEGER frequency = {0, 1}; LARGE_INTEGER frequency = { 0, 1 };
QueryPerformanceFrequency(&frequency); QueryPerformanceFrequency(&frequency);
uint64 startMicro = __rdtsc(); uint64 startMicro = __rdtsc();
@ -312,18 +312,18 @@ uint64 Beefy::BFGetTickCountMicroFast()
LARGE_INTEGER timeNow; LARGE_INTEGER timeNow;
QueryPerformanceCounter(&timeNow); QueryPerformanceCounter(&timeNow);
qPFMicro = (uint32) ((timeNow.QuadPart * 100000000) / frequency.QuadPart); qPFMicro = (uint32)((timeNow.QuadPart * 100000000) / frequency.QuadPart);
} while (qPFMicro - prevQPFMicro < 100000); } while (qPFMicro - prevQPFMicro < 100000);
prevQPFMicro = qPFMicro; prevQPFMicro = qPFMicro;
int64 curMicro = __rdtsc(); int64 curMicro = __rdtsc();
int aDivisor = (int) (curMicro - startMicro); int aDivisor = (int)(curMicro - startMicro);
startMicro = curMicro; startMicro = curMicro;
timingSet[i] = aDivisor; timingSet[i] = aDivisor;
} }
qsort(timingSet, BF_ARRAY_COUNT(timingSet), sizeof(timingSet[0]), IntCompare); qsort(timingSet, BF_ARRAY_COUNT(timingSet), sizeof(timingSet[0]), IntCompare);
gTimerDivisor = timingSet[BF_ARRAY_COUNT(timingSet)/3]; gTimerDivisor = timingSet[BF_ARRAY_COUNT(timingSet) / 3];
//gTimerDivisor = *gTimingSet.rbegin(); //gTimerDivisor = *gTimingSet.rbegin();
OutputDebugStrF("BFGetTickCountMicro divisor: %d\n", gTimerDivisor); OutputDebugStrF("BFGetTickCountMicro divisor: %d\n", gTimerDivisor);
@ -364,7 +364,7 @@ uint64 Beefy::BFGetTickCountMicro()
QueryPerformanceCounter(&value); QueryPerformanceCounter(&value);
curTime = value.QuadPart; curTime = value.QuadPart;
return (int64) ((curTime - startTime) * (double)1000000 / freq.QuadPart); return (int64)((curTime - startTime) * (double)1000000 / freq.QuadPart);
} }
static uint64 WinConvertFILETIME(const FILETIME& ft) static uint64 WinConvertFILETIME(const FILETIME& ft)
@ -410,7 +410,7 @@ bool Beefy::DirectoryExists(const StringImpl& path, String* outActualName)
void Beefy::BFFatalError(const StringImpl& message, const StringImpl& file, int line) void Beefy::BFFatalError(const StringImpl& message, const StringImpl& file, int line)
{ {
#ifndef BF_NO_BFAPP #ifndef BF_NO_BFAPP
if (gBFApp != NULL) if (gBFApp != NULL)
gBFApp->mSysDialogCnt++; gBFApp->mSysDialogCnt++;
#endif #endif
@ -876,13 +876,13 @@ public:
static IOCPManager* Get() static IOCPManager* Get()
{ {
AutoCrit autoCrit(gBfpCritSect); AutoCrit autoCrit(gBfpCritSect);
if (gIOCPManager == NULL) if (gIOCPManager == NULL)
{ {
gIOCPManager = new IOCPManager(); gIOCPManager = new IOCPManager();
BfpRecordManager(gIOCPManager); BfpRecordManager(gIOCPManager);
} }
return gIOCPManager; return gIOCPManager;
} }
}; };
@ -892,7 +892,7 @@ static void __cdecl HandlePureVirtualFunctionCall()
BfpSystem_FatalError("Pure virtual function call", NULL); BfpSystem_FatalError("Pure virtual function call", NULL);
} }
static void __cdecl HandleInvalidParameter(const wchar_t *, const wchar_t *, const wchar_t *, unsigned int, uintptr_t) static void __cdecl HandleInvalidParameter(const wchar_t*, const wchar_t*, const wchar_t*, unsigned int, uintptr_t)
{ {
BfpSystem_FatalError("Invalid parameter", NULL); BfpSystem_FatalError("Invalid parameter", NULL);
} }
@ -1304,7 +1304,7 @@ BFP_EXPORT void BFP_CALLTYPE BfpProcess_Enumerate(const char* machineName, BfpPr
memcpy(process->mInfo, curProcessInfo, dataSize); memcpy(process->mInfo, curProcessInfo, dataSize);
UTF16String utf16; UTF16String utf16;
utf16.Set(curProcessInfo->ImageName.Buffer, curProcessInfo->ImageName.Length/2); utf16.Set(curProcessInfo->ImageName.Buffer, curProcessInfo->ImageName.Length / 2);
process->mImageName = UTF8Encode(utf16); process->mImageName = UTF8Encode(utf16);
outProcesses[count++] = process; outProcesses[count++] = process;
@ -1989,20 +1989,20 @@ BFP_EXPORT void BFP_CALLTYPE BfpSpawn_Kill(BfpSpawn* spawn, int exitCode, BfpKil
return; return;
} }
// BOOL hadConsole = ::FreeConsole(); // BOOL hadConsole = ::FreeConsole();
// ::AttachConsole(spawn->mProcessId); // ::AttachConsole(spawn->mProcessId);
// ::SetConsoleCtrlHandler(NULL, true); // ::SetConsoleCtrlHandler(NULL, true);
// ::GenerateConsoleCtrlEvent(CTRL_C_EVENT, 0); // ::GenerateConsoleCtrlEvent(CTRL_C_EVENT, 0);
// //::Sleep(2000); // //::Sleep(2000);
// ::FreeConsole(); // ::FreeConsole();
// ::SetConsoleCtrlHandler(NULL, false); // ::SetConsoleCtrlHandler(NULL, false);
// if (!::TerminateProcess(spawn->mHProcess, (UINT)exitCode)) // if (!::TerminateProcess(spawn->mHProcess, (UINT)exitCode))
// { // {
// int lastError = ::GetLastError(); // int lastError = ::GetLastError();
// OUTRESULT(BfpSpawnResult_UnknownError); // OUTRESULT(BfpSpawnResult_UnknownError);
// return; // return;
// } // }
OUTRESULT(BfpSpawnResult_Ok); OUTRESULT(BfpSpawnResult_Ok);
} }
@ -2101,13 +2101,13 @@ BFP_EXPORT void BFP_CALLTYPE BfpThread_SetName(BfpThread* thread, const char* na
if (hThread == NULL) if (hThread == NULL)
hThread = ::GetCurrentThread(); hThread = ::GetCurrentThread();
if (gSetThreadDescription != NULL) if (gSetThreadDescription != NULL)
{ {
gSetThreadDescription(hThread, UTF8Decode(name).c_str()); gSetThreadDescription(hThread, UTF8Decode(name).c_str());
OUTRESULT(BfpThreadResult_Ok); OUTRESULT(BfpThreadResult_Ok);
return; return;
} }
SetThreadName(::GetThreadId(hThread), name); SetThreadName(::GetThreadId(hThread), name);
@ -2193,32 +2193,165 @@ BFP_EXPORT void BFP_CALLTYPE BfpThread_Resume(BfpThread* thread, BfpThreadResult
OUTRESULT(BfpThreadResult_Ok); OUTRESULT(BfpThreadResult_Ok);
} }
// Windows 7 SP1 is the first version of Windows to support the AVX API.
// The value for CONTEXT_XSTATE has changed between Windows 7 and
// Windows 7 SP1 and greater.
// While the value will be correct for future SDK headers, we need to set
// this value manually when building with a Windows 7 SDK for running on
// Windows 7 SPI OS bits.
#undef CONTEXT_XSTATE
#if defined(_M_X64)
#define CONTEXT_XSTATE (0x00100040)
#else
#define CONTEXT_XSTATE (0x00010040)
#endif
// Since the AVX API is not declared in the Windows 7 SDK headers and
// since we don't have the proper libs to work with, we will declare
// the API as function pointers and get them with GetProcAddress calls
// from kernel32.dll. We also need to set some #defines.
#define XSTATE_AVX (XSTATE_GSSE)
#define XSTATE_MASK_AVX (XSTATE_MASK_GSSE)
typedef DWORD64(WINAPI* PGETENABLEDXSTATEFEATURES)();
static PGETENABLEDXSTATEFEATURES pfnGetEnabledXStateFeatures = NULL;
typedef BOOL(WINAPI* PINITIALIZECONTEXT)(PVOID Buffer, DWORD ContextFlags, PCONTEXT* Context, PDWORD ContextLength);
static PINITIALIZECONTEXT pfnInitializeContext = NULL;
typedef BOOL(WINAPI* PGETXSTATEFEATURESMASK)(PCONTEXT Context, PDWORD64 FeatureMask);
static PGETXSTATEFEATURESMASK pfnGetXStateFeaturesMask = NULL;
typedef PVOID(WINAPI* LOCATEXSTATEFEATURE)(PCONTEXT Context, DWORD FeatureId, PDWORD Length);
static LOCATEXSTATEFEATURE pfnLocateXStateFeature = NULL;
typedef BOOL(WINAPI* SETXSTATEFEATURESMASK)(PCONTEXT Context, DWORD64 FeatureMask);
static SETXSTATEFEATURESMASK pfnSetXStateFeaturesMask = NULL;
static uint8 ContextBuffer[4096];
static CONTEXT* CaptureRegistersEx(HANDLE hThread, intptr*& curPtr)
{
PCONTEXT Context;
DWORD ContextSize;
DWORD64 FeatureMask;
DWORD FeatureLength;
BOOL Success;
PM128A Xmm;
PM128A Ymm;
if (pfnGetEnabledXStateFeatures == (PGETENABLEDXSTATEFEATURES)-1)
return NULL;
if (pfnGetEnabledXStateFeatures == NULL)
{
HMODULE hm = GetModuleHandleA("kernel32.dll");
if (hm == NULL)
{
pfnGetEnabledXStateFeatures = (PGETENABLEDXSTATEFEATURES)-1;
return NULL;
}
pfnGetEnabledXStateFeatures = (PGETENABLEDXSTATEFEATURES)GetProcAddress(hm, "GetEnabledXStateFeatures");
pfnInitializeContext = (PINITIALIZECONTEXT)GetProcAddress(hm, "InitializeContext");
pfnGetXStateFeaturesMask = (PGETXSTATEFEATURESMASK)GetProcAddress(hm, "GetXStateFeaturesMask");
pfnLocateXStateFeature = (LOCATEXSTATEFEATURE)GetProcAddress(hm, "LocateXStateFeature");
pfnSetXStateFeaturesMask = (SETXSTATEFEATURESMASK)GetProcAddress(hm, "SetXStateFeaturesMask");
if (pfnGetEnabledXStateFeatures == NULL
|| pfnInitializeContext == NULL
|| pfnGetXStateFeaturesMask == NULL
|| pfnLocateXStateFeature == NULL
|| pfnSetXStateFeaturesMask == NULL)
{
pfnGetEnabledXStateFeatures = (PGETENABLEDXSTATEFEATURES)-1;
return NULL;
}
}
FeatureMask = pfnGetEnabledXStateFeatures();
if ((FeatureMask & XSTATE_MASK_AVX) == 0)
return NULL;
ContextSize = 0;
Success = pfnInitializeContext(NULL,
CONTEXT_ALL | CONTEXT_XSTATE | CONTEXT_EXCEPTION_REQUEST,
NULL,
&ContextSize);
if (ContextSize > sizeof(ContextBuffer))
return NULL;
Success = pfnInitializeContext(ContextBuffer,
CONTEXT_ALL | CONTEXT_XSTATE | CONTEXT_EXCEPTION_REQUEST,
&Context,
&ContextSize);
if (Success == FALSE)
return NULL;
Success = pfnSetXStateFeaturesMask(Context, XSTATE_MASK_AVX);
if (Success == FALSE)
return Context;
Success = GetThreadContext(hThread, Context);
if (Success == FALSE)
return Context;
Success = pfnGetXStateFeaturesMask(Context, &FeatureMask);
if (Success == FALSE)
return Context;
if ((FeatureMask & XSTATE_MASK_AVX) == 0)
return Context;
Xmm = (PM128A)pfnLocateXStateFeature(Context, XSTATE_LEGACY_SSE, &FeatureLength);
Ymm = (PM128A)pfnLocateXStateFeature(Context, XSTATE_AVX, NULL);
memcpy(curPtr, Ymm, FeatureLength);
curPtr += FeatureLength / sizeof(intptr);
return Context;
}
BFP_EXPORT void BFP_CALLTYPE BfpThread_GetIntRegisters(BfpThread* thread, intptr* outStackPtr, intptr* outIntRegs, int* inOutIntRegCount, BfpThreadResult* outResult) BFP_EXPORT void BFP_CALLTYPE BfpThread_GetIntRegisters(BfpThread* thread, intptr* outStackPtr, intptr* outIntRegs, int* inOutIntRegCount, BfpThreadResult* outResult)
{ {
CONTEXT ctx; CONTEXT ctx;
memset(&ctx, 0, sizeof(CONTEXT)); intptr* curPtr = outIntRegs;
ctx.ContextFlags = CONTEXT_INTEGER | CONTEXT_CONTROL; CONTEXT* ctxPtr = NULL;
BOOL success = ::GetThreadContext((HANDLE)thread, &ctx);
if (!success) if (*inOutIntRegCount > 48)
ctxPtr = CaptureRegistersEx((HANDLE)thread, curPtr);
if (ctxPtr == NULL)
{ {
int error = GetLastError(); memset(&ctx, 0, sizeof(CONTEXT));
OUTRESULT(BfpThreadResult_UnknownError); ctx.ContextFlags = CONTEXT_ALL;
return; BOOL success = ::GetThreadContext((HANDLE)thread, (CONTEXT*)&ctx);
if (!success)
{
int error = GetLastError();
OUTRESULT(BfpThreadResult_UnknownError);
return;
}
ctxPtr = &ctx;
DWORD lastError = GetLastError();
BF_ASSERT(success);
} }
DWORD lastError = GetLastError();
BF_ASSERT(success);
#ifdef BF32 #ifdef BF32
*outStackPtr = (intptr)ctx.Esp; * outStackPtr = (intptr)ctxPtr->Esp;
if (*inOutIntRegCount < 7) if (*inOutIntRegCount < (int)(curPtr - outIntRegs) + 7)
{ {
OUTRESULT(BfpThreadResult_InsufficientBuffer); OUTRESULT(BfpThreadResult_InsufficientBuffer);
return; return;
} }
#else #else
*outStackPtr = (intptr)ctx.Rsp; * outStackPtr = (intptr)ctxPtr->Rsp;
if (*inOutIntRegCount < 15) if (*inOutIntRegCount < (int)(curPtr - outIntRegs) + 48)
{ {
OUTRESULT(BfpThreadResult_InsufficientBuffer); OUTRESULT(BfpThreadResult_InsufficientBuffer);
return; return;
@ -2230,36 +2363,36 @@ BFP_EXPORT void BFP_CALLTYPE BfpThread_GetIntRegisters(BfpThread* thread, intptr
if (outIntRegs == NULL) if (outIntRegs == NULL)
return; return;
intptr* curPtr = outIntRegs;
#ifdef BF32 #ifdef BF32
*(curPtr++) = (intptr)ctx.Eax; * (curPtr++) = (intptr)ctxPtr->Eax;
*(curPtr++) = (intptr)ctx.Ebx; *(curPtr++) = (intptr)ctxPtr->Ebx;
*(curPtr++) = (intptr)ctx.Ecx; *(curPtr++) = (intptr)ctxPtr->Ecx;
*(curPtr++) = (intptr)ctx.Edx; *(curPtr++) = (intptr)ctxPtr->Edx;
*(curPtr++) = (intptr)ctx.Esi; *(curPtr++) = (intptr)ctxPtr->Esi;
*(curPtr++) = (intptr)ctx.Edi; *(curPtr++) = (intptr)ctxPtr->Edi;
*(curPtr++) = (intptr)ctx.Ebp; *(curPtr++) = (intptr)ctxPtr->Ebp;
#else #else
*(curPtr++) = (intptr)ctx.SegFs; // Testing * (curPtr++) = (intptr)ctxPtr->SegFs; // Testing
*(curPtr++) = (intptr)ctx.Rax; *(curPtr++) = (intptr)ctxPtr->Rax;
*(curPtr++) = (intptr)ctx.Rbx; *(curPtr++) = (intptr)ctxPtr->Rbx;
*(curPtr++) = (intptr)ctx.Rcx; *(curPtr++) = (intptr)ctxPtr->Rcx;
*(curPtr++) = (intptr)ctx.Rdx; *(curPtr++) = (intptr)ctxPtr->Rdx;
*(curPtr++) = (intptr)ctx.Rsi; *(curPtr++) = (intptr)ctxPtr->Rsi;
*(curPtr++) = (intptr)ctx.Rdi; *(curPtr++) = (intptr)ctxPtr->Rdi;
*(curPtr++) = (intptr)ctx.Rbp; *(curPtr++) = (intptr)ctxPtr->Rbp;
*(curPtr++) = (intptr)ctx.R8; *(curPtr++) = (intptr)ctxPtr->R8;
*(curPtr++) = (intptr)ctx.R9; *(curPtr++) = (intptr)ctxPtr->R9;
*(curPtr++) = (intptr)ctx.R10; *(curPtr++) = (intptr)ctxPtr->R10;
*(curPtr++) = (intptr)ctx.R11; *(curPtr++) = (intptr)ctxPtr->R11;
*(curPtr++) = (intptr)ctx.R12; *(curPtr++) = (intptr)ctxPtr->R12;
*(curPtr++) = (intptr)ctx.R13; *(curPtr++) = (intptr)ctxPtr->R13;
*(curPtr++) = (intptr)ctx.R14; *(curPtr++) = (intptr)ctxPtr->R14;
*(curPtr++) = (intptr)ctx.R15; *(curPtr++) = (intptr)ctxPtr->R15;
memcpy(curPtr, &ctxPtr->Xmm0, 16 * 16);
curPtr += (16 * 16) / sizeof(intptr);
#endif #endif
*inOutIntRegCount = (int)(curPtr - outIntRegs); * inOutIntRegCount = (int)(curPtr - outIntRegs);
} }
struct BfpCritSect struct BfpCritSect