mirror of
https://github.com/beefytech/Beef.git
synced 2025-06-10 04:22:20 +02:00
Capture xmm/ymm registers
This commit is contained in:
parent
f1c38c792d
commit
14e1ffa5ac
1 changed files with 403 additions and 270 deletions
|
@ -150,7 +150,7 @@ static void BfpRecordManager(BfpManager* manager)
|
|||
gManagerTail = manager;
|
||||
}
|
||||
|
||||
typedef NTSTATUS(NTAPI *NtQuerySystemInformation_t)(SYSTEM_INFORMATION_CLASS, PVOID, ULONG, PULONG);
|
||||
typedef NTSTATUS(NTAPI* NtQuerySystemInformation_t)(SYSTEM_INFORMATION_CLASS, PVOID, ULONG, PULONG);
|
||||
static NtQuerySystemInformation_t gNtQuerySystemInformation = NULL;
|
||||
static HMODULE gNTDll = NULL;
|
||||
|
||||
|
@ -227,7 +227,7 @@ WindowsSharedInfo* GetSharedInfo()
|
|||
|
||||
BF_ASSERT(sharedFileMapping != NULL);
|
||||
|
||||
gGlobalPlatformInfo = (WindowsSharedInfo*) MapViewOfFile(sharedFileMapping,
|
||||
gGlobalPlatformInfo = (WindowsSharedInfo*)MapViewOfFile(sharedFileMapping,
|
||||
FILE_MAP_READ | FILE_MAP_WRITE,
|
||||
0,
|
||||
0,
|
||||
|
@ -289,7 +289,7 @@ uint64 Beefy::BFGetTickCountMicroFast()
|
|||
#endif
|
||||
|
||||
::SetThreadPriority(::GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL);
|
||||
::SetThreadAffinityMask(::GetCurrentThread(), (int64)1<<(windowsSharedInfo->mThreadAcc % processorCount));
|
||||
::SetThreadAffinityMask(::GetCurrentThread(), (int64)1 << (windowsSharedInfo->mThreadAcc % processorCount));
|
||||
|
||||
uint64 deltaMicro = 0;
|
||||
|
||||
|
@ -300,7 +300,7 @@ uint64 Beefy::BFGetTickCountMicroFast()
|
|||
|
||||
uint32 prevQPFMicro = 0;
|
||||
|
||||
LARGE_INTEGER frequency = {0, 1};
|
||||
LARGE_INTEGER frequency = { 0, 1 };
|
||||
QueryPerformanceFrequency(&frequency);
|
||||
uint64 startMicro = __rdtsc();
|
||||
|
||||
|
@ -312,18 +312,18 @@ uint64 Beefy::BFGetTickCountMicroFast()
|
|||
LARGE_INTEGER timeNow;
|
||||
QueryPerformanceCounter(&timeNow);
|
||||
|
||||
qPFMicro = (uint32) ((timeNow.QuadPart * 100000000) / frequency.QuadPart);
|
||||
qPFMicro = (uint32)((timeNow.QuadPart * 100000000) / frequency.QuadPart);
|
||||
} while (qPFMicro - prevQPFMicro < 100000);
|
||||
prevQPFMicro = qPFMicro;
|
||||
|
||||
int64 curMicro = __rdtsc();
|
||||
int aDivisor = (int) (curMicro - startMicro);
|
||||
int aDivisor = (int)(curMicro - startMicro);
|
||||
startMicro = curMicro;
|
||||
timingSet[i] = aDivisor;
|
||||
}
|
||||
|
||||
qsort(timingSet, BF_ARRAY_COUNT(timingSet), sizeof(timingSet[0]), IntCompare);
|
||||
gTimerDivisor = timingSet[BF_ARRAY_COUNT(timingSet)/3];
|
||||
gTimerDivisor = timingSet[BF_ARRAY_COUNT(timingSet) / 3];
|
||||
|
||||
//gTimerDivisor = *gTimingSet.rbegin();
|
||||
OutputDebugStrF("BFGetTickCountMicro divisor: %d\n", gTimerDivisor);
|
||||
|
@ -364,7 +364,7 @@ uint64 Beefy::BFGetTickCountMicro()
|
|||
QueryPerformanceCounter(&value);
|
||||
curTime = value.QuadPart;
|
||||
|
||||
return (int64) ((curTime - startTime) * (double)1000000 / freq.QuadPart);
|
||||
return (int64)((curTime - startTime) * (double)1000000 / freq.QuadPart);
|
||||
}
|
||||
|
||||
static uint64 WinConvertFILETIME(const FILETIME& ft)
|
||||
|
@ -892,7 +892,7 @@ static void __cdecl HandlePureVirtualFunctionCall()
|
|||
BfpSystem_FatalError("Pure virtual function call", NULL);
|
||||
}
|
||||
|
||||
static void __cdecl HandleInvalidParameter(const wchar_t *, const wchar_t *, const wchar_t *, unsigned int, uintptr_t)
|
||||
static void __cdecl HandleInvalidParameter(const wchar_t*, const wchar_t*, const wchar_t*, unsigned int, uintptr_t)
|
||||
{
|
||||
BfpSystem_FatalError("Invalid parameter", NULL);
|
||||
}
|
||||
|
@ -1304,7 +1304,7 @@ BFP_EXPORT void BFP_CALLTYPE BfpProcess_Enumerate(const char* machineName, BfpPr
|
|||
memcpy(process->mInfo, curProcessInfo, dataSize);
|
||||
|
||||
UTF16String utf16;
|
||||
utf16.Set(curProcessInfo->ImageName.Buffer, curProcessInfo->ImageName.Length/2);
|
||||
utf16.Set(curProcessInfo->ImageName.Buffer, curProcessInfo->ImageName.Length / 2);
|
||||
process->mImageName = UTF8Encode(utf16);
|
||||
|
||||
outProcesses[count++] = process;
|
||||
|
@ -1989,20 +1989,20 @@ BFP_EXPORT void BFP_CALLTYPE BfpSpawn_Kill(BfpSpawn* spawn, int exitCode, BfpKil
|
|||
return;
|
||||
}
|
||||
|
||||
// BOOL hadConsole = ::FreeConsole();
|
||||
// ::AttachConsole(spawn->mProcessId);
|
||||
// ::SetConsoleCtrlHandler(NULL, true);
|
||||
// ::GenerateConsoleCtrlEvent(CTRL_C_EVENT, 0);
|
||||
// //::Sleep(2000);
|
||||
// ::FreeConsole();
|
||||
// ::SetConsoleCtrlHandler(NULL, false);
|
||||
// BOOL hadConsole = ::FreeConsole();
|
||||
// ::AttachConsole(spawn->mProcessId);
|
||||
// ::SetConsoleCtrlHandler(NULL, true);
|
||||
// ::GenerateConsoleCtrlEvent(CTRL_C_EVENT, 0);
|
||||
// //::Sleep(2000);
|
||||
// ::FreeConsole();
|
||||
// ::SetConsoleCtrlHandler(NULL, false);
|
||||
|
||||
// if (!::TerminateProcess(spawn->mHProcess, (UINT)exitCode))
|
||||
// {
|
||||
// int lastError = ::GetLastError();
|
||||
// OUTRESULT(BfpSpawnResult_UnknownError);
|
||||
// return;
|
||||
// }
|
||||
// if (!::TerminateProcess(spawn->mHProcess, (UINT)exitCode))
|
||||
// {
|
||||
// int lastError = ::GetLastError();
|
||||
// OUTRESULT(BfpSpawnResult_UnknownError);
|
||||
// return;
|
||||
// }
|
||||
OUTRESULT(BfpSpawnResult_Ok);
|
||||
}
|
||||
|
||||
|
@ -2193,32 +2193,165 @@ BFP_EXPORT void BFP_CALLTYPE BfpThread_Resume(BfpThread* thread, BfpThreadResult
|
|||
OUTRESULT(BfpThreadResult_Ok);
|
||||
}
|
||||
|
||||
|
||||
// Windows 7 SP1 is the first version of Windows to support the AVX API.
|
||||
|
||||
// The value for CONTEXT_XSTATE has changed between Windows 7 and
|
||||
// Windows 7 SP1 and greater.
|
||||
// While the value will be correct for future SDK headers, we need to set
|
||||
// this value manually when building with a Windows 7 SDK for running on
|
||||
// Windows 7 SPI OS bits.
|
||||
|
||||
#undef CONTEXT_XSTATE
|
||||
|
||||
#if defined(_M_X64)
|
||||
#define CONTEXT_XSTATE (0x00100040)
|
||||
#else
|
||||
#define CONTEXT_XSTATE (0x00010040)
|
||||
#endif
|
||||
|
||||
// Since the AVX API is not declared in the Windows 7 SDK headers and
|
||||
// since we don't have the proper libs to work with, we will declare
|
||||
// the API as function pointers and get them with GetProcAddress calls
|
||||
// from kernel32.dll. We also need to set some #defines.
|
||||
|
||||
#define XSTATE_AVX (XSTATE_GSSE)
|
||||
#define XSTATE_MASK_AVX (XSTATE_MASK_GSSE)
|
||||
|
||||
typedef DWORD64(WINAPI* PGETENABLEDXSTATEFEATURES)();
|
||||
static PGETENABLEDXSTATEFEATURES pfnGetEnabledXStateFeatures = NULL;
|
||||
|
||||
typedef BOOL(WINAPI* PINITIALIZECONTEXT)(PVOID Buffer, DWORD ContextFlags, PCONTEXT* Context, PDWORD ContextLength);
|
||||
static PINITIALIZECONTEXT pfnInitializeContext = NULL;
|
||||
|
||||
typedef BOOL(WINAPI* PGETXSTATEFEATURESMASK)(PCONTEXT Context, PDWORD64 FeatureMask);
|
||||
static PGETXSTATEFEATURESMASK pfnGetXStateFeaturesMask = NULL;
|
||||
|
||||
typedef PVOID(WINAPI* LOCATEXSTATEFEATURE)(PCONTEXT Context, DWORD FeatureId, PDWORD Length);
|
||||
static LOCATEXSTATEFEATURE pfnLocateXStateFeature = NULL;
|
||||
|
||||
typedef BOOL(WINAPI* SETXSTATEFEATURESMASK)(PCONTEXT Context, DWORD64 FeatureMask);
|
||||
static SETXSTATEFEATURESMASK pfnSetXStateFeaturesMask = NULL;
|
||||
|
||||
static uint8 ContextBuffer[4096];
|
||||
static CONTEXT* CaptureRegistersEx(HANDLE hThread, intptr*& curPtr)
|
||||
{
|
||||
PCONTEXT Context;
|
||||
DWORD ContextSize;
|
||||
DWORD64 FeatureMask;
|
||||
DWORD FeatureLength;
|
||||
BOOL Success;
|
||||
PM128A Xmm;
|
||||
PM128A Ymm;
|
||||
|
||||
if (pfnGetEnabledXStateFeatures == (PGETENABLEDXSTATEFEATURES)-1)
|
||||
return NULL;
|
||||
|
||||
if (pfnGetEnabledXStateFeatures == NULL)
|
||||
{
|
||||
HMODULE hm = GetModuleHandleA("kernel32.dll");
|
||||
if (hm == NULL)
|
||||
{
|
||||
pfnGetEnabledXStateFeatures = (PGETENABLEDXSTATEFEATURES)-1;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
pfnGetEnabledXStateFeatures = (PGETENABLEDXSTATEFEATURES)GetProcAddress(hm, "GetEnabledXStateFeatures");
|
||||
pfnInitializeContext = (PINITIALIZECONTEXT)GetProcAddress(hm, "InitializeContext");
|
||||
pfnGetXStateFeaturesMask = (PGETXSTATEFEATURESMASK)GetProcAddress(hm, "GetXStateFeaturesMask");
|
||||
pfnLocateXStateFeature = (LOCATEXSTATEFEATURE)GetProcAddress(hm, "LocateXStateFeature");
|
||||
pfnSetXStateFeaturesMask = (SETXSTATEFEATURESMASK)GetProcAddress(hm, "SetXStateFeaturesMask");
|
||||
|
||||
if (pfnGetEnabledXStateFeatures == NULL
|
||||
|| pfnInitializeContext == NULL
|
||||
|| pfnGetXStateFeaturesMask == NULL
|
||||
|| pfnLocateXStateFeature == NULL
|
||||
|| pfnSetXStateFeaturesMask == NULL)
|
||||
{
|
||||
pfnGetEnabledXStateFeatures = (PGETENABLEDXSTATEFEATURES)-1;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
FeatureMask = pfnGetEnabledXStateFeatures();
|
||||
if ((FeatureMask & XSTATE_MASK_AVX) == 0)
|
||||
return NULL;
|
||||
|
||||
ContextSize = 0;
|
||||
Success = pfnInitializeContext(NULL,
|
||||
CONTEXT_ALL | CONTEXT_XSTATE | CONTEXT_EXCEPTION_REQUEST,
|
||||
NULL,
|
||||
&ContextSize);
|
||||
|
||||
if (ContextSize > sizeof(ContextBuffer))
|
||||
return NULL;
|
||||
|
||||
Success = pfnInitializeContext(ContextBuffer,
|
||||
CONTEXT_ALL | CONTEXT_XSTATE | CONTEXT_EXCEPTION_REQUEST,
|
||||
&Context,
|
||||
&ContextSize);
|
||||
|
||||
if (Success == FALSE)
|
||||
return NULL;
|
||||
|
||||
Success = pfnSetXStateFeaturesMask(Context, XSTATE_MASK_AVX);
|
||||
if (Success == FALSE)
|
||||
return Context;
|
||||
|
||||
Success = GetThreadContext(hThread, Context);
|
||||
if (Success == FALSE)
|
||||
return Context;
|
||||
|
||||
Success = pfnGetXStateFeaturesMask(Context, &FeatureMask);
|
||||
if (Success == FALSE)
|
||||
return Context;
|
||||
|
||||
if ((FeatureMask & XSTATE_MASK_AVX) == 0)
|
||||
return Context;
|
||||
|
||||
Xmm = (PM128A)pfnLocateXStateFeature(Context, XSTATE_LEGACY_SSE, &FeatureLength);
|
||||
Ymm = (PM128A)pfnLocateXStateFeature(Context, XSTATE_AVX, NULL);
|
||||
memcpy(curPtr, Ymm, FeatureLength);
|
||||
curPtr += FeatureLength / sizeof(intptr);
|
||||
return Context;
|
||||
}
|
||||
|
||||
BFP_EXPORT void BFP_CALLTYPE BfpThread_GetIntRegisters(BfpThread* thread, intptr* outStackPtr, intptr* outIntRegs, int* inOutIntRegCount, BfpThreadResult* outResult)
|
||||
{
|
||||
CONTEXT ctx;
|
||||
intptr* curPtr = outIntRegs;
|
||||
CONTEXT* ctxPtr = NULL;
|
||||
|
||||
if (*inOutIntRegCount > 48)
|
||||
ctxPtr = CaptureRegistersEx((HANDLE)thread, curPtr);
|
||||
|
||||
if (ctxPtr == NULL)
|
||||
{
|
||||
memset(&ctx, 0, sizeof(CONTEXT));
|
||||
ctx.ContextFlags = CONTEXT_INTEGER | CONTEXT_CONTROL;
|
||||
BOOL success = ::GetThreadContext((HANDLE)thread, &ctx);
|
||||
ctx.ContextFlags = CONTEXT_ALL;
|
||||
BOOL success = ::GetThreadContext((HANDLE)thread, (CONTEXT*)&ctx);
|
||||
if (!success)
|
||||
{
|
||||
int error = GetLastError();
|
||||
OUTRESULT(BfpThreadResult_UnknownError);
|
||||
return;
|
||||
}
|
||||
ctxPtr = &ctx;
|
||||
|
||||
DWORD lastError = GetLastError();
|
||||
BF_ASSERT(success);
|
||||
}
|
||||
|
||||
#ifdef BF32
|
||||
*outStackPtr = (intptr)ctx.Esp;
|
||||
if (*inOutIntRegCount < 7)
|
||||
* outStackPtr = (intptr)ctxPtr->Esp;
|
||||
if (*inOutIntRegCount < (int)(curPtr - outIntRegs) + 7)
|
||||
{
|
||||
OUTRESULT(BfpThreadResult_InsufficientBuffer);
|
||||
return;
|
||||
}
|
||||
#else
|
||||
*outStackPtr = (intptr)ctx.Rsp;
|
||||
if (*inOutIntRegCount < 15)
|
||||
* outStackPtr = (intptr)ctxPtr->Rsp;
|
||||
if (*inOutIntRegCount < (int)(curPtr - outIntRegs) + 48)
|
||||
{
|
||||
OUTRESULT(BfpThreadResult_InsufficientBuffer);
|
||||
return;
|
||||
|
@ -2230,36 +2363,36 @@ BFP_EXPORT void BFP_CALLTYPE BfpThread_GetIntRegisters(BfpThread* thread, intptr
|
|||
if (outIntRegs == NULL)
|
||||
return;
|
||||
|
||||
intptr* curPtr = outIntRegs;
|
||||
|
||||
#ifdef BF32
|
||||
*(curPtr++) = (intptr)ctx.Eax;
|
||||
*(curPtr++) = (intptr)ctx.Ebx;
|
||||
*(curPtr++) = (intptr)ctx.Ecx;
|
||||
*(curPtr++) = (intptr)ctx.Edx;
|
||||
*(curPtr++) = (intptr)ctx.Esi;
|
||||
*(curPtr++) = (intptr)ctx.Edi;
|
||||
*(curPtr++) = (intptr)ctx.Ebp;
|
||||
* (curPtr++) = (intptr)ctxPtr->Eax;
|
||||
*(curPtr++) = (intptr)ctxPtr->Ebx;
|
||||
*(curPtr++) = (intptr)ctxPtr->Ecx;
|
||||
*(curPtr++) = (intptr)ctxPtr->Edx;
|
||||
*(curPtr++) = (intptr)ctxPtr->Esi;
|
||||
*(curPtr++) = (intptr)ctxPtr->Edi;
|
||||
*(curPtr++) = (intptr)ctxPtr->Ebp;
|
||||
#else
|
||||
*(curPtr++) = (intptr)ctx.SegFs; // Testing
|
||||
*(curPtr++) = (intptr)ctx.Rax;
|
||||
*(curPtr++) = (intptr)ctx.Rbx;
|
||||
*(curPtr++) = (intptr)ctx.Rcx;
|
||||
*(curPtr++) = (intptr)ctx.Rdx;
|
||||
*(curPtr++) = (intptr)ctx.Rsi;
|
||||
*(curPtr++) = (intptr)ctx.Rdi;
|
||||
*(curPtr++) = (intptr)ctx.Rbp;
|
||||
*(curPtr++) = (intptr)ctx.R8;
|
||||
*(curPtr++) = (intptr)ctx.R9;
|
||||
*(curPtr++) = (intptr)ctx.R10;
|
||||
*(curPtr++) = (intptr)ctx.R11;
|
||||
*(curPtr++) = (intptr)ctx.R12;
|
||||
*(curPtr++) = (intptr)ctx.R13;
|
||||
*(curPtr++) = (intptr)ctx.R14;
|
||||
*(curPtr++) = (intptr)ctx.R15;
|
||||
* (curPtr++) = (intptr)ctxPtr->SegFs; // Testing
|
||||
*(curPtr++) = (intptr)ctxPtr->Rax;
|
||||
*(curPtr++) = (intptr)ctxPtr->Rbx;
|
||||
*(curPtr++) = (intptr)ctxPtr->Rcx;
|
||||
*(curPtr++) = (intptr)ctxPtr->Rdx;
|
||||
*(curPtr++) = (intptr)ctxPtr->Rsi;
|
||||
*(curPtr++) = (intptr)ctxPtr->Rdi;
|
||||
*(curPtr++) = (intptr)ctxPtr->Rbp;
|
||||
*(curPtr++) = (intptr)ctxPtr->R8;
|
||||
*(curPtr++) = (intptr)ctxPtr->R9;
|
||||
*(curPtr++) = (intptr)ctxPtr->R10;
|
||||
*(curPtr++) = (intptr)ctxPtr->R11;
|
||||
*(curPtr++) = (intptr)ctxPtr->R12;
|
||||
*(curPtr++) = (intptr)ctxPtr->R13;
|
||||
*(curPtr++) = (intptr)ctxPtr->R14;
|
||||
*(curPtr++) = (intptr)ctxPtr->R15;
|
||||
memcpy(curPtr, &ctxPtr->Xmm0, 16 * 16);
|
||||
curPtr += (16 * 16) / sizeof(intptr);
|
||||
#endif
|
||||
|
||||
*inOutIntRegCount = (int)(curPtr - outIntRegs);
|
||||
* inOutIntRegCount = (int)(curPtr - outIntRegs);
|
||||
}
|
||||
|
||||
struct BfpCritSect
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue