Okay, I saw where the bug is. It is not in the patching but rather in my synchronization mechanism. Here is some code:
Code: Select all DWORD isLocked, numLocked;
KIRQL irqLevel;
PKDPC dpArr;
//LOCK
irqLevel = RaiseIRQL();
dpArr = AcquireLock(&isLocked, &numLocked);
//INSTALL INLINE PATCH
if( NT_SUCCESS(inlineHookInstall()) ) {
DbgPrint("Successfully patched the function\n");
}
LowerIRQL(irqLevel);
ReleaseLock(dpArr, &isLocked, &numLocked);
And my routines:
Code: Select all/* raises the IRQL of current processor to DISPATCH_LEVEL */
KIRQL RaiseIRQL() {
KIRQL curr, prev;
curr = KeGetCurrentIrql();
prev = curr;
if(curr < DISPATCH_LEVEL)
prev = KeRaiseIrqlToDpcLevel();
return prev;
}
void LowerIRQL(KIRQL prev) {
KeLowerIrql(prev);
}
/* This routine will basically spin until we are ready to unlock */
void lockRoutine(PKDPC dpc, PVOID context, PVOID lock, PVOID lockedCount) {
PDWORD isLocked;
PDWORD lockedCPUsCount;
isLocked = (PDWORD) lock;
lockedCPUsCount = (PDWORD) lockedCount;
DbgPrint("[Locked routine] locked CPU [%u]\n", KeGetCurrentProcessorNumber());
InterlockedIncrement((LONG *)lockedCPUsCount);
//spin until lock flag is set
while(InterlockedCompareExchange((LONG *)isLocked, 1, 1)) {
__asm {
nop;
}
}
//we have finished with shared access, start unlocking
InterlockedDecrement((LONG *)lockedCount);
DbgPrint("[Locked routine] unlocked CPU [%u]\n", KeGetCurrentProcessorNumber());
return;
}
/* This routine will schedule DPCs for all active CPUs. This DPC will spin until we are ready to unlock */
PKDPC AcquireLock(PDWORD lock, PDWORD numLockedCPUs) {
PKDPC dpcArray;
DWORD cpuID;
DWORD i;
DWORD iOtherCPUs;
ULONG CpuCount;
CpuCount = KeQueryActiveProcessorCount(NULL);
if(KeGetCurrentIrql() != DISPATCH_LEVEL)
return NULL;
//init vars to 0
InterlockedAnd((LONG *)lock, 0);
InterlockedAnd((LONG *)numLockedCPUs, 0);
dpcArray = (PKDPC)ExAllocatePoolWithTag(NonPagedPool, CpuCount * sizeof(KDPC), '2BUB');
if(dpcArray == NULL)
return NULL;
cpuID = KeGetCurrentProcessorNumber();
for(i = 0; i < CpuCount; i++) {
PKDPC dpcEntry = &(dpcArray[i]);
if(i != cpuID) {
KeInitializeDpc(dpcEntry, lockRoutine, NULL);
KeSetTargetProcessorDpc(dpcEntry, i);
KeInsertQueueDpc(dpcEntry, lock, numLockedCPUs);
}
}
//cpus left for hooking
iOtherCPUs = KeQueryActiveProcessorCount(NULL) - 1;
InterlockedCompareExchange((LONG *)numLockedCPUs, iOtherCPUs, iOtherCPUs);
//spin while all dpc are executed
while(*numLockedCPUs != iOtherCPUs) {
__asm {
nop;
}
InterlockedCompareExchange((LONG *)numLockedCPUs, iOtherCPUs, iOtherCPUs);
}
return dpcArray;
}
/* This routine unlocks the exclusive access to the CPU */
NTSTATUS ReleaseLock(PVOID dpcPtr, PDWORD lock, PDWORD lockedCount) {
//all scheduled DPCS (if any) will exit
InterlockedIncrement((LONG *)lock);
InterlockedCompareExchange((LONG *)lockedCount, 0, 0);
while(lockedCount != 0) {
__asm {
nop;
}
InterlockedCompareExchange((LONG *)lockedCount, 0, 0);
}
if(dpcPtr != NULL) {
ExFreePoolWithTag(dpcPtr, '2BUB');
}
}
It turns out that in the ReleaseLock even when lockedCount ==0 the loop will still keep looping and that's why I'm getting this sluggishness - because it is constantly looping. But I see no reason why this should be the case the condition is simple enough that once lockedCount is 0 it should terminate o_O. Can someone test this and see if he sees the same thing?