diff options
author | ionescu007 <aionescu+git@gmail.com> | 2016-03-17 08:53:06 +0300 |
---|---|---|
committer | ionescu007 <aionescu+git@gmail.com> | 2016-03-17 08:53:11 +0300 |
commit | 77b45ab829ec2bcad02440f5abfdb570e5582658 (patch) | |
tree | dedd3a0e9e018691c8758af05881be6179f4aeef | |
parent | cae69b02e1d3fe7e569a7c898ff2e1ed95941a4f (diff) |
Final round of cleanups
-rw-r--r-- | ntint.h | 2 | ||||
-rw-r--r-- | shv.c | 4 | ||||
-rw-r--r-- | shv.h | 2 | ||||
-rw-r--r-- | shv.vcxproj | 2 | ||||
-rw-r--r-- | shvutil.c | 4 | ||||
-rw-r--r-- | shvvmx.c | 16 | ||||
-rw-r--r-- | shvvmxhv.c | 149 | ||||
-rw-r--r-- | shvvp.c | 142 | ||||
-rw-r--r-- | shvx64.asm | 64 | ||||
-rw-r--r-- | vmx.h | 2 | ||||
-rw-r--r-- | x64.asm | 26 |
11 files changed, 354 insertions, 59 deletions
@@ -12,7 +12,7 @@ Abstract: Author: - Alex Ionescu (@aionescu) 16-Mar-2016 - Initial version + Alex Ionescu (@aionescu) 16-Mar-2016 - Initial version Environment: @@ -2,7 +2,7 @@ Copyright (c) Alex Ionescu. All rights reserved. -Header Name: +Module Name: shv.c @@ -12,7 +12,7 @@ Abstract: Author: - Alex Ionescu (alex.ionescu@reactos.com) 16-Mar-2016 + Alex Ionescu (@aionescu) 16-Mar-2016 - Initial version Environment: @@ -12,7 +12,7 @@ Abstract: Author: - Alex Ionescu (alex.ionescu@reactos.com) 16-Mar-2016 + Alex Ionescu (@aionescu) 16-Mar-2016 - Initial version Environment: diff --git a/shv.vcxproj b/shv.vcxproj index c36b29d..2cf5bf1 100644 --- a/shv.vcxproj +++ b/shv.vcxproj @@ -63,7 +63,7 @@ <ClInclude Include="vmx.h" /> </ItemGroup> <ItemGroup> - <MASM Include="x64.asm" /> + <MASM Include="shvx64.asm" /> </ItemGroup> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <ImportGroup Label="ExtensionTargets"> @@ -2,7 +2,7 @@ Copyright (c) Alex Ionescu. All rights reserved. -Header Name: +Module Name: shvutil.c @@ -12,7 +12,7 @@ Abstract: Author: - Alex Ionescu (alex.ionescu@reactos.com) 16-Mar-2016 + Alex Ionescu (@aionescu) 16-Mar-2016 - Initial version Environment: @@ -2,7 +2,7 @@ Copyright (c) Alex Ionescu. All rights reserved. -Header Name: +Module Name: shvvmx.c @@ -12,11 +12,11 @@ Abstract: Author: - Alex Ionescu (alex.ionescu@reactos.com) 16-Mar-2016 + Alex Ionescu (@aionescu) 16-Mar-2016 - Initial version Environment: - Kernel mode only. + Kernel mode only, IRQL DISPATCH_LEVEL. --*/ @@ -365,6 +365,16 @@ ShvVmxLaunchOnVp ( _In_ PSHV_VP_DATA VpData ) { + ULONG i; + + // + // Initialize all the VMX-related MSRs by reading their value + // + for (i = 0; i < RTL_NUMBER_OF(VpData->MsrData); i++) + { + VpData->MsrData[i].QuadPart = __readmsr(MSR_IA32_VMX_BASIC + i); + } + // // Attempt to enter VMX root mode on this processor. // @@ -2,7 +2,7 @@ Copyright (c) Alex Ionescu. All rights reserved. -Header Name: +Module Name: shvvmxhv.c @@ -12,11 +12,11 @@ Abstract: Author: - Alex Ionescu (alex.ionescu@reactos.com) 16-Mar-2016 + Alex Ionescu (@aionescu) 16-Mar-2016 - Initial version Environment: - Hypervisor mode only. + Hypervisor mode only, IRQL DIRQL_MAX --*/ @@ -28,18 +28,29 @@ ShvVmxResume ( VOID ) { + // + // Issue a VMXRESUME. The reason that we've defined an entire function for + // this sole instruction is both so that we can use it as the target of the + // VMCS when re-entering the VM After a VM-Exit, as well as so that we can + // decorate it with the DECLSPEC_NORETURN marker, which is not set on the + // intrinsic (as it can fail in case of an error). + // __vmx_vmresume(); } ULONG_PTR +FORCEINLINE ShvVmxRead ( _In_ ULONG VmcsFieldId ) { SIZE_T FieldData; + // + // Because VMXREAD returns an error code, and not the data, it is painful + // to use in most circumstances. This simple function simplifies it use. + // __vmx_vmread(VmcsFieldId, &FieldData); - return FieldData; } @@ -48,6 +59,13 @@ ShvVmxHandleInvd ( VOID ) { + // + // This is the handler for the INVD instruction. Technically it may be more + // correct to use __invd instead of __wbinvd, but that intrinsic doesn't + // actually exist. Additionally, the Windows kernel (or HAL) don't contain + // any example of INVD actually ever being used. Finally, Hyper-V itself + // handles INVD by issuing WBINVD as well, so we'll just do that here too. + // __wbinvd(); } @@ -58,6 +76,12 @@ ShvVmxHandleCpuid ( { INT cpu_info[4]; + // + // Check for the magic CPUID sequence, and check that it is is coming from + // Ring 0. Technically we could also check the RIP and see if this falls + // in the expected function, but we may want to allow a sepaarate "unload" + // driver or code at some point. + // if ((VpState->VpRegs->Rax == 0x41414141) && (VpState->VpRegs->Rcx == 0x42424242) && ((ShvVmxRead(GUEST_CS_SELECTOR) & RPL_MASK) == DPL_SYSTEM)) @@ -66,13 +90,27 @@ ShvVmxHandleCpuid ( return; } + // + // Otherwise, issue the CPUID to the logical processor based on the indexes + // on the VP's GPRs. + // __cpuidex(cpu_info, (INT)VpState->VpRegs->Rax, (INT)VpState->VpRegs->Rcx); + // + // Check if this was CPUID 1h, which is the features request. + // if (VpState->VpRegs->Rax == 1) { + // + // Set the Hypervisor Present-bit in RCX, which Intel and AMD have both + // reserved for this indication. + // cpu_info[2] |= 0x80000000; } + // + // Copy the values from the logical processor registers into the VP GPRs. + // VpState->VpRegs->Rax = cpu_info[0]; VpState->VpRegs->Rbx = cpu_info[1]; VpState->VpRegs->Rcx = cpu_info[2]; @@ -84,6 +122,9 @@ ShvVmxHandleXsetbv ( _In_ PSHV_VP_STATE VpState ) { + // + // Simply issue the XSETBV instruction on the native logical processor. + // _xsetbv((ULONG)VpState->VpRegs->Rcx, VpState->VpRegs->Rdx << 32 | VpState->VpRegs->Rax); @@ -94,7 +135,14 @@ ShvVmxHandleVmx ( _In_ PSHV_VP_STATE VpState ) { + // + // Set the CF flag, which is how VMX instructions indicate failure + // VpState->GuestEFlags |= 0x1; // VM_FAIL_INVALID + + // + // RFLAGs is actually restored from the VMCS, so update it here + // __vmx_vmwrite(GUEST_RFLAGS, VpState->GuestEFlags); } @@ -103,6 +151,13 @@ ShvVmxHandleExit ( _In_ PSHV_VP_STATE VpState ) { + // + // This is the generic VM-Exit handler. Decode the reason for the exit and + // call the appropriate handler. As per Intel specifications, given that we + // have requested no optional exits whatsoever, we should only see CPUID, + // INVD, XSETBV and other VMX instructions. GETSEC cannot happen as we do + // not run in SMX context. + // switch (VpState->ExitReason) { case EXIT_REASON_CPUID: @@ -131,6 +186,11 @@ ShvVmxHandleExit ( break; } + // + // Move the instruction pointer to the next instruction after the one that + // caused the exit. Since we are not doing any special handling or changing + // of execution, this can be done for any exit reason. + // VpState->GuestRip += ShvVmxRead(VM_EXIT_INSTRUCTION_LEN); __vmx_vmwrite(GUEST_RIP, VpState->GuestRip); } @@ -145,39 +205,118 @@ ShvVmxEntryHandler ( SHV_VP_STATE guestContext; PSHV_VP_DATA vpData; + // + // For performance and sanity reasons, do not allow any hardware interrupts + // to come in while we are inside of the hypervisor context. We still want + // the clock and IPIs to occur, though. Obviously don't allow any thread + // scheduling, DPCs, timers or APCs to interrupt us either. This means that + // we should spend very little time in the hypervisor (always a good thing) + // KeRaiseIrql(CLOCK_LEVEL - 1, &guestContext.GuestIrql); + // + // Because we had to use RCX when calling RtlCaptureContext, its true value + // was actually pushed on the stack right before the call. Go dig into the + // stack to find it, and overwrite the bogus value that's there now. + // Context->Rcx = *(PULONG64)((ULONG_PTR)Context - sizeof(Context->Rcx)); - Context->Rsp += sizeof(Context->Rcx); + // + // Get the per-VP data for this processor. + // vpData = &ShvGlobalData->VpData[KeGetCurrentProcessorNumberEx(NULL)]; + // + // Build a little stack context to make it easier to keep track of certain + // guest state, such as the RIP/RSP/RFLAGS, and the exit reason. The rest + // of the general purpose registers come from the context structure that we + // captured on our own with RtlCaptureContext in the assembly entrypoint. + // guestContext.GuestEFlags = ShvVmxRead(GUEST_RFLAGS); guestContext.GuestRip = ShvVmxRead(GUEST_RIP); guestContext.GuestRsp = ShvVmxRead(GUEST_RSP); guestContext.ExitReason = ShvVmxRead(VM_EXIT_REASON) & 0xFFFF; guestContext.VpRegs = Context; guestContext.ExitVm = FALSE; + + // + // Call the generic handler + // ShvVmxHandleExit(&guestContext); + // + // Did we hit the magic exit sequence, or should we resume back to the VM + // context? + // if (guestContext.ExitVm) { + // + // When running in VMX root mode, the processor will set limits of the + // GDT and IDT to 0xFFFF (notice that there are no Host VMCS fields to + // set these values). This causes problems with PatchGuard, which will + // believe that the GDTR and IDTR have been modified by malware, and + // eventually crash the system. Since we know what the original state + // of the GDTR and IDTR was, simply restore it now. + // __lgdt(&vpData->HostState.SpecialRegisters.Gdtr.Limit); __lidt(&vpData->HostState.SpecialRegisters.Idtr.Limit); + + // + // Our DPC routine may have interrupted an arbitrary user process, and + // not an idle or system thread as usually happens on an idle system. + // Therefore if we return back to the original caller after turning off + // VMX, it will keep our current "host" CR3 value which we set on entry + // to the PML4 of the SYSTEM process. We want to return back with the + // correct value of the "guest" CR3, so that the currently executing + // process continues to run with its expected address space mappings. + // __writecr3(ShvVmxRead(GUEST_CR3)); + // + // Finally, set the stack and instruction pointer to whatever location + // had the instruction causing our VM-Exit, such as ShvVpUninitialize. + // This will effectively act as a longjmp back to that location. + // Context->Rsp = guestContext.GuestRsp; Context->Rip = (ULONG64)guestContext.GuestRip; + // + // Turn off VMX root mode on this logical processor. We're done here. + // __vmx_off(); } else { + // + // Because we won't be returning back into assembly code, nothing will + // ever know about the "pop rcx" that must technically be done (or more + // accurately "add rsp, 4" as rcx will already be correct thanks to the + // fixup earlier. In order to keep the stack sane, do that adjustment + // here. + // + Context->Rsp += sizeof(Context->Rcx); + + // + // Return into a VMXRESUME intrinsic, which we broke out as its own + // function, in order to allow this to work. No assembly code will be + // needed as RtlRestoreContext will fix all the GPRs, and what we just + // did to RSP will take care of the rest. + // Context->Rip = (ULONG64)ShvVmxResume; } + // + // Restore the IRQL back to the original level + // KeLowerIrql(guestContext.GuestIrql); + // + // Restore the context to either ShvVmxResume, in which case the CPU's VMX + // facility will do the "true" return back to the VM (but without restoring + // GPRs, which is why we must do it here), or to the original guest's RIP, + // which we use in case an exit was requested. In this case VMX must now be + // off, and this will look like a longjmp to the original stack and RIP. + // RtlRestoreContext(Context, NULL); } @@ -2,7 +2,7 @@ Copyright (c) Alex Ionescu. All rights reserved. -Header Name: +Module Name: shvvp.c @@ -12,11 +12,11 @@ Abstract: Author: - Alex Ionescu (alex.ionescu@reactos.com) 16-Mar-2016 + Alex Ionescu (@aionescu) 16-Mar-2016 - Initial version Environment: - Kernel mode only. + Kernel mode only, IRQL DISPATCH_LEVEL. --*/ @@ -24,20 +24,78 @@ Environment: VOID ShvVpInitialize ( - _In_ PSHV_VP_DATA Data + _In_ PSHV_VP_DATA Data, + _In_ ULONG64 SystemDirectoryTableBase ) { + // + // Store the hibernation state of the processor, which contains all the + // special registers and MSRs which are what the VMCS will need as part + // of its setup. This avoids using assembly sequences and manually reading + // this data. + // KeSaveStateForHibernate(&Data->HostState); + + // + // Then, capture the entire register state. We will need this, as once we + // launch the VM, it will begin execution at the defined guest instruction + // pointer, which is being captured as part of this call. In other words, + // we will return right where we were, but with all our registers corrupted + // by the VMCS/VMX initialization code (as guest state does not include + // register state). By saving the context here, which includes all general + // purpose registers, we guarantee that we return with all of our starting + // register values as well! + // RtlCaptureContext(&Data->HostState.ContextFrame); + // + // As per the above, we might be here because the VM has actually launched. + // We can check this by verifying the value of the VmxEnabled field, which + // is set to 1 right before VMXLAUNCH is performed. We do not use the Data + // parameter or any other local register in this function, and in fact have + // defined VmxEnabled as volatile, because as per the above, our register + // state is currently dirty due to the VMCALL itself. By using the global + // variable combined with an API call, we also make sure that the compiler + // will not optimize this access in any way, even on LTGC/Ox builds. + // if (ShvGlobalData->VpData[KeGetCurrentProcessorNumberEx(NULL)].VmxEnabled == 1) { + // + // We now indicate that the VM has launched, and that we are about to + // restore the GPRs back to their original values. This will have the + // effect of putting us yet *AGAIN* at the previous line of code, but + // this time the value of VmxEnabled will be two, bypassing the if and + // else if checks. + // ShvGlobalData->VpData[KeGetCurrentProcessorNumberEx(NULL)].VmxEnabled = 2; + // + // And finally, restore the context, so that all register and stack + // state is finally restored. Note that by continuing to reference the + // per-VP data this way, the compiler will continue to generate non- + // optimized accesses, guaranteeing that no previous register state + // will be used. + // RtlRestoreContext(&ShvGlobalData->VpData[KeGetCurrentProcessorNumberEx(NULL)].HostState.ContextFrame, NULL); } + // + // If we are in this branch comparison, it means that we have not yet + // attempted to launch the VM, nor that we have launched it. In other + // words, this is the first time in ShvVpInitialize. Because of this, + // we are free to use all register state, as it is ours to use. + // else if (Data->VmxEnabled == 0) { + // + // First, capture the value of the PML4 for the SYSTEM process, so that + // all virtual processors, regardless of which process the current LP + // has interrupted, can share the correct kernel address space. + // + Data->SystemDirectoryTableBase = SystemDirectoryTableBase; + + // + // Then, attempt to initialize VMX on this processor + // ShvVmxLaunchOnVp(Data); } } @@ -50,7 +108,28 @@ ShvVpUninitialize ( INT dummy[4]; UNREFERENCED_PARAMETER(VpData); + // + // Send the magic shutdown instruction sequence + // __cpuidex(dummy, 0x41414141, 0x42424242); + + // + // The processor will return here after the hypervisor issues a VMXOFF + // instruction and restores the CPU context to this location. Unfortunately + // because this is done with RtlRestoreContext which returns using "iretq", + // this causes the processor to remove the RPL bits off the segments. As + // the x64 kernel does not expect kernel-mode code to chang ethe value of + // any segments, this results in the DS and ES segments being stuck 0x20, + // and the FS segment being stuck at 0x50, until the next context switch. + // + // If the DPC happened to have interrupted either the idle thread or system + // thread, that's perfectly fine (albeit unusual). If the DPC interrupted a + // 64-bit long-mode thread, that's also fine. However if the DPC interrupts + // a thread in compatibility-mode, running as part of WoW64, it will hit a + // GPF instantenously and crash. + // + // Thus, set the segments to their correct value, one more time, as a fix. + // ShvVmxCleanup(KGDT64_R3_DATA | RPL_MASK, KGDT64_R3_CMTEB | RPL_MASK); } @@ -63,30 +142,39 @@ ShvVpCallbackDpc ( ) { PSHV_VP_DATA vpData; - ULONG i; UNREFERENCED_PARAMETER(Dpc); + // + // Get the per-VP data for this logical processor + // vpData = &ShvGlobalData->VpData[KeGetCurrentProcessorNumberEx(NULL)]; - if (Context) + // + // Check if we are loading, or unloading + // + if (ARGUMENT_PRESENT(Context)) { - vpData->VpIndex = KeGetCurrentProcessorNumberEx(NULL); - vpData->SystemDirectoryTableBase = (ULONG64)Context; - __stosq((PULONGLONG)vpData->ShvStackLimit, 0xCC, KERNEL_STACK_SIZE / sizeof(ULONGLONG)); - - for (i = 0; i < RTL_NUMBER_OF(vpData->MsrData); i++) - { - vpData->MsrData[i].QuadPart = __readmsr(MSR_IA32_VMX_BASIC + i); - } - - ShvVpInitialize(vpData); + // + // Initialize the virtual processor + // + ShvVpInitialize(vpData, (ULONG64)Context); } else { + // + // Tear down the virtual processor + // ShvVpUninitialize(vpData); } + // + // Wait for all DPCs to synchronize at this point + // KeSignalCallDpcSynchronize(SystemArgument2); + + // + // Mark the DPC as being complete + // KeSignalCallDpcDone(SystemArgument1); } @@ -99,13 +187,27 @@ ShvVpAllocateGlobalData ( PSHV_GLOBAL_DATA data; ULONG cpuCount, size; + // + // The entire address range is OK for this allocation + // lowest.QuadPart = 0; - highest.QuadPart = -1; + highest.QuadPart = lowest.QuadPart - 1; + // + // Query the number of logical processors, including those potentially in + // groups other than 0. This allows us to support >64 processors. + // cpuCount = KeQueryActiveProcessorCountEx(ALL_PROCESSOR_GROUPS); + // + // Each processor will receive its own slice of per-virtual processor data. + // size = FIELD_OFFSET(SHV_GLOBAL_DATA, VpData) + cpuCount * sizeof(SHV_VP_DATA); + // + // Allocate a contiguous chunk of RAM to back this allocation and make sure + // that it is RW only, instead of RWX, by using the new Windows 8 API. + // data = (PSHV_GLOBAL_DATA)MmAllocateContiguousNodeMemory(size, lowest, highest, @@ -114,9 +216,15 @@ ShvVpAllocateGlobalData ( MM_ANY_NODE_OK); if (data != NULL) { + // + // Zero out the entire data region + // __stosq((PULONGLONG)data, 0, size / sizeof(ULONGLONG)); } + // + // Return what is hopefully a valid pointer, otherwise NULL. + // return data; } diff --git a/shvx64.asm b/shvx64.asm new file mode 100644 index 0000000..66fdd7a --- /dev/null +++ b/shvx64.asm @@ -0,0 +1,64 @@ +;++ +; +; Copyright (c) Alex Ionescu. All rights reserved. +; +; Module: +; +; shvx64.asm +; +; Abstract: +; +; This module implements AMD64-specific routines for the Simple Hyper Visor. +; +; Author: +; +; Alex Ionescu (@aionescu) 16-Mar-2016 - Initial version +; +; Environment: +; +; Kernel mode only. +; +;-- + +include ksamd64.inc + + extern ShvVmxEntryHandler:proc + extern RtlCaptureContext:proc + + NESTED_ENTRY ShvVmxEntry, _TEXT$00 + + push_reg rcx ; save RCX, as we will need to orverride it + END_PROLOGUE ; done messing with the stack + + lea rcx, [rsp+8h] ; store the context in the stack, bias for + ; the return address and the push we just did. + call RtlCaptureContext ; save the current register state. + ; note that this is a specially written function + ; which has the following key characteristics: + ; 1) it does not taint the value of RCX + ; 2) it does not spill any registers, nor + ; expect home space to be allocated for it + + jmp ShvVmxEntryHandler ; jump to the C code handler. we assume that it + ; compiled with optimizations and does not use + ; home space, which is true of release builds. + + NESTED_END ShvVmxEntry, _TEXT$00 + + LEAF_ENTRY ShvVmxCleanup, _TEXT$00 + + mov ds, cx ; set DS to parameter 1 + mov es, cx ; set ES to parameter 1 + mov fs, dx ; set FS to parameter 2 + ret ; return + + LEAF_END ShvVmxCleanup, _TEXT$00 + + LEAF_ENTRY __lgdt, _TEXT$00 + + lgdt fword ptr [rcx] ; load the GDTR with the value in parameter 1 + ret ; return + + LEAF_END __lgdt, _TEXT$00 + + end @@ -12,7 +12,7 @@ Abstract: Author: - Alex Ionescu (alex.ionescu@reactos.com) 16-Mar-2016 + Alex Ionescu (@aionescu) 16-Mar-2016 - Initial version Environment: diff --git a/x64.asm b/x64.asm deleted file mode 100644 index ecb536e..0000000 --- a/x64.asm +++ /dev/null @@ -1,26 +0,0 @@ - -EXTERN ShvVmxEntryHandler : PROC -EXTERN RtlCaptureContext : PROC - -.CODE - -ShvVmxEntry PROC - push rcx - lea rcx, [rsp+8h] - call RtlCaptureContext - jmp ShvVmxEntryHandler -ShvVmxEntry ENDP - -ShvVmxCleanup PROC - mov ds, cx - mov es, cx - mov fs, dx - ret -ShvVmxCleanup ENDP - -__lgdt PROC - lgdt fword ptr [rcx] - ret -__lgdt ENDP - -END |