Virtualbox源码分析15 IEM: Instruction Decoding and Emulation manager


IEM - Instruction Decoding and Emulation manager

IEM : 模拟执行管理模块负责模拟执行小段连续guest代码避免太多退出/虚拟化陷阱导致性能下降,同时也提供单条指令的模拟。

15.1 模拟执行单条指令

IEM定义了很多指令的模拟执行代码

g_apfnOneByteMap(IEMAllInstructionsOneByte.cpp.h) :opcode instruction只有1字节的指令

g_apfnTwoByteMap (IEMAllInstructionsTwoByte0f.cpp.h): opcode instruction只有2字节的指令

g_apfnThreeByte0f38. (IEMAllInstructionsThree0f38.cpp.h): 0F 38 开头的指令,这些指令基本都没有实现

g_apfnThreeByte0f3a (IEMAllInstructionsThree0f3a.cpp.h): 0F 3A开头的指令,这些指令基本都没有实现

VEX前缀的指令: 这些指令大部分都没有实现

g_apfnVexMap1(IEMAllInstructionsVexMap1.cpp.h) g_apfnTwoByteMap里对应VEX前缀指令

g_apfnVexMap2(IEMAllInstructionsVexMap2.cpp.h) g_apfnThreeByte0f38里对应VEX前缀指令

g_apfnVexMap3(IEMAllInstructionsVexMap3.cpp.h) g_apfnThreeByte0f3a里对应VEX前缀指令

//模拟执行 mov,cl,imm8指令的实现
FNIEMOP_DEF(iemOp_CL_Ib)
{
  IEMOP_MNEMONIC(mov_CL_Ib, "mov CL,Ib");
  return FNIEMOP_CALL_1(iemOpCommonMov_r8_Ib, X86_GREG_xCX | pVCpu->iem.s.uRexB);
}
FNIEMOP_DEF_1(iemOpCommonMov_r8_Ib, uint8_t, iReg)
{
  	//获取imm8内存
    uint8_t u8Imm; IEM_OPCODE_GET_NEXT_U8(&u8Imm);
    IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
    IEM_MC_BEGIN(0, 1);
    IEM_MC_LOCAL_CONST(uint8_t, u8Value,/*=*/ u8Imm);
  	//imm8里的值赋值到cl寄存器里
    IEM_MC_STORE_GREG_U8(iReg, u8Value);
    IEM_MC_ADVANCE_RIP();
    IEM_MC_END();
    return VINF_SUCCESS;
}
...
FNIEMOP_DEF(iemOp_eAX_Iv)
...

其中g_apfnTwoByteMap被作为0F的单字节opcode作为g_apfnOneByteMap里的一项

FNIEMOP_DEF(iemOp_2byteEscape)
{
    if (RT_LIKELY(IEM_GET_TARGET_CPU(pVCpu) >= IEMTARGETCPU_286))
    {
      	//获取instruction的第二个字节
        uint8_t b; IEM_OPCODE_GET_NEXT_U8(&b);
      	//call g_apfnTwoByteMap里对应的函数
        return FNIEMOP_CALL(g_apfnTwoByteMap[(uintptr_t)b * 4 + pVCpu->iem.s.idxPrefix]);
    }
    ...
}

同样g_apfnThreeByte0f3a根据前两个字节的opcode作为g_apfnTwoByteMap里的一项

FNIEMOP_DEF(iemOp_3byte_Esc_0f_3a)
{
  	//获取instruction的第三个字节
    uint8_t b; IEM_OPCODE_GET_NEXT_U8(&b);
    //call g_apfnThreeByte0f3a里的第三个字节
    return FNIEMOP_CALL(g_apfnThreeByte0f3a[(uintptr_t)b * 4 + pVCpu->iem.s.idxPrefix]);
}

这些.h文件里调用会调用下面5个文件里的函数完成指令模拟

IEMAllCImpl.cpp,h:

IEMAllCImplStrInstr.cpp.h :string相关指令模拟,比如rep movs等

IEMAllCImplSvmInstr.cpp.h: svm指令模拟,用于嵌套SVM支持

IEMAllCImplVmxInstr.cpp.h: vmx指令模拟,用于嵌套VMX支持

IEMAllAImpl.asm : 部分指令的汇编实现

15.2 IEM模式的执行

IEM提供了很多对外的API给其他的Manager调用来模拟执行单条或者多条指令

IEMExecOne: 执行一条指令

VMMDECL(VBOXSTRICTRC) IEMExecOne(PVMCPUCC pVCpu)
{
	//初始化并且获取opcode
	VBOXSTRICTRC rcStrict = iemInitDecoderAndPrefetchOpcodes(pVCpu, false);
  if (rcStrict == VINF_SUCCESS)
    //执行指令
  	rcStrict = iemExecOneInner(pVCpu, true, "IEMExecOne");
  else if (pVCpu->iem.s.cActiveMappings > 0)
  	iemMemRollback(pVCpu);
}

//读取opcode
IEM_STATIC VBOXSTRICTRC iemInitDecoderAndPrefetchOpcodes(PVMCPUCC pVCpu, bool fBypassHandlers)
{
  //获取可以读取的大小
  //64位系统
  if (pVCpu->iem.s.enmCpuMode == IEMMODE_64BIT)
  {
    cbToTryRead = PAGE_SIZE;
    GCPtrPC     = pVCpu->cpum.GstCtx.rip;
    //获取需要读取的字节大小,RIP到当前页的结尾
    if (IEM_IS_CANONICAL(GCPtrPC))
      cbToTryRead = PAGE_SIZE - (GCPtrPC & PAGE_OFFSET_MASK);
    else
      //出错,抛GP异常
      return iemRaiseGeneralProtectionFault0(pVCpu);
  }
  else
  {
    uint32_t GCPtrPC32 = pVCpu->cpum.GstCtx.eip;
    if (GCPtrPC32 <= pVCpu->cpum.GstCtx.cs.u32Limit)
      cbToTryRead = pVCpu->cpum.GstCtx.cs.u32Limit - GCPtrPC32 + 1;
    else
      return iemRaiseSelectorBounds(pVCpu, X86_SREG_CS, IEM_ACCESS_INSTRUCTION);
    if (cbToTryRead) { /* likely */ }
    else /* overflowed */
    {
      cbToTryRead = UINT32_MAX;
    }
    //32位需要加上CSBase
    GCPtrPC = (uint32_t)pVCpu->cpum.GstCtx.cs.u64Base + GCPtrPC32;
  }
  //获取虚拟机虚拟地址对应的虚拟机物理地址
  RTGCPHYS    GCPhys;
  uint64_t    fFlags;
  int rc = PGMGstGetPage(pVCpu, GCPtrPC, &fFlags, &GCPhys);
  if (RT_SUCCESS(rc)) { /* probable */ }
  else
  {
    return iemRaisePageFault(pVCpu, GCPtrPC, IEM_ACCESS_INSTRUCTION, rc);
  }
  //不可以是R0的代码
  if ((fFlags & X86_PTE_US) || pVCpu->iem.s.uCpl != 3) { /* likely */ }
  else
  {
    return iemRaisePageFault(pVCpu, GCPtrPC, IEM_ACCESS_INSTRUCTION, VERR_ACCESS_DENIED);
  }
  //检查页面是否有可执行属性
  if (!(fFlags & X86_PTE_PAE_NX) || !(pVCpu->cpum.GstCtx.msrEFER & MSR_K6_EFER_NXE)) { /* likely */ }
  else
  {
    return iemRaisePageFault(pVCpu, GCPtrPC, IEM_ACCESS_INSTRUCTION, VERR_ACCESS_DENIED);
  }
  GCPhys |= GCPtrPC & PAGE_OFFSET_MASK;
  
  //计算需要读取多少内存
  uint32_t cbLeftOnPage = PAGE_SIZE - (GCPtrPC & PAGE_OFFSET_MASK);
  if (cbToTryRead > cbLeftOnPage)
    cbToTryRead = cbLeftOnPage;
  //opcode最多15个字节
  if (cbToTryRead > sizeof(pVCpu->iem.s.abOpcode))
    cbToTryRead = sizeof(pVCpu->iem.s.abOpcode);

  //不需要bypass mmio and access handler
  if (!pVCpu->iem.s.fBypassHandlers)
  {
    //读取内存
    VBOXSTRICTRC rcStrict = PGMPhysRead(pVCpu->CTX_SUFF(pVM), GCPhys, pVCpu->iem.s.abOpcode, cbToTryRead, PGMACCESSORIGIN_IEM);
    if (RT_LIKELY(rcStrict == VINF_SUCCESS))
    { /* likely */ }
    else if (PGM_PHYS_RW_IS_SUCCESS(rcStrict))
    {
      rcStrict = iemSetPassUpStatus(pVCpu, rcStrict);
    }
    else
    {
      return rcStrict;
    }
  }
  else
  {
    //读取内存
    rc = PGMPhysSimpleReadGCPhys(pVCpu->CTX_SUFF(pVM), pVCpu->iem.s.abOpcode, GCPhys, cbToTryRead);
    if (RT_SUCCESS(rc))
    { /* likely */ }
    else
    {
      return rc;
    }
  }
  pVCpu->iem.s.cbOpcode = cbToTryRead;
}

获取一条指令,virtualbox支持两个版本的opcodeGet,一个支持CODE_TLB(code缓存),一个不支持CODE_TLB,

但支持CODE_TLB的版本正在开发中,暂时不看

# define IEM_OPCODE_GET_NEXT_U8(a_pu8) (*(a_pu8) = iemOpcodeGetNextU8Jmp(pVCpu))
//获取一个字节的opcode
DECLINLINE(uint8_t) iemOpcodeGetNextU8Jmp(PVMCPUCC pVCpu)
{
 //支持code tlb的版本,还没有实现完成
# ifdef IEM_WITH_CODE_TLB 
    uintptr_t       offBuf = pVCpu->iem.s.offInstrNextByte;
    uint8_t const  *pbBuf  = pVCpu->iem.s.pbInstrBuf;
    if (RT_LIKELY(   pbBuf != NULL
                  && offBuf < pVCpu->iem.s.cbInstrBuf))
    {
        pVCpu->iem.s.offInstrNextByte = (uint32_t)offBuf + 1;
        return pbBuf[offBuf];
    }
# else
  	//如果opcode已经获取了,直接返回
    uintptr_t offOpcode = pVCpu->iem.s.offOpcode;
    if (RT_LIKELY((uint8_t)offOpcode < pVCpu->iem.s.cbOpcode))
    {
        pVCpu->iem.s.offOpcode = (uint8_t)offOpcode + 1;
        return pVCpu->iem.s.abOpcode[offOpcode];
    }
# endif
    return iemOpcodeGetNextU8SlowJmp(pVCpu);
}
//需要从内存中读取opcode
DECL_NO_INLINE(IEM_STATIC, uint8_t) iemOpcodeGetNextU8SlowJmp(PVMCPUCC pVCpu)
{
# ifdef IEM_WITH_CODE_TLB 
  	 //支持GuestTLB的版本
    uint8_t u8;
    iemOpcodeFetchBytesJmp(pVCpu, sizeof(u8), &u8);
    return u8;
# else
   //不支持GuestTLB的版本,获取至少1个字节的指令
    VBOXSTRICTRC rcStrict = iemOpcodeFetchMoreBytes(pVCpu, 1);
    if (rcStrict == VINF_SUCCESS)
        return pVCpu->iem.s.abOpcode[pVCpu->iem.s.offOpcode++];
    //获取指令失败,跳转到出错处理
    longjmp(*pVCpu->iem.s.CTX_SUFF(pJmpBuf), VBOXSTRICTRC_VAL(rcStrict));
# endif
}

//iemOpcodeFetchMoreBytes获取更多的opcode,大部分代码和iemInitDecoderAndPrefetchOpcodes类似
IEM_STATIC VBOXSTRICTRC iemOpcodeFetchMoreBytes(PVMCPUCC pVCpu, size_t cbMin)
{
  //获取下一条指令地址
  if (pVCpu->iem.s.enmCpuMode == IEMMODE_64BIT)
  {
    //64位
    cbToTryRead = PAGE_SIZE;
    GCPtrNext   = pVCpu->cpum.GstCtx.rip + pVCpu->iem.s.cbOpcode;
    if (!IEM_IS_CANONICAL(GCPtrNext))
      return iemRaiseGeneralProtectionFault0(pVCpu);
  }
  else
  {
    //32位,需要加上CS段Base
    uint32_t GCPtrNext32 = pVCpu->cpum.GstCtx.eip;
    GCPtrNext32 += pVCpu->iem.s.cbOpcode;
    if (GCPtrNext32 > pVCpu->cpum.GstCtx.cs.u32Limit)
      return iemRaiseSelectorBounds(pVCpu, X86_SREG_CS, IEM_ACCESS_INSTRUCTION);
    cbToTryRead = pVCpu->cpum.GstCtx.cs.u32Limit - GCPtrNext32 + 1;
    if (!cbToTryRead) 
    {
      cbToTryRead = UINT32_MAX;
    }
    if (cbToTryRead < cbMin - cbLeft)
      return iemRaiseSelectorBounds(pVCpu, X86_SREG_CS, IEM_ACCESS_INSTRUCTION);
    GCPtrNext = (uint32_t)pVCpu->cpum.GstCtx.cs.u64Base + GCPtrNext32;
  }
  
  //最多只读到这一页结尾或者最多15个字节
  uint32_t cbLeftOnPage = PAGE_SIZE - (GCPtrNext & PAGE_OFFSET_MASK);
  if (cbToTryRead > cbLeftOnPage)
    cbToTryRead = cbLeftOnPage;
  if (cbToTryRead > sizeof(pVCpu->iem.s.abOpcode) - pVCpu->iem.s.cbOpcode)
    cbToTryRead = sizeof(pVCpu->iem.s.abOpcode) - pVCpu->iem.s.cbOpcode;
  //获取虚拟机虚拟地址对应的虚拟机物理地址
  int rc = PGMGstGetPage(pVCpu, GCPtrNext, &fFlags, &GCPhys);
  ...
  //读取指令
  if (!pVCpu->iem.s.fBypassHandlers)
  {
    VBOXSTRICTRC rcStrict = PGMPhysRead(pVCpu->CTX_SUFF(pVM), GCPhys, &pVCpu->iem.s.abOpcode[pVCpu->iem.s.cbOpcode],
                                        cbToTryRead, PGMACCESSORIGIN_IEM);
  }
  else
  {
    rc = PGMPhysSimpleReadGCPhys(pVCpu->CTX_SUFF(pVM), &pVCpu->iem.s.abOpcode[pVCpu->iem.s.cbOpcode], GCPhys, cbToTryRead);
	}
  pVCpu->iem.s.cbOpcode += cbToTryRead;
}

iemExecOneInner

//fExecuteInhibit 如果是true,在执行了cli,pop ss,mov ss,gr之后会继续执行一条指令
//ss寄存器修改之后需要禁止debug异常和中断知道下一条指令完成,因为下一条指令是修改esp的指令
DECLINLINE(VBOXSTRICTRC) iemExecOneInner(PVMCPUCC pVCpu, bool fExecuteInhibit, const char *pszFunction)
{
#ifdef IEM_WITH_SETJMP
    VBOXSTRICTRC rcStrict;
    jmp_buf      JmpBuf;
    jmp_buf     *pSavedJmpBuf  = pVCpu->iem.s.CTX_SUFF(pJmpBuf);
    pVCpu->iem.s.CTX_SUFF(pJmpBuf) = &JmpBuf;
    if ((rcStrict = setjmp(JmpBuf)) == 0)
    {
      	//获取opcode instruction
        uint8_t b; IEM_OPCODE_GET_NEXT_U8(&b);
        //call相应的模拟执行函数(包括1,2,3字节的instruction)
        rcStrict = FNIEMOP_CALL(g_apfnOneByteMap[b]);
    }
    else
        pVCpu->iem.s.cLongJumps++;
    pVCpu->iem.s.CTX_SUFF(pJmpBuf) = pSavedJmpBuf;
#else
    uint8_t b; IEM_OPCODE_GET_NEXT_U8(&b);
    VBOXSTRICTRC rcStrict = FNIEMOP_CALL(g_apfnOneByteMap[b]);
#endif
    //执行成功
    if (rcStrict == VINF_SUCCESS)
        pVCpu->iem.s.cInstructions++;
    if (pVCpu->iem.s.cActiveMappings > 0)
    {
        iemMemRollback(pVCpu);
    }
  
  	//如果设置了fExecuteInhibit,而且模拟执行成功,而且设置了禁止中断,继续执行一条指令
  	//ss寄存器修改之后需要禁止debug异常和中断知道下一条指令完成,因为下一条指令是修改esp的指令,需要这两条指令同时完成
  	if (   fExecuteInhibit
        && rcStrict == VINF_SUCCESS
        && VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS)
        && EMIsInhibitInterruptsActive(pVCpu))
    {
      	//获取下一条指令
        rcStrict = iemInitDecoderAndPrefetchOpcodes(pVCpu, pVCpu->iem.s.fBypassHandlers);
        if (rcStrict == VINF_SUCCESS)
        {
          	//执行下一条指令
#ifdef IEM_WITH_SETJMP
            pVCpu->iem.s.CTX_SUFF(pJmpBuf) = &JmpBuf;
            if ((rcStrict = setjmp(JmpBuf)) == 0)
            {
                uint8_t b; IEM_OPCODE_GET_NEXT_U8(&b);
                rcStrict = FNIEMOP_CALL(g_apfnOneByteMap[b]);
            }
            else
                pVCpu->iem.s.cLongJumps++;
            pVCpu->iem.s.CTX_SUFF(pJmpBuf) = pSavedJmpBuf;
#else
            IEM_OPCODE_GET_NEXT_U8(&b);
            rcStrict = FNIEMOP_CALL(g_apfnOneByteMap[b]);
#endif
            if (rcStrict == VINF_SUCCESS)
                pVCpu->iem.s.cInstructions++;
            if (pVCpu->iem.s.cActiveMappings > 0)
            {
                iemMemRollback(pVCpu);
            }
        }
        else if (pVCpu->iem.s.cActiveMappings > 0)
            iemMemRollback(pVCpu);
        //清除关中断flag,(cli之后只模拟一条指令够吗?)
        VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS); 
    }
  
}

IEMExecLots:

模拟执行多条指令

VMMDECL(VBOXSTRICTRC) IEMExecLots(PVMCPUCC pVCpu, uint32_t cMaxInstructions, uint32_t cPollRate, uint32_t *pcInstructions)
{
  //模拟执行之前如果发现有pending的中断,先处理掉
	if (   fIntrEnabled
        && TRPMHasTrap(pVCpu)
        && EMGetInhibitInterruptsPC(pVCpu) != pVCpu->cpum.GstCtx.rip)
  {
    uint8_t     u8TrapNo;
    TRPMEVENT   enmType;
    uint32_t    uErrCode;
    RTGCPTR     uCr2;
    int rc2 = TRPMQueryTrapAll(pVCpu, &u8TrapNo, &enmType, &uErrCode, &uCr2, NULL /* pu8InstLen */, NULL /* fIcebp */);
    AssertRC(rc2);
    Assert(enmType == TRPM_HARDWARE_INT);
    //最终调用iemRaiseXcptOrInt,切换到中断处理函数中
    VBOXSTRICTRC rcStrict = IEMInjectTrap(pVCpu, u8TrapNo, enmType, (uint16_t)uErrCode, uCr2, 0 /* cbInstr */);
    //重置中断
    TRPMResetTrap(pVCpu);
  }
  //获取指令
  VBOXSTRICTRC rcStrict = iemInitDecoderAndPrefetchOpcodes(pVCpu, false);
  //最多执行4096条字节
  uint32_t cMaxInstructionsGccStupidity = cMaxInstructions;
  PVMCC pVM = pVCpu->CTX_SUFF(pVM);
  for (;;)
  {
    //获取并模拟执行一条指令
    uint8_t b; IEM_OPCODE_GET_NEXT_U8(&b);
    rcStrict = FNIEMOP_CALL(g_apfnOneByteMap[b]);
    //模拟执行成功
    if (RT_LIKELY(rcStrict == VINF_SUCCESS))
    {
      pVCpu->iem.s.cInstructions++;
      if (RT_LIKELY(pVCpu->iem.s.rcPassUp == VINF_SUCCESS))
      {
        //forceactions里去掉sync cr3,刷新tlb等
        uint64_t fCpu = pVCpu->fLocalForcedActions
          & ( VMCPU_FF_ALL_MASK & ~(  VMCPU_FF_PGM_SYNC_CR3
                                    | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
                                    | VMCPU_FF_TLB_FLUSH
                                    | VMCPU_FF_INHIBIT_INTERRUPTS
                                    | VMCPU_FF_BLOCK_NMIS
                                    | VMCPU_FF_UNHALT ));
				//如果没有forcesaction,或者没有达到执行条数的上限都会继续执行
        if (RT_LIKELY(   (   !fCpu
                          || (   !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
                              && !pVCpu->cpum.GstCtx.rflags.Bits.u1IF) )
                      && !VM_FF_IS_ANY_SET(pVM, VM_FF_ALL_MASK) ))
        {
          if (cMaxInstructionsGccStupidity-- > 0)
          {
            //运行一段时间检查是否有计时器时间到
            if (   (cMaxInstructionsGccStupidity & cPollRate) != 0
                || !TMTimerPollBool(pVM, pVCpu))
            {
              //重新准备decoder环境准备运行下一条指令
              iemReInitDecoder(pVCpu);
              continue;
            }
          }
        }
      }
      //模拟执行出错,停止模拟执行
      rcStrict = iemExecStatusCodeFiddling(pVCpu, rcStrict);
      break;
  }
  //返回模拟执行了多少条指令
  if (pcInstructions)
    *pcInstructions = pVCpu->iem.s.cInstructions - cInstructionsAtStart;
  return rcStrict;
}

iemInitExec:

初始化VCPU->iem.s结构体,每次需要模拟执行指令之前,需要调用这个函数

DECLINLINE(void) iemInitExec(PVMCPUCC pVCpu, bool fBypassHandlers)
{
	  //获取当前的GuestOS的运行权限(R0orR3)
    pVCpu->iem.s.uCpl               = CPUMGetGuestCPL(pVCpu);
    //32位还是64位指令
    pVCpu->iem.s.enmCpuMode         = iemCalcCpuMode(pVCpu);
    pVCpu->iem.s.enmDefAddrMode     = (IEMMODE)0xfe;
    pVCpu->iem.s.enmEffAddrMode     = (IEMMODE)0xfe;
    ...
    pVCpu->iem.s.cActiveMappings    = 0;
    pVCpu->iem.s.iNextMapping       = 0;
    pVCpu->iem.s.rcPassUp           = VINF_SUCCESS;
    pVCpu->iem.s.fBypassHandlers    = fBypassHandlers;
    ....
}

异常处理类函数:支持模拟过程中抛异常

iemRaiseXcptOrInt

DECL_NO_INLINE(IEM_STATIC, VBOXSTRICTRC)
iemRaiseXcptOrInt(PVMCPUCC    pVCpu,
                  uint8_t     cbInstr,
                  uint8_t     u8Vector,
                  uint32_t    fFlags,
                  uint16_t    uErr,
                  uint64_t    uCr2)
{

  uint8_t const  uPrevXcpt = pVCpu->iem.s.uCurXcpt;
  uint32_t const fPrevXcpt = pVCpu->iem.s.fCurXcpt;
  if (pVCpu->iem.s.cXcptRecursions == 0)
  else
  {
    //嵌套异常
    if (pVCpu->iem.s.cXcptRecursions >= 4)
    {
      //如果嵌套异常层数大于4层,返回错误,不能无限异常
      IEM_RETURN_ASPECT_NOT_IMPLEMENTED_LOG(("Too many fault nestings.\n"));
      IEMXCPTRAISE enmRaise = IEMEvaluateRecursiveXcpt(pVCpu, fPrevXcpt, uPrevXcpt, fFlags, u8Vector,
                                                         NULL /* pXcptRaiseInfo */);
    }
    //抛当前异常
    if (enmRaise == IEMXCPTRAISE_CURRENT_XCPT)
    { /* likely */ }
    //抛double fault异常
    else if (enmRaise == IEMXCPTRAISE_DOUBLE_FAULT)
    {
      fFlags   = IEM_XCPT_FLAGS_T_CPU_XCPT | IEM_XCPT_FLAGS_ERR;
      u8Vector = X86_XCPT_DF;
      uErr     = 0;
    }
    //TRIPLE_FAULT异常,无法处理,shutdownVCPU
    else if (enmRaise == IEMXCPTRAISE_TRIPLE_FAULT)
    {
      return iemInitiateCpuShutdown(pVCpu);
    }
    else if (enmRaise == IEMXCPTRAISE_CPU_HANG)
    {
      //返回CPU hang
      return VERR_EM_GUEST_CPU_HANG;
    }
    else
    {
      return VERR_IEM_IPE_9;
    }
  }
  pVCpu->iem.s.cXcptRecursions++;
  pVCpu->iem.s.uCurXcpt    = u8Vector;
  pVCpu->iem.s.fCurXcpt    = fFlags;
  pVCpu->iem.s.uCurXcptErr = uErr;
  pVCpu->iem.s.uCurXcptCr2 = uCr2;

  //根据GuestOS运行
  if (!(pVCpu->cpum.GstCtx.cr0 & X86_CR0_PE))
    rcStrict = iemRaiseXcptOrIntInRealMode(pVCpu, cbInstr, u8Vector, fFlags, uErr, uCr2);
  else if (pVCpu->cpum.GstCtx.msrEFER & MSR_K6_EFER_LMA)
    rcStrict = iemRaiseXcptOrIntInLongMode(pVCpu, cbInstr, u8Vector, fFlags, uErr, uCr2);
  else
    rcStrict = iemRaiseXcptOrIntInProtMode(pVCpu, cbInstr, u8Vector, fFlags, uErr, uCr2);

  //到这边异常处理完毕,unwind
  pVCpu->iem.s.cXcptRecursions--;
  pVCpu->iem.s.uCurXcpt = uPrevXcpt;
  pVCpu->iem.s.fCurXcpt = fPrevXcpt;
}
//异常分类
IEM_STATIC IEMXCPTCLASS iemGetXcptClass(uint8_t uVector)
{
    Assert(uVector <= X86_XCPT_LAST);
    switch (uVector)
    {
        case X86_XCPT_DE: (Divide Error Exception)
        case X86_XCPT_TS:  (Invalid TSS Exception )
        case X86_XCPT_NP:  (Segment Not Present)
        case X86_XCPT_SS:  (Stack Fault Exception)
        case X86_XCPT_GP:   (General Protection Exception)
        case X86_XCPT_SX:   /* AMD only */
            return IEMXCPTCLASS_CONTRIBUTORY;
        case X86_XCPT_PF:  (PageFault)
        case X86_XCPT_VE:   /* Intel only */
            return IEMXCPTCLASS_PAGE_FAULT;
        case X86_XCPT_DF:   (Double Fault Exception)
            return IEMXCPTCLASS_DOUBLE_FAULT;
    }
    return IEMXCPTCLASS_BENIGN;
}
//根据当前异常类型和前一个异常类型决定下一步操作
VMM_INT_DECL(IEMXCPTRAISE) IEMEvaluateRecursiveXcpt(PVMCPUCC pVCpu, uint32_t fPrevFlags, uint8_t uPrevVector, uint32_t fCurFlags,
                                                    uint8_t uCurVector, PIEMXCPTRAISEINFO pfXcptRaiseInfo)
{
  IEMXCPTRAISE     enmRaise   = IEMXCPTRAISE_CURRENT_XCPT;
  IEMXCPTRAISEINFO fRaiseInfo = IEMXCPTRAISEINFO_NONE;
  //前一个异常是CPU类型异常(DB,PF等)
  if (fPrevFlags & IEM_XCPT_FLAGS_T_CPU_XCPT)
  {
    IEMXCPTCLASS enmPrevXcptClass = iemGetXcptClass(uPrevVector);
    if (enmPrevXcptClass != IEMXCPTCLASS_BENIGN)
    {
      //获取当前异常类型
      IEMXCPTCLASS enmCurXcptClass = iemGetXcptClass(uCurVector);
      if (   enmPrevXcptClass == IEMXCPTCLASS_PAGE_FAULT
          && (   enmCurXcptClass == IEMXCPTCLASS_PAGE_FAULT
              || enmCurXcptClass == IEMXCPTCLASS_CONTRIBUTORY))
      {
        //double fault
        enmRaise = IEMXCPTRAISE_DOUBLE_FAULT;
        fRaiseInfo = enmCurXcptClass == IEMXCPTCLASS_PAGE_FAULT ? IEMXCPTRAISEINFO_PF_PF
          : IEMXCPTRAISEINFO_PF_CONTRIBUTORY_XCPT;
      }
      else if (   enmPrevXcptClass == IEMXCPTCLASS_CONTRIBUTORY
               && enmCurXcptClass  == IEMXCPTCLASS_CONTRIBUTORY)
      {
        //double fault
        enmRaise = IEMXCPTRAISE_DOUBLE_FAULT;
      }
      else if (   enmPrevXcptClass == IEMXCPTCLASS_DOUBLE_FAULT
               && (   enmCurXcptClass == IEMXCPTCLASS_CONTRIBUTORY
                   || enmCurXcptClass == IEMXCPTCLASS_PAGE_FAULT))
      {
        //前一次异常是DF,又发生异常,返回TRIPLE_FAULT
        enmRaise = IEMXCPTRAISE_TRIPLE_FAULT;
      }
    }
    else
    {
      if (uPrevVector == X86_XCPT_NMI)
      {
        fRaiseInfo = IEMXCPTRAISEINFO_NMI_XCPT;
        if (uCurVector == X86_XCPT_PF)
        {
          //在处理nmi异常的时候发生PF异常
          fRaiseInfo |= IEMXCPTRAISEINFO_NMI_PF;
        }
      }
      else if (   uPrevVector == X86_XCPT_AC
               && uCurVector  == X86_XCPT_AC)
      {
        //两次发生AC,停止VCPU
        enmRaise   = IEMXCPTRAISE_CPU_HANG;
        fRaiseInfo = IEMXCPTRAISEINFO_AC_AC;
      }
    }
  }
  else if (fPrevFlags & IEM_XCPT_FLAGS_T_EXT_INT)
  {
    //前一次发生的中断是外部中断
    fRaiseInfo = IEMXCPTRAISEINFO_EXT_INT_XCPT;
    if (uCurVector == X86_XCPT_PF)
      //处理外部中断的时候发生PF异常
      fRaiseInfo |= IEMXCPTRAISEINFO_EXT_INT_PF;
  }
  else
  {
    //到这里,前一次异常是软件异常
    fRaiseInfo = IEMXCPTRAISEINFO_SOFT_INT_XCPT;
  }
  if (pfXcptRaiseInfo)
    *pfXcptRaiseInfo = fRaiseInfo;
  return enmRaise;
}
//实模式下的异常分发
IEM_STATIC VBOXSTRICTRC
iemRaiseXcptOrIntInRealMode(PVMCPUCC      pVCpu,
                            uint8_t     cbInstr,
                            uint8_t     u8Vector,
                            uint32_t    fFlags,
                            uint16_t    uErr,
                            uint64_t    uCr2)
{
 		//获取对应的IDT表内容
  	RTFAR16 Idte;
    VBOXSTRICTRC rcStrict = iemMemFetchDataU32(pVCpu, (uint32_t *)&Idte, UINT8_MAX, pVCpu->cpum.GstCtx.idtr.pIdt + UINT32_C(4) * u8Vector);
  
  	//构造异常栈
  	uint16_t *pu16Frame;
    uint64_t  uNewRsp;
    rcStrict = iemMemStackPushBeginSpecial(pVCpu, 6, (void **)&pu16Frame, &uNewRsp);
    if (rcStrict != VINF_SUCCESS)
        return rcStrict;
  	//保存eflags
    uint32_t fEfl = IEMMISC_GET_EFL(pVCpu);
    pu16Frame[2] = (uint16_t)fEfl;
  	//保存cs段
    pu16Frame[1] = (uint16_t)pVCpu->cpum.GstCtx.cs.Sel;
  	//保存rip
    pu16Frame[0] = (fFlags & IEM_XCPT_FLAGS_T_SOFT_INT) ? pVCpu->cpum.GstCtx.ip + cbInstr : pVCpu->cpum.GstCtx.ip;
  	//push到新的rsp里
    rcStrict = iemMemStackPushCommitSpecial(pVCpu, pu16Frame, uNewRsp);
    if (RT_UNLIKELY(rcStrict != VINF_SUCCESS))
        return rcStrict;
  	
  	//修改当前的CS段和RIP到异常处理函数入口
    pVCpu->cpum.GstCtx.cs.Sel           = Idte.sel;
    pVCpu->cpum.GstCtx.cs.ValidSel      = Idte.sel;
    pVCpu->cpum.GstCtx.cs.fFlags        = CPUMSELREG_FLAGS_VALID;
    pVCpu->cpum.GstCtx.cs.u64Base       = (uint32_t)Idte.sel << 4;
    pVCpu->cpum.GstCtx.rip              = Idte.off;
  	//eflag里去掉TF/IF/AC
    fEfl &= ~(X86_EFL_IF | X86_EFL_TF | X86_EFL_AC);
    IEMMISC_SET_EFL(pVCpu, fEfl);
   
   	return fFlags & IEM_XCPT_FLAGS_T_CPU_XCPT ? VINF_IEM_RAISED_XCPT : VINF_SUCCESS;
}


iemRaiseXcptOrIntInLongMode: 64位抛异常的函数, 下图是64位的异常处理栈

在这里插入图片描述

IEM_STATIC VBOXSTRICTRC
iemRaiseXcptOrIntInLongMode(PVMCPUCC      pVCpu,
                            uint8_t     cbInstr,
                            uint8_t     u8Vector,
                            uint32_t    fFlags,
                            uint16_t    uErr,
                            uint64_t    uCr2)
{
  //通过idt寄存器获取异常IDT表,每次只读取8个字节,所以要读取两次
  VBOXSTRICTRC rcStrict = iemMemFetchSysU64(pVCpu, &Idte.au64[0], UINT8_MAX, pVCpu->cpum.GstCtx.idtr.pIdt + offIdt);
    if (RT_LIKELY(rcStrict == VINF_SUCCESS))
        rcStrict = iemMemFetchSysU64(pVCpu, &Idte.au64[1], UINT8_MAX, pVCpu->cpum.GstCtx.idtr.pIdt + offIdt + 8);
  //检查IDT表里的属性是否和当前上下文匹配
  if (Idte.Gate.u1DescType)
  {
    return iemRaiseGeneralProtectionFault(pVCpu, X86_TRAP_ERR_IDT | ((uint16_t)u8Vector << X86_TRAP_ERR_SEL_SHIFT));
  }
  //调用异常处理函数之前,eflag里有几位需要clear
  //可以参考Intel手册6.12.1.3节
  uint32_t fEflToClear = X86_EFL_TF | X86_EFL_NT | X86_EFL_RF | X86_EFL_VM;
  switch (Idte.Gate.u4Type)
  {
    case AMD64_SEL_TYPE_SYS_INT_GATE:
      fEflToClear |= X86_EFL_IF;
      break;
    case AMD64_SEL_TYPE_SYS_TRAP_GATE:
      break;
  }
  if ((fFlags & (IEM_XCPT_FLAGS_T_SOFT_INT | IEM_XCPT_FLAGS_ICEBP_INSTR)) == IEM_XCPT_FLAGS_T_SOFT_INT)
  {
    if (pVCpu->iem.s.uCpl > Idte.Gate.u2Dpl)
    {
      return iemRaiseGeneralProtectionFault(pVCpu, X86_TRAP_ERR_IDT | ((uint16_t)u8Vector << X86_TRAP_ERR_SEL_SHIFT));
    }
  }
  if (!Idte.Gate.u1Present)
  {
    return iemRaiseSelectorNotPresentWithErr(pVCpu, X86_TRAP_ERR_IDT | ((uint16_t)u8Vector << X86_TRAP_ERR_SEL_SHIFT));
  }
  //从idt table里获取idt入口函数的cs段信息,并检查是否正确
  RTSEL NewCS = Idte.Gate.u16Sel;
  if (!(NewCS & X86_SEL_MASK_OFF_RPL))
  {
    return iemRaiseGeneralProtectionFault0(pVCpu);
  }
  IEMSELDESC DescCS;
  rcStrict = iemMemFetchSelDesc(pVCpu, &DescCS, NewCS, X86_XCPT_GP);
  if (rcStrict != VINF_SUCCESS)
  {
    return rcStrict;
  }
  //必须是64位cs
  if (!DescCS.Long.Gen.u1DescType)
  {
    return iemRaiseGeneralProtectionFault(pVCpu, NewCS & X86_SEL_MASK_OFF_RPL);
  }
  if (   !DescCS.Long.Gen.u1Long
      || DescCS.Long.Gen.u1DefBig
      || !(DescCS.Long.Gen.u4Type & X86_SEL_TYPE_CODE) )
  {
    return iemRaiseGeneralProtectionFault(pVCpu, NewCS & X86_SEL_MASK_OFF_RPL);
  }
  if (!DescCS.Legacy.Gen.u1Present)
  {
    return iemRaiseSelectorNotPresentBySelector(pVCpu, NewCS);
  }
  
  //获取异常处理函数的入口地址
  uint64_t const uNewRip = Idte.Gate.u16OffsetLow
                           | ((uint32_t)Idte.Gate.u16OffsetHigh << 16)
                           | ((uint64_t)Idte.Gate.u32OffsetTop  << 32);
 	if (!IEM_IS_CANONICAL(uNewRip))
  {
    return iemRaiseGeneralProtectionFault0(pVCpu);
  }
  
  //如果是R3发生的异常,需要切换到R0的栈
  uint64_t        uNewRsp;
  uint8_t const   uNewCpl = DescCS.Legacy.Gen.u4Type & X86_SEL_TYPE_CONF
    ? pVCpu->iem.s.uCpl : DescCS.Legacy.Gen.u2Dpl;
  if (   uNewCpl != pVCpu->iem.s.uCpl
      || Idte.Gate.u3IST != 0)
  {
    //权限切换,从TSS里读取新的rsp值
    rcStrict = iemRaiseLoadStackFromTss64(pVCpu, uNewCpl, Idte.Gate.u3IST, &uNewRsp);
    if (rcStrict != VINF_SUCCESS)
      return rcStrict;
  }
  else
    //没有切换权限,rsp不变
    uNewRsp = pVCpu->cpum.GstCtx.rsp;
  //这里为什么要吧低4位清零?
  uNewRsp &= ~(uint64_t)0xf;
  
  uint32_t        fEfl    = IEMMISC_GET_EFL(pVCpu);
  if (fFlags & (IEM_XCPT_FLAGS_DRx_INSTR_BP | IEM_XCPT_FLAGS_T_SOFT_INT))
    fEfl &= ~X86_EFL_RF;
  else
    fEfl |= X86_EFL_RF; 
  
  //设置新的权限级别
  uint8_t const uOldCpl = pVCpu->iem.s.uCpl;
  pVCpu->iem.s.uCpl = uNewCpl;
  
  //开始构造异常栈
  uint32_t   cbStackFrame = sizeof(uint64_t) * (5 + !!(fFlags & IEM_XCPT_FLAGS_ERR));
  RTPTRUNION uStackFrame;
  rcStrict = iemMemMap(pVCpu, &uStackFrame.pv, cbStackFrame, UINT8_MAX,
                       uNewRsp - cbStackFrame, IEM_ACCESS_STACK_W | IEM_ACCESS_WHAT_SYS); 
  if (rcStrict != VINF_SUCCESS)
    return rcStrict;
  void * const pvStackFrame = uStackFrame.pv;
  //errorcode
  if (fFlags & IEM_XCPT_FLAGS_ERR)
    *uStackFrame.pu64++ = uErr;
  //rip
  uStackFrame.pu64[0] = fFlags & IEM_XCPT_FLAGS_T_SOFT_INT ? pVCpu->cpum.GstCtx.rip + cbInstr : pVCpu->cpum.GstCtx.rip;
  //cs
  uStackFrame.pu64[1] = (pVCpu->cpum.GstCtx.cs.Sel & ~X86_SEL_RPL) | uOldCpl; /* CPL paranoia */
  //rflags
  uStackFrame.pu64[2] = fEfl;
  //rsp
  uStackFrame.pu64[3] = pVCpu->cpum.GstCtx.rsp;
  //ss
  uStackFrame.pu64[4] = pVCpu->cpum.GstCtx.ss.Sel;
  //提交到GuestOS的栈上
  rcStrict = iemMemCommitAndUnmap(pVCpu, pvStackFrame, IEM_ACCESS_STACK_W | IEM_ACCESS_WHAT_SYS);
  if (rcStrict != VINF_SUCCESS)
    return rcStrict;
  
  //修改对应寄存器,运行环境切换到中断处理程序入口
  //权限切换,需要修改ss段
  if (uNewCpl != uOldCpl)
  {
    pVCpu->cpum.GstCtx.ss.Sel        = 0 | uNewCpl;
    pVCpu->cpum.GstCtx.ss.ValidSel   = 0 | uNewCpl;
    pVCpu->cpum.GstCtx.ss.fFlags     = CPUMSELREG_FLAGS_VALID;
    pVCpu->cpum.GstCtx.ss.u32Limit   = UINT32_MAX;
    pVCpu->cpum.GstCtx.ss.u64Base    = 0;
    pVCpu->cpum.GstCtx.ss.Attr.u     = (uNewCpl << X86DESCATTR_DPL_SHIFT) | X86DESCATTR_UNUSABLE;
  }
  //没有权限切换。cs段和rip切换到中断处理程序入口
  pVCpu->cpum.GstCtx.rsp           = uNewRsp - cbStackFrame;
  pVCpu->cpum.GstCtx.cs.Sel        = (NewCS & ~X86_SEL_RPL) | uNewCpl;
  pVCpu->cpum.GstCtx.cs.ValidSel   = (NewCS & ~X86_SEL_RPL) | uNewCpl;
  pVCpu->cpum.GstCtx.cs.fFlags     = CPUMSELREG_FLAGS_VALID;
  pVCpu->cpum.GstCtx.cs.u32Limit   = X86DESC_LIMIT_G(&DescCS.Legacy);
  pVCpu->cpum.GstCtx.cs.u64Base    = X86DESC_BASE(&DescCS.Legacy);
  pVCpu->cpum.GstCtx.cs.Attr.u     = X86DESC_GET_HID_ATTR(&DescCS.Legacy);
  pVCpu->cpum.GstCtx.rip           = uNewRip;

  fEfl &= ~fEflToClear;
  IEMMISC_SET_EFL(pVCpu, fEfl);
  
  //部分中断(PF)需要设置CR2寄存器:发生PF的内存地址
  if (fFlags & IEM_XCPT_FLAGS_CR2)
    pVCpu->cpum.GstCtx.cr2 = uCr2;
  //CPU引发的中断,需要清楚部分寄存器,比如DB中断需要修改DR7寄存器
  if (fFlags & IEM_XCPT_FLAGS_T_CPU_XCPT)
        iemRaiseXcptAdjustState(pVCpu, u8Vector);
  return fFlags & IEM_XCPT_FLAGS_T_CPU_XCPT ? VINF_IEM_RAISED_XCPT : VINF_SUCCESS;
}

iemRaiseXcptOrIntInProtMode: 16/32位保护模式异常处理

保护模式下权限是否切换对应的stack是不同的,见下图

IEM_STATIC VBOXSTRICTRC
iemRaiseXcptOrIntInProtMode(PVMCPUCC      pVCpu,
                            uint8_t     cbInstr,
                            uint8_t     u8Vector,
                            uint32_t    fFlags,
                            uint16_t    uErr,
                            uint64_t    uCr2)
{
  //获取IDT表,idt表有64位
  if (pVCpu->cpum.GstCtx.idtr.cbIdt < UINT32_C(8) * u8Vector + 7)
  {
    return iemRaiseGeneralProtectionFault(pVCpu, X86_TRAP_ERR_IDT | ((uint16_t)u8Vector << X86_TRAP_ERR_SEL_SHIFT));
  }
  X86DESC Idte;
  VBOXSTRICTRC rcStrict = iemMemFetchSysU64(pVCpu, &Idte.u, UINT8_MAX,
                                            pVCpu->cpum.GstCtx.idtr.pIdt + UINT32_C(8) * u8Vector);
  if (RT_UNLIKELY(rcStrict != VINF_SUCCESS))
  {
    return rcStrict;
  }
  //保护模式的IDT分成3种类型: Task Gate,Interrupt Gate和Trap Gate
  //Idte.Gate.u4Type标记是具体什么类型
  bool     fTaskGate   = false;
  uint8_t  f32BitGate  = true;
  uint32_t fEflToClear = X86_EFL_TF | X86_EFL_NT | X86_EFL_RF | X86_EFL_VM;
  switch (Idte.Gate.u4Type)
  {
    case X86_SEL_TYPE_SYS_UNDEFINED:
    case X86_SEL_TYPE_SYS_286_TSS_AVAIL:
    case X86_SEL_TYPE_SYS_LDT:
    case X86_SEL_TYPE_SYS_286_TSS_BUSY:
    case X86_SEL_TYPE_SYS_286_CALL_GATE:
    case X86_SEL_TYPE_SYS_UNDEFINED2:
    case X86_SEL_TYPE_SYS_386_TSS_AVAIL:
    case X86_SEL_TYPE_SYS_UNDEFINED3:
    case X86_SEL_TYPE_SYS_386_TSS_BUSY:
    case X86_SEL_TYPE_SYS_386_CALL_GATE:
    case X86_SEL_TYPE_SYS_UNDEFINED4:
      {
        //错误的type
        return iemRaiseGeneralProtectionFault(pVCpu, X86_TRAP_ERR_IDT | ((uint16_t)u8Vector << X86_TRAP_ERR_SEL_SHIFT));
      }
		//Interrupt Gate
    case X86_SEL_TYPE_SYS_286_INT_GATE:
      f32BitGate = false;
    case X86_SEL_TYPE_SYS_386_INT_GATE:
      fEflToClear |= X86_EFL_IF;
      break;
		//task gate
    case X86_SEL_TYPE_SYS_TASK_GATE:
      fTaskGate = true;
      break;
    //trap Gate
    case X86_SEL_TYPE_SYS_286_TRAP_GATE:
      f32BitGate = false;
    case X86_SEL_TYPE_SYS_386_TRAP_GATE:
      break;
  }
  
  //如果是task gate,需要做task switch
  if (fTaskGate)
  {
    uint16_t const uExt     = (    (fFlags & IEM_XCPT_FLAGS_T_SOFT_INT)
                                   && !(fFlags & IEM_XCPT_FLAGS_ICEBP_INSTR)) ? 0 : 1;
    uint16_t const uSelMask = X86_SEL_MASK_OFF_RPL;
    RTSEL          SelTSS   = Idte.Gate.u16Sel;
    //获取TSS段信息
    IEMSELDESC DescTSS;
    rcStrict = iemMemFetchSelDescWithErr(pVCpu, &DescTSS, SelTSS, X86_XCPT_GP, (SelTSS & uSelMask) | uExt);
		//校验TSS段
    if (   DescTSS.Legacy.Gen.u1DescType
        || (   DescTSS.Legacy.Gen.u4Type != X86_SEL_TYPE_SYS_286_TSS_AVAIL
            && DescTSS.Legacy.Gen.u4Type != X86_SEL_TYPE_SYS_386_TSS_AVAIL))
    {
      return iemRaiseGeneralProtectionFault(pVCpu, (SelTSS & uSelMask) | uExt);
    }
    if (!DescTSS.Legacy.Gen.u1Present)
    {
      return iemRaiseSelectorNotPresentWithErr(pVCpu, (SelTSS & uSelMask) | uExt);
    }
    //call iemTaskSwitch切换TSS
    return iemTaskSwitch(pVCpu, IEMTASKSWITCH_INT_XCPT,
                         (fFlags & IEM_XCPT_FLAGS_T_SOFT_INT) ? pVCpu->cpum.GstCtx.eip + cbInstr : pVCpu->cpum.GstCtx.eip,
                         fFlags, uErr, uCr2, SelTSS, &DescTSS);
	}  
  //获取CS段,并校验CS段,同longmode里的处理
  RTSEL NewCS = Idte.Gate.u16Sel;
  if (!(NewCS & X86_SEL_MASK_OFF_RPL))
  {
    return iemRaiseGeneralProtectionFault0(pVCpu);
  }
  IEMSELDESC DescCS;
  rcStrict = iemMemFetchSelDesc(pVCpu, &DescCS, NewCS, X86_XCPT_GP); 
  ...
  //获取异常处理入口地址
  uint32_t const uNewEip =    Idte.Gate.u4Type == X86_SEL_TYPE_SYS_286_INT_GATE
                             || Idte.Gate.u4Type == X86_SEL_TYPE_SYS_286_TRAP_GATE
                           ? Idte.Gate.u16OffsetLow
                           : Idte.Gate.u16OffsetLow | ((uint32_t)Idte.Gate.u16OffsetHigh << 16);
  //新的eip不能超过CS段里的限制
  uint32_t cbLimitCS = X86DESC_LIMIT_G(&DescCS.Legacy);
  if (uNewEip > cbLimitCS)
  {
    return iemRaiseGeneralProtectionFault(pVCpu, 0);
  }
  
  //权限改变
  if (uNewCpl != pVCpu->iem.s.uCpl)
  {
    //获取新的ss段和esp,并校验是否正确
    RTSEL    NewSS;
    uint32_t uNewEsp;
    rcStrict = iemRaiseLoadStackFromTss32Or16(pVCpu, uNewCpl, &NewSS, &uNewEsp);
    IEMSELDESC DescSS;
    rcStrict = iemMiscValidateNewSS(pVCpu, NewSS, uNewCpl, &DescSS);
    if (!DescSS.Legacy.Gen.u1DefBig)
    {
      uNewEsp = (uint16_t)uNewEsp;
    }
    uint32_t cbLimitSS = X86DESC_LIMIT_G(&DescSS.Legacy);
    uint8_t const cbStackFrame = !(fEfl & X86_EFL_VM)
      ? (fFlags & IEM_XCPT_FLAGS_ERR ? 12 : 10) << f32BitGate
      : (fFlags & IEM_XCPT_FLAGS_ERR ? 20 : 18) << f32BitGate;
    if (!(DescSS.Legacy.Gen.u4Type & X86_SEL_TYPE_DOWN))
    {
      if (   uNewEsp - 1 > cbLimitSS
          || uNewEsp < cbStackFrame)
      {
        return iemRaiseSelectorBoundsBySelector(pVCpu, NewSS);
      }
    }
    else
    {
      if (   uNewEsp - 1 > (DescSS.Legacy.Gen.u1DefBig ? UINT32_MAX : UINT16_MAX)
          || uNewEsp - cbStackFrame < cbLimitSS + UINT32_C(1))
      {
        return iemRaiseSelectorBoundsBySelector(pVCpu, NewSS);
      }
    }
    //切换权限
    uint8_t const uOldCpl = pVCpu->iem.s.uCpl;
    pVCpu->iem.s.uCpl = uNewCpl;

    //准备异常调用栈
    RTPTRUNION uStackFrame;
    rcStrict = iemMemMap(pVCpu, &uStackFrame.pv, cbStackFrame, UINT8_MAX,
                         uNewEsp - cbStackFrame + X86DESC_BASE(&DescSS.Legacy), IEM_ACCESS_STACK_W | IEM_ACCESS_WHAT_SYS); 
    void * const pvStackFrame = uStackFrame.pv;
    if (f32BitGate)
    {
      if (fFlags & IEM_XCPT_FLAGS_ERR)
        *uStackFrame.pu32++ = uErr;
      uStackFrame.pu32[0] = (fFlags & IEM_XCPT_FLAGS_T_SOFT_INT) ? pVCpu->cpum.GstCtx.eip + cbInstr : pVCpu->cpum.GstCtx.eip;
      uStackFrame.pu32[1] = (pVCpu->cpum.GstCtx.cs.Sel & ~X86_SEL_RPL) | uOldCpl;
      uStackFrame.pu32[2] = fEfl;
      uStackFrame.pu32[3] = pVCpu->cpum.GstCtx.esp;
      uStackFrame.pu32[4] = pVCpu->cpum.GstCtx.ss.Sel;
      //虚拟8086模式多保存4个段寄存器
      if (fEfl & X86_EFL_VM)
      {
        uStackFrame.pu32[1] = pVCpu->cpum.GstCtx.cs.Sel;
        uStackFrame.pu32[5] = pVCpu->cpum.GstCtx.es.Sel;
        uStackFrame.pu32[6] = pVCpu->cpum.GstCtx.ds.Sel;
        uStackFrame.pu32[7] = pVCpu->cpum.GstCtx.fs.Sel;
        uStackFrame.pu32[8] = pVCpu->cpum.GstCtx.gs.Sel;
      }
    }
    else
    {
      if (fFlags & IEM_XCPT_FLAGS_ERR)
        *uStackFrame.pu16++ = uErr;
      uStackFrame.pu16[0] = (fFlags & IEM_XCPT_FLAGS_T_SOFT_INT) ? pVCpu->cpum.GstCtx.ip + cbInstr : pVCpu->cpum.GstCtx.ip;
      uStackFrame.pu16[1] = (pVCpu->cpum.GstCtx.cs.Sel & ~X86_SEL_RPL) | uOldCpl;
      uStackFrame.pu16[2] = fEfl;
      uStackFrame.pu16[3] = pVCpu->cpum.GstCtx.sp;
      uStackFrame.pu16[4] = pVCpu->cpum.GstCtx.ss.Sel;
      if (fEfl & X86_EFL_VM)
      {
        uStackFrame.pu16[1] = pVCpu->cpum.GstCtx.cs.Sel;
        uStackFrame.pu16[5] = pVCpu->cpum.GstCtx.es.Sel;
        uStackFrame.pu16[6] = pVCpu->cpum.GstCtx.ds.Sel;
        uStackFrame.pu16[7] = pVCpu->cpum.GstCtx.fs.Sel;
        uStackFrame.pu16[8] = pVCpu->cpum.GstCtx.gs.Sel;
      }
    }
    rcStrict = iemMemCommitAndUnmap(pVCpu, pvStackFrame, IEM_ACCESS_STACK_W | IEM_ACCESS_WHAT_SYS);
    //切换到新的ss段和esp
    pVCpu->cpum.GstCtx.ss.Sel            = NewSS;
    pVCpu->cpum.GstCtx.ss.ValidSel       = NewSS;
    pVCpu->cpum.GstCtx.ss.fFlags         = CPUMSELREG_FLAGS_VALID;
    pVCpu->cpum.GstCtx.ss.u32Limit       = cbLimitSS;
    pVCpu->cpum.GstCtx.ss.u64Base        = X86DESC_BASE(&DescSS.Legacy);
    pVCpu->cpum.GstCtx.ss.Attr.u         = X86DESC_GET_HID_ATTR(&DescSS.Legacy);
    if (!pVCpu->cpum.GstCtx.ss.Attr.n.u1DefBig)
      pVCpu->cpum.GstCtx.sp            = (uint16_t)(uNewEsp - cbStackFrame);
    else
      pVCpu->cpum.GstCtx.rsp           = uNewEsp - cbStackFrame;
    //虚拟8086模式需要加载4个空的段寄存器
    if (fEfl & X86_EFL_VM)
    {
      iemHlpLoadNullDataSelectorOnV86Xcpt(pVCpu, &pVCpu->cpum.GstCtx.gs);
      iemHlpLoadNullDataSelectorOnV86Xcpt(pVCpu, &pVCpu->cpum.GstCtx.fs);
      iemHlpLoadNullDataSelectorOnV86Xcpt(pVCpu, &pVCpu->cpum.GstCtx.es);
      iemHlpLoadNullDataSelectorOnV86Xcpt(pVCpu, &pVCpu->cpum.GstCtx.ds);
    }
	}
  else
  {
    //权限没有改变,先获取新的栈
    uint64_t        uNewRsp;
    RTPTRUNION      uStackFrame;
    uint8_t const   cbStackFrame = (fFlags & IEM_XCPT_FLAGS_ERR ? 8 : 6) << f32BitGate;
    rcStrict = iemMemStackPushBeginSpecial(pVCpu, cbStackFrame, &uStackFrame.pv, &uNewRsp);
    if (rcStrict != VINF_SUCCESS)
      return rcStrict;
    void * const pvStackFrame = uStackFrame.pv;
    //只需要保存errorcode,cs,eip,eflag
    if (f32BitGate)
    {
      if (fFlags & IEM_XCPT_FLAGS_ERR)
        *uStackFrame.pu32++ = uErr;
      uStackFrame.pu32[0] = fFlags & IEM_XCPT_FLAGS_T_SOFT_INT ? pVCpu->cpum.GstCtx.eip + cbInstr : pVCpu->cpum.GstCtx.eip;
      uStackFrame.pu32[1] = (pVCpu->cpum.GstCtx.cs.Sel & ~X86_SEL_RPL) | pVCpu->iem.s.uCpl;
      uStackFrame.pu32[2] = fEfl;
    }
    else
    {
      if (fFlags & IEM_XCPT_FLAGS_ERR)
        *uStackFrame.pu16++ = uErr;
      uStackFrame.pu16[0] = fFlags & IEM_XCPT_FLAGS_T_SOFT_INT ? pVCpu->cpum.GstCtx.eip + cbInstr : pVCpu->cpum.GstCtx.eip;
      uStackFrame.pu16[1] = (pVCpu->cpum.GstCtx.cs.Sel & ~X86_SEL_RPL) | pVCpu->iem.s.uCpl;
      uStackFrame.pu16[2] = fEfl;
    }
    //提交到GuestOSrsp内存里
    rcStrict = iemMemCommitAndUnmap(pVCpu, pvStackFrame, IEM_ACCESS_STACK_W); 

    /* Mark the CS selector as 'accessed'. */
    if (!(DescCS.Legacy.Gen.u4Type & X86_SEL_TYPE_ACCESSED))
    {
      rcStrict = iemMemMarkSelDescAccessed(pVCpu, NewCS);
      DescCS.Legacy.Gen.u4Type |= X86_SEL_TYPE_ACCESSED;
    }
    //切换到新的esp
    pVCpu->cpum.GstCtx.rsp = uNewRsp;
	}
  //准备新的运行上下文: cs和新的eip
  pVCpu->cpum.GstCtx.cs.Sel            = (NewCS & ~X86_SEL_RPL) | uNewCpl;
  pVCpu->cpum.GstCtx.cs.ValidSel       = (NewCS & ~X86_SEL_RPL) | uNewCpl;
  pVCpu->cpum.GstCtx.cs.fFlags         = CPUMSELREG_FLAGS_VALID;
  pVCpu->cpum.GstCtx.cs.u32Limit       = cbLimitCS;
  pVCpu->cpum.GstCtx.cs.u64Base        = X86DESC_BASE(&DescCS.Legacy);
  pVCpu->cpum.GstCtx.cs.Attr.u         = X86DESC_GET_HID_ATTR(&DescCS.Legacy);
  pVCpu->cpum.GstCtx.rip               = uNewEip;  
  fEfl &= ~fEflToClear;
  IEMMISC_SET_EFL(pVCpu, fEfl);
	//设置cr2
  if (fFlags & IEM_XCPT_FLAGS_CR2)
    pVCpu->cpum.GstCtx.cr2 = uCr2;
  if (fFlags & IEM_XCPT_FLAGS_T_CPU_XCPT)
    iemRaiseXcptAdjustState(pVCpu, u8Vector);
  return fFlags & IEM_XCPT_FLAGS_T_CPU_XCPT ? VINF_IEM_RAISED_XCPT : VINF_SUCCESS;
}

15.3 提供VMExit中模拟发生VMExit指令的APIs

VMX里设置了GuestOS里执行部分特殊指令的时候触发VMExit,需要在root模式下模拟执行这条指令,IEM提供一系列API。

IEMExecDecodedOut 模拟执行OUT指令

VMM_INT_DECL(VBOXSTRICTRC) IEMExecDecodedOut(PVMCPUCC pVCpu, uint8_t cbInstr, uint16_t u16Port, bool fImm, uint8_t cbReg)
{
  	//初始化IEM
    iemInitExec(pVCpu, false /*fBypassHandlers*/);
  	//调用iemCImpl_out模拟执行,传入3个参数
    VBOXSTRICTRC rcStrict = IEM_CIMPL_CALL_3(iemCImpl_out, u16Port, fImm, cbReg);
    return iemUninitExecAndFiddleStatusAndMaybeReenter(pVCpu, rcStrict);
}

IEM_CIMPL_DEF_3(iemCImpl_out, uint16_t, u16Port, bool, fImm, uint8_t, cbReg)
{
    ...
		//获取OUT指令参数
    uint32_t u32Value;
    switch (cbReg)
    {
        case 1: u32Value = pVCpu->cpum.GstCtx.al;  break;
        case 2: u32Value = pVCpu->cpum.GstCtx.ax;  break;
        case 4: u32Value = pVCpu->cpum.GstCtx.eax; break;
        default: AssertFailedReturn(VERR_IEM_IPE_4);
    }
  	//调用IOMIOPortWrite执行OUT操作
    rcStrict = IOMIOPortWrite(pVCpu->CTX_SUFF(pVM), pVCpu, u16Port, u32Value, cbReg);
    ....
    return rcStrict;
}

IEMExecDecodedIn 模拟执行IN指令

IEMExecDecodedMovCRxWrite

VMM_INT_DECL(VBOXSTRICTRC) IEMExecDecodedMovCRxWrite(PVMCPUCC pVCpu, uint8_t cbInstr, uint8_t iCrReg, uint8_t iGReg)
{
		iemInitExec(pVCpu, false /*fBypassHandlers*/);
    VBOXSTRICTRC rcStrict = IEM_CIMPL_CALL_2(iemCImpl_mov_Cd_Rd, iCrReg, iGReg);
}

IEM_CIMPL_DEF_2(iemCImpl_mov_Cd_Rd, uint8_t, iCrReg, uint8_t, iGReg)
{
    //读取src值
    uint64_t uNewCrX;
    if (pVCpu->iem.s.enmCpuMode == IEMMODE_64BIT)
        uNewCrX = iemGRegFetchU64(pVCpu, iGReg);
    else
        uNewCrX = iemGRegFetchU32(pVCpu, iGReg);
  	//调用iemCImpl_load_CrX吧上面读取的值写入CrX寄存器
    return IEM_CIMPL_CALL_4(iemCImpl_load_CrX, iCrReg, uNewCrX, IEMACCESSCRX_MOV_CRX, iGReg);
}

IEM_CIMPL_DEF_4(iemCImpl_load_CrX, uint8_t, iCrReg, uint64_t, uNewCrX, IEMACCESSCRX, enmAccessCrX, uint8_t, iGReg)
{
  	//写入Crx寄存器之后,需要先
    switch (iCrReg)
    {
        //cr0
        case 0:
        {
          	//先检查输入是否正确
          	//修改GuestCRO
          	CPUMSetGuestCR0(pVCpu, uNewCrX);
          	//根据修改后的内容修改VCPU里的变量
          	//PG位修改,需要修改对应EFER关闭或者开启长模式
            if (   (uNewCrX & X86_CR0_PG) != (uOldCrX & X86_CR0_PG)
                && (pVCpu->cpum.GstCtx.msrEFER & MSR_K6_EFER_LME) )
            {
                uint64_t NewEFER = pVCpu->cpum.GstCtx.msrEFER;
                if (uNewCrX & X86_CR0_PG)
                    NewEFER |= MSR_K6_EFER_LMA;
                else
                    NewEFER &= ~MSR_K6_EFER_LMA;
                CPUMSetGuestEFER(pVCpu, NewEFER);
            }
          	...
            break
        }
        case 2:
        {
          //CR2 直接写入即可
          pVCpu->cpum.GstCtx.cr2 = uNewCrX;
        }
       	...
   }
}

IEMExecDecodedMovCRxRead

VMM_INT_DECL(VBOXSTRICTRC) IEMExecDecodedMovCRxRead(PVMCPUCC pVCpu, uint8_t cbInstr, uint8_t iGReg, uint8_t iCrReg)
{
		iemInitExec(pVCpu, false /*fBypassHandlers*/);
    VBOXSTRICTRC rcStrict = IEM_CIMPL_CALL_2(iemCImpl_mov_Rd_Cd, iGReg, iCrReg);
}

IEM_CIMPL_DEF_2(iemCImpl_mov_Rd_Cd, uint8_t, iGReg, uint8_t, iCrReg)
{
  switch (iCrReg)
  {
    //cr0到cr4直接读取即可
    case 0:
     crX = pVCpu->cpum.GstCtx.cr0;
     break;
     ...
    //irql等级,需要读取TPR寄存器,需要调用APIC的API
    case 8:
      uint8_t uTpr;
      int rc = APICGetTpr(pVCpu, &uTpr, NULL, NULL);
      if (RT_SUCCESS(rc))
        crX = uTpr >> 4;
  //写入GuestOS的寄存器
  *(uint64_t *)iemGRegRef(pVCpu, iGReg) = crX;
}

IEMExecDecodedClts

这个函数其实是MovCRx指令里模拟执行Clear Task-Swtich Flags的操作

VMM_INT_DECL(VBOXSTRICTRC) IEMExecDecodedClts(PVMCPUCC pVCpu, uint8_t cbInstr)
{
    iemInitExec(pVCpu, false /*fBypassHandlers*/);
    VBOXSTRICTRC rcStrict = IEM_CIMPL_CALL_0(iemCImpl_clts);
}
IEM_CIMPL_DEF_0(iemCImpl_clts)
{
		//根据GuestCtx里的cr0值获取新的cr0值
    uint64_t uNewCr0 = pVCpu->cpum.GstCtx.cr0;
    uNewCr0 &= ~X86_CR0_TS;
    //写入GuestOSCRx寄存器
    return IEM_CIMPL_CALL_4(iemCImpl_load_CrX, /*cr*/ 0, uNewCr0, IEMACCESSCRX_CLTS, UINT8_MAX /* iGReg */);
}

IEMExecDecodedLmsw

CR0寄存器里PE/MP/EM/TS位修改, 最终调到iemCImpl_lmsw的函数里

IEM_CIMPL_DEF_2(iemCImpl_lmsw, uint16_t, u16NewMsw, RTGCPTR, GCPtrEffDst)
{
	uint64_t uNewCr0 = pVCpu->cpum.GstCtx.cr0  & ~(X86_CR0_MP | X86_CR0_EM | X86_CR0_TS);
  uNewCr0 |= u16NewMsw & (X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS);
  return IEM_CIMPL_CALL_4(iemCImpl_load_CrX, /*cr*/ 0, uNewCr0, IEMACCESSCRX_LMSW, UINT8_MAX /* iGReg */);
}

IEMExecDecodedXsetbv

xsetbv指令触发的VMExit的模拟,最终调到iemCImpl_xsetbv里

IEM_CIMPL_DEF_0(iemCImpl_xsetbv)
{
    if (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE)
    {
      	//这条指令只能在R0执行
        if (pVCpu->iem.s.uCpl == 0)
        {
          	//获取xsetbv指令的参数
            uint32_t uEcx = pVCpu->cpum.GstCtx.ecx;
            uint64_t uNewValue = RT_MAKE_U64(pVCpu->cpum.GstCtx.eax, pVCpu->cpum.GstCtx.edx);
            switch (uEcx)
            {
                case 0:
                {
                  	//设置GuetsOS的xcr0寄存器
                    int rc = CPUMSetGuestXcr0(pVCpu, uNewValue);
                    if (rc == VINF_SUCCESS)
                        break;
                  	//调用失败,触发GP到GuestOS里
                    return iemRaiseGeneralProtectionFault0(pVCpu);
                }
                case 1:
                default:
                		//ecx是其他值,现在都触发GP
                    return iemRaiseGeneralProtectionFault0(pVCpu);
            }
            iemRegAddToRipAndClearRF(pVCpu, cbInstr);
            return VINF_SUCCESS;
        }
    }
}

IEMExecDecodedWbinvd: cache强制写回,这条指令现在什么都没做,但这样会导致GuestOS里代码出现数据不同步问题,理论其实应该调用真机的wbinvd指令。

IEMExecDecodedInvd:让缓存失效,这条指令执行之后cache里内容失效,没有写回的cache内容将丢失 ,这条指令现在也什么都没做。

IEMExecDecodedInvlpg: 让某个地址的TLB缓存失效

//让GCPtrPage的TLB缓存失效
IEM_CIMPL_DEF_1(iemCImpl_invlpg, RTGCPTR, GCPtrPage)
{
	//调用PGM里的函数,PGM会记录虚拟机虚拟地址到虚拟机物理地址的TLB映射(相当于软件模拟TLB),这个函数会去掉这个映射关系
	int rc = PGMInvalidatePage(pVCpu, GCPtrPage);
  iemRegAddToRipAndClearRF(pVCpu, cbInstr);
}

IEMExecDecodedInvpcid:进程上下文识别(PCID)失效指令。

这里需要先介绍一下PCID(Process Context IDentifiers),针对每个进程分配专用的ID标识,用于区分TLB中不同进程对应的entry

  1. PCID是一个12位的标识符,位于CR3寄存器的最低12位。
  2. CR4寄存器中的bit 17(PCIDE Flag)开启是否支持PCID
  3. 当开启PCID之后,CPUD将所有的TLB entry通过PCID进行分类,不同PCID对应不同的进程,那么不同的进程就可以使用自己的独立的TLB entry,进程间互不影响和干扰。

引入PCID之后,会大大减少线程切换导致的TLB Miss

没有PCID支持的环境下,当CPU切换进程的时候,会使当前CPU上的所有TLB无效

当开启PCID之前的环境下,当CPU切换进程的时候,CPU会更加PCID无效部分TLB项

Invpcid第一个参数是InvpcidType,第二个参数是虚拟地址和PCID的输入

IEM_CIMPL_DEF_3(iemCImpl_invpcid, uint8_t, iEffSeg, RTGCPTR, GCPtrInvpcidDesc, uint64_t, uInvpcidType)
{
	//获取参数
	VBOXSTRICTRC rcStrict = iemMemFetchDataU128(pVCpu, &uDesc, iEffSeg, GCPtrInvpcidDesc);
  //65到128位是输入的虚拟地址
  RTGCUINTPTR64 const GCPtrInvAddr = uDesc.s.Hi;
  //0到11位是PCID
  uint8_t       const uPcid        = uDesc.s.Lo & UINT64_C(0xfff);
  uint32_t      const uCr4         = pVCpu->cpum.GstCtx.cr4;
  uint64_t      const uCr3         = pVCpu->cpum.GstCtx.cr3;
  //根据输入到pcid type不同,有不同的处理方法
  switch (uInvpcidType)
  {
    //Pcid进程的GCPtrInvAddr的TLB缓存无效
    case X86_INVPCID_TYPE_INDV_ADDR:
      {
        //cr4里没有开启PCID,抛GP异常
        if (  !(uCr4 & X86_CR4_PCIDE)
            && uPcid != 0)
        {
          return iemRaiseGeneralProtectionFault0(pVCpu);
        }
        //其实Virtualbox并没有实现PCID对应的功能,所以这边只是刷新所有的TLB
        PGMFlushTLB(pVCpu, uCr3, false /* fGlobal */);
        break;
      }
	  //Pcid进程的所有的TLB缓存无效
    case X86_INVPCID_TYPE_SINGLE_CONTEXT:
      {
        PGMFlushTLB(pVCpu, uCr3, false /* fGlobal */);
        break;
      }
		//所有进程的TLB无效包括全局页表
    case X86_INVPCID_TYPE_ALL_CONTEXT_INCL_GLOBAL:
      {
        PGMFlushTLB(pVCpu, uCr3, true /* fGlobal */);
        break;
      }
    //所有进程的TLB无效但不包括全局页表
    case X86_INVPCID_TYPE_ALL_CONTEXT_EXCL_GLOBAL:
      {
        PGMFlushTLB(pVCpu, uCr3, false /* fGlobal */);
        break;
      }
  }
}

IEMExecDecodedCpuid: CPUID指令模拟,之前的章节里介绍过

IEMExecDecodedRdpmc: rdpmc指令模拟,读取Performance-Monitoring Counters(只有AMD有这个VMExit,Intel CPU怎么处理?直接执行获取真机的返回值?)

IEM_CIMPL_DEF_0(iemCImpl_rdpmc)
{
		//返回0而已,所以虚拟机内无法获取PMC的正确返回值
		VCpu->cpum.GstCtx.rax = 0;
    pVCpu->cpum.GstCtx.rdx = 0;
    pVCpu->cpum.GstCtx.fExtrn &= ~(CPUMCTX_EXTRN_RAX | CPUMCTX_EXTRN_RDX);
}

IEMExecDecodedRdtsc:rdtsc指令模拟

IEM_CIMPL_DEF_0(iemCImpl_rdtsc)
{
	 //调用TM里的函数获取虚拟机时间tick
	 uint64_t uTicks = TMCpuTickGet(pVCpu);
	 pVCpu->cpum.GstCtx.rax = RT_LO_U32(uTicks);
   pVCpu->cpum.GstCtx.rdx = RT_HI_U32(uTicks);
   pVCpu->cpum.GstCtx.fExtrn &= ~(CPUMCTX_EXTRN_RAX | CPUMCTX_EXTRN_RDX); /* For 	IEMExecDecodedRdtsc. */
   iemRegAddToRipAndClearRF(pVCpu, cbInstr);
   return VINF_SUCCESS;
}

IEMExecDecodedRdtscp: rdtscp指令模拟

IEM_CIMPL_DEF_0(iemCImpl_rdtscp)
{
  	//读取MSR_K8_TSC_AUX寄存器
    VBOXSTRICTRC rcStrict = CPUMQueryGuestMsr(pVCpu, MSR_K8_TSC_AUX, &pVCpu->cpum.GstCtx.rcx);
    if (rcStrict == VINF_SUCCESS)
    {
      //只取后32位
      pVCpu->cpum.GstCtx.rcx &= UINT32_C(0xffffffff);
      //调用TM里的函数获取虚拟机时间tick
      uint64_t uTicks = TMCpuTickGet(pVCpu);
      pVCpu->cpum.GstCtx.rax = RT_LO_U32(uTicks);
      pVCpu->cpum.GstCtx.rdx = RT_HI_U32(uTicks);
      pVCpu->cpum.GstCtx.fExtrn &= ~(CPUMCTX_EXTRN_RAX | CPUMCTX_EXTRN_RDX | CPUMCTX_EXTRN_RCX); /* For IEMExecDecodedRdtscp. */
      iemRegAddToRipAndClearRF(pVCpu, cbInstr);
    }
}

IEMExecDecodedRdmsr: rdmsr寄存器模拟

IEM_CIMPL_DEF_0(iemCImpl_rdmsr)
{
		//调用CPUM里的API获取rdmsr的返回值 (CPUM一章里介绍过)
		VBOXSTRICTRC rcStrict = CPUMQueryGuestMsr(pVCpu, pVCpu->cpum.GstCtx.ecx, &uValue.u);
    if (rcStrict == VINF_SUCCESS)
    {
        pVCpu->cpum.GstCtx.rax = uValue.s.Lo;
        pVCpu->cpum.GstCtx.rdx = uValue.s.Hi;
        //标记rax和rdx改变
        pVCpu->cpum.GstCtx.fExtrn &= ~(CPUMCTX_EXTRN_RAX | CPUMCTX_EXTRN_RDX);
        iemRegAddToRipAndClearRF(pVCpu, cbInstr);
        return VINF_SUCCESS;
    }
}

IEMExecDecodedWrmsr: wrmsr寄存器模拟

IEM_CIMPL_DEF_0(iemCImpl_wrmsr)
{
		//调用CPUM里的API执行msr寄存器的写入 (CPUM一章里介绍过)
		VBOXSTRICTRC rcStrict = CPUMSetGuestMsr(pVCpu, idMsr, uValue.u);
    if (rcStrict == VINF_SUCCESS)
    {
        iemRegAddToRipAndClearRF(pVCpu, cbInstr);
        return VINF_SUCCESS;
    }
}

IEMExecDecodedMonitor: monitor指令模拟,EM章节里介绍过

IEMExecDecodedMwait: mwait指令模拟,EM章节里介绍过

IEMExecDecodedHlt: hlt指令模拟

IEM_CIMPL_DEF_0(iemCImpl_hlt)
{
		//什么都不需要做,RIP指向下一个RIP,返回VINF_EM_HALT交给上层函数处理
		iemRegAddToRipAndClearRF(pVCpu, cbInstr);
    return VINF_EM_HALT;
}

Virtualbox为了支持嵌套VMX,还需要模拟执行嵌套Guest的VMX指令和异常退出VMExit等,嵌套VMX再后面的章节中单独介绍。

IEMExecDecodedVmxon

IEMExecDecodedVmwrite

,

IEMExecDecodedInvvpid

IEMExecVmxVmexitExtInt

IEMExecVmxVmexit

参考资料

http://happyseeker.github.io/kernel/2018/05/04/pti-and-pcid.html

https://blog.csdn.net/omnispace/article/details/61415935

Intel手册

发布了26 篇原创文章 · 获赞 10 · 访问量 1237

猜你喜欢

转载自blog.csdn.net/qq_29684547/article/details/104159547
今日推荐