用户空间发送 KVM_RUN指令,通过vm的fd描述符进行。

static long kvm_vcpu_ioctl(struct file *filp,  unsigned int ioctl, unsigned long arg) 

 { 

struct kvm_vcpu *vcpu = filp->private_data; 

void __user *argp = (void __user *)arg; 

int r; 

struct kvm_fpu *fpu = NULL; 

struct kvm_sregs *kvm_sregs = NULL; 



if (vcpu->kvm->mm != current->mm) 

return -EIO; 



if (unlikely(_IOC_TYPE(ioctl) != KVMIO)) 

return -EINVAL; 



 #if defined(CONFIG_S390) || defined(CONFIG_PPC) || defined(CONFIG_MIPS) 

/* 

* Special cases: vcpu ioctls that are asynchronous to vcpu execution, 

* so vcpu_load() would break it. 

*/ 

if (ioctl == KVM_S390_INTERRUPT || ioctl == KVM_S390_IRQ || ioctl == KVM_INTERRUPT) 

return kvm_arch_vcpu_ioctl(filp, ioctl, arg); 

 #endif 





r = vcpu_load(vcpu); 

if (r) 

return r; 

switch (ioctl) { 

case KVM_RUN:
r = -EINVAL;
if (arg)
goto out;
if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) {
/* The thread running this VCPU changed. */
struct pid *oldpid = vcpu->pid;
struct pid *newpid = get_task_pid(current, PIDTYPE_PID);


rcu_assign_pointer(vcpu->pid, newpid);
if (oldpid)
synchronize_rcu();
put_pid(oldpid);
}
r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run);
trace_kvm_userspace_exit(vcpu->run->exit_reason, r); 

break; 

case KVM_GET_REGS: { 

struct kvm_regs *kvm_regs; 



r = -ENOMEM; 

kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL); 

if (!kvm_regs) 

goto out; 

r = kvm_arch_vcpu_ioctl_get_regs(vcpu, kvm_regs); 

if (r) 

goto out_free1; 

r = -EFAULT; 

if (copy_to_user(argp, kvm_regs, sizeof(struct kvm_regs))) 

goto out_free1; 

r = 0; 

 out_free1: 

kfree(kvm_regs); 

break; 

} 

case KVM_SET_REGS: { 

struct kvm_regs *kvm_regs; 



r = -ENOMEM; 

kvm_regs = memdup_user(argp, sizeof(*kvm_regs)); 

if (IS_ERR(kvm_regs)) { 

r = PTR_ERR(kvm_regs); 

goto out; 

} 

r = kvm_arch_vcpu_ioctl_set_regs(vcpu, kvm_regs); 

kfree(kvm_regs); 

break; 

} 

case KVM_GET_SREGS: { 

kvm_sregs = kzalloc(sizeof(struct kvm_sregs), GFP_KERNEL); 

r = -ENOMEM; 

if (!kvm_sregs) 

goto out; 

r = kvm_arch_vcpu_ioctl_get_sregs(vcpu, kvm_sregs); 

if (r) 

goto out; 

r = -EFAULT; 

if (copy_to_user(argp, kvm_sregs, sizeof(struct kvm_sregs))) 

goto out; 

r = 0; 

break; 

} 

case KVM_SET_SREGS: { 

kvm_sregs = memdup_user(argp, sizeof(*kvm_sregs)); 

if (IS_ERR(kvm_sregs)) { 

r = PTR_ERR(kvm_sregs); 

kvm_sregs = NULL; 

goto out; 

} 

r = kvm_arch_vcpu_ioctl_set_sregs(vcpu, kvm_sregs); 

break; 

} 

case KVM_GET_MP_STATE: { 

struct kvm_mp_state mp_state; 



r = kvm_arch_vcpu_ioctl_get_mpstate(vcpu, &mp_state); 

if (r) 

goto out; 

r = -EFAULT; 

if (copy_to_user(argp, &mp_state, sizeof(mp_state))) 

goto out; 

r = 0; 

break; 

} 

case KVM_SET_MP_STATE: { 

struct kvm_mp_state mp_state; 



r = -EFAULT; 

if (copy_from_user(&mp_state, argp, sizeof(mp_state))) 

goto out; 

r = kvm_arch_vcpu_ioctl_set_mpstate(vcpu, &mp_state); 

break; 

} 

case KVM_TRANSLATE: { 

struct kvm_translation tr; 



r = -EFAULT; 

if (copy_from_user(&tr, argp, sizeof(tr))) 

goto out; 

r = kvm_arch_vcpu_ioctl_translate(vcpu, &tr); 

if (r) 

goto out; 

r = -EFAULT; 

if (copy_to_user(argp, &tr, sizeof(tr))) 

goto out; 

r = 0; 

break; 

} 

case KVM_SET_GUEST_DEBUG: { 

struct kvm_guest_debug dbg; 



r = -EFAULT; 

if (copy_from_user(&dbg, argp, sizeof(dbg))) 

goto out; 

r = kvm_arch_vcpu_ioctl_set_guest_debug(vcpu, &dbg); 

break; 

} 

case KVM_SET_SIGNAL_MASK: { 

struct kvm_signal_mask __user *sigmask_arg = argp; 

struct kvm_signal_mask kvm_sigmask; 

sigset_t sigset, *p; 



p = NULL; 

if (argp) { 

r = -EFAULT; 

if (copy_from_user(&kvm_sigmask, argp, 

  sizeof(kvm_sigmask))) 

goto out; 

r = -EINVAL; 

if (kvm_sigmask.len != sizeof(sigset)) 

goto out; 

r = -EFAULT; 

if (copy_from_user(&sigset, sigmask_arg->sigset, 

  sizeof(sigset))) 

goto out; 

p = &sigset; 

} 

r = kvm_vcpu_ioctl_set_sigmask(vcpu, p); 

break; 

} 

case KVM_GET_FPU: { 

fpu = kzalloc(sizeof(struct kvm_fpu), GFP_KERNEL); 

r = -ENOMEM; 

if (!fpu) 

goto out; 

r = kvm_arch_vcpu_ioctl_get_fpu(vcpu, fpu); 

if (r) 

goto out; 

r = -EFAULT; 

if (copy_to_user(argp, fpu, sizeof(struct kvm_fpu))) 

goto out; 

r = 0; 

break; 

} 

case KVM_SET_FPU: { 

fpu = memdup_user(argp, sizeof(*fpu)); 

if (IS_ERR(fpu)) { 

r = PTR_ERR(fpu); 

fpu = NULL; 

goto out; 

} 

r = kvm_arch_vcpu_ioctl_set_fpu(vcpu, fpu); 

break; 

} 

default: 

r = kvm_arch_vcpu_ioctl(filp, ioctl, arg); 

} 

 out: 

vcpu_put(vcpu); 

kfree(fpu); 

kfree(kvm_sregs); 

return r; 
}




 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
struct fpu *fpu = ¤t->thread.fpu;
int r;
sigset_t sigsaved;


fpu__activate_curr(fpu);


if (vcpu->sigset_active)
sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);


if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
kvm_vcpu_block(vcpu);
kvm_apic_accept_events(vcpu);
clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
r = -EAGAIN;
goto out;
}


/* re-sync apic's tpr */
if (!lapic_in_kernel(vcpu)) {
if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) {
r = -EINVAL;
goto out;
}
}


if (unlikely(vcpu->arch.complete_userspace_io)) {
int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io;
vcpu->arch.complete_userspace_io = NULL;
r = cui(vcpu);
if (r <= 0)
goto out;
} else
WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);


r = vcpu_run(vcpu);


 out:
post_kvm_run_save(vcpu);
if (vcpu->sigset_active)
sigprocmask(SIG_SETMASK, &sigsaved, NULL);


return r;
 }
x86.c
 static int vcpu_run(struct kvm_vcpu *vcpu)
 {
int r;
struct kvm *kvm = vcpu->kvm;


vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);


for (;;) {
if (kvm_vcpu_running(vcpu)) {
r = vcpu_enter_guest(vcpu);
} else {
r = vcpu_block(kvm, vcpu);
}


if (r <= 0)
break;


clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
if (kvm_cpu_has_pending_timer(vcpu))
kvm_inject_pending_timer_irqs(vcpu);


if (dm_request_for_irq_injection(vcpu) &&
kvm_vcpu_ready_for_interrupt_injection(vcpu)) {
r = 0;
vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
++vcpu->stat.request_irq_exits;
break;
}


kvm_check_async_pf_completion(vcpu);


if (signal_pending(current)) {
r = -EINTR;
vcpu->run->exit_reason = KVM_EXIT_INTR;
++vcpu->stat.signal_exits;
break;
}
if (need_resched()) {
srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
cond_resched();
vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
}
}


srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);


return r;
 }

 static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 {
int r;
bool req_int_win =
dm_request_for_irq_injection(vcpu) &&
kvm_cpu_accept_dm_intr(vcpu);


bool req_immediate_exit = false;


if (vcpu->requests) {
if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
kvm_mmu_unload(vcpu);
if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
__kvm_migrate_timers(vcpu);
if (kvm_check_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu))
kvm_gen_update_masterclock(vcpu->kvm);
if (kvm_check_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu))
kvm_gen_kvmclock_update(vcpu);
if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) {
r = kvm_guest_time_update(vcpu);
if (unlikely(r))
goto out;
}
if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
kvm_mmu_sync_roots(vcpu);
if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
kvm_vcpu_flush_tlb(vcpu);
if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
r = 0;
goto out;
}
if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
r = 0;
goto out;
}
if (kvm_check_request(KVM_REQ_DEACTIVATE_FPU, vcpu)) {
vcpu->fpu_active = 0;
kvm_x86_ops->fpu_deactivate(vcpu);
}
if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) {
/* Page is swapped out. Do synthetic halt */
vcpu->arch.apf.halted = true;
r = 1;
goto out;
}
if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
record_steal_time(vcpu);
if (kvm_check_request(KVM_REQ_SMI, vcpu))
process_smi(vcpu);
if (kvm_check_request(KVM_REQ_NMI, vcpu))
process_nmi(vcpu);
if (kvm_check_request(KVM_REQ_PMU, vcpu))
kvm_pmu_handle_event(vcpu);
if (kvm_check_request(KVM_REQ_PMI, vcpu))
kvm_pmu_deliver_pmi(vcpu);
if (kvm_check_request(KVM_REQ_IOAPIC_EOI_EXIT, vcpu)) {
BUG_ON(vcpu->arch.pending_ioapic_eoi > 255);
if (test_bit(vcpu->arch.pending_ioapic_eoi,
    vcpu->arch.ioapic_handled_vectors)) {
vcpu->run->exit_reason = KVM_EXIT_IOAPIC_EOI;
vcpu->run->eoi.vector =
vcpu->arch.pending_ioapic_eoi;
r = 0;
goto out;
}
}
if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
vcpu_scan_ioapic(vcpu);
if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
kvm_vcpu_reload_apic_access_page(vcpu);
if (kvm_check_request(KVM_REQ_HV_CRASH, vcpu)) {
vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
vcpu->run->system_event.type = KVM_SYSTEM_EVENT_CRASH;
r = 0;
goto out;
}
if (kvm_check_request(KVM_REQ_HV_RESET, vcpu)) {
vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
vcpu->run->system_event.type = KVM_SYSTEM_EVENT_RESET;
r = 0;
goto out;
}
if (kvm_check_request(KVM_REQ_HV_EXIT, vcpu)) {
vcpu->run->exit_reason = KVM_EXIT_HYPERV;
vcpu->run->hyperv = vcpu->arch.hyperv.exit;
r = 0;
goto out;
}


/*
* KVM_REQ_HV_STIMER has to be processed after
* KVM_REQ_CLOCK_UPDATE, because Hyper-V SynIC timers
* depend on the guest clock being up-to-date
*/
if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu))
kvm_hv_process_stimers(vcpu);
}


/*
* KVM_REQ_EVENT is not set when posted interrupts are set by
* VT-d hardware, so we have to update RVI unconditionally.
*/
if (kvm_lapic_enabled(vcpu)) {
/*
* Update architecture specific hints for APIC
* virtual interrupt delivery.
*/
if (vcpu->arch.apicv_active)
kvm_x86_ops->hwapic_irr_update(vcpu,
kvm_lapic_find_highest_irr(vcpu));
}


if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
kvm_apic_accept_events(vcpu);
if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
r = 1;
goto out;
}


if (inject_pending_event(vcpu, req_int_win) != 0)
req_immediate_exit = true;
else {
/* Enable NMI/IRQ window open exits if needed.
*
* SMIs have two cases: 1) they can be nested, and
* then there is nothing to do here because RSM will
* cause a vmexit anyway; 2) or the SMI can be pending
* because inject_pending_event has completed the
* injection of an IRQ or NMI from the previous vmexit,
* and then we request an immediate exit to inject the SMI.
*/
if (vcpu->arch.smi_pending && !is_smm(vcpu))
req_immediate_exit = true;
if (vcpu->arch.nmi_pending)
kvm_x86_ops->enable_nmi_window(vcpu);
if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
kvm_x86_ops->enable_irq_window(vcpu);
}


if (kvm_lapic_enabled(vcpu)) {
update_cr8_intercept(vcpu);
kvm_lapic_sync_to_vapic(vcpu);
}
}


r = kvm_mmu_reload(vcpu);
if (unlikely(r)) {
goto cancel_injection;
}


preempt_disable();


kvm_x86_ops->prepare_guest_switch(vcpu);
if (vcpu->fpu_active)
kvm_load_guest_fpu(vcpu);
vcpu->mode = IN_GUEST_MODE;


srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);


/*
* We should set ->mode before check ->requests,
* Please see the comment in kvm_make_all_cpus_request.
* This also orders the write to mode from any reads
* to the page tables done while the VCPU is running.
* Please see the comment in kvm_flush_remote_tlbs.
*/
smp_mb__after_srcu_read_unlock();


local_irq_disable();


if (vcpu->mode == EXITING_GUEST_MODE || vcpu->requests
   || need_resched() || signal_pending(current)) {
vcpu->mode = OUTSIDE_GUEST_MODE;
smp_wmb();
local_irq_enable();
preempt_enable();
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
r = 1;
goto cancel_injection;
}


kvm_load_guest_xcr0(vcpu);


if (req_immediate_exit) {
kvm_make_request(KVM_REQ_EVENT, vcpu);
smp_send_reschedule(vcpu->cpu);
}


trace_kvm_entry(vcpu->vcpu_id);
wait_lapic_expire(vcpu);
guest_enter_irqoff();


if (unlikely(vcpu->arch.switch_db_regs)) {
set_debugreg(0, 7);
set_debugreg(vcpu->arch.eff_db[0], 0);
set_debugreg(vcpu->arch.eff_db[1], 1);
set_debugreg(vcpu->arch.eff_db[2], 2);
set_debugreg(vcpu->arch.eff_db[3], 3);
set_debugreg(vcpu->arch.dr6, 6);
vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
}
kvm_x86_ops->run(vcpu);
 


/* 

* Do this here before restoring debug registers on the host.  And 

* since we do this before handling the vmexit, a DR access vmexit 

* can (a) read the correct value of the debug registers, (b) set 

* KVM_DEBUGREG_WONT_EXIT again. 

*/ 

if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) { 

WARN_ON(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP); 

kvm_x86_ops->sync_dirty_debug_regs(vcpu); 

kvm_update_dr0123(vcpu); 

kvm_update_dr6(vcpu); 

kvm_update_dr7(vcpu); 

vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD; 

} 



/* 

* If the guest has used debug registers, at least dr7 

* will be disabled while returning to the host. 

* If we don't have active breakpoints in the host, we don't 

* care about the messed up debug address registers. But if 

* we have some of them active, restore the old state. 

*/ 

if (hw_breakpoint_active()) 

hw_breakpoint_restore(); 



vcpu->arch.last_guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); 



vcpu->mode = OUTSIDE_GUEST_MODE; 

smp_wmb(); 



kvm_put_guest_xcr0(vcpu); 



kvm_x86_ops->handle_external_intr(vcpu); 



++vcpu->stat.exits; 



guest_exit_irqoff(); 



local_irq_enable(); 

preempt_enable(); 



vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 



/* 

* Profile KVM exit RIPs: 

*/ 

if (unlikely(prof_on == KVM_PROFILING)) { 

unsigned long rip = kvm_rip_read(vcpu); 

profile_hit(KVM_PROFILING, (void *)rip); 

} 



if (unlikely(vcpu->arch.tsc_always_catchup)) 

kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); 



if (vcpu->arch.apic_attention) 

kvm_lapic_sync_from_vapic(vcpu); 



r = kvm_x86_ops->handle_exit(vcpu); 

return r; 



 cancel_injection: 

kvm_x86_ops->cancel_injection(vcpu); 

if (unlikely(vcpu->arch.apic_attention)) 

kvm_lapic_sync_from_vapic(vcpu); 

 out: 

return r; 

 } 


static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 {
struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long debugctlmsr, cr4;


/* Record the guest's net vcpu time for enforced NMI injections. */
if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
vmx->entry_time = ktime_get();


/* Don't enter VMX if guest state is invalid, let the exit handler
  start emulation until we arrive back to a valid state */
if (vmx->emulation_required)
return;


if (vmx->ple_window_dirty) {
vmx->ple_window_dirty = false;
vmcs_write32(PLE_WINDOW, vmx->ple_window);
}


if (vmx->nested.sync_shadow_vmcs) {
copy_vmcs12_to_shadow(vmx);
vmx->nested.sync_shadow_vmcs = false;
}


if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty))
vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);


cr4 = cr4_read_shadow();
if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) {
vmcs_writel(HOST_CR4, cr4);
vmx->host_state.vmcs_host_cr4 = cr4;
}


/* When single-stepping over STI and MOV SS, we must clear the
* corresponding interruptibility bits in the guest state. Otherwise
* vmentry fails as it then expects bit 14 (BS) in pending debug
* exceptions being set, but that's not correct for the guest debugging
* case. */
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
vmx_set_interrupt_shadow(vcpu, 0);


if (vmx->guest_pkru_valid)
__write_pkru(vmx->guest_pkru);


atomic_switch_perf_msrs(vmx);
debugctlmsr = get_debugctlmsr();


vmx_arm_hv_timer(vcpu);


vmx->__launched = vmx->loaded_vmcs->launched;
asm(
/* Store host registers */
"push %%" _ASM_DX "; push %%" _ASM_BP ";"
"push %%" _ASM_CX " \n\t" /* placeholder for guest rcx */
"push %%" _ASM_CX " \n\t"
"cmp %%" _ASM_SP ", %c[host_rsp](%0) \n\t"
"je 1f \n\t"
"mov %%" _ASM_SP ", %c[host_rsp](%0) \n\t"
__ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t"
"1: \n\t"
/* Reload cr2 if changed */
"mov %c[cr2](%0), %%" _ASM_AX " \n\t"
"mov %%cr2, %%" _ASM_DX " \n\t"
"cmp %%" _ASM_AX ", %%" _ASM_DX " \n\t"
"je 2f \n\t"
"mov %%" _ASM_AX", %%cr2 \n\t"
"2: \n\t"
/* Check if vmlaunch of vmresume is needed */
"cmpl $0, %c[launched](%0) \n\t"
/* Load guest registers.  Don't clobber flags. */
"mov %c[rax](%0), %%" _ASM_AX " \n\t"
"mov %c[rbx](%0), %%" _ASM_BX " \n\t"
"mov %c[rdx](%0), %%" _ASM_DX " \n\t"
"mov %c[rsi](%0), %%" _ASM_SI " \n\t"
"mov %c[rdi](%0), %%" _ASM_DI " \n\t"
"mov %c[rbp](%0), %%" _ASM_BP " \n\t"
 #ifdef CONFIG_X86_64
"mov %c[r8](%0),  %%r8  \n\t"
"mov %c[r9](%0),  %%r9  \n\t"
"mov %c[r10](%0), %%r10 \n\t"
"mov %c[r11](%0), %%r11 \n\t"
"mov %c[r12](%0), %%r12 \n\t"
"mov %c[r13](%0), %%r13 \n\t"
"mov %c[r14](%0), %%r14 \n\t"
"mov %c[r15](%0), %%r15 \n\t"
 #endif
"mov %c[rcx](%0), %%" _ASM_CX " \n\t" /* kills %0 (ecx) */


/* Enter guest mode */
"jne 1f \n\t"
__ex(ASM_VMX_VMLAUNCH) "\n\t"
"jmp 2f \n\t"
"1: " __ex(ASM_VMX_VMRESUME) "\n\t"
"2: "
/* Save guest registers, load host registers, keep flags */
"mov %0, %c[wordsize](%%" _ASM_SP ") \n\t"
"pop %0 \n\t"
"mov %%" _ASM_AX ", %c[rax](%0) \n\t"
"mov %%" _ASM_BX ", %c[rbx](%0) \n\t"
__ASM_SIZE(pop) " %c[rcx](%0) \n\t"
"mov %%" _ASM_DX ", %c[rdx](%0) \n\t"
"mov %%" _ASM_SI ", %c[rsi](%0) \n\t"
"mov %%" _ASM_DI ", %c[rdi](%0) \n\t"
"mov %%" _ASM_BP ", %c[rbp](%0) \n\t"
 #ifdef CONFIG_X86_64
"mov %%r8,  %c[r8](%0) \n\t"
"mov %%r9,  %c[r9](%0) \n\t"
"mov %%r10, %c[r10](%0) \n\t"
"mov %%r11, %c[r11](%0) \n\t"
"mov %%r12, %c[r12](%0) \n\t"
"mov %%r13, %c[r13](%0) \n\t"
"mov %%r14, %c[r14](%0) \n\t"
"mov %%r15, %c[r15](%0) \n\t"
 #endif
"mov %%cr2, %%" _ASM_AX "   \n\t"
"mov %%" _ASM_AX ", %c[cr2](%0) \n\t"


"pop  %%" _ASM_BP "; pop  %%" _ASM_DX " \n\t"
"setbe %c[fail](%0) \n\t"
".pushsection .rodata \n\t"
".global vmx_return \n\t"
"vmx_return: " _ASM_PTR " 2b \n\t"
".popsection"
     : : "c"(vmx), "d"((unsigned long)HOST_RSP),
[launched]"i"(offsetof(struct vcpu_vmx, __launched)),
[fail]"i"(offsetof(struct vcpu_vmx, fail)),
[host_rsp]"i"(offsetof(struct vcpu_vmx, host_rsp)),
[rax]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RAX])),
[rbx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RBX])),
[rcx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RCX])),
[rdx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RDX])),
[rsi]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RSI])),
[rdi]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RDI])),
[rbp]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RBP])),
 #ifdef CONFIG_X86_64
[r8]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R8])),
[r9]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R9])),
[r10]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R10])),
[r11]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R11])),
[r12]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R12])),
[r13]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R13])),
[r14]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R14])),
[r15]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R15])),
 #endif
[cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)),
[wordsize]"i"(sizeof(ulong))
     : "cc", "memory"
 #ifdef CONFIG_X86_64
, "rax", "rbx", "rdi", "rsi"
, "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
 #else
, "eax", "ebx", "edi", "esi"
 #endif
     );


/* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
if (debugctlmsr)
update_debugctlmsr(debugctlmsr);


 #ifndef CONFIG_X86_64
/*
* The sysexit path does not restore ds/es, so we must set them to
* a reasonable value ourselves.
*
* We can't defer this to vmx_load_host_state() since that function
* may be executed in interrupt context, which saves and restore segments
* around it, nullifying its effect.
*/
loadsegment(ds, __USER_DS);
loadsegment(es, __USER_DS);
 #endif


vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)
 | (1 << VCPU_EXREG_RFLAGS)
 | (1 << VCPU_EXREG_PDPTR)
 | (1 << VCPU_EXREG_SEGMENTS)
 | (1 << VCPU_EXREG_CR3));
vcpu->arch.regs_dirty = 0;


vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);


vmx->loaded_vmcs->launched = 1;


vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);


/*
* eager fpu is enabled if PKEY is supported and CR4 is switched
* back on host, so it is safe to read guest PKRU from current
* XSAVE.
*/
if (boot_cpu_has(X86_FEATURE_OSPKE)) {
vmx->guest_pkru = __read_pkru();
if (vmx->guest_pkru != vmx->host_pkru) {
vmx->guest_pkru_valid = true;
__write_pkru(vmx->host_pkru);
} else
vmx->guest_pkru_valid = false;
}


/*
* the KVM_REQ_EVENT optimization bit is only on for one entry, and if
* we did not inject a still-pending event to L1 now because of
* nested_run_pending, we need to re-enable this bit.
*/
if (vmx->nested.nested_run_pending)
kvm_make_request(KVM_REQ_EVENT, vcpu);


vmx->nested.nested_run_pending = 0;


vmx_complete_atomic_exit(vmx);
vmx_recover_nmi_blocking(vmx);
vmx_complete_interrupts(vmx);
 }