WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

Re: [Xen-devel] [PATCH 09/13] Nested Virtualization: svm specific implem

To: Christoph Egger <Christoph.Egger@xxxxxxx>
Subject: Re: [Xen-devel] [PATCH 09/13] Nested Virtualization: svm specific implementation
From: Tim Deegan <Tim.Deegan@xxxxxxxxxx>
Date: Tue, 16 Nov 2010 14:54:11 +0000
Cc: "xen-devel@xxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxx>
Delivery-date: Tue, 16 Nov 2010 06:55:32 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
In-reply-to: <201011121943.45776.Christoph.Egger@xxxxxxx>
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
References: <201011121943.45776.Christoph.Egger@xxxxxxx>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
User-agent: Mutt/1.5.20 (2009-06-14)
Hi, 

At 18:43 +0000 on 12 Nov (1289587425), Christoph Egger wrote:
> +static int nsvm_vmrun_permissionmap(struct vcpu *v)
> +{
> +    struct arch_svm_struct *arch_svm = &v->arch.hvm_svm;
> +    struct nestedsvm *svm = &vcpu_nestedsvm(v);
> +    struct vmcb_struct *ns_vmcb = vcpu_nestedhvm(v).nv_vmcx;
> +    struct vmcb_struct *host_vmcb = arch_svm->vmcb;
> +    unsigned long *ns_msrpm_ptr;
> +    unsigned int i;
> +    enum hvm_copy_result ret;
> +
> +    ns_msrpm_ptr = (unsigned long *)svm->ns_cached_msrpm;
> +
> +    ret = hvm_copy_from_guest_phys(svm->ns_cached_msrpm,
> +                                   ns_vmcb->msrpm_base_pa, MSRPM_SIZE);
> +    if (ret != HVMCOPY_okay) {
> +        gdprintk(XENLOG_ERR, "hvm_copy_from_guest_phys msrpm %u\n", ret);
> +        return 1;
> +    }
> +
> +    /* Skip io bitmap merge since hvm_io_bitmap has all bits set but
> +     * 0x80 and 0xed.
> +     */

What if the L1 hypervisor wants to intercept port 0x80 or port 0xed?

> +    /* v->arch.hvm_svm.msrpm has type unsigned long, thus
> +     * BYTES_PER_LONG.
> +     */
> +    for (i = 0; i < MSRPM_SIZE / BYTES_PER_LONG; i++)
> +        svm->ns_merged_msrpm[i] = arch_svm->msrpm[i] | ns_msrpm_ptr[i];
> +
> +    host_vmcb->iopm_base_pa =
> +        (uint64_t)virt_to_maddr(hvm_io_bitmap);
> +    host_vmcb->msrpm_base_pa =
> +        (uint64_t)virt_to_maddr(svm->ns_merged_msrpm);
> +
> +    return 0;
> +}
> +
> +static int nsvm_vmcb_prepare4vmrun(struct vcpu *v, struct cpu_user_regs 
> *regs)
> +{
> +    struct nestedsvm *svm = &vcpu_nestedsvm(v);
> +    struct vmcb_struct *ns_vmcb = vcpu_nestedhvm(v).nv_vmcx;
> +    struct vmcb_struct *host_vmcb = v->arch.hvm_svm.vmcb;
> +    int rc;
> +
> +    /* Enable nested guest intercepts */
> +    svm->ns_cr_intercepts = ns_vmcb->cr_intercepts;
> +    svm->ns_dr_intercepts = ns_vmcb->dr_intercepts;
> +    svm->ns_exception_intercepts = ns_vmcb->exception_intercepts;
> +    svm->ns_general1_intercepts = ns_vmcb->general1_intercepts;
> +    svm->ns_general2_intercepts = ns_vmcb->general2_intercepts;
> +
> +    host_vmcb->cr_intercepts |= ns_vmcb->cr_intercepts;
> +    host_vmcb->dr_intercepts |= ns_vmcb->dr_intercepts;
> +    host_vmcb->exception_intercepts |= ns_vmcb->exception_intercepts;
> +    host_vmcb->general1_intercepts |= ns_vmcb->general1_intercepts;
> +    host_vmcb->general2_intercepts |= ns_vmcb->general2_intercepts;
> +
> +    /* Nested Pause Filter */
> +    if (ns_vmcb->general1_intercepts & GENERAL1_INTERCEPT_PAUSE)
> +        host_vmcb->pause_filter_count =
> +            min(ns_vmcb->pause_filter_count, host_vmcb->pause_filter_count);
> +    else
> +        host_vmcb->pause_filter_count = SVM_PAUSEFILTER_INIT;

Why ignore the L0  count if the L1 doesn't intercept PAUSE?

> +
> +    /* Nested IO permission bitmaps */
> +    rc = nsvm_vmrun_permissionmap(v);
> +    if (rc)
> +        return rc;
> +
> +    /* TSC offset */
> +    hvm_set_guest_tsc(v, host_vmcb->tsc_offset + ns_vmcb->tsc_offset);

hvm_set_guest_tsc takes an absolute value, not an offset.  ITYM to call
the hvm_funcs pointer directly.

> +
> +    /* ASID */
> +    hvm_asid_flush_vcpu(v);
> +    /* host_vmcb->guest_asid = ns_vmcb->guest_asid; */
> +
> +    /* TLB control */
> +    host_vmcb->tlb_control |= ns_vmcb->tlb_control;
> +
> +    /* Virtual Interrupts */
> +    host_vmcb->vintr = ns_vmcb->vintr;
> +    host_vmcb->vintr.fields.intr_masking = 1;
> +
> +    /* Shadow Mode */
> +    host_vmcb->interrupt_shadow = ns_vmcb->interrupt_shadow;
> +
> +    /* Exit codes */
> +    host_vmcb->exitcode = ns_vmcb->exitcode;
> +    host_vmcb->exitinfo1 = ns_vmcb->exitinfo1;
> +    host_vmcb->exitinfo2 = ns_vmcb->exitinfo2;
> +    host_vmcb->exitintinfo = ns_vmcb->exitintinfo;
> +
> +    /* Pending Interrupts */
> +    host_vmcb->eventinj = ns_vmcb->eventinj;
> +
> +    /* LBR virtualization */
> +    svm->ns_lbr_control = ns_vmcb->lbr_control;
> +    host_vmcb->lbr_control.bytes |= ns_vmcb->lbr_control.bytes;
> +
> +    /* NextRIP */
> +    host_vmcb->nextrip = ns_vmcb->nextrip;
> +
> +    /*
> +     * VMCB Save State Area
> +     */
> +
> +    /* Segments */
> +    host_vmcb->es = ns_vmcb->es;
> +    host_vmcb->cs = ns_vmcb->cs;
> +    host_vmcb->ss = ns_vmcb->ss;
> +    host_vmcb->ds = ns_vmcb->ds;
> +    host_vmcb->gdtr = ns_vmcb->gdtr;
> +    host_vmcb->idtr = ns_vmcb->idtr;
> +
> +    /* CPL */
> +    host_vmcb->cpl = ns_vmcb->cpl;
> +
> +    /* EFER */
> +    v->arch.hvm_vcpu.guest_efer = ns_vmcb->efer;
> +    rc = hvm_set_efer(ns_vmcb->efer);
> +    if (rc != X86EMUL_OKAY)
> +        gdprintk(XENLOG_ERR, "hvm_set_efer failed, rc: %u\n", rc);
> +
> +    /* CR4 */
> +    v->arch.hvm_vcpu.guest_cr[4] = ns_vmcb->cr4;
> +    rc = hvm_set_cr4(ns_vmcb->cr4);
> +    if (rc != X86EMUL_OKAY)
> +        gdprintk(XENLOG_ERR, "hvm_set_cr4 failed, rc: %u\n", rc);
> +
> +    /* CR0 */
> +    v->arch.hvm_vcpu.guest_cr[0] = ns_vmcb->cr0;
> +    rc = hvm_set_cr0(ns_vmcb->cr0);
> +    if (rc != X86EMUL_OKAY)
> +        gdprintk(XENLOG_ERR, "hvm_set_cr0 failed, rc: %u\n", rc);
> +
> +    /* CR2 */
> +    v->arch.hvm_vcpu.guest_cr[2] = ns_vmcb->cr2;
> +    hvm_update_guest_cr(v, 2);
> +
> +    /* Nested paging mode */
> +    if (nestedhvm_paging_mode_hap(v)) {
> +        /* host nested paging + guest nested paging. */
> +
> +        /* hvm_set_cr3() below sets v->arch.hvm_vcpu.guest_cr[3] for us. */
> +        rc = hvm_set_cr3(ns_vmcb->cr3);
> +        if (rc != X86EMUL_OKAY)
> +            gdprintk(XENLOG_ERR, "hvm_set_cr3 failed, rc: %u\n", rc);
> +    } else if (paging_mode_hap(v->domain)) {
> +        /* host nested paging + guest shadow paging. */
> +        host_vmcb->np_enable = 1;
> +        /* Keep h_cr3 as it is. */
> +        /* Guest shadow paging: Must intercept pagefaults. */
> +        host_vmcb->exception_intercepts |= (1U << TRAP_page_fault);

No - it's the L1 hypervisor's job to intercep #PF if it wants to use
shadow paging.  We shouldn't special-case it here. 

> +        /* hvm_set_cr3() below sets v->arch.hvm_vcpu.guest_cr[3] for us. */
> +        rc = hvm_set_cr3(ns_vmcb->cr3);
> +        if (rc != X86EMUL_OKAY)
> +            gdprintk(XENLOG_ERR, "hvm_set_cr3 failed, rc: %u\n", rc);
> +    } else {
> +        /* host shadow paging + guest shadow paging. */
> +        host_vmcb->np_enable = 0;
> +        host_vmcb->h_cr3 = 0x0;
> +
> +        /* TODO: Once shadow-shadow paging is in place come back to here
> +         * and set host_vmcb->cr3 to the shadowed shadow table.
> +         */
> +    }
> +
> +    /* DRn */
> +    host_vmcb->dr7 = ns_vmcb->dr7;
> +    host_vmcb->dr6 = ns_vmcb->dr6;
> +
> +    /* RFLAGS */
> +    host_vmcb->rflags = ns_vmcb->rflags;
> +
> +    /* RIP */
> +    host_vmcb->rip = ns_vmcb->rip;
> +
> +    /* RSP */
> +    host_vmcb->rsp = ns_vmcb->rsp;
> +
> +    /* RAX */
> +    host_vmcb->rax = ns_vmcb->rax;
> +
> +    /* Keep the host values of the fs, gs, ldtr, tr, kerngsbase,
> +     * star, lstar, cstar, sfmask, sysenter_cs, sysenter_esp,
> +     * sysenter_eip. These are handled via VMSAVE/VMLOAD emulation.
> +     */
> +
> +    /* Page tables */
> +    host_vmcb->pdpe0 = ns_vmcb->pdpe0;
> +    host_vmcb->pdpe1 = ns_vmcb->pdpe1;
> +    host_vmcb->pdpe2 = ns_vmcb->pdpe2;
> +    host_vmcb->pdpe3 = ns_vmcb->pdpe3;
> +
> +    /* PAT */
> +    host_vmcb->g_pat = ns_vmcb->g_pat;
> +
> +    /* Debug Control MSR */
> +    host_vmcb->debugctlmsr = ns_vmcb->debugctlmsr;
> +
> +    /* LBR MSRs */
> +    host_vmcb->lastbranchfromip = ns_vmcb->lastbranchfromip;
> +    host_vmcb->lastbranchtoip = ns_vmcb->lastbranchtoip;
> +    host_vmcb->lastintfromip = ns_vmcb->lastintfromip;
> +    host_vmcb->lastinttoip = ns_vmcb->lastinttoip;
> +
> +    rc = svm_vmcb_isvalid(__func__, ns_vmcb, 1);
> +    if (rc) {
> +        gdprintk(XENLOG_ERR, "nested vmcb invalid\n");
> +        return rc;
> +    }
> +
> +    rc = svm_vmcb_isvalid(__func__, host_vmcb, 1);
> +    if (rc) {
> +        gdprintk(XENLOG_ERR, "host vmcb invalid\n");
> +        return rc;
> +    }
> +
> +    /* Switch guest registers to nested guest */
> +    regs->eax = ns_vmcb->rax;
> +    regs->eip = ns_vmcb->rip;
> +    regs->esp = ns_vmcb->rsp;
> +    regs->eflags = ns_vmcb->rflags;
> +
> +    return 0;
> +}
> +

> +int
> +nsvm_vmcb_guest_intercepts_exitcode(struct vcpu *v,
> +    struct cpu_user_regs *regs, uint64_t exitcode)
> +{
> +    uint64_t exit_bits;
> +    struct nestedvcpu *nv = &vcpu_nestedhvm(v);
> +    struct nestedsvm *svm = &vcpu_nestedsvm(v);
> +    struct vmcb_struct *ns_vmcb = nv->nv_vmcx;
> +    enum nestedhvm_vmexits vmexits;
> +
> +    switch (exitcode) {
> +    case VMEXIT_CR0_READ ... VMEXIT_CR15_READ:
> +    case VMEXIT_CR0_WRITE ... VMEXIT_CR15_WRITE:
> +        exit_bits = 1ULL << (exitcode - VMEXIT_CR0_READ);
> +        if (svm->ns_cr_intercepts & exit_bits)
> +            break;
> +        return 0;
> +
> +    case VMEXIT_DR0_READ ... VMEXIT_DR7_READ:
> +    case VMEXIT_DR0_WRITE ... VMEXIT_DR7_WRITE:
> +        exit_bits = 1ULL << (exitcode - VMEXIT_DR0_READ);
> +        if (svm->ns_dr_intercepts & exit_bits)
> +            break;
> +        return 0;
> +
> +    case VMEXIT_EXCEPTION_DE ... VMEXIT_EXCEPTION_XF:
> +        exit_bits = 1ULL << (exitcode - VMEXIT_EXCEPTION_DE);
> +        if (svm->ns_exception_intercepts & exit_bits)
> +            break;
> +        return 0;
> +
> +    case VMEXIT_INTR ... VMEXIT_SHUTDOWN:
> +        exit_bits = 1ULL << (exitcode - VMEXIT_INTR);
> +        if (svm->ns_general1_intercepts & exit_bits)
> +            break;
> +        return 0;
> +
> +    case VMEXIT_VMRUN ... VMEXIT_MWAIT_CONDITIONAL:
> +        exit_bits = 1ULL << (exitcode - VMEXIT_VMRUN);
> +        if (svm->ns_general2_intercepts & exit_bits)
> +            break;
> +        return 0;
> +
> +    case VMEXIT_NPF:
> +    case VMEXIT_INVALID:
> +        /* Always intercepted */
> +        break;
> +
> +    default:
> +        gdprintk(XENLOG_ERR, "Illegal exitcode 0x%"PRIx64"\n", exitcode);
> +        BUG();
> +        break;
> +    }
> +
> +    /* Special cases: Do more detailed checks */
> +    switch (exitcode) {
> +    case VMEXIT_MSR:
> +        ASSERT(regs != NULL);
> +        nestedsvm_vmcb_map(v, nv->nv_vmcxaddr);
> +        ASSERT(nv->nv_vmcx != NULL);
> +        ns_vmcb = nv->nv_vmcx;
> +        vmexits = nsvm_vmcb_guest_intercepts_msr(svm->ns_cached_msrpm,
> +            regs->ecx, ns_vmcb->exitinfo1 != 0);
> +        if (vmexits == NESTEDHVM_VMEXIT_HOST)
> +            return 0;
> +        break;
> +
> +    case VMEXIT_IOIO:
> +        /* always intercepted */
> +        break;

What if the guest doesn't want to intercept it?  Won't that make it
crash or misbehave?

Tim.

-- 
Tim Deegan <Tim.Deegan@xxxxxxxxxx>
Principal Software Engineer, Xen Platform Team
Citrix Systems UK Ltd.  (Company #02937203, SL9 0BG)

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

<Prev in Thread] Current Thread [Next in Thread>