[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 09/13] Nested Virtualization: svm specific implementation



On Tuesday 16 November 2010 15:54:11 Tim Deegan wrote:
> Hi,
>
> At 18:43 +0000 on 12 Nov (1289587425), Christoph Egger wrote:
> > +static int nsvm_vmrun_permissionmap(struct vcpu *v)
> > +{
> > +    struct arch_svm_struct *arch_svm = &v->arch.hvm_svm;
> > +    struct nestedsvm *svm = &vcpu_nestedsvm(v);
> > +    struct vmcb_struct *ns_vmcb = vcpu_nestedhvm(v).nv_vmcx;
> > +    struct vmcb_struct *host_vmcb = arch_svm->vmcb;
> > +    unsigned long *ns_msrpm_ptr;
> > +    unsigned int i;
> > +    enum hvm_copy_result ret;
> > +
> > +    ns_msrpm_ptr = (unsigned long *)svm->ns_cached_msrpm;
> > +
> > +    ret = hvm_copy_from_guest_phys(svm->ns_cached_msrpm,
> > +                                   ns_vmcb->msrpm_base_pa, MSRPM_SIZE);
> > +    if (ret != HVMCOPY_okay) {
> > +        gdprintk(XENLOG_ERR, "hvm_copy_from_guest_phys msrpm %u\n",
> > ret); +        return 1;
> > +    }
> > +
> > +    /* Skip io bitmap merge since hvm_io_bitmap has all bits set but
> > +     * 0x80 and 0xed.
> > +     */
>
> What if the L1 hypervisor wants to intercept port 0x80 or port 0xed?

Thanks for the pointer. This item is work in progress.

>
> > +    /* v->arch.hvm_svm.msrpm has type unsigned long, thus
> > +     * BYTES_PER_LONG.
> > +     */
> > +    for (i = 0; i < MSRPM_SIZE / BYTES_PER_LONG; i++)
> > +        svm->ns_merged_msrpm[i] = arch_svm->msrpm[i] | ns_msrpm_ptr[i];
> > +
> > +    host_vmcb->iopm_base_pa =
> > +        (uint64_t)virt_to_maddr(hvm_io_bitmap);
> > +    host_vmcb->msrpm_base_pa =
> > +        (uint64_t)virt_to_maddr(svm->ns_merged_msrpm);
> > +
> > +    return 0;
> > +}
> > +
> > +static int nsvm_vmcb_prepare4vmrun(struct vcpu *v, struct cpu_user_regs
> > *regs) +{
> > +    struct nestedsvm *svm = &vcpu_nestedsvm(v);
> > +    struct vmcb_struct *ns_vmcb = vcpu_nestedhvm(v).nv_vmcx;
> > +    struct vmcb_struct *host_vmcb = v->arch.hvm_svm.vmcb;
> > +    int rc;
> > +
> > +    /* Enable nested guest intercepts */
> > +    svm->ns_cr_intercepts = ns_vmcb->cr_intercepts;
> > +    svm->ns_dr_intercepts = ns_vmcb->dr_intercepts;
> > +    svm->ns_exception_intercepts = ns_vmcb->exception_intercepts;
> > +    svm->ns_general1_intercepts = ns_vmcb->general1_intercepts;
> > +    svm->ns_general2_intercepts = ns_vmcb->general2_intercepts;
> > +
> > +    host_vmcb->cr_intercepts |= ns_vmcb->cr_intercepts;
> > +    host_vmcb->dr_intercepts |= ns_vmcb->dr_intercepts;
> > +    host_vmcb->exception_intercepts |= ns_vmcb->exception_intercepts;
> > +    host_vmcb->general1_intercepts |= ns_vmcb->general1_intercepts;
> > +    host_vmcb->general2_intercepts |= ns_vmcb->general2_intercepts;
> > +
> > +    /* Nested Pause Filter */
> > +    if (ns_vmcb->general1_intercepts & GENERAL1_INTERCEPT_PAUSE)
> > +        host_vmcb->pause_filter_count =
> > +            min(ns_vmcb->pause_filter_count,
> > host_vmcb->pause_filter_count); +    else
> > +        host_vmcb->pause_filter_count = SVM_PAUSEFILTER_INIT;
>
> Why ignore the L0  count if the L1 doesn't intercept PAUSE?

SVM_PAUSEFILTER_INIT is what is also used in construct_vmcb().

>
> > +
> > +    /* Nested IO permission bitmaps */
> > +    rc = nsvm_vmrun_permissionmap(v);
> > +    if (rc)
> > +        return rc;
> > +
> > +    /* TSC offset */
> > +    hvm_set_guest_tsc(v, host_vmcb->tsc_offset + ns_vmcb->tsc_offset);
>
> hvm_set_guest_tsc takes an absolute value, not an offset.  ITYM to call
> the hvm_funcs pointer directly.

Thanks. Fixed.

>
> > +
> > +    /* ASID */
> > +    hvm_asid_flush_vcpu(v);
> > +    /* host_vmcb->guest_asid = ns_vmcb->guest_asid; */
> > +
> > +    /* TLB control */
> > +    host_vmcb->tlb_control |= ns_vmcb->tlb_control;
> > +
> > +    /* Virtual Interrupts */
> > +    host_vmcb->vintr = ns_vmcb->vintr;
> > +    host_vmcb->vintr.fields.intr_masking = 1;
> > +
> > +    /* Shadow Mode */
> > +    host_vmcb->interrupt_shadow = ns_vmcb->interrupt_shadow;
> > +
> > +    /* Exit codes */
> > +    host_vmcb->exitcode = ns_vmcb->exitcode;
> > +    host_vmcb->exitinfo1 = ns_vmcb->exitinfo1;
> > +    host_vmcb->exitinfo2 = ns_vmcb->exitinfo2;
> > +    host_vmcb->exitintinfo = ns_vmcb->exitintinfo;
> > +
> > +    /* Pending Interrupts */
> > +    host_vmcb->eventinj = ns_vmcb->eventinj;
> > +
> > +    /* LBR virtualization */
> > +    svm->ns_lbr_control = ns_vmcb->lbr_control;
> > +    host_vmcb->lbr_control.bytes |= ns_vmcb->lbr_control.bytes;
> > +
> > +    /* NextRIP */
> > +    host_vmcb->nextrip = ns_vmcb->nextrip;
> > +
> > +    /*
> > +     * VMCB Save State Area
> > +     */
> > +
> > +    /* Segments */
> > +    host_vmcb->es = ns_vmcb->es;
> > +    host_vmcb->cs = ns_vmcb->cs;
> > +    host_vmcb->ss = ns_vmcb->ss;
> > +    host_vmcb->ds = ns_vmcb->ds;
> > +    host_vmcb->gdtr = ns_vmcb->gdtr;
> > +    host_vmcb->idtr = ns_vmcb->idtr;
> > +
> > +    /* CPL */
> > +    host_vmcb->cpl = ns_vmcb->cpl;
> > +
> > +    /* EFER */
> > +    v->arch.hvm_vcpu.guest_efer = ns_vmcb->efer;
> > +    rc = hvm_set_efer(ns_vmcb->efer);
> > +    if (rc != X86EMUL_OKAY)
> > +        gdprintk(XENLOG_ERR, "hvm_set_efer failed, rc: %u\n", rc);
> > +
> > +    /* CR4 */
> > +    v->arch.hvm_vcpu.guest_cr[4] = ns_vmcb->cr4;
> > +    rc = hvm_set_cr4(ns_vmcb->cr4);
> > +    if (rc != X86EMUL_OKAY)
> > +        gdprintk(XENLOG_ERR, "hvm_set_cr4 failed, rc: %u\n", rc);
> > +
> > +    /* CR0 */
> > +    v->arch.hvm_vcpu.guest_cr[0] = ns_vmcb->cr0;
> > +    rc = hvm_set_cr0(ns_vmcb->cr0);
> > +    if (rc != X86EMUL_OKAY)
> > +        gdprintk(XENLOG_ERR, "hvm_set_cr0 failed, rc: %u\n", rc);
> > +
> > +    /* CR2 */
> > +    v->arch.hvm_vcpu.guest_cr[2] = ns_vmcb->cr2;
> > +    hvm_update_guest_cr(v, 2);
> > +
> > +    /* Nested paging mode */
> > +    if (nestedhvm_paging_mode_hap(v)) {
> > +        /* host nested paging + guest nested paging. */
> > +
> > +        /* hvm_set_cr3() below sets v->arch.hvm_vcpu.guest_cr[3] for us.
> > */ +        rc = hvm_set_cr3(ns_vmcb->cr3);
> > +        if (rc != X86EMUL_OKAY)
> > +            gdprintk(XENLOG_ERR, "hvm_set_cr3 failed, rc: %u\n", rc);
> > +    } else if (paging_mode_hap(v->domain)) {
> > +        /* host nested paging + guest shadow paging. */
> > +        host_vmcb->np_enable = 1;
> > +        /* Keep h_cr3 as it is. */
> > +        /* Guest shadow paging: Must intercept pagefaults. */
> > +        host_vmcb->exception_intercepts |= (1U << TRAP_page_fault);
>
> No - it's the L1 hypervisor's job to intercep #PF if it wants to use
> shadow paging.  We shouldn't special-case it here.

Fixed.

>
> > +        /* hvm_set_cr3() below sets v->arch.hvm_vcpu.guest_cr[3] for us.
> > */ +        rc = hvm_set_cr3(ns_vmcb->cr3);
> > +        if (rc != X86EMUL_OKAY)
> > +            gdprintk(XENLOG_ERR, "hvm_set_cr3 failed, rc: %u\n", rc);
> > +    } else {
> > +        /* host shadow paging + guest shadow paging. */
> > +        host_vmcb->np_enable = 0;
> > +        host_vmcb->h_cr3 = 0x0;
> > +
> > +        /* TODO: Once shadow-shadow paging is in place come back to here
> > +         * and set host_vmcb->cr3 to the shadowed shadow table.
> > +         */
> > +    }
> > +
> > +    /* DRn */
> > +    host_vmcb->dr7 = ns_vmcb->dr7;
> > +    host_vmcb->dr6 = ns_vmcb->dr6;
> > +
> > +    /* RFLAGS */
> > +    host_vmcb->rflags = ns_vmcb->rflags;
> > +
> > +    /* RIP */
> > +    host_vmcb->rip = ns_vmcb->rip;
> > +
> > +    /* RSP */
> > +    host_vmcb->rsp = ns_vmcb->rsp;
> > +
> > +    /* RAX */
> > +    host_vmcb->rax = ns_vmcb->rax;
> > +
> > +    /* Keep the host values of the fs, gs, ldtr, tr, kerngsbase,
> > +     * star, lstar, cstar, sfmask, sysenter_cs, sysenter_esp,
> > +     * sysenter_eip. These are handled via VMSAVE/VMLOAD emulation.
> > +     */
> > +
> > +    /* Page tables */
> > +    host_vmcb->pdpe0 = ns_vmcb->pdpe0;
> > +    host_vmcb->pdpe1 = ns_vmcb->pdpe1;
> > +    host_vmcb->pdpe2 = ns_vmcb->pdpe2;
> > +    host_vmcb->pdpe3 = ns_vmcb->pdpe3;
> > +
> > +    /* PAT */
> > +    host_vmcb->g_pat = ns_vmcb->g_pat;
> > +
> > +    /* Debug Control MSR */
> > +    host_vmcb->debugctlmsr = ns_vmcb->debugctlmsr;
> > +
> > +    /* LBR MSRs */
> > +    host_vmcb->lastbranchfromip = ns_vmcb->lastbranchfromip;
> > +    host_vmcb->lastbranchtoip = ns_vmcb->lastbranchtoip;
> > +    host_vmcb->lastintfromip = ns_vmcb->lastintfromip;
> > +    host_vmcb->lastinttoip = ns_vmcb->lastinttoip;
> > +
> > +    rc = svm_vmcb_isvalid(__func__, ns_vmcb, 1);
> > +    if (rc) {
> > +        gdprintk(XENLOG_ERR, "nested vmcb invalid\n");
> > +        return rc;
> > +    }
> > +
> > +    rc = svm_vmcb_isvalid(__func__, host_vmcb, 1);
> > +    if (rc) {
> > +        gdprintk(XENLOG_ERR, "host vmcb invalid\n");
> > +        return rc;
> > +    }
> > +
> > +    /* Switch guest registers to nested guest */
> > +    regs->eax = ns_vmcb->rax;
> > +    regs->eip = ns_vmcb->rip;
> > +    regs->esp = ns_vmcb->rsp;
> > +    regs->eflags = ns_vmcb->rflags;
> > +
> > +    return 0;
> > +}
> > +
> >
> > +int
> > +nsvm_vmcb_guest_intercepts_exitcode(struct vcpu *v,
> > +    struct cpu_user_regs *regs, uint64_t exitcode)
> > +{
> > +    uint64_t exit_bits;
> > +    struct nestedvcpu *nv = &vcpu_nestedhvm(v);
> > +    struct nestedsvm *svm = &vcpu_nestedsvm(v);
> > +    struct vmcb_struct *ns_vmcb = nv->nv_vmcx;
> > +    enum nestedhvm_vmexits vmexits;
> > +
> > +    switch (exitcode) {
> > +    case VMEXIT_CR0_READ ... VMEXIT_CR15_READ:
> > +    case VMEXIT_CR0_WRITE ... VMEXIT_CR15_WRITE:
> > +        exit_bits = 1ULL << (exitcode - VMEXIT_CR0_READ);
> > +        if (svm->ns_cr_intercepts & exit_bits)
> > +            break;
> > +        return 0;
> > +
> > +    case VMEXIT_DR0_READ ... VMEXIT_DR7_READ:
> > +    case VMEXIT_DR0_WRITE ... VMEXIT_DR7_WRITE:
> > +        exit_bits = 1ULL << (exitcode - VMEXIT_DR0_READ);
> > +        if (svm->ns_dr_intercepts & exit_bits)
> > +            break;
> > +        return 0;
> > +
> > +    case VMEXIT_EXCEPTION_DE ... VMEXIT_EXCEPTION_XF:
> > +        exit_bits = 1ULL << (exitcode - VMEXIT_EXCEPTION_DE);
> > +        if (svm->ns_exception_intercepts & exit_bits)
> > +            break;
> > +        return 0;
> > +
> > +    case VMEXIT_INTR ... VMEXIT_SHUTDOWN:
> > +        exit_bits = 1ULL << (exitcode - VMEXIT_INTR);
> > +        if (svm->ns_general1_intercepts & exit_bits)
> > +            break;
> > +        return 0;
> > +
> > +    case VMEXIT_VMRUN ... VMEXIT_MWAIT_CONDITIONAL:
> > +        exit_bits = 1ULL << (exitcode - VMEXIT_VMRUN);
> > +        if (svm->ns_general2_intercepts & exit_bits)
> > +            break;
> > +        return 0;
> > +
> > +    case VMEXIT_NPF:
> > +    case VMEXIT_INVALID:
> > +        /* Always intercepted */
> > +        break;
> > +
> > +    default:
> > +        gdprintk(XENLOG_ERR, "Illegal exitcode 0x%"PRIx64"\n",
> > exitcode); +        BUG();
> > +        break;
> > +    }
> > +
> > +    /* Special cases: Do more detailed checks */
> > +    switch (exitcode) {
> > +    case VMEXIT_MSR:
> > +        ASSERT(regs != NULL);
> > +        nestedsvm_vmcb_map(v, nv->nv_vmcxaddr);
> > +        ASSERT(nv->nv_vmcx != NULL);
> > +        ns_vmcb = nv->nv_vmcx;
> > +        vmexits = nsvm_vmcb_guest_intercepts_msr(svm->ns_cached_msrpm,
> > +            regs->ecx, ns_vmcb->exitinfo1 != 0);
> > +        if (vmexits == NESTEDHVM_VMEXIT_HOST)
> > +            return 0;
> > +        break;
> > +
> > +    case VMEXIT_IOIO:
> > +        /* always intercepted */
> > +        break;
>
> What if the guest doesn't want to intercept it?  Won't that make it
> crash or misbehave?

Yes, right. Fixed.

Christoph



-- 
---to satisfy European Law for business letters:
Advanced Micro Devices GmbH
Einsteinring 24, 85609 Dornach b. Muenchen
Geschaeftsfuehrer: Alberto Bozzo, Andrew Bowd
Sitz: Dornach, Gemeinde Aschheim, Landkreis Muenchen
Registergericht Muenchen, HRB Nr. 43632


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.