[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [RFC XTF PATCH] Pagetable Emulation testing



>>> On 06.03.17 at 17:42, <andrew.cooper3@xxxxxxxxxx> wrote:
> +struct mapping_info
> +{
> +    unsigned int level, order;
> +    void *va;
> +    intpte_t *pte, *fe_pte;

Having got quite a bit into the patch, I think I've finally understood that
"fe" here is meant to match up with "fep" - a comment might be helpful.

> +bool ex_check_pf(struct cpu_regs *regs,
> +                 const struct extable_entry *ex)
> +{
> +    if ( regs->entry_vector == X86_EXC_PF )
> +    {
> +        unsigned long cr2 = read_cr2();
> +
> +        if ( (cr2 != under_test.va) &&
> +             (cr2 != under_test.va + (LDT_SEL & ~7)) )
> +            xtf_failure("Bad %%cr2: expected %p, got %p\n",
> +                        _p(under_test.va), _p(cr2));
> +
> +        regs->ax = EXINFO_SYM(PF, regs->error_code);
> +
> +        if ( ex->fixup )
> +            regs->ip = ex->fixup;
> +        else
> +            regs->ip = *(unsigned long *)cpu_regs_sp(regs);

What does this match up with? The single RET you place at the first
byte of the page subject to testing? Surely this is rather fragile wrt
#PF happening anywhere unexpected?

> +static void prepare_mappings(struct mapping_info *m, unsigned int level, 
> bool super, paddr_t paddr)
> +{
> +    bool pse36 = CONFIG_PAGING_LEVELS == 2 && paddr != (uint32_t)paddr;
> +
> +    memset(m, 0, sizeof(*m));
> +
> +#define PAGE_COMMON PF_SYM(AD, U, RW, P)
> +    /*
> +     * For 4-level paging, we use l4[1/2].
> +     */
> +    if ( CONFIG_PAGING_LEVELS == 4 )
> +    {
> +
> +        pae_l4_identmap[1] = (unsigned long)l3t | PAGE_COMMON;
> +        pae_l4_identmap[2] = (unsigned long)l3t | PAGE_COMMON;
> +
> +        l3t[0]   = (unsigned long)l2t | PAGE_COMMON;
> +        l3t[511] = (unsigned long)l2t | PAGE_COMMON;
> +
> +        l2t[0]   = (unsigned long)l1t | PAGE_COMMON;
> +        l2t[511] = (unsigned long)l1t | PAGE_COMMON;
> +
> +        l1t[0]   = paddr | PAGE_COMMON;
> +        l1t[511] = ((paddr - 1) & ~0xfff) | PAGE_COMMON;
> +
> +        m->va     = _p(2ULL << PAE_L4_PT_SHIFT);
> +        m->l1e    = &l1t[0];
> +        m->l2e    = &l2t[0];
> +        m->l3e    = &l3t[0];
> +        m->l4e    = _p(&pae_l4_identmap[2]);
> +        m->fe_pte = &l1t[511];
> +
> +        asm(_ASM_EXTABLE_HANDLER(2 << PAE_L4_PT_SHIFT, 0, ex_check_pf));
> +        under_test.va = (unsigned long)m->va;
> +    }
> +    else if ( CONFIG_PAGING_LEVELS == 3 )
> +    {
> +        pae32_l3_identmap[1] = (unsigned long)l2t | _PAGE_PRESENT;
> +        pae32_l3_identmap[2] = (unsigned long)l2t | _PAGE_PRESENT;
> +
> +        l2t[0]   = (unsigned long)l1t | PAGE_COMMON;
> +        l2t[511] = (unsigned long)l1t | PAGE_COMMON;
> +
> +        l1t[0]   = paddr | PAGE_COMMON;
> +        l1t[511] = ((paddr - 1) & ~0xfff) | PAGE_COMMON;
> +
> +        m->va     = _p(2ULL << PAE_L3_PT_SHIFT);
> +        m->l1e    = &l1t[0];
> +        m->l2e    = &l2t[0];
> +        m->l3e    = _p(&pae32_l3_identmap[2]);
> +        m->l4e    = NULL;
> +        m->fe_pte = &l1t[511];
> +
> +        asm(_ASM_EXTABLE_HANDLER(2 << PAE_L3_PT_SHIFT, 0, ex_check_pf));
> +        under_test.va = (unsigned long)m->va;
> +    }
> +    else if ( CONFIG_PAGING_LEVELS == 2 )
> +    {
> +        if ( pse36 )
> +        {
> +            ASSERT(super);
> +            ASSERT(IS_ALIGNED(paddr, MB(4)));
> +
> +            pse_l2_identmap[511] = fold_pse36((paddr - MB(4)) | PAGE_COMMON 
> | _PAGE_PSE);
> +            pse_l2_identmap[512] = fold_pse36(paddr | PAGE_COMMON | 
> _PAGE_PSE);
> +        }
> +        else
> +        {
> +            pse_l2_identmap[511] = (unsigned long)l1t | PAGE_COMMON;
> +            pse_l2_identmap[512] = (unsigned long)l1t | PAGE_COMMON;
> +
> +            l1t[0]    = paddr | PAGE_COMMON;
> +            l1t[1023] = ((paddr - 1) & ~0xfff) | PAGE_COMMON;
> +        }
> +
> +        m->va     = _p(2ULL << PAE_L3_PT_SHIFT);
> +        m->l1e    = pse36 ? NULL : &l1t[0];
> +        m->l2e    = _p(&pse_l2_identmap[512]);
> +        m->l3e    = NULL;
> +        m->l4e    = NULL;
> +        m->fe_pte = pse36 ? _p(&pse_l2_identmap[511]) : &l1t[1023];

The use of PAE constants here (including the literal 511 / 512) is
mildly confusing here without any comments.

> +        asm(_ASM_EXTABLE_HANDLER(2 << PAE_L3_PT_SHIFT, 0, ex_check_pf));
> +        under_test.va = (unsigned long)m->va;
> +    }
> +    else
> +        panic("%s() PAGING_LEVELS %u not implemented yet\n",
> +              __func__, CONFIG_PAGING_LEVELS);
> +
> +#undef PAGE_COMMON
> +
> +    /* Flush the TLB before trying to use the new mappings. */
> +    flush_tlb(false);
> +
> +    /* Put FEP immediately before va, and a ret instruction at va. */
> +    memcpy(m->va - 5, "\x0f\x0bxen\xc3", 6);
> +    barrier();
> +
> +    /* Read them back, to confirm that RAM is properly in place. */
> +    if ( memcmp(m->va - 5, "\x0f\x0bxen\xc3", 6) )
> +        panic("Bad phys or virtual setup\n");
> +
> +    /* Construct the LDT at va. */
> +    user_desc *ldt = m->va;
> +
> +    ldt[LDT_SEL >> 3] = (typeof(*ldt))INIT_GDTE_SYM(0, 0xfffff, COMMON, 
> DATA, DPL3, B, W);

This dual use of m->va clearly needs a comment, perhaps next to
the definition of LDT_SEL (which can't be freely set to whatever one
may like - it namely can't be using LDT slot 0).

> +    gdt[GDTE_AVAIL0]  = (typeof(*gdt))INIT_GDTE((unsigned long)m->va, 
> PAGE_SIZE, 0x82);
> +#if __x86_64__

#ifdef ?

> +void check(struct mapping_info *m, exinfo_t actual, exinfo_t expected, enum 
> modifier mod)
> +{
> +    /* Check that the actual pagefault matched our expectation. */
> +    if ( actual != expected )
> +    {
> +        const char *user_sup = under_test.user ? "User" : "Supervisor";
> +        bool ac_fault = !!actual, ex_fault = !!expected;

Stray !! ?

> +        char ac_ec[16], ex_ec[16];
> +
> +        if ( ac_fault )
> +            x86_exc_decode_ec(ac_ec, ARRAY_SIZE(ac_ec),
> +                              X86_EXC_PF, (uint16_t)actual);
> +        if ( ex_fault )
> +            x86_exc_decode_ec(ex_ec, ARRAY_SIZE(ex_ec),
> +                              X86_EXC_PF, (uint16_t)expected);

Why the casts on the last function arguments?

> +        if ( ac_fault && !ex_fault )
> +            fail("    Fail: expected no fault, got #PF[%s] for %s %s\n",
> +                 ac_ec, user_sup, under_test.desc);
> +        else if ( !ac_fault && ex_fault )
> +            fail("    Fail: expected #PF[%s], got no fault for %s %s\n",
> +                 ex_ec, user_sup, under_test.desc);
> +        else
> +            fail("    Fail: expected #PF[%s], got #PF[%s] for %s %s\n",
> +                 ex_ec, ac_ec,  user_sup, under_test.desc);

From looking just at this function ex_ec[] and ac_ec[] may be
used uninitialized here (when !ac_fault && !ex_fault). With the
function not being static, I'd actually expect compilers to be
able to spot and warn about this.

> +    }
> +
> +    /* Check that A/D bits got updated as expected. */
> +    unsigned int leaf_level =
> +        m->order ? ((m->order - PAGE_SHIFT) / PT_ORDER) : 0;
> +    unsigned int i; /* NB - Levels are 0-indexed. */

Do you enable C99 mode somewhere? Otherwise I'd expect the
compiler to warn about mixing declarations and code.

> +    if ( amd_fam10_erratum )
> +    {
> +        /*
> +         * AMD Fam10 appears to defer the setting of access bits for implicit
> +         * loads.  As a result, the implicit tests (which load %fs) don't
> +         * necessarily observe the access bits being set on the pagewalk to
> +         * the LDT.
> +         *
> +         * Experimentally, a serialising instruction fixes things, or a loop
> +         * of 1000 nops, but so does forcing the use of the loaded segment.
> +         *
> +         * If this is an implicit load which didn't fault, read through %fs 
> to
> +         * force it to be loaded into the segment cache.

The load into the segment cache certainly has at least started, so
maybe the better (more precise) wording would be something like
"... force the load into the segment cache to complete"?

> +exinfo_t calc(struct mapping_info *m, uint64_t new, unsigned int walk, enum 
> modifier mod)

To other than you as the author of the code, it would help if the
name of the function indicated what it wants to calculate.

> +{
> +    bool nx_valid   = CONFIG_PAGING_LEVELS >= 3 && (host_nx_leaked || (mod & 
> NX));
> +    bool insn_valid = nx_valid || (mod & SMEP);
> +    uint64_t rsvd = ((1ULL << 52) - 1) & ~((1ULL << maxphysaddr) - 1);
> +
> +    /* Accumulate additional bits which are reserved. */
> +    if ( !nx_valid )
> +        rsvd |= _PAGE_NX;
> +
> +    if ( m->level == 4 )
> +        rsvd |= _PAGE_PSE | (vendor_is_amd ? _PAGE_GLOBAL : 0);
> +    else if ( m->level == 3 && !cpu_has_page1gb )
> +        rsvd |= _PAGE_PSE;

Shouldn't this second conditional include CONFIG_PAGING_LEVELS > 3?
Certainly that bit is reserved there too, but there are quite a few
more.

> +void test_pte(const struct stubs *stubs, struct mapping_info *m, uint64_t  
> overlay)
> +{
> +    uint64_t new = m->paddr | overlay;
> +    bool user = false;
> +
> +    under_test.pteval = *m->pte = new;
> +    under_test.pte_printed = false;
> +    clear_ad(m);
> +
> +    under_test.active = true;
> +
> +    for ( ; ; user = true )
> +    {
> +        unsigned int base = user ? PFEC(U) : 0;
> +
> +#define CALL(fn, va)                                                    \
> +        (user ? exec_user_param(fn ## _user, (unsigned long)(va))       \
> +              : fn((unsigned long)(va)))
> +
> +        under_test.user = user;
> +
> +        /* Map the exec FEP stub with suitable permissions. */
> +        if ( stubs == &force_stubs )
> +        {
> +            *m->fe_pte &= ~_PAGE_USER;
> +            if ( user )
> +                *m->fe_pte |= _PAGE_USER;
> +            invlpg(m->va - 5);

Why do you need to do the for the FEP prefix, but not for the
rest of the stub?

> --- /dev/null
> +++ b/tests/pagetable-emulation/stubs.h
> @@ -0,0 +1,25 @@
> +#ifndef _LOWLEVEL_H_
> +#define _LOWLEVEL_H_
> +
> +unsigned long stub_read(unsigned long va);
> +unsigned long stub_implicit(unsigned long sel); /* unsigned long sel */

What are these comments intended to tell the reader?

Jan

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.