[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v2] libx86: Introduce x86_cpu_policy_calculate_compatible() with MSR_ARCH_CAPS handling


  • To: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
  • From: Jan Beulich <jbeulich@xxxxxxxx>
  • Date: Tue, 29 Jun 2021 17:35:35 +0200
  • Arc-authentication-results: i=1; mx.microsoft.com 1; spf=pass smtp.mailfrom=suse.com; dmarc=pass action=none header.from=suse.com; dkim=pass header.d=suse.com; arc=none
  • Arc-message-signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector9901; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-SenderADCheck; bh=URLnnwCsKBkaRrg2bq5VfQAyb6TPlECEcaLBS5kAmfk=; b=LK55jjbtBR5qqBocSQzRBHERZTkj8t7aaagFlhXsze1gYibGwFD8ULPDZ+i1B+DyuZAmzwKXKNzzw73NIP849bBNtvFiT6iNlsnlVfnCBq3Cp2FkYtb23Ig+LxwN8fpjsA5VXkbsZwXG22bJVtQxEbSGhLzLy4jbjA3wjDQSxslPrgQeEijX1X7kHSD/TbyyNj+/LM2+6Q7l8ERs6nE4NUBt10DS3X26ABv98ZBGxtD2PQHMwcBr4S7omFmIsX2tgD2TDmLlJ5jekduRF0eX0VV7fQT0N2bprvZJ5KaqnpwB0XvqNdmB97lMTn3E3YmUc3A0ISzDJleVn3r/ihacCA==
  • Arc-seal: i=1; a=rsa-sha256; s=arcselector9901; d=microsoft.com; cv=none; b=c/nmr1M9CAjlFApM2wA59xlBntQaL1gMFhkf9i4Cuzz+cEKaGAAXtjrszCHvolkVpWedDWj2cgWESe2UhnFFstJG0vJQhBITz/6nBB9YcJzEW5gZMdovxD42x4lsZTH1z9El9DLu8omX5k8VGnswypJUJYRd9vUmmFpF35zxLSbcy/T3niQFKq0wK2JD2g5865pzq9Zw6DPvZ+Erg6r7qBa1lRB7dKQN39YqeqO0+cvmhvv7gKZfaMUwXBR/aL/hIUfefs8sB+oV7sUPcgr2jTFUDIjDafqSVpefXQNE3IQ87jfDFw5JwD6Oo0jmERXX7OEHDIpXluTseyt14OpcrA==
  • Authentication-results: lists.xenproject.org; dkim=none (message not signed) header.d=none;lists.xenproject.org; dmarc=none action=none header.from=suse.com;
  • Cc: Roger Pau Monné <roger.pau@xxxxxxxxxx>, Wei Liu <wl@xxxxxxx>, Xen-devel <xen-devel@xxxxxxxxxxxxxxxxxxxx>
  • Delivery-date: Tue, 29 Jun 2021 15:35:57 +0000
  • List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

On 28.06.2021 17:00, Andrew Cooper wrote:
> --- a/tools/tests/cpu-policy/test-cpu-policy.c
> +++ b/tools/tests/cpu-policy/test-cpu-policy.c
> @@ -775,6 +775,154 @@ static void test_is_compatible_failure(void)
>      }
>  }
>  
> +static void test_calculate_compatible_success(void)
> +{
> +    static struct test {

It's only testing code, so it doesn't matter all this much, but
elsewhere such static struct-s are const.

> +        const char *name;
> +        struct {
> +            struct cpuid_policy p;
> +            struct msr_policy m;
> +        } a, b, out;
> +    } tests[] = {
> +        {
> +            "arch_caps, b short max_leaf",
> +            .a = {
> +                .p.basic.max_leaf = 7,
> +                .p.feat.arch_caps = true,
> +                .m.arch_caps.rdcl_no = true,
> +            },
> +            .b = {
> +                .p.basic.max_leaf = 6,
> +                .p.feat.arch_caps = true,
> +                .m.arch_caps.rdcl_no = true,
> +            },
> +            .out = {
> +                .p.basic.max_leaf = 6,
> +            },
> +        },
> +        {
> +            "arch_caps, b feat missing",
> +            .a = {
> +                .p.basic.max_leaf = 7,
> +                .p.feat.arch_caps = true,
> +                .m.arch_caps.rdcl_no = true,
> +            },
> +            .b = {
> +                .p.basic.max_leaf = 7,
> +                .m.arch_caps.rdcl_no = true,
> +            },
> +            .out = {
> +                .p.basic.max_leaf = 7,
> +            },
> +        },
> +        {
> +            "arch_caps, b rdcl_no missing",
> +            .a = {
> +                .p.basic.max_leaf = 7,
> +                .p.feat.arch_caps = true,
> +                .m.arch_caps.rdcl_no = true,
> +            },
> +            .b = {
> +                .p.basic.max_leaf = 7,
> +                .p.feat.arch_caps = true,
> +            },
> +            .out = {
> +                .p.basic.max_leaf = 7,
> +                .p.feat.arch_caps = true,
> +            },
> +        },
> +        {
> +            "arch_caps, rdcl_no ok",
> +            .a = {
> +                .p.basic.max_leaf = 7,
> +                .p.feat.arch_caps = true,
> +                .m.arch_caps.rdcl_no = true,
> +            },
> +            .b = {
> +                .p.basic.max_leaf = 7,
> +                .p.feat.arch_caps = true,
> +                .m.arch_caps.rdcl_no = true,
> +            },
> +            .out = {
> +                .p.basic.max_leaf = 7,
> +                .p.feat.arch_caps = true,
> +                .m.arch_caps.rdcl_no = true,
> +            },
> +        },
> +        {
> +            "arch_caps, rsba accum",
> +            .a = {
> +                .p.basic.max_leaf = 7,
> +                .p.feat.arch_caps = true,
> +                .m.arch_caps.rsba = true,
> +            },
> +            .b = {
> +                .p.basic.max_leaf = 7,
> +                .p.feat.arch_caps = true,
> +            },
> +            .out = {
> +                .p.basic.max_leaf = 7,
> +                .p.feat.arch_caps = true,
> +                .m.arch_caps.rsba = true,
> +            },
> +        },

For RDCL_NO you go through quite a few more variations, and given
the accumulating nature of RSBA habing a similar set for it would
imo be quite valuable, not the least for people like me to see
clearly what behavior is expected there.

> +    };
> +    struct cpu_policy_errors no_errors = INIT_CPU_POLICY_ERRORS;
> +
> +    printf("Testing calculate compatibility success:\n");
> +
> +    for ( size_t i = 0; i < ARRAY_SIZE(tests); ++i )
> +    {
> +        struct test *t = &tests[i];
> +        struct cpuid_policy *p = malloc(sizeof(struct cpuid_policy));
> +        struct msr_policy *m = malloc(sizeof(struct msr_policy));
> +        struct cpu_policy a = {
> +            &t->a.p,
> +            &t->a.m,
> +        }, b = {
> +            &t->b.p,
> +            &t->b.m,

Hmm, I guess these two struct instances are the reason for tests[]
to be non-const. I vaguely recall discussion about having a const-
correct variant of struct cpu_policy; if you don't think this is
warranted, may I ask that you add a respective brief comment to
tests[]?

> +        }, out = {
> +            p,
> +            m,
> +        };
> +        struct cpu_policy_errors e;
> +        int res;
> +
> +        if ( !p || !m )
> +            err(1, "%s() malloc failure", __func__);
> +
> +        res = x86_cpu_policy_calculate_compatible(&a, &b, &out, &e);
> +
> +        /* Check the expected error output. */
> +        if ( res != 0 || memcmp(&no_errors, &e, sizeof(no_errors)) )

While this memcmp() has precedents, ...

> +        {
> +            fail("  Test '%s' expected no errors\n"
> +                 "    got res %d { leaf %08x, subleaf %08x, msr %08x }\n",
> +                 t->name, res, e.leaf, e.subleaf, e.msr);
> +            goto test_done;
> +        }
> +
> +        if ( memcmp(&t->out.p, p, sizeof(*p)) )

... I'm worried that this and ...

> +        {
> +            fail("  Test '%s' resulting CPUID policy not as expected\n",
> +                 t->name);
> +            goto test_done;
> +        }
> +
> +        if ( memcmp(&t->out.m, m, sizeof(*m)) )

... this may (down the road) suffer from mismatches on uninitialized
padding fields. Otoh I've meanwhile found that the new function
clears both output buffers first thinhg.

> --- a/xen/include/xen/lib/x86/cpu-policy.h
> +++ b/xen/include/xen/lib/x86/cpu-policy.h
> @@ -37,6 +37,34 @@ int x86_cpu_policies_are_compatible(const struct 
> cpu_policy *host,
>                                      const struct cpu_policy *guest,
>                                      struct cpu_policy_errors *err);
>  
> +/*
> + * Given two policies, calculate one which is compatible with each.
> + *
> + * i.e. Given host @a and host @b, calculate what to give a VM so it can live
> + * migrate between the two.
> + *
> + * @param a        A cpu_policy.
> + * @param b        Another cpu_policy.
> + * @param out      A policy compatible with @a and @b, if successful.
> + * @param err      Optional hint for error diagnostics.
> + * @returns -errno
> + *
> + * For typical usage, @a and @b should be default system policies of the same
> + * type (i.e. PV or HVM) from different hosts.

Given this property, what use do you anticipate for the new function
within libxl? Or asked differently, where from would libxl obtain a
remote host's policy?

>  It does not make sense to try
> + * and level max policies, as they contain the non-migrateable features.
> + *
> + * Some data (e.g. the long CPU brand string) cannot be levelled.  Such data
> + * will be taken from @a, and the content in @b will be discaraded.

I'm afraid I can't spot this "taking from @a" in the code.

Also, nit: "discarded"

> + * It is possible that @a and @b cannot be resolved to migration-compatible

Nit: Missing "a" after "to"?

> @@ -43,6 +46,52 @@ int x86_cpu_policies_are_compatible(const struct 
> cpu_policy *host,
>      return ret;
>  }
>  
> +#ifndef __XEN__
> +int x86_cpu_policy_calculate_compatible(const struct cpu_policy *a,
> +                                        const struct cpu_policy *b,
> +                                        struct cpu_policy *out,
> +                                        struct cpu_policy_errors *err)
> +{
> +    const struct cpuid_policy *ap = a->cpuid, *bp = b->cpuid;
> +    const struct msr_policy *am = a->msr, *bm = b->msr;
> +    struct cpuid_policy *cp = out->cpuid;
> +    struct msr_policy *mp = out->msr;
> +
> +    memset(cp, 0, sizeof(*cp));
> +    memset(mp, 0, sizeof(*mp));
> +
> +    cp->basic.max_leaf = min(ap->basic.max_leaf, bp->basic.max_leaf);
> +
> +    if ( cp->basic.max_leaf >= 7 )
> +    {
> +        cp->feat.max_subleaf = min(ap->feat.max_subleaf, 
> bp->feat.max_subleaf);
> +
> +        cp->feat.raw[0].b = ap->feat.raw[0].b & bp->feat.raw[0].b;
> +        cp->feat.raw[0].c = ap->feat.raw[0].c & bp->feat.raw[0].c;
> +        cp->feat.raw[0].d = ap->feat.raw[0].d & bp->feat.raw[0].d;

Is there a particular reason to not handle this in full, i.e. for
all of the subleaves? If there is, I'd still have expected you to
at least handle _7a1 that we already know about. Failing that I'd
have hoped for a justifying comment (or maybe a TODO item beyond ...

> +    }
> +
> +    /* TODO: Far more. */

... this one.

> +    mp->platform_info.raw = am->platform_info.raw & bm->platform_info.raw;
> +
> +    if ( cp->feat.arch_caps )
> +    {
> +        /*
> +         * RSBA means "RSB Alternative", i.e. RSB stuffing not necesserily
> +         * safe.  It needs to accumulate rather than intersect across a
> +         * resource pool.
> +         */
> +#define POL_MASK ARCH_CAPS_RSBA
> +        mp->arch_caps.raw = ((am->arch_caps.raw ^ POL_MASK) &
> +                             (bm->arch_caps.raw ^ POL_MASK)) ^ POL_MASK;
> +#undef POL_MASK
> +    }

Related to my respective request on the set of tests performed, this
really is partial accumulation, as ARCH_CAPS are still taken as a
prereq feature. That is, one host with RSBA and another without
ARCH_CAPS will result in a policy without RSBA. Is this really what's
intended?

Jan




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.