|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [PATCH 1/8] x86/CPUID: enable AVX10 leaf
This requires bumping the number of basic leaves we support. Apart from
this the logic is modeled as closely as possible after that of leaf 7
handling.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
---
The gen-cpuid.py adjustment is merely the minimum needed. It's not
really clear to me whether someone turning off e.g. AVX512BW might then
also validly expect AVX10 to be turned off.
Spec version 2 leaves unclear what the xstate components are which would
need enabling for AVX10/256. recalculate_{xstate,misc}() are therefore
conservative for now.
Do we want to synthesize AVX10 in the policy when all necessary AVX512*
features are available, thus allowing migration from an AVX10 host to a
suitable non-AVX10 one?
--- a/tools/misc/xen-cpuid.c
+++ b/tools/misc/xen-cpuid.c
@@ -230,7 +230,7 @@ static const char *const str_7d1[32] =
[14] = "prefetchi", [15] = "user-msr",
- [18] = "cet-sss",
+ [18] = "cet-sss", [19] = "avx10",
};
static const char *const str_7d2[32] =
--- a/xen/arch/x86/cpu-policy.c
+++ b/xen/arch/x86/cpu-policy.c
@@ -221,7 +221,7 @@ static void recalculate_xstate(struct cp
xstate_sizes[X86_XCR0_BNDCSR_POS]);
}
- if ( p->feat.avx512f )
+ if ( p->feat.avx512f || (p->feat.avx10 && p->avx10.vsz512) )
{
xstates |= X86_XCR0_OPMASK | X86_XCR0_ZMM | X86_XCR0_HI_ZMM;
xstate_size = max(xstate_size,
@@ -283,6 +283,16 @@ static void recalculate_misc(struct cpu_
p->basic.raw[0xc] = EMPTY_LEAF;
+ zero_leaves(p->basic.raw, 0xe, 0x23);
+
+ p->avx10.raw[0].b &= 0x000700ff;
+ p->avx10.raw[0].c = p->avx10.raw[0].d = 0;
+ if ( !p->feat.avx10 || !p->avx10.version || !p->avx10.vsz512 )
+ {
+ p->feat.avx10 = false;
+ memset(p->avx10.raw, 0, sizeof(p->avx10.raw));
+ }
+
p->extd.e1d &= ~CPUID_COMMON_1D_FEATURES;
/* Most of Power/RAS hidden from guests. */
@@ -800,6 +810,7 @@ void recalculate_cpuid_policy(struct dom
p->basic.max_leaf = min(p->basic.max_leaf, max->basic.max_leaf);
p->feat.max_subleaf = min(p->feat.max_subleaf, max->feat.max_subleaf);
+ p->avx10.max_subleaf = min(p->avx10.max_subleaf, max->avx10.max_subleaf);
p->extd.max_leaf = 0x80000000U | min(p->extd.max_leaf & 0xffff,
((p->x86_vendor & (X86_VENDOR_AMD |
X86_VENDOR_HYGON))
@@ -854,6 +865,8 @@ void recalculate_cpuid_policy(struct dom
if ( p->basic.max_leaf < XSTATE_CPUID )
__clear_bit(X86_FEATURE_XSAVE, fs);
+ if ( p->basic.max_leaf < 0x24 )
+ __clear_bit(X86_FEATURE_AVX10, fs);
sanitise_featureset(fs);
@@ -967,6 +980,8 @@ static void __init __maybe_unused build_
sizeof(raw_cpu_policy.feat.raw));
BUILD_BUG_ON(sizeof(raw_cpu_policy.xstate) !=
sizeof(raw_cpu_policy.xstate.raw));
+ BUILD_BUG_ON(sizeof(raw_cpu_policy.avx10) !=
+ sizeof(raw_cpu_policy.avx10.raw));
BUILD_BUG_ON(sizeof(raw_cpu_policy.extd) !=
sizeof(raw_cpu_policy.extd.raw));
}
--- a/xen/arch/x86/cpuid.c
+++ b/xen/arch/x86/cpuid.c
@@ -87,6 +87,15 @@ void guest_cpuid(const struct vcpu *v, u
*res = array_access_nospec(p->xstate.raw, subleaf);
break;
+ case 0x24:
+ ASSERT(p->avx10.max_subleaf < ARRAY_SIZE(p->avx10.raw));
+ if ( subleaf > min_t(uint32_t, p->avx10.max_subleaf,
+ ARRAY_SIZE(p->avx10.raw) - 1) )
+ return;
+
+ *res = array_access_nospec(p->avx10.raw, subleaf);
+ break;
+
default:
*res = array_access_nospec(p->basic.raw, leaf);
break;
--- a/xen/include/public/arch-x86/cpufeatureset.h
+++ b/xen/include/public/arch-x86/cpufeatureset.h
@@ -321,6 +321,7 @@ XEN_CPUFEATURE(AVX_VNNI_INT16, 15*32
XEN_CPUFEATURE(PREFETCHI, 15*32+14) /*A PREFETCHIT{0,1} Instructions
*/
XEN_CPUFEATURE(USER_MSR, 15*32+15) /*s U{RD,WR}MSR Instructions */
XEN_CPUFEATURE(CET_SSS, 15*32+18) /* CET Supervisor Shadow Stacks
safe to use */
+XEN_CPUFEATURE(AVX10, 15*32+19) /* AVX10 Converged Vector ISA */
/* Intel-defined CPU features, MSR_ARCH_CAPS 0x10a.eax, word 16 */
XEN_CPUFEATURE(RDCL_NO, 16*32+ 0) /*A No Rogue Data Cache Load
(Meltdown) */
--- a/xen/include/xen/lib/x86/cpu-policy.h
+++ b/xen/include/xen/lib/x86/cpu-policy.h
@@ -85,11 +85,12 @@ unsigned int x86_cpuid_lookup_vendor(uin
*/
const char *x86_cpuid_vendor_to_str(unsigned int vendor);
-#define CPUID_GUEST_NR_BASIC (0xdu + 1)
+#define CPUID_GUEST_NR_BASIC (0x24u + 1)
#define CPUID_GUEST_NR_CACHE (5u + 1)
#define CPUID_GUEST_NR_FEAT (2u + 1)
#define CPUID_GUEST_NR_TOPO (1u + 1)
#define CPUID_GUEST_NR_XSTATE (62u + 1)
+#define CPUID_GUEST_NR_AVX10 (0u + 1)
#define CPUID_GUEST_NR_EXTD_INTEL (0x8u + 1)
#define CPUID_GUEST_NR_EXTD_AMD (0x21u + 1)
#define CPUID_GUEST_NR_EXTD MAX(CPUID_GUEST_NR_EXTD_INTEL, \
@@ -255,6 +256,19 @@ struct cpu_policy
} comp[CPUID_GUEST_NR_XSTATE];
} xstate;
+ /* Structured AVX10 information leaf: 0x000000024[xx] */
+ union {
+ struct cpuid_leaf raw[CPUID_GUEST_NR_AVX10];
+ struct {
+ /* Subleaf 0. */
+ uint32_t max_subleaf;
+ uint32_t version:8, :8;
+ bool vsz128:1, vsz256:1, vsz512:1;
+ uint32_t :13;
+ uint32_t /* c */:32, /* d */:32;
+ };
+ } avx10;
+
/* Extended leaves: 0x800000xx */
union {
struct cpuid_leaf raw[CPUID_GUEST_NR_EXTD];
--- a/xen/lib/x86/cpuid.c
+++ b/xen/lib/x86/cpuid.c
@@ -123,6 +123,7 @@ void x86_cpu_policy_fill_native(struct c
switch ( i )
{
case 0x4: case 0x7: case 0xb: case 0xd:
+ case 0x24:
/* Multi-invocation leaves. Deferred. */
continue;
}
@@ -216,6 +217,15 @@ void x86_cpu_policy_fill_native(struct c
}
}
+ if ( p->basic.max_leaf >= 0x24 )
+ {
+ cpuid_count_leaf(0x24, 0, &p->avx10.raw[0]);
+
+ for ( i = 1; i <= MIN(p->avx10.max_subleaf,
+ ARRAY_SIZE(p->avx10.raw) - 1); ++i )
+ cpuid_count_leaf(0x24, i, &p->avx10.raw[i]);
+ }
+
/* Extended leaves. */
cpuid_leaf(0x80000000U, &p->extd.raw[0]);
for ( i = 1; i <= MIN(p->extd.max_leaf & 0xffffU,
@@ -285,6 +295,9 @@ void x86_cpu_policy_clear_out_of_range_l
ARRAY_SIZE(p->xstate.raw) - 1);
}
+ if ( p->basic.max_leaf < 0x24 )
+ memset(p->avx10.raw, 0, sizeof(p->avx10.raw));
+
zero_leaves(p->extd.raw,
((p->extd.max_leaf >> 16) == 0x8000
? (p->extd.max_leaf & 0xffff) + 1 : 0),
@@ -297,6 +310,8 @@ void __init x86_cpu_policy_bound_max_lea
min_t(uint32_t, p->basic.max_leaf, ARRAY_SIZE(p->basic.raw) - 1);
p->feat.max_subleaf =
min_t(uint32_t, p->feat.max_subleaf, ARRAY_SIZE(p->feat.raw) - 1);
+ p->avx10.max_subleaf =
+ min_t(uint32_t, p->avx10.max_subleaf, ARRAY_SIZE(p->avx10.raw) - 1);
p->extd.max_leaf = 0x80000000U | min_t(uint32_t, p->extd.max_leaf & 0xffff,
ARRAY_SIZE(p->extd.raw) - 1);
}
@@ -324,6 +339,8 @@ void x86_cpu_policy_shrink_max_leaves(st
*/
p->basic.raw[0xd] = p->xstate.raw[0];
+ p->basic.raw[0x24] = p->avx10.raw[0];
+
for ( i = p->basic.max_leaf; i; --i )
if ( p->basic.raw[i].a | p->basic.raw[i].b |
p->basic.raw[i].c | p->basic.raw[i].d )
@@ -457,6 +474,13 @@ int x86_cpuid_copy_to_buffer(const struc
break;
}
+ case 0x24:
+ for ( subleaf = 0;
+ subleaf <= MIN(p->avx10.max_subleaf,
+ ARRAY_SIZE(p->avx10.raw) - 1); ++subleaf )
+ COPY_LEAF(leaf, subleaf, &p->avx10.raw[subleaf]);
+ break;
+
default:
COPY_LEAF(leaf, XEN_CPUID_NO_SUBLEAF, &p->basic.raw[leaf]);
break;
@@ -549,6 +573,13 @@ int x86_cpuid_copy_from_buffer(struct cp
array_access_nospec(p->xstate.raw, data.subleaf) = l;
break;
+ case 0x24:
+ if ( data.subleaf >= ARRAY_SIZE(p->avx10.raw) )
+ goto out_of_range;
+
+ array_access_nospec(p->avx10.raw, data.subleaf) = l;
+ break;
+
default:
if ( data.subleaf != XEN_CPUID_NO_SUBLEAF )
goto out_of_range;
--- a/xen/lib/x86/policy.c
+++ b/xen/lib/x86/policy.c
@@ -21,6 +21,14 @@ int x86_cpu_policies_are_compatible(cons
if ( guest->feat.max_subleaf > host->feat.max_subleaf )
FAIL_CPUID(7, 0);
+ if ( guest->avx10.version > host->avx10.version ||
+ (guest->avx10.vsz512
+ ? !host->avx10.vsz512
+ : guest->avx10.vsz256
+ ? !host->avx10.vsz256
+ : guest->avx10.vsz128 && !host->avx10.vsz128 ) )
+ FAIL_CPUID(0x24, 0);
+
if ( guest->extd.max_leaf > host->extd.max_leaf )
FAIL_CPUID(0x80000000U, NA);
--- a/xen/tools/gen-cpuid.py
+++ b/xen/tools/gen-cpuid.py
@@ -286,7 +286,7 @@ def crunch_numbers(state):
# enabled. Certain later extensions, acting on 256-bit vectors of
# integers, better depend on AVX2 than AVX.
AVX2: [AVX512F, VAES, VPCLMULQDQ, AVX_VNNI, AVX_IFMA, AVX_VNNI_INT8,
- AVX_VNNI_INT16, SHA512, SM4],
+ AVX_VNNI_INT16, SHA512, SM4, AVX10],
# AVX512F is taken to mean hardware support for 512bit registers
# (which in practice depends on the EVEX prefix to encode) as well
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |