[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v3 09/22] x86/xstate: enable AMX components



These being controlled by XCR0, enabling support is relatively
straightforward. Note however that there won't be any use of them until
their dependent ISA extension CPUID flags get exposed, not the least due
to recalculate_xstate() handling the dependencies in kind of a reverse
manner.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
---
v3: Add new states to XSTATE_NONLAZY.
v2: New.

--- a/tools/libs/light/libxl_cpuid.c
+++ b/tools/libs/light/libxl_cpuid.c
@@ -221,6 +221,9 @@ int libxl_cpuid_parse_config(libxl_cpuid
         {"md-clear",     0x00000007,  0, CPUID_REG_EDX, 10,  1},
         {"serialize",    0x00000007,  0, CPUID_REG_EDX, 14,  1},
         {"cet-ibt",      0x00000007,  0, CPUID_REG_EDX, 20,  1},
+        {"amx-bf16",     0x00000007,  0, CPUID_REG_EDX, 22,  1},
+        {"amx-tile",     0x00000007,  0, CPUID_REG_EDX, 24,  1},
+        {"amx-int8",     0x00000007,  0, CPUID_REG_EDX, 25,  1},
         {"ibrsb",        0x00000007,  0, CPUID_REG_EDX, 26,  1},
         {"stibp",        0x00000007,  0, CPUID_REG_EDX, 27,  1},
         {"l1d-flush",    0x00000007,  0, CPUID_REG_EDX, 28,  1},
--- a/tools/misc/xen-cpuid.c
+++ b/tools/misc/xen-cpuid.c
@@ -168,7 +168,8 @@ static const char *const str_7d0[32] =
 
     [18] = "pconfig",
     [20] = "cet-ibt",
-
+    [22] = "amx-bf16",
+    [24] = "amx-tile",      [25] = "amx-int8",
     [26] = "ibrsb",         [27] = "stibp",
     [28] = "l1d-flush",     [29] = "arch-caps",
     [30] = "core-caps",     [31] = "ssbd",
--- a/xen/arch/x86/cpuid.c
+++ b/xen/arch/x86/cpuid.c
@@ -198,6 +198,14 @@ static void recalculate_xstate(struct cp
                           xstate_size(X86_XCR0_PKRU_POS));
     }
 
+    if ( p->feat.amx_tile )
+    {
+        xstates |= X86_XCR0_TILECFG | X86_XCR0_TILEDATA;
+        xstate_size = max(xstate_size,
+                          xstate_offset(X86_XCR0_TILEDATA_POS) +
+                          xstate_size(X86_XCR0_TILEDATA_POS));
+    }
+
     p->xstate.max_size  =  xstate_size;
     p->xstate.xcr0_low  =  xstates & ~XSTATE_XSAVES_ONLY;
     p->xstate.xcr0_high = (xstates & ~XSTATE_XSAVES_ONLY) >> 32;
--- a/xen/arch/x86/xstate.c
+++ b/xen/arch/x86/xstate.c
@@ -640,6 +640,10 @@ static bool valid_xcr0(uint64_t xcr0)
     if ( !(xcr0 & X86_XCR0_BNDREGS) != !(xcr0 & X86_XCR0_BNDCSR) )
         return false;
 
+    /* TILECFG and TILEDATA must be the same. */
+    if ( !(xcr0 & X86_XCR0_TILECFG) != !(xcr0 & X86_XCR0_TILEDATA) )
+        return false;
+
     return true;
 }
 
--- a/xen/include/asm-x86/x86-defns.h
+++ b/xen/include/asm-x86/x86-defns.h
@@ -96,6 +96,10 @@
 #define X86_XCR0_HI_ZMM           (1ULL << X86_XCR0_HI_ZMM_POS)
 #define X86_XCR0_PKRU_POS         9
 #define X86_XCR0_PKRU             (1ULL << X86_XCR0_PKRU_POS)
+#define X86_XCR0_TILECFG_POS      17
+#define X86_XCR0_TILECFG          (1ULL << X86_XCR0_TILECFG_POS)
+#define X86_XCR0_TILEDATA_POS     18
+#define X86_XCR0_TILEDATA         (1ULL << X86_XCR0_TILEDATA_POS)
 #define X86_XCR0_LWP_POS          62
 #define X86_XCR0_LWP              (1ULL << X86_XCR0_LWP_POS)
 
--- a/xen/include/asm-x86/xstate.h
+++ b/xen/include/asm-x86/xstate.h
@@ -32,7 +32,8 @@ extern uint32_t mxcsr_mask;
 #define XSTATE_FP_SSE  (X86_XCR0_FP | X86_XCR0_SSE)
 
 #define XSTATE_ALL     (~(1ULL << 63))
-#define XSTATE_NONLAZY (X86_XCR0_BNDREGS | X86_XCR0_BNDCSR | X86_XCR0_PKRU)
+#define XSTATE_NONLAZY (X86_XCR0_BNDREGS | X86_XCR0_BNDCSR | X86_XCR0_PKRU | \
+                        X86_XCR0_TILECFG | X86_XCR0_TILEDATA)
 #define XSTATE_LAZY    (XSTATE_ALL & ~XSTATE_NONLAZY)
 #define XSTATE_XSAVES_ONLY         0
 #define XSTATE_COMPACTION_ENABLED  (1ULL << 63)
--- a/xen/include/public/arch-x86/cpufeatureset.h
+++ b/xen/include/public/arch-x86/cpufeatureset.h
@@ -268,6 +268,9 @@ XEN_CPUFEATURE(MD_CLEAR,      9*32+10) /
 XEN_CPUFEATURE(TSX_FORCE_ABORT, 9*32+13) /* MSR_TSX_FORCE_ABORT.RTM_ABORT */
 XEN_CPUFEATURE(SERIALIZE,     9*32+14) /*a  SERIALIZE insn */
 XEN_CPUFEATURE(CET_IBT,       9*32+20) /*   CET - Indirect Branch Tracking */
+XEN_CPUFEATURE(AMX_BF16,      9*32+22) /*   AMX BFloat16 instructions */
+XEN_CPUFEATURE(AMX_TILE,      9*32+24) /*   AMX tile architecture */
+XEN_CPUFEATURE(AMX_INT8,      9*32+25) /*   AMX 8-bit integer instructions */
 XEN_CPUFEATURE(IBRSB,         9*32+26) /*A  IBRS and IBPB support (used by 
Intel) */
 XEN_CPUFEATURE(STIBP,         9*32+27) /*A  STIBP */
 XEN_CPUFEATURE(L1D_FLUSH,     9*32+28) /*S  MSR_FLUSH_CMD and L1D flush. */
--- a/xen/tools/gen-cpuid.py
+++ b/xen/tools/gen-cpuid.py
@@ -222,7 +222,7 @@ def crunch_numbers(state):
         # instruction groups which are specified to require XSAVE for state
         # management.
         XSAVE: [XSAVEOPT, XSAVEC, XGETBV1, XSAVES,
-                AVX, MPX, PKU, LWP],
+                AVX, MPX, PKU, AMX_TILE, LWP],
 
         # AVX is taken to mean hardware support for 256bit registers (which in
         # practice depends on the VEX prefix to encode), and the instructions
@@ -290,6 +290,11 @@ def crunch_numbers(state):
 
         # In principle the TSXLDTRK insns could also be considered independent.
         RTM: [TSXLDTRK],
+
+        # AMX-TILE means hardware support for tile registers and general non-
+        # computational instructions.  All further AMX features are built on 
top
+        # of AMX-TILE.
+        AMX_TILE: [AMX_BF16, AMX_INT8],
     }
 
     deep_features = tuple(sorted(deps.keys()))




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.