[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH 4/5] x86/cpuid: Simplify recalculate_xstate()


  • To: Xen-devel <xen-devel@xxxxxxxxxxxxxxxxxxxx>
  • From: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
  • Date: Mon, 3 May 2021 16:39:37 +0100
  • Authentication-results: esa1.hc3370-68.iphmx.com; dkim=none (message not signed) header.i=none
  • Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>, Jan Beulich <JBeulich@xxxxxxxx>, Roger Pau Monné <roger.pau@xxxxxxxxxx>, Wei Liu <wl@xxxxxxx>
  • Delivery-date: Mon, 03 May 2021 15:39:57 +0000
  • Ironport-hdrordr: A9a23:KLSibKFGXtC7fUmdpLqEF8eALOonbusQ8zAX/mp2TgFYddHdqt C2kJ0guSPcpRQwfDUbmd6GMLSdWn+0z/VIyKQYILvKZmbbkUSyKoUK1+Xf6hntATf3+OIY9Y oISchDIfnxCVQ/ssrg+gm/FL8boeWvy6yjiefAw3oFd2gDAcxdxj1kAQWWGFAefngkObMFEv Onl696jgvlVXMLbtmqQlkpNtKzw+HjpdbdT1orJzNP0njtsQ+V
  • Ironport-sdr: H5HLuDGyqEByLnCPYbYjLdx7mfF/ERASTva30yvuQ1iok75tvfyaLNmFDOJjcdrg22kapJlzVo /DYx6XvBIdcuSXrsWC2CXKgZhZxazfprOKt5dQJz0I1bNkUPTPNQvfw2zrPiN7b+3OEUuUAmYo 0MkXfJgzwAeV8myO2Kx6f7EqhHyjn6qTUc9mdRVk8FsUxNRXBKZkMNicsQxj1HdFoAfUvv5+VF YdqFWwptXKqRhyk06x/iJkSd9+S4a7zYPcFkLhITqW6X+uOX6QhwuT+79ahopfezdA9puN9XoA oDg=
  • List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

Make use of the new xstate_uncompressed_size() helper rather than maintaining
the running calculation while accumulating feature components.

The rest of the CPUID data can come direct from the raw cpuid policy.  All
per-component data forms an ABI through the behaviour of the X{SAVE,RSTOR}*
instructions, and are constant.

Use for_each_set_bit() rather than opencoding a slightly awkward version of
it.  Mask the attributes in ecx down based on the visible features.  This
isn't actually necessary for any components or attributes defined at the time
of writing (up to AMX), but is added out of an abundance of caution.

Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
---
CC: Jan Beulich <JBeulich@xxxxxxxx>
CC: Roger Pau Monné <roger.pau@xxxxxxxxxx>
CC: Wei Liu <wl@xxxxxxx>

Using min() in for_each_set_bit() leads to awful code generation, as it
prohibits the optimiations for spotting that the bitmap is <= BITS_PER_LONG.
As p->xstate is long enough already, use a BUILD_BUG_ON() instead.
---
 xen/arch/x86/cpuid.c | 52 +++++++++++++++++-----------------------------------
 1 file changed, 17 insertions(+), 35 deletions(-)

diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c
index 752bf244ea..c7f8388e5d 100644
--- a/xen/arch/x86/cpuid.c
+++ b/xen/arch/x86/cpuid.c
@@ -154,8 +154,7 @@ static void sanitise_featureset(uint32_t *fs)
 static void recalculate_xstate(struct cpuid_policy *p)
 {
     uint64_t xstates = XSTATE_FP_SSE;
-    uint32_t xstate_size = XSTATE_AREA_MIN_SIZE;
-    unsigned int i, Da1 = p->xstate.Da1;
+    unsigned int i, ecx_bits = 0, Da1 = p->xstate.Da1;
 
     /*
      * The Da1 leaf is the only piece of information preserved in the common
@@ -167,61 +166,44 @@ static void recalculate_xstate(struct cpuid_policy *p)
         return;
 
     if ( p->basic.avx )
-    {
         xstates |= X86_XCR0_YMM;
-        xstate_size = max(xstate_size,
-                          xstate_offsets[X86_XCR0_YMM_POS] +
-                          xstate_sizes[X86_XCR0_YMM_POS]);
-    }
 
     if ( p->feat.mpx )
-    {
         xstates |= X86_XCR0_BNDREGS | X86_XCR0_BNDCSR;
-        xstate_size = max(xstate_size,
-                          xstate_offsets[X86_XCR0_BNDCSR_POS] +
-                          xstate_sizes[X86_XCR0_BNDCSR_POS]);
-    }
 
     if ( p->feat.avx512f )
-    {
         xstates |= X86_XCR0_OPMASK | X86_XCR0_ZMM | X86_XCR0_HI_ZMM;
-        xstate_size = max(xstate_size,
-                          xstate_offsets[X86_XCR0_HI_ZMM_POS] +
-                          xstate_sizes[X86_XCR0_HI_ZMM_POS]);
-    }
 
     if ( p->feat.pku )
-    {
         xstates |= X86_XCR0_PKRU;
-        xstate_size = max(xstate_size,
-                          xstate_offsets[X86_XCR0_PKRU_POS] +
-                          xstate_sizes[X86_XCR0_PKRU_POS]);
-    }
 
-    p->xstate.max_size  =  xstate_size;
+    /* Subleaf 0 */
+    p->xstate.max_size =
+        xstate_uncompressed_size(xstates & ~XSTATE_XSAVES_ONLY);
     p->xstate.xcr0_low  =  xstates & ~XSTATE_XSAVES_ONLY;
     p->xstate.xcr0_high = (xstates & ~XSTATE_XSAVES_ONLY) >> 32;
 
+    /* Subleaf 1 */
     p->xstate.Da1 = Da1;
     if ( p->xstate.xsaves )
     {
+        ecx_bits |= 3; /* Align64, XSS */
         p->xstate.xss_low   =  xstates & XSTATE_XSAVES_ONLY;
         p->xstate.xss_high  = (xstates & XSTATE_XSAVES_ONLY) >> 32;
     }
-    else
-        xstates &= ~XSTATE_XSAVES_ONLY;
 
-    for ( i = 2; i < min(63ul, ARRAY_SIZE(p->xstate.comp)); ++i )
+    /* Subleafs 2+ */
+    xstates &= ~XSTATE_FP_SSE;
+    BUILD_BUG_ON(ARRAY_SIZE(p->xstate.comp) < 63);
+    for_each_set_bit ( i, &xstates, 63 )
     {
-        uint64_t curr_xstate = 1ul << i;
-
-        if ( !(xstates & curr_xstate) )
-            continue;
-
-        p->xstate.comp[i].size   = xstate_sizes[i];
-        p->xstate.comp[i].offset = xstate_offsets[i];
-        p->xstate.comp[i].xss    = curr_xstate & XSTATE_XSAVES_ONLY;
-        p->xstate.comp[i].align  = curr_xstate & xstate_align;
+        /*
+         * Pass through size (eax) and offset (ebx) directly.  Visbility of
+         * attributes in ecx limited by visible features in Da1.
+         */
+        p->xstate.raw[i].a = raw_cpuid_policy.xstate.raw[i].a;
+        p->xstate.raw[i].b = raw_cpuid_policy.xstate.raw[i].b;
+        p->xstate.raw[i].c = raw_cpuid_policy.xstate.raw[i].c & ecx_bits;
     }
 }
 
-- 
2.11.0




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.