[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH] x86/cpuid: AVX-512 Feature Detection



On 29/06/16 03:20, Luwei Kang wrote:
> AVX-512 is an extention of AVX2. Its spec can be found at:
> https://software.intel.com/sites/default/files/managed/b4/3a/319433-024.pdf
> This patch detects AVX-512 features by CPUID.
>
> Signed-off-by: Luwei Kang <luwei.kang@xxxxxxxxx>
> ---
>  xen/arch/x86/hvm/hvm.c                      | 14 ++++++++++++++
>  xen/arch/x86/traps.c                        | 22 +++++++++++++++++++++-
>  xen/include/public/arch-x86/cpufeatureset.h |  9 +++++++++
>  xen/tools/gen-cpuid.py                      |  4 ++++
>  4 files changed, 48 insertions(+), 1 deletion(-)
>
> diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
> index c89ab6e..7696b1e 100644
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -3474,6 +3474,20 @@ void hvm_cpuid(unsigned int input, unsigned int *eax, 
> unsigned int *ebx,
>                                    xstate_sizes[_XSTATE_BNDCSR]);
>              }
>  
> +            if ( _ebx & cpufeat_mask(X86_FEATURE_AVX512F) )
> +            {
> +                xfeature_mask |= XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM;
> +                xstate_size = max(xstate_size,
> +                                  xstate_offsets[_XSTATE_OPMASK] +
> +                                  xstate_sizes[_XSTATE_OPMASK]);
> +                xstate_size = max(xstate_size,
> +                                  xstate_offsets[_XSTATE_ZMM] +
> +                                  xstate_sizes[_XSTATE_ZMM]);
> +                xstate_size = max(xstate_size,
> +                                  xstate_offsets[_XSTATE_HI_ZMM] +
> +                                  xstate_sizes[_XSTATE_HI_ZMM]);
> +            }
> +
>              if ( _ecx & cpufeat_mask(X86_FEATURE_PKU) )
>              {
>                  xfeature_mask |= XSTATE_PKRU;
> diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
> index 767d0b0..8fb697b 100644
> --- a/xen/arch/x86/traps.c
> +++ b/xen/arch/x86/traps.c
> @@ -975,7 +975,7 @@ void pv_cpuid(struct cpu_user_regs *regs)
>  
>      switch ( leaf )
>      {
> -        uint32_t tmp, _ecx;
> +        uint32_t tmp, _ecx, _ebx;
>  
>      case 0x00000001:
>          c &= pv_featureset[FEATURESET_1c];
> @@ -1157,6 +1157,26 @@ void pv_cpuid(struct cpu_user_regs *regs)
>                                 xstate_sizes[_XSTATE_YMM]);
>              }
>  
> +            if ( !is_control_domain(currd) && !is_hardware_domain(currd) )
> +                domain_cpuid(currd, 7, 0, &tmp, &_ebx, &tmp, &tmp);
> +            else
> +                cpuid_count(7, 0, &tmp, &_ebx, &tmp, &tmp);
> +            _ebx &= pv_featureset[FEATURESET_7b0];
> +
> +            if ( _ebx & cpufeat_mask(X86_FEATURE_AVX512F) )
> +            {
> +                xfeature_mask |= XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM;
> +                xstate_size = max(xstate_size,
> +                                  xstate_offsets[_XSTATE_OPMASK] +
> +                                  xstate_sizes[_XSTATE_OPMASK]);
> +                xstate_size = max(xstate_size,
> +                                  xstate_offsets[_XSTATE_ZMM] +
> +                                  xstate_sizes[_XSTATE_ZMM]);
> +                xstate_size = max(xstate_size,
> +                                  xstate_offsets[_XSTATE_HI_ZMM] +
> +                                  xstate_sizes[_XSTATE_HI_ZMM]);
> +            }
> +
>              a = (uint32_t)xfeature_mask;
>              d = (uint32_t)(xfeature_mask >> 32);
>              c = xstate_size;
> diff --git a/xen/include/public/arch-x86/cpufeatureset.h 
> b/xen/include/public/arch-x86/cpufeatureset.h
> index 39acf8c..9320c9e 100644
> --- a/xen/include/public/arch-x86/cpufeatureset.h
> +++ b/xen/include/public/arch-x86/cpufeatureset.h
> @@ -206,15 +206,24 @@ XEN_CPUFEATURE(PQM,           5*32+12) /*   Platform 
> QoS Monitoring */
>  XEN_CPUFEATURE(NO_FPU_SEL,    5*32+13) /*!  FPU CS/DS stored as zero */
>  XEN_CPUFEATURE(MPX,           5*32+14) /*S  Memory Protection Extensions */
>  XEN_CPUFEATURE(PQE,           5*32+15) /*   Platform QoS Enforcement */
> +XEN_CPUFEATURE(AVX512F,       5*32+16) /*A  AVX-512 Foundation Instructions 
> */
> +XEN_CPUFEATURE(AVX512DQ,      5*32+17) /*A  AVX-512 Doubleword & Quadword 
> Instrs */
>  XEN_CPUFEATURE(RDSEED,        5*32+18) /*A  RDSEED instruction */
>  XEN_CPUFEATURE(ADX,           5*32+19) /*A  ADCX, ADOX instructions */
>  XEN_CPUFEATURE(SMAP,          5*32+20) /*S  Supervisor Mode Access 
> Prevention */
> +XEN_CPUFEATURE(AVX512IFMA,    5*32+21) /*A  AVX-512 Integer Fused Multiply 
> Add */
>  XEN_CPUFEATURE(CLFLUSHOPT,    5*32+23) /*A  CLFLUSHOPT instruction */
>  XEN_CPUFEATURE(CLWB,          5*32+24) /*A  CLWB instruction */
> +XEN_CPUFEATURE(AVX512PF,      5*32+26) /*A  AVX-512 Prefetch Instructions */
> +XEN_CPUFEATURE(AVX512ER,      5*32+27) /*A  AVX-512 Exponent & Reciprocal 
> Instrs */
> +XEN_CPUFEATURE(AVX512CD,      5*32+28) /*A  AVX-512 Conflict Detection 
> Instrs */
>  XEN_CPUFEATURE(SHA,           5*32+29) /*A  SHA1 & SHA256 instructions */
> +XEN_CPUFEATURE(AVX512BW,      5*32+30) /*A  AVX-512 Byte and Word 
> Instructions */
> +XEN_CPUFEATURE(AVX512VL,      5*32+31) /*A  AVX-512 Vector Length Extensions 
> */
>  
>  /* Intel-defined CPU features, CPUID level 0x00000007:0.ecx, word 6 */
>  XEN_CPUFEATURE(PREFETCHWT1,   6*32+ 0) /*A  PREFETCHWT1 instruction */
> +XEN_CPUFEATURE(AVX512VBMI,    6*32+ 1) /*A  AVX-512 Vector Byte Manipulation 
> Instrs */
>  XEN_CPUFEATURE(PKU,           6*32+ 3) /*H  Protection Keys for Userspace */
>  XEN_CPUFEATURE(OSPKE,         6*32+ 4) /*!  OS Protection Keys Enable */
>  
> diff --git a/xen/tools/gen-cpuid.py b/xen/tools/gen-cpuid.py
> index 7c45eca..897e660 100755
> --- a/xen/tools/gen-cpuid.py
> +++ b/xen/tools/gen-cpuid.py
> @@ -235,6 +235,10 @@ def crunch_numbers(state):
>          # subsequent instruction groups may only be VEX encoded.
>          AVX: [FMA, FMA4, F16C, AVX2, XOP],
>  
> +        # AVX-512 is an extention of AVX2 and it depends on AVX2 available.
> +        AVX2: [AVX512F, AVX512DQ, AVX512IFMA, AVX512PF, AVX512ER, AVX512CD,
> +                AVX512BW, AVX512VL, AVX512VBMI],

I think this needs adjusting.  AVX512F is the base feature and
indication of extra xstate, while all other AVX512 features (e.g.
AVX512DQ) are explicitly documented not needing to check for AVX512F if
the AVX512DQ bit is present.

I think it wants to look something like:

# AVX2 is an extension to AVX, providing mainly new integer instructions.
# In principle, AVX512 only depends on YMM register state, but many AVX2
# instructions are extended by AVX512F to 512-bit forms.
AVX2: [AVX512F],

# AVX512F is taken to mean hardware support for EVEX encoded instructions,
# 512bit registers, and the instructions themselves.  All further AVX512
features
# are built on top of AVX512F.
AVX512F: [AVX512DQ, AVX512IFMA, AVX512PF, AVX512ER, AVX512CD,
                 AVX512BW, AVX512VL, AVX512VBMI],

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.