[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 26/27] x86/cpuid: Effectively remove pv_cpuid() and hvm_cpuid()



All callers of pv_cpuid() and hvm_cpuid() (other than guest_cpuid() legacy
path) have been removed from the codebase.  Move them into cpuid.c to avoid
any further use, leaving guest_cpuid() as the sole API to use.

Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
---
CC: Jan Beulich <JBeulich@xxxxxxxx>
---
 xen/arch/x86/cpuid.c            | 524 ++++++++++++++++++++++++++++++++++++++++
 xen/arch/x86/hvm/hvm.c          | 250 -------------------
 xen/arch/x86/traps.c            | 273 ---------------------
 xen/include/asm-x86/hvm/hvm.h   |   2 -
 xen/include/asm-x86/processor.h |   2 -
 5 files changed, 524 insertions(+), 527 deletions(-)

diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c
index 9181fc7..5e7e8cc 100644
--- a/xen/arch/x86/cpuid.c
+++ b/xen/arch/x86/cpuid.c
@@ -5,6 +5,7 @@
 #include <asm/hvm/hvm.h>
 #include <asm/hvm/vmx/vmcs.h>
 #include <asm/processor.h>
+#include <asm/xstate.h>
 
 const uint32_t known_features[] = INIT_KNOWN_FEATURES;
 const uint32_t special_features[] = INIT_SPECIAL_FEATURES;
@@ -336,6 +337,529 @@ int init_domain_cpuid_policy(struct domain *d)
     return 0;
 }
 
+static void pv_cpuid(struct cpu_user_regs *regs)
+{
+    uint32_t leaf, subleaf, a, b, c, d;
+    struct vcpu *curr = current;
+    struct domain *currd = curr->domain;
+    const struct cpuid_policy *p = currd->arch.cpuid;
+
+    leaf = a = regs->_eax;
+    b = regs->_ebx;
+    subleaf = c = regs->_ecx;
+    d = regs->_edx;
+
+    if ( !is_control_domain(currd) && !is_hardware_domain(currd) )
+        domain_cpuid(currd, leaf, subleaf, &a, &b, &c, &d);
+    else
+        cpuid_count(leaf, subleaf, &a, &b, &c, &d);
+
+    switch ( leaf )
+    {
+        uint32_t tmp;
+
+    case 0x00000001:
+        c = p->basic._1c;
+        d = p->basic._1d;
+
+        if ( !is_pvh_domain(currd) )
+        {
+            /*
+             * Delete the PVH condition when HVMLite formally replaces PVH,
+             * and HVM guests no longer enter a PV codepath.
+             */
+
+            /*
+             * !!! OSXSAVE handling for PV guests is non-architectural !!!
+             *
+             * Architecturally, the correct code here is simply:
+             *
+             *   if ( curr->arch.pv_vcpu.ctrlreg[4] & X86_CR4_OSXSAVE )
+             *       c |= cpufeat_mask(X86_FEATURE_OSXSAVE);
+             *
+             * However because of bugs in Xen (before c/s bd19080b, Nov 2010,
+             * the XSAVE cpuid flag leaked into guests despite the feature not
+             * being available for use), buggy workarounds where introduced to
+             * Linux (c/s 947ccf9c, also Nov 2010) which relied on the fact
+             * that Xen also incorrectly leaked OSXSAVE into the guest.
+             *
+             * Furthermore, providing architectural OSXSAVE behaviour to a
+             * many Linux PV guests triggered a further kernel bug when the
+             * fpu code observes that XSAVEOPT is available, assumes that
+             * xsave state had been set up for the task, and follows a wild
+             * pointer.
+             *
+             * Older Linux PVOPS kernels however do require architectural
+             * behaviour.  They observe Xen's leaked OSXSAVE and assume they
+             * can already use XSETBV, dying with a #UD because the shadowed
+             * CR4.OSXSAVE is clear.  This behaviour has been adjusted in all
+             * observed cases via stable backports of the above changeset.
+             *
+             * Therefore, the leaking of Xen's OSXSAVE setting has become a
+             * defacto part of the PV ABI and can't reasonably be corrected.
+             * It can however be restricted to only the enlightened CPUID
+             * view, as seen by the guest kernel.
+             *
+             * The following situations and logic now applies:
+             *
+             * - Hardware without CPUID faulting support and native CPUID:
+             *    There is nothing Xen can do here.  The hosts XSAVE flag will
+             *    leak through and Xen's OSXSAVE choice will leak through.
+             *
+             *    In the case that the guest kernel has not set up OSXSAVE, 
only
+             *    SSE will be set in xcr0, and guest userspace can't do too 
much
+             *    damage itself.
+             *
+             * - Enlightened CPUID or CPUID faulting available:
+             *    Xen can fully control what is seen here.  Guest kernels need
+             *    to see the leaked OSXSAVE via the enlightened path, but
+             *    guest userspace and the native is given architectural
+             *    behaviour.
+             *
+             *    Emulated vs Faulted CPUID is distinguised based on whether a
+             *    #UD or #GP is currently being serviced.
+             */
+            /* OSXSAVE clear in policy.  Fast-forward CR4 back in. */
+            if ( (curr->arch.pv_vcpu.ctrlreg[4] & X86_CR4_OSXSAVE) ||
+                 (regs->entry_vector == TRAP_invalid_op &&
+                  guest_kernel_mode(curr, regs) &&
+                  (read_cr4() & X86_CR4_OSXSAVE)) )
+                c |= cpufeat_mask(X86_FEATURE_OSXSAVE);
+
+            /*
+             * At the time of writing, a PV domain is the only viable option
+             * for Dom0.  Several interactions between dom0 and Xen for real
+             * hardware setup have unfortunately been implemented based on
+             * state which incorrectly leaked into dom0.
+             *
+             * These leaks are retained for backwards compatibility, but
+             * restricted to the hardware domains kernel only.
+             */
+            if ( is_hardware_domain(currd) && guest_kernel_mode(curr, regs) )
+            {
+                /*
+                 * MTRR used to unconditionally leak into PV guests.  They
+                 * cannot MTRR infrastructure at all, and shouldn't be able to
+                 * see the feature.
+                 *
+                 * Modern PVOPS Linux self-clobbers the MTRR feature, to avoid
+                 * trying to use the associated MSRs.  Xenolinux-based PV 
dom0's
+                 * however use the MTRR feature as an indication of the 
presence
+                 * of the XENPF_{add,del,read}_memtype hypercalls.
+                 */
+                if ( cpu_has_mtrr )
+                    d |= cpufeat_mask(X86_FEATURE_MTRR);
+
+                /*
+                 * MONITOR never leaked into PV guests, as PV guests cannot
+                 * use the MONITOR/MWAIT instructions.  As such, they require
+                 * the feature to not being present in emulated CPUID.
+                 *
+                 * Modern PVOPS Linux try to be cunning and use native CPUID
+                 * to see if the hardware actually supports MONITOR, and by
+                 * extension, deep C states.
+                 *
+                 * If the feature is seen, deep-C state information is
+                 * obtained from the DSDT and handed back to Xen via the
+                 * XENPF_set_processor_pminfo hypercall.
+                 *
+                 * This mechanism is incompatible with an HVM-based hardware
+                 * domain, and also with CPUID Faulting.
+                 *
+                 * Luckily, Xen can be just as 'cunning', and distinguish an
+                 * emulated CPUID from a faulted CPUID by whether a #UD or #GP
+                 * fault is currently being serviced.  Yuck...
+                 */
+                if ( cpu_has_monitor && regs->entry_vector == TRAP_gp_fault )
+                    c |= cpufeat_mask(X86_FEATURE_MONITOR);
+
+                /*
+                 * While MONITOR never leaked into PV guests, EIST always used
+                 * to.
+                 *
+                 * Modern PVOPS will only parse P state information from the
+                 * DSDT and return it to Xen if EIST is seen in the emulated
+                 * CPUID information.
+                 */
+                if ( cpu_has_eist )
+                    c |= cpufeat_mask(X86_FEATURE_EIST);
+            }
+        }
+
+        if ( vpmu_enabled(curr) &&
+             vpmu_is_set(vcpu_vpmu(curr), VPMU_CPU_HAS_DS) )
+        {
+            d |= cpufeat_mask(X86_FEATURE_DS);
+            if ( cpu_has(&current_cpu_data, X86_FEATURE_DTES64) )
+                c |= cpufeat_mask(X86_FEATURE_DTES64);
+            if ( cpu_has(&current_cpu_data, X86_FEATURE_DSCPL) )
+                c |= cpufeat_mask(X86_FEATURE_DSCPL);
+        }
+        break;
+
+    case 0x0000000a: /* Architectural Performance Monitor Features (Intel) */
+        if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
+             !vpmu_enabled(curr) )
+            goto unsupported;
+
+        /* Report at most version 3 since that's all we currently emulate. */
+        if ( (a & 0xff) > 3 )
+            a = (a & ~0xff) | 3;
+        break;
+
+    case XSTATE_CPUID:
+        if ( !p->basic.xsave || subleaf >= 63 )
+            goto unsupported;
+        switch ( subleaf )
+        {
+        case 0:
+        {
+            uint64_t xfeature_mask = XSTATE_FP_SSE;
+            uint32_t xstate_size = XSTATE_AREA_MIN_SIZE;
+
+            if ( p->basic.avx )
+            {
+                xfeature_mask |= XSTATE_YMM;
+                xstate_size = (xstate_offsets[_XSTATE_YMM] +
+                               xstate_sizes[_XSTATE_YMM]);
+            }
+
+            if ( p->feat.avx512f )
+            {
+                xfeature_mask |= XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM;
+                xstate_size = max(xstate_size,
+                                  xstate_offsets[_XSTATE_OPMASK] +
+                                  xstate_sizes[_XSTATE_OPMASK]);
+                xstate_size = max(xstate_size,
+                                  xstate_offsets[_XSTATE_ZMM] +
+                                  xstate_sizes[_XSTATE_ZMM]);
+                xstate_size = max(xstate_size,
+                                  xstate_offsets[_XSTATE_HI_ZMM] +
+                                  xstate_sizes[_XSTATE_HI_ZMM]);
+            }
+
+            a = (uint32_t)xfeature_mask;
+            d = (uint32_t)(xfeature_mask >> 32);
+            c = xstate_size;
+
+            /*
+             * Always read CPUID.0xD[ECX=0].EBX from hardware, rather than
+             * domain policy.  It varies with enabled xstate, and the correct
+             * xcr0 is in context.
+             */
+            cpuid_count(leaf, subleaf, &tmp, &b, &tmp, &tmp);
+            break;
+        }
+
+        case 1:
+            a = p->xstate.Da1;
+            b = c = d = 0;
+            break;
+        }
+        break;
+
+    case 0x80000001:
+        c = p->extd.e1c;
+        d = p->extd.e1d;
+
+        /* If not emulating AMD, clear the duplicated features in e1d. */
+        if ( currd->arch.x86_vendor != X86_VENDOR_AMD )
+            d &= ~CPUID_COMMON_1D_FEATURES;
+
+        /*
+         * MTRR used to unconditionally leak into PV guests.  They cannot MTRR
+         * infrastructure at all, and shouldn't be able to see the feature.
+         *
+         * Modern PVOPS Linux self-clobbers the MTRR feature, to avoid trying
+         * to use the associated MSRs.  Xenolinux-based PV dom0's however use
+         * the MTRR feature as an indication of the presence of the
+         * XENPF_{add,del,read}_memtype hypercalls.
+         */
+        if ( is_hardware_domain(currd) && guest_kernel_mode(curr, regs) &&
+             cpu_has_mtrr )
+            d |= cpufeat_mask(X86_FEATURE_MTRR);
+        break;
+
+    case 0x80000007:
+        d = p->extd.e7d;
+        break;
+
+    case 0x80000008:
+        a = paddr_bits | (vaddr_bits << 8);
+        b = p->extd.e8b;
+        break;
+
+    case 0x00000005: /* MONITOR/MWAIT */
+    case 0x0000000b: /* Extended Topology Enumeration */
+    case 0x8000000a: /* SVM revision and features */
+    case 0x8000001b: /* Instruction Based Sampling */
+    case 0x8000001c: /* Light Weight Profiling */
+    case 0x8000001e: /* Extended topology reporting */
+    unsupported:
+        a = b = c = d = 0;
+        break;
+
+    case 0x7:
+        ASSERT_UNREACHABLE();
+        /* Now handled in guest_cpuid(). */
+    }
+
+    regs->rax = a;
+    regs->rbx = b;
+    regs->rcx = c;
+    regs->rdx = d;
+}
+
+static void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
+                      unsigned int *ecx, unsigned int *edx)
+{
+    struct vcpu *v = current;
+    struct domain *d = v->domain;
+    const struct cpuid_policy *p = d->arch.cpuid;
+    unsigned int count, dummy = 0;
+
+    if ( !eax )
+        eax = &dummy;
+    if ( !ebx )
+        ebx = &dummy;
+    if ( !ecx )
+        ecx = &dummy;
+    count = *ecx;
+    if ( !edx )
+        edx = &dummy;
+
+    domain_cpuid(d, input, count, eax, ebx, ecx, edx);
+
+    switch ( input )
+    {
+    case 0x1:
+        /* Fix up VLAPIC details. */
+        *ebx &= 0x00FFFFFFu;
+        *ebx |= (v->vcpu_id * 2) << 24;
+
+        *ecx = p->basic._1c;
+        *edx = p->basic._1d;
+
+        /* APIC exposed to guests, but Fast-forward MSR_APIC_BASE.EN back in. 
*/
+        if ( vlapic_hw_disabled(vcpu_vlapic(v)) )
+            *edx &= ~cpufeat_bit(X86_FEATURE_APIC);
+
+        /* OSXSAVE clear in policy.  Fast-forward CR4 back in. */
+        if ( v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_OSXSAVE )
+            *ecx |= cpufeat_mask(X86_FEATURE_OSXSAVE);
+
+        /*
+         * PSE36 is not supported in shadow mode.  This bit should be
+         * unilaterally cleared.
+         *
+         * However, an unspecified version of Hyper-V from 2011 refuses
+         * to start as the "cpu does not provide required hw features" if
+         * it can't see PSE36.
+         *
+         * As a workaround, leak the toolstack-provided PSE36 value into a
+         * shadow guest if the guest is already using PAE paging (and won't
+         * care about reverting back to PSE paging).  Otherwise, knoble it, so
+         * a 32bit guest doesn't get the impression that it could try to use
+         * PSE36 paging.
+         */
+        if ( !hap_enabled(d) && !(hvm_pae_enabled(v) || 
hvm_long_mode_enabled(v)) )
+            *edx &= ~cpufeat_mask(X86_FEATURE_PSE36);
+
+        if ( vpmu_enabled(v) &&
+             vpmu_is_set(vcpu_vpmu(v), VPMU_CPU_HAS_DS) )
+        {
+            *edx |= cpufeat_mask(X86_FEATURE_DS);
+            if ( cpu_has(&current_cpu_data, X86_FEATURE_DTES64) )
+                *ecx |= cpufeat_mask(X86_FEATURE_DTES64);
+            if ( cpu_has(&current_cpu_data, X86_FEATURE_DSCPL) )
+                *ecx |= cpufeat_mask(X86_FEATURE_DSCPL);
+        }
+
+        break;
+
+    case 0xb:
+        /* Fix the x2APIC identifier. */
+        *edx = v->vcpu_id * 2;
+        break;
+
+    case XSTATE_CPUID:
+        if ( !p->basic.xsave || count >= 63 )
+        {
+            *eax = *ebx = *ecx = *edx = 0;
+            break;
+        }
+        switch ( count )
+        {
+        case 0:
+        {
+            uint64_t xfeature_mask = XSTATE_FP_SSE;
+            uint32_t xstate_size = XSTATE_AREA_MIN_SIZE;
+
+            if ( p->basic.avx )
+            {
+                xfeature_mask |= XSTATE_YMM;
+                xstate_size = max(xstate_size,
+                                  xstate_offsets[_XSTATE_YMM] +
+                                  xstate_sizes[_XSTATE_YMM]);
+            }
+
+            if ( p->feat.mpx )
+            {
+                xfeature_mask |= XSTATE_BNDREGS | XSTATE_BNDCSR;
+                xstate_size = max(xstate_size,
+                                  xstate_offsets[_XSTATE_BNDCSR] +
+                                  xstate_sizes[_XSTATE_BNDCSR]);
+            }
+
+            if ( p->feat.avx512f )
+            {
+                xfeature_mask |= XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM;
+                xstate_size = max(xstate_size,
+                                  xstate_offsets[_XSTATE_OPMASK] +
+                                  xstate_sizes[_XSTATE_OPMASK]);
+                xstate_size = max(xstate_size,
+                                  xstate_offsets[_XSTATE_ZMM] +
+                                  xstate_sizes[_XSTATE_ZMM]);
+                xstate_size = max(xstate_size,
+                                  xstate_offsets[_XSTATE_HI_ZMM] +
+                                  xstate_sizes[_XSTATE_HI_ZMM]);
+            }
+
+            if ( p->feat.pku )
+            {
+                xfeature_mask |= XSTATE_PKRU;
+                xstate_size = max(xstate_size,
+                                  xstate_offsets[_XSTATE_PKRU] +
+                                  xstate_sizes[_XSTATE_PKRU]);
+            }
+
+            if ( p->extd.lwp )
+            {
+                xfeature_mask |= XSTATE_LWP;
+                xstate_size = max(xstate_size,
+                                  xstate_offsets[_XSTATE_LWP] +
+                                  xstate_sizes[_XSTATE_LWP]);
+            }
+
+            *eax = (uint32_t)xfeature_mask;
+            *edx = (uint32_t)(xfeature_mask >> 32);
+            *ecx = xstate_size;
+
+            /*
+             * Always read CPUID[0xD,0].EBX from hardware, rather than domain
+             * policy.  It varies with enabled xstate, and the correct xcr0 is
+             * in context.
+             */
+            cpuid_count(input, count, &dummy, ebx, &dummy, &dummy);
+            break;
+        }
+
+        case 1:
+            *eax = p->xstate.Da1;
+
+            if ( p->xstate.xsaves )
+            {
+                /*
+                 * Always read CPUID[0xD,1].EBX from hardware, rather than
+                 * domain policy.  It varies with enabled xstate, and the
+                 * correct xcr0/xss are in context.
+                 */
+                cpuid_count(input, count, &dummy, ebx, &dummy, &dummy);
+            }
+            else
+                *ebx = 0;
+
+            *ecx = *edx = 0;
+            break;
+        }
+        break;
+
+    case 0x0000000a: /* Architectural Performance Monitor Features (Intel) */
+        if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || !vpmu_enabled(v) )
+        {
+            *eax = *ebx = *ecx = *edx = 0;
+            break;
+        }
+
+        /* Report at most version 3 since that's all we currently emulate */
+        if ( (*eax & 0xff) > 3 )
+            *eax = (*eax & ~0xff) | 3;
+        break;
+
+    case 0x80000001:
+        *ecx = p->extd.e1c;
+        *edx = p->extd.e1d;
+
+        /* If not emulating AMD, clear the duplicated features in e1d. */
+        if ( d->arch.x86_vendor != X86_VENDOR_AMD )
+            *edx &= ~CPUID_COMMON_1D_FEATURES;
+        /* fast-forward MSR_APIC_BASE.EN if it hasn't already been clobbered. 
*/
+        else if ( vlapic_hw_disabled(vcpu_vlapic(v)) )
+            *edx &= ~cpufeat_bit(X86_FEATURE_APIC);
+
+        /*
+         * PSE36 is not supported in shadow mode.  This bit should be
+         * unilaterally cleared.
+         *
+         * However, an unspecified version of Hyper-V from 2011 refuses
+         * to start as the "cpu does not provide required hw features" if
+         * it can't see PSE36.
+         *
+         * As a workaround, leak the toolstack-provided PSE36 value into a
+         * shadow guest if the guest is already using PAE paging (and won't
+         * care about reverting back to PSE paging).  Otherwise, knoble it, so
+         * a 32bit guest doesn't get the impression that it could try to use
+         * PSE36 paging.
+         */
+        if ( !hap_enabled(d) && !(hvm_pae_enabled(v) || 
hvm_long_mode_enabled(v)) )
+            *edx &= ~cpufeat_mask(X86_FEATURE_PSE36);
+
+        /* SYSCALL is hidden outside of long mode on Intel. */
+        if ( d->arch.x86_vendor == X86_VENDOR_INTEL &&
+             !hvm_long_mode_enabled(v))
+            *edx &= ~cpufeat_mask(X86_FEATURE_SYSCALL);
+
+        break;
+
+    case 0x80000007:
+        *edx = p->extd.e7d;
+        break;
+
+    case 0x80000008:
+        *eax &= 0xff;
+        count = d->arch.paging.gfn_bits + PAGE_SHIFT;
+        if ( *eax > count )
+            *eax = count;
+
+        count = (p->basic.pae || p->basic.pse36) ? 36 : 32;
+        if ( *eax < count )
+            *eax = count;
+
+        *eax |= (p->extd.lm ? vaddr_bits : 32) << 8;
+
+        *ebx = p->extd.e8b;
+        break;
+
+    case 0x8000001c:
+        if ( !cpu_has_svm )
+        {
+            *eax = *ebx = *ecx = *edx = 0;
+            break;
+        }
+
+        if ( cpu_has_lwp && (v->arch.xcr0 & XSTATE_LWP) )
+            /* Turn on available bit and other features specified in lwp_cfg. 
*/
+            *eax = (*edx & v->arch.hvm_svm.guest_lwp_cfg) | 1;
+        else
+            *eax = 0;
+        break;
+
+    case 0x7:
+        ASSERT_UNREACHABLE();
+        /* Now handled in guest_cpuid(). */
+    }
+}
+
 void guest_cpuid(const struct vcpu *v, unsigned int leaf,
                  unsigned int subleaf, struct cpuid_leaf *res)
 {
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 4ded533..cf38907 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -3287,256 +3287,6 @@ unsigned long copy_from_user_hvm(void *to, const void 
*from, unsigned len)
     return rc ? len : 0; /* fake a copy_from_user() return code */
 }
 
-void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
-                                   unsigned int *ecx, unsigned int *edx)
-{
-    struct vcpu *v = current;
-    struct domain *d = v->domain;
-    const struct cpuid_policy *p = d->arch.cpuid;
-    unsigned int count, dummy = 0;
-
-    if ( !eax )
-        eax = &dummy;
-    if ( !ebx )
-        ebx = &dummy;
-    if ( !ecx )
-        ecx = &dummy;
-    count = *ecx;
-    if ( !edx )
-        edx = &dummy;
-
-    domain_cpuid(d, input, count, eax, ebx, ecx, edx);
-
-    switch ( input )
-    {
-    case 0x1:
-        /* Fix up VLAPIC details. */
-        *ebx &= 0x00FFFFFFu;
-        *ebx |= (v->vcpu_id * 2) << 24;
-
-        *ecx = p->basic._1c;
-        *edx = p->basic._1d;
-
-        /* APIC exposed to guests, but Fast-forward MSR_APIC_BASE.EN back in. 
*/
-        if ( vlapic_hw_disabled(vcpu_vlapic(v)) )
-            *edx &= ~cpufeat_bit(X86_FEATURE_APIC);
-
-        /* OSXSAVE clear in policy.  Fast-forward CR4 back in. */
-        if ( v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_OSXSAVE )
-            *ecx |= cpufeat_mask(X86_FEATURE_OSXSAVE);
-
-        /*
-         * PSE36 is not supported in shadow mode.  This bit should be
-         * unilaterally cleared.
-         *
-         * However, an unspecified version of Hyper-V from 2011 refuses
-         * to start as the "cpu does not provide required hw features" if
-         * it can't see PSE36.
-         *
-         * As a workaround, leak the toolstack-provided PSE36 value into a
-         * shadow guest if the guest is already using PAE paging (and won't
-         * care about reverting back to PSE paging).  Otherwise, knoble it, so
-         * a 32bit guest doesn't get the impression that it could try to use
-         * PSE36 paging.
-         */
-        if ( !hap_enabled(d) && !(hvm_pae_enabled(v) || 
hvm_long_mode_enabled(v)) )
-            *edx &= ~cpufeat_mask(X86_FEATURE_PSE36);
-
-        if ( vpmu_enabled(v) &&
-             vpmu_is_set(vcpu_vpmu(v), VPMU_CPU_HAS_DS) )
-        {
-            *edx |= cpufeat_mask(X86_FEATURE_DS);
-            if ( cpu_has(&current_cpu_data, X86_FEATURE_DTES64) )
-                *ecx |= cpufeat_mask(X86_FEATURE_DTES64);
-            if ( cpu_has(&current_cpu_data, X86_FEATURE_DSCPL) )
-                *ecx |= cpufeat_mask(X86_FEATURE_DSCPL);
-        }
-
-        break;
-
-    case 0xb:
-        /* Fix the x2APIC identifier. */
-        *edx = v->vcpu_id * 2;
-        break;
-
-    case XSTATE_CPUID:
-        if ( !p->basic.xsave || count >= 63 )
-        {
-            *eax = *ebx = *ecx = *edx = 0;
-            break;
-        }
-        switch ( count )
-        {
-        case 0:
-        {
-            uint64_t xfeature_mask = XSTATE_FP_SSE;
-            uint32_t xstate_size = XSTATE_AREA_MIN_SIZE;
-
-            if ( p->basic.avx )
-            {
-                xfeature_mask |= XSTATE_YMM;
-                xstate_size = max(xstate_size,
-                                  xstate_offsets[_XSTATE_YMM] +
-                                  xstate_sizes[_XSTATE_YMM]);
-            }
-
-            if ( p->feat.mpx )
-            {
-                xfeature_mask |= XSTATE_BNDREGS | XSTATE_BNDCSR;
-                xstate_size = max(xstate_size,
-                                  xstate_offsets[_XSTATE_BNDCSR] +
-                                  xstate_sizes[_XSTATE_BNDCSR]);
-            }
-
-            if ( p->feat.avx512f )
-            {
-                xfeature_mask |= XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM;
-                xstate_size = max(xstate_size,
-                                  xstate_offsets[_XSTATE_OPMASK] +
-                                  xstate_sizes[_XSTATE_OPMASK]);
-                xstate_size = max(xstate_size,
-                                  xstate_offsets[_XSTATE_ZMM] +
-                                  xstate_sizes[_XSTATE_ZMM]);
-                xstate_size = max(xstate_size,
-                                  xstate_offsets[_XSTATE_HI_ZMM] +
-                                  xstate_sizes[_XSTATE_HI_ZMM]);
-            }
-
-            if ( p->feat.pku )
-            {
-                xfeature_mask |= XSTATE_PKRU;
-                xstate_size = max(xstate_size,
-                                  xstate_offsets[_XSTATE_PKRU] +
-                                  xstate_sizes[_XSTATE_PKRU]);
-            }
-
-            if ( p->extd.lwp )
-            {
-                xfeature_mask |= XSTATE_LWP;
-                xstate_size = max(xstate_size,
-                                  xstate_offsets[_XSTATE_LWP] +
-                                  xstate_sizes[_XSTATE_LWP]);
-            }
-
-            *eax = (uint32_t)xfeature_mask;
-            *edx = (uint32_t)(xfeature_mask >> 32);
-            *ecx = xstate_size;
-
-            /*
-             * Always read CPUID[0xD,0].EBX from hardware, rather than domain
-             * policy.  It varies with enabled xstate, and the correct xcr0 is
-             * in context.
-             */
-            cpuid_count(input, count, &dummy, ebx, &dummy, &dummy);
-            break;
-        }
-
-        case 1:
-            *eax = p->xstate.Da1;
-
-            if ( p->xstate.xsaves )
-            {
-                /*
-                 * Always read CPUID[0xD,1].EBX from hardware, rather than
-                 * domain policy.  It varies with enabled xstate, and the
-                 * correct xcr0/xss are in context.
-                 */
-                cpuid_count(input, count, &dummy, ebx, &dummy, &dummy);
-            }
-            else
-                *ebx = 0;
-
-            *ecx = *edx = 0;
-            break;
-        }
-        break;
-
-    case 0x0000000a: /* Architectural Performance Monitor Features (Intel) */
-        if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || !vpmu_enabled(v) )
-        {
-            *eax = *ebx = *ecx = *edx = 0;
-            break;
-        }
-
-        /* Report at most version 3 since that's all we currently emulate */
-        if ( (*eax & 0xff) > 3 )
-            *eax = (*eax & ~0xff) | 3;
-        break;
-
-    case 0x80000001:
-        *ecx = p->extd.e1c;
-        *edx = p->extd.e1d;
-
-        /* If not emulating AMD, clear the duplicated features in e1d. */
-        if ( d->arch.x86_vendor != X86_VENDOR_AMD )
-            *edx &= ~CPUID_COMMON_1D_FEATURES;
-        /* fast-forward MSR_APIC_BASE.EN if it hasn't already been clobbered. 
*/
-        else if ( vlapic_hw_disabled(vcpu_vlapic(v)) )
-            *edx &= ~cpufeat_bit(X86_FEATURE_APIC);
-
-        /*
-         * PSE36 is not supported in shadow mode.  This bit should be
-         * unilaterally cleared.
-         *
-         * However, an unspecified version of Hyper-V from 2011 refuses
-         * to start as the "cpu does not provide required hw features" if
-         * it can't see PSE36.
-         *
-         * As a workaround, leak the toolstack-provided PSE36 value into a
-         * shadow guest if the guest is already using PAE paging (and won't
-         * care about reverting back to PSE paging).  Otherwise, knoble it, so
-         * a 32bit guest doesn't get the impression that it could try to use
-         * PSE36 paging.
-         */
-        if ( !hap_enabled(d) && !(hvm_pae_enabled(v) || 
hvm_long_mode_enabled(v)) )
-            *edx &= ~cpufeat_mask(X86_FEATURE_PSE36);
-
-        /* SYSCALL is hidden outside of long mode on Intel. */
-        if ( d->arch.x86_vendor == X86_VENDOR_INTEL &&
-             !hvm_long_mode_enabled(v))
-            *edx &= ~cpufeat_mask(X86_FEATURE_SYSCALL);
-
-        break;
-
-    case 0x80000007:
-        *edx = p->extd.e7d;
-        break;
-
-    case 0x80000008:
-        *eax &= 0xff;
-        count = d->arch.paging.gfn_bits + PAGE_SHIFT;
-        if ( *eax > count )
-            *eax = count;
-
-        count = (p->basic.pae || p->basic.pse36) ? 36 : 32;
-        if ( *eax < count )
-            *eax = count;
-
-        *eax |= (p->extd.lm ? vaddr_bits : 32) << 8;
-
-        *ebx = p->extd.e8b;
-        break;
-
-    case 0x8000001c:
-        if ( !cpu_has_svm )
-        {
-            *eax = *ebx = *ecx = *edx = 0;
-            break;
-        }
-
-        if ( cpu_has_lwp && (v->arch.xcr0 & XSTATE_LWP) )
-            /* Turn on available bit and other features specified in lwp_cfg. 
*/
-            *eax = (*edx & v->arch.hvm_svm.guest_lwp_cfg) | 1;
-        else
-            *eax = 0;
-        break;
-
-    case 0x7:
-        ASSERT_UNREACHABLE();
-        /* Now handled in guest_cpuid(). */
-    }
-}
-
 bool hvm_check_cpuid_faulting(struct vcpu *v)
 {
     if ( !v->arch.cpuid_faulting )
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index e2669b1..501d5b3 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -1020,279 +1020,6 @@ void cpuid_hypervisor_leaves(const struct vcpu *v, 
unsigned int leaf,
     }
 }
 
-void pv_cpuid(struct cpu_user_regs *regs)
-{
-    uint32_t leaf, subleaf, a, b, c, d;
-    struct vcpu *curr = current;
-    struct domain *currd = curr->domain;
-    const struct cpuid_policy *p = currd->arch.cpuid;
-
-    leaf = a = regs->_eax;
-    b = regs->_ebx;
-    subleaf = c = regs->_ecx;
-    d = regs->_edx;
-
-    if ( !is_control_domain(currd) && !is_hardware_domain(currd) )
-        domain_cpuid(currd, leaf, subleaf, &a, &b, &c, &d);
-    else
-        cpuid_count(leaf, subleaf, &a, &b, &c, &d);
-
-    switch ( leaf )
-    {
-        uint32_t tmp;
-
-    case 0x00000001:
-        c = p->basic._1c;
-        d = p->basic._1d;
-
-        if ( !is_pvh_domain(currd) )
-        {
-            /*
-             * Delete the PVH condition when HVMLite formally replaces PVH,
-             * and HVM guests no longer enter a PV codepath.
-             */
-
-            /*
-             * !!! OSXSAVE handling for PV guests is non-architectural !!!
-             *
-             * Architecturally, the correct code here is simply:
-             *
-             *   if ( curr->arch.pv_vcpu.ctrlreg[4] & X86_CR4_OSXSAVE )
-             *       c |= cpufeat_mask(X86_FEATURE_OSXSAVE);
-             *
-             * However because of bugs in Xen (before c/s bd19080b, Nov 2010,
-             * the XSAVE cpuid flag leaked into guests despite the feature not
-             * being available for use), buggy workarounds where introduced to
-             * Linux (c/s 947ccf9c, also Nov 2010) which relied on the fact
-             * that Xen also incorrectly leaked OSXSAVE into the guest.
-             *
-             * Furthermore, providing architectural OSXSAVE behaviour to a
-             * many Linux PV guests triggered a further kernel bug when the
-             * fpu code observes that XSAVEOPT is available, assumes that
-             * xsave state had been set up for the task, and follows a wild
-             * pointer.
-             *
-             * Older Linux PVOPS kernels however do require architectural
-             * behaviour.  They observe Xen's leaked OSXSAVE and assume they
-             * can already use XSETBV, dying with a #UD because the shadowed
-             * CR4.OSXSAVE is clear.  This behaviour has been adjusted in all
-             * observed cases via stable backports of the above changeset.
-             *
-             * Therefore, the leaking of Xen's OSXSAVE setting has become a
-             * defacto part of the PV ABI and can't reasonably be corrected.
-             * It can however be restricted to only the enlightened CPUID
-             * view, as seen by the guest kernel.
-             *
-             * The following situations and logic now applies:
-             *
-             * - Hardware without CPUID faulting support and native CPUID:
-             *    There is nothing Xen can do here.  The hosts XSAVE flag will
-             *    leak through and Xen's OSXSAVE choice will leak through.
-             *
-             *    In the case that the guest kernel has not set up OSXSAVE, 
only
-             *    SSE will be set in xcr0, and guest userspace can't do too 
much
-             *    damage itself.
-             *
-             * - Enlightened CPUID or CPUID faulting available:
-             *    Xen can fully control what is seen here.  Guest kernels need
-             *    to see the leaked OSXSAVE via the enlightened path, but
-             *    guest userspace and the native is given architectural
-             *    behaviour.
-             *
-             *    Emulated vs Faulted CPUID is distinguised based on whether a
-             *    #UD or #GP is currently being serviced.
-             */
-            /* OSXSAVE clear in policy.  Fast-forward CR4 back in. */
-            if ( (curr->arch.pv_vcpu.ctrlreg[4] & X86_CR4_OSXSAVE) ||
-                 (regs->entry_vector == TRAP_invalid_op &&
-                  guest_kernel_mode(curr, regs) &&
-                  (read_cr4() & X86_CR4_OSXSAVE)) )
-                c |= cpufeat_mask(X86_FEATURE_OSXSAVE);
-
-            /*
-             * At the time of writing, a PV domain is the only viable option
-             * for Dom0.  Several interactions between dom0 and Xen for real
-             * hardware setup have unfortunately been implemented based on
-             * state which incorrectly leaked into dom0.
-             *
-             * These leaks are retained for backwards compatibility, but
-             * restricted to the hardware domains kernel only.
-             */
-            if ( is_hardware_domain(currd) && guest_kernel_mode(curr, regs) )
-            {
-                /*
-                 * MTRR used to unconditionally leak into PV guests.  They
-                 * cannot MTRR infrastructure at all, and shouldn't be able to
-                 * see the feature.
-                 *
-                 * Modern PVOPS Linux self-clobbers the MTRR feature, to avoid
-                 * trying to use the associated MSRs.  Xenolinux-based PV 
dom0's
-                 * however use the MTRR feature as an indication of the 
presence
-                 * of the XENPF_{add,del,read}_memtype hypercalls.
-                 */
-                if ( cpu_has_mtrr )
-                    d |= cpufeat_mask(X86_FEATURE_MTRR);
-
-                /*
-                 * MONITOR never leaked into PV guests, as PV guests cannot
-                 * use the MONITOR/MWAIT instructions.  As such, they require
-                 * the feature to not being present in emulated CPUID.
-                 *
-                 * Modern PVOPS Linux try to be cunning and use native CPUID
-                 * to see if the hardware actually supports MONITOR, and by
-                 * extension, deep C states.
-                 *
-                 * If the feature is seen, deep-C state information is
-                 * obtained from the DSDT and handed back to Xen via the
-                 * XENPF_set_processor_pminfo hypercall.
-                 *
-                 * This mechanism is incompatible with an HVM-based hardware
-                 * domain, and also with CPUID Faulting.
-                 *
-                 * Luckily, Xen can be just as 'cunning', and distinguish an
-                 * emulated CPUID from a faulted CPUID by whether a #UD or #GP
-                 * fault is currently being serviced.  Yuck...
-                 */
-                if ( cpu_has_monitor && regs->entry_vector == TRAP_gp_fault )
-                    c |= cpufeat_mask(X86_FEATURE_MONITOR);
-
-                /*
-                 * While MONITOR never leaked into PV guests, EIST always used
-                 * to.
-                 *
-                 * Modern PVOPS will only parse P state information from the
-                 * DSDT and return it to Xen if EIST is seen in the emulated
-                 * CPUID information.
-                 */
-                if ( cpu_has_eist )
-                    c |= cpufeat_mask(X86_FEATURE_EIST);
-            }
-        }
-
-        if ( vpmu_enabled(curr) &&
-             vpmu_is_set(vcpu_vpmu(curr), VPMU_CPU_HAS_DS) )
-        {
-            d |= cpufeat_mask(X86_FEATURE_DS);
-            if ( cpu_has(&current_cpu_data, X86_FEATURE_DTES64) )
-                c |= cpufeat_mask(X86_FEATURE_DTES64);
-            if ( cpu_has(&current_cpu_data, X86_FEATURE_DSCPL) )
-                c |= cpufeat_mask(X86_FEATURE_DSCPL);
-        }
-        break;
-
-    case 0x0000000a: /* Architectural Performance Monitor Features (Intel) */
-        if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
-             !vpmu_enabled(curr) )
-            goto unsupported;
-
-        /* Report at most version 3 since that's all we currently emulate. */
-        if ( (a & 0xff) > 3 )
-            a = (a & ~0xff) | 3;
-        break;
-
-    case XSTATE_CPUID:
-        if ( !p->basic.xsave || subleaf >= 63 )
-            goto unsupported;
-        switch ( subleaf )
-        {
-        case 0:
-        {
-            uint64_t xfeature_mask = XSTATE_FP_SSE;
-            uint32_t xstate_size = XSTATE_AREA_MIN_SIZE;
-
-            if ( p->basic.avx )
-            {
-                xfeature_mask |= XSTATE_YMM;
-                xstate_size = (xstate_offsets[_XSTATE_YMM] +
-                               xstate_sizes[_XSTATE_YMM]);
-            }
-
-            if ( p->feat.avx512f )
-            {
-                xfeature_mask |= XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM;
-                xstate_size = max(xstate_size,
-                                  xstate_offsets[_XSTATE_OPMASK] +
-                                  xstate_sizes[_XSTATE_OPMASK]);
-                xstate_size = max(xstate_size,
-                                  xstate_offsets[_XSTATE_ZMM] +
-                                  xstate_sizes[_XSTATE_ZMM]);
-                xstate_size = max(xstate_size,
-                                  xstate_offsets[_XSTATE_HI_ZMM] +
-                                  xstate_sizes[_XSTATE_HI_ZMM]);
-            }
-
-            a = (uint32_t)xfeature_mask;
-            d = (uint32_t)(xfeature_mask >> 32);
-            c = xstate_size;
-
-            /*
-             * Always read CPUID.0xD[ECX=0].EBX from hardware, rather than
-             * domain policy.  It varies with enabled xstate, and the correct
-             * xcr0 is in context.
-             */
-            cpuid_count(leaf, subleaf, &tmp, &b, &tmp, &tmp);
-            break;
-        }
-
-        case 1:
-            a = p->xstate.Da1;
-            b = c = d = 0;
-            break;
-        }
-        break;
-
-    case 0x80000001:
-        c = p->extd.e1c;
-        d = p->extd.e1d;
-
-        /* If not emulating AMD, clear the duplicated features in e1d. */
-        if ( currd->arch.x86_vendor != X86_VENDOR_AMD )
-            d &= ~CPUID_COMMON_1D_FEATURES;
-
-        /*
-         * MTRR used to unconditionally leak into PV guests.  They cannot MTRR
-         * infrastructure at all, and shouldn't be able to see the feature.
-         *
-         * Modern PVOPS Linux self-clobbers the MTRR feature, to avoid trying
-         * to use the associated MSRs.  Xenolinux-based PV dom0's however use
-         * the MTRR feature as an indication of the presence of the
-         * XENPF_{add,del,read}_memtype hypercalls.
-         */
-        if ( is_hardware_domain(currd) && guest_kernel_mode(curr, regs) &&
-             cpu_has_mtrr )
-            d |= cpufeat_mask(X86_FEATURE_MTRR);
-        break;
-
-    case 0x80000007:
-        d = p->extd.e7d;
-        break;
-
-    case 0x80000008:
-        a = paddr_bits | (vaddr_bits << 8);
-        b = p->extd.e8b;
-        break;
-
-    case 0x00000005: /* MONITOR/MWAIT */
-    case 0x0000000b: /* Extended Topology Enumeration */
-    case 0x8000000a: /* SVM revision and features */
-    case 0x8000001b: /* Instruction Based Sampling */
-    case 0x8000001c: /* Light Weight Profiling */
-    case 0x8000001e: /* Extended topology reporting */
-    unsupported:
-        a = b = c = d = 0;
-        break;
-
-    case 0x7:
-        ASSERT_UNREACHABLE();
-        /* Now handled in guest_cpuid(). */
-    }
-
-    regs->rax = a;
-    regs->rbx = b;
-    regs->rcx = c;
-    regs->rdx = d;
-}
-
 static int emulate_invalid_rdtscp(struct cpu_user_regs *regs)
 {
     char opcode[3];
diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
index c1b07b7..a8f9824 100644
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -392,8 +392,6 @@ bool hvm_set_guest_bndcfgs(struct vcpu *v, u64 val);
 #define has_viridian_apic_assist(d) \
     (is_viridian_domain(d) && (viridian_feature_mask(d) & HVMPV_apic_assist))
 
-void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
-                                   unsigned int *ecx, unsigned int *edx);
 bool hvm_check_cpuid_faulting(struct vcpu *v);
 void hvm_migrate_timers(struct vcpu *v);
 void hvm_do_resume(struct vcpu *v);
diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h
index e43b956..3bb93a2 100644
--- a/xen/include/asm-x86/processor.h
+++ b/xen/include/asm-x86/processor.h
@@ -627,8 +627,6 @@ enum get_cpu_vendor {
 int get_cpu_vendor(uint32_t b, uint32_t c, uint32_t d, enum get_cpu_vendor 
mode);
 uint8_t get_cpu_family(uint32_t raw, uint8_t *model, uint8_t *stepping);
 
-void pv_cpuid(struct cpu_user_regs *regs);
-
 #endif /* !__ASSEMBLY__ */
 
 #endif /* __ASM_X86_PROCESSOR_H */
-- 
2.1.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.