|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [PATCH v4 1/2] x86/fpu: Combine fpu_ctxt and xsave_area in arch_vcpu
On Mon, Oct 7, 2024 at 4:52 PM Alejandro Vallejo
<alejandro.vallejo@xxxxxxxxx> wrote:
>
> fpu_ctxt is either a pointer to the legacy x87/SSE save area (used by FXSAVE)
> or
> a pointer aliased with xsave_area that points to its fpu_sse subfield. Such
> subfield is at the base and is identical in size and layout to the legacy
> buffer.
>
> This patch merges the 2 pointers in the arch_vcpu into a single XSAVE area. In
> the very rare case in which the host doesn't support XSAVE all we're doing is
> wasting a tiny amount of memory and trading those for a lot more simplicity in
> the code.
>
> While at it, dedup the setup logic in vcpu_init_fpu() and integrate it
> into xstate_alloc_save_area().
>
> Signed-off-by: Alejandro Vallejo <alejandro.vallejo@xxxxxxxxx>
> --
> v4:
> * Amend commit message with extra note about deduping vcpu_init_fpu()
> * Remove comment on top of cpu_user_regs (though I really think there
> ought to be a credible one, in one form or another).
> * Remove cast from blk.c so FXSAVE_AREA is "void *"
> * Simplify comment in xstate_alloc_save_area() for the "host has no
> XSAVE" case.
> ---
> xen/arch/x86/domctl.c | 6 ++++-
> xen/arch/x86/hvm/emulate.c | 4 +--
> xen/arch/x86/hvm/hvm.c | 6 ++++-
> xen/arch/x86/i387.c | 45 +++++--------------------------
> xen/arch/x86/include/asm/domain.h | 6 -----
> xen/arch/x86/x86_emulate/blk.c | 2 +-
> xen/arch/x86/xstate.c | 12 ++++++---
> 7 files changed, 28 insertions(+), 53 deletions(-)
>
> diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c
> index 96d816cf1a7d..2d115395da90 100644
> --- a/xen/arch/x86/domctl.c
> +++ b/xen/arch/x86/domctl.c
> @@ -1379,7 +1379,11 @@ void arch_get_info_guest(struct vcpu *v,
> vcpu_guest_context_u c)
> #define c(fld) (c.nat->fld)
> #endif
>
> - memcpy(&c.nat->fpu_ctxt, v->arch.fpu_ctxt, sizeof(c.nat->fpu_ctxt));
> + BUILD_BUG_ON(sizeof(c.nat->fpu_ctxt) !=
> + sizeof(v->arch.xsave_area->fpu_sse));
> + memcpy(&c.nat->fpu_ctxt, &v->arch.xsave_area->fpu_sse,
> + sizeof(c.nat->fpu_ctxt));
> +
> if ( is_pv_domain(d) )
> c(flags = v->arch.pv.vgc_flags & ~(VGCF_i387_valid|VGCF_in_kernel));
> else
> diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c
> index aa97ca1cbffd..f2bc6967dfcb 100644
> --- a/xen/arch/x86/hvm/emulate.c
> +++ b/xen/arch/x86/hvm/emulate.c
> @@ -2371,7 +2371,7 @@ static int cf_check hvmemul_get_fpu(
> alternative_vcall(hvm_funcs.fpu_dirty_intercept);
> else if ( type == X86EMUL_FPU_fpu )
> {
> - const fpusse_t *fpu_ctxt = curr->arch.fpu_ctxt;
> + const fpusse_t *fpu_ctxt = &curr->arch.xsave_area->fpu_sse;
>
> /*
> * Latch current register state so that we can back out changes
> @@ -2411,7 +2411,7 @@ static void cf_check hvmemul_put_fpu(
>
> if ( aux )
> {
> - fpusse_t *fpu_ctxt = curr->arch.fpu_ctxt;
> + fpusse_t *fpu_ctxt = &curr->arch.xsave_area->fpu_sse;
> bool dval = aux->dval;
> int mode = hvm_guest_x86_mode(curr);
>
> diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
> index 7b2e1c9813d6..77fe282118f7 100644
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -914,7 +914,11 @@ static int cf_check hvm_save_cpu_ctxt(struct vcpu *v,
> hvm_domain_context_t *h)
>
> if ( v->fpu_initialised )
> {
> - memcpy(ctxt.fpu_regs, v->arch.fpu_ctxt, sizeof(ctxt.fpu_regs));
> + BUILD_BUG_ON(sizeof(ctxt.fpu_regs) !=
> + sizeof(v->arch.xsave_area->fpu_sse));
> + memcpy(ctxt.fpu_regs, &v->arch.xsave_area->fpu_sse,
> + sizeof(ctxt.fpu_regs));
> +
> ctxt.flags = XEN_X86_FPU_INITIALISED;
> }
>
> diff --git a/xen/arch/x86/i387.c b/xen/arch/x86/i387.c
> index 134e0bece519..fbb9d3584a3d 100644
> --- a/xen/arch/x86/i387.c
> +++ b/xen/arch/x86/i387.c
> @@ -39,7 +39,7 @@ static inline void fpu_xrstor(struct vcpu *v, uint64_t mask)
> /* Restore x87 FPU, MMX, SSE and SSE2 state */
> static inline void fpu_fxrstor(struct vcpu *v)
> {
> - const fpusse_t *fpu_ctxt = v->arch.fpu_ctxt;
> + const fpusse_t *fpu_ctxt = &v->arch.xsave_area->fpu_sse;
>
> /*
> * Some CPUs don't save/restore FDP/FIP/FOP unless an exception
> @@ -151,7 +151,7 @@ static inline void fpu_xsave(struct vcpu *v)
> /* Save x87 FPU, MMX, SSE and SSE2 state */
> static inline void fpu_fxsave(struct vcpu *v)
> {
> - fpusse_t *fpu_ctxt = v->arch.fpu_ctxt;
> + fpusse_t *fpu_ctxt = &v->arch.xsave_area->fpu_sse;
> unsigned int fip_width = v->domain->arch.x87_fip_width;
>
> if ( fip_width != 4 )
> @@ -212,7 +212,7 @@ void vcpu_restore_fpu_nonlazy(struct vcpu *v, bool
> need_stts)
> * above) we also need to restore full state, to prevent subsequently
> * saving state belonging to another vCPU.
> */
> - if ( v->arch.fully_eager_fpu || (v->arch.xsave_area && xstate_all(v)) )
> + if ( v->arch.fully_eager_fpu || xstate_all(v) )
> {
> if ( cpu_has_xsave )
> fpu_xrstor(v, XSTATE_ALL);
> @@ -299,44 +299,14 @@ void save_fpu_enable(void)
> /* Initialize FPU's context save area */
> int vcpu_init_fpu(struct vcpu *v)
> {
> - int rc;
> -
> v->arch.fully_eager_fpu = opt_eager_fpu;
> -
> - if ( (rc = xstate_alloc_save_area(v)) != 0 )
> - return rc;
> -
> - if ( v->arch.xsave_area )
> - v->arch.fpu_ctxt = &v->arch.xsave_area->fpu_sse;
> - else
> - {
> - BUILD_BUG_ON(__alignof(v->arch.xsave_area->fpu_sse) < 16);
> - v->arch.fpu_ctxt = _xzalloc(sizeof(v->arch.xsave_area->fpu_sse),
> - __alignof(v->arch.xsave_area->fpu_sse));
> - if ( v->arch.fpu_ctxt )
> - {
> - fpusse_t *fpu_sse = v->arch.fpu_ctxt;
> -
> - fpu_sse->fcw = FCW_DEFAULT;
> - fpu_sse->mxcsr = MXCSR_DEFAULT;
> - }
> - else
> - rc = -ENOMEM;
> - }
> -
> - return rc;
> + return xstate_alloc_save_area(v);
> }
>
> void vcpu_setup_fpu(struct vcpu *v, struct xsave_struct *xsave_area,
> const void *data, unsigned int fcw_default)
> {
> - /*
> - * For the entire function please note that vcpu_init_fpu() (above)
> points
> - * v->arch.fpu_ctxt into v->arch.xsave_area when XSAVE is available.
> Hence
> - * accesses through both pointers alias one another, and the shorter form
> - * is used here.
> - */
> - fpusse_t *fpu_sse = v->arch.fpu_ctxt;
> + fpusse_t *fpu_sse = &v->arch.xsave_area->fpu_sse;
>
> ASSERT(!xsave_area || xsave_area == v->arch.xsave_area);
>
> @@ -373,10 +343,7 @@ void vcpu_setup_fpu(struct vcpu *v, struct xsave_struct
> *xsave_area,
> /* Free FPU's context save area */
> void vcpu_destroy_fpu(struct vcpu *v)
> {
> - if ( v->arch.xsave_area )
> - xstate_free_save_area(v);
> - else
> - xfree(v->arch.fpu_ctxt);
> + xstate_free_save_area(v);
> }
>
> /*
> diff --git a/xen/arch/x86/include/asm/domain.h
> b/xen/arch/x86/include/asm/domain.h
> index 5219c4fb0f69..b79d6badd71c 100644
> --- a/xen/arch/x86/include/asm/domain.h
> +++ b/xen/arch/x86/include/asm/domain.h
> @@ -591,12 +591,6 @@ struct pv_vcpu
>
> struct arch_vcpu
> {
> - /*
> - * guest context (mirroring struct vcpu_guest_context) common
> - * between pv and hvm guests
> - */
> -
> - void *fpu_ctxt;
> struct cpu_user_regs user_regs;
>
> /* Debug registers. */
> diff --git a/xen/arch/x86/x86_emulate/blk.c b/xen/arch/x86/x86_emulate/blk.c
> index e790f4f90056..08a05f8453f7 100644
> --- a/xen/arch/x86/x86_emulate/blk.c
> +++ b/xen/arch/x86/x86_emulate/blk.c
> @@ -11,7 +11,7 @@
> !defined(X86EMUL_NO_SIMD)
> # ifdef __XEN__
> # include <asm/xstate.h>
> -# define FXSAVE_AREA current->arch.fpu_ctxt
> +# define FXSAVE_AREA ((void *)¤t->arch.xsave_area->fpu_sse)
Could you use "struct x86_fxsr *" instead of "void*" ?
Maybe adding another "struct x86_fxsr fxsr" inside the anonymous
fpu_sse union would help here.
> # else
> # define FXSAVE_AREA get_fpu_save_area()
> # endif
> diff --git a/xen/arch/x86/xstate.c b/xen/arch/x86/xstate.c
> index 57a0749f0d54..af9e345a7ace 100644
> --- a/xen/arch/x86/xstate.c
> +++ b/xen/arch/x86/xstate.c
> @@ -508,9 +508,15 @@ int xstate_alloc_save_area(struct vcpu *v)
> unsigned int size;
>
> if ( !cpu_has_xsave )
> - return 0;
> -
> - if ( !is_idle_vcpu(v) || !cpu_has_xsavec )
> + {
> + /*
> + * On non-XSAVE systems, we allocate an XSTATE buffer for simplicity.
> + * XSTATE is backwards compatible to FXSAVE, and only one cacheline
> + * larger.
> + */
> + size = XSTATE_AREA_MIN_SIZE;
> + }
> + else if ( !is_idle_vcpu(v) || !cpu_has_xsavec )
> {
> size = xsave_cntxt_size;
> BUG_ON(size < XSTATE_AREA_MIN_SIZE);
Frediano
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |