|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [PATCH V3 01/19] x86: Create per-domain mapping of guest_root_pt
On Mon, May 13, 2024 at 11:10:59AM +0000, Elias El Yandouzi wrote:
> From: Hongyan Xia <hongyxia@xxxxxxxxxx>
>
> Create a per-domain mapping of PV guest_root_pt as direct map is being
> removed.
>
> Note that we do not map and unmap root_pgt for now since it is still a
> xenheap page.
I'm afraid this needs more context, at least for me to properly
understand.
I think I've figured out what create_perdomain_mapping() is supposed
to do, and I have to admit the interface is very awkward.
Anyway, attempted to provide some feedback.
>
> Signed-off-by: Hongyan Xia <hongyxia@xxxxxxxxxx>
> Signed-off-by: Julien Grall <jgrall@xxxxxxxxxx>
> Signed-off-by: Elias El Yandouzi <eliasely@xxxxxxxxxx>
>
> ----
> Changes in V3:
> * Rename SHADOW_ROOT
> * Haven't addressed the potentially over-allocation issue as I don't
> get it
>
> Changes in V2:
> * Rework the shadow perdomain mapping solution in the follow-up
> patches
>
> Changes since Hongyan's version:
> * Remove the final dot in the commit title
>
> diff --git a/xen/arch/x86/include/asm/config.h
> b/xen/arch/x86/include/asm/config.h
> index ab7288cb36..5d710384df 100644
> --- a/xen/arch/x86/include/asm/config.h
> +++ b/xen/arch/x86/include/asm/config.h
> @@ -203,7 +203,7 @@ extern unsigned char boot_edid_info[128];
> /* Slot 260: per-domain mappings (including map cache). */
> #define PERDOMAIN_VIRT_START (PML4_ADDR(260))
> #define PERDOMAIN_SLOT_MBYTES (PML4_ENTRY_BYTES >> (20 + PAGETABLE_ORDER))
> -#define PERDOMAIN_SLOTS 3
> +#define PERDOMAIN_SLOTS 4
> #define PERDOMAIN_VIRT_SLOT(s) (PERDOMAIN_VIRT_START + (s) * \
> (PERDOMAIN_SLOT_MBYTES << 20))
> /* Slot 4: mirror of per-domain mappings (for compat xlat area accesses). */
> @@ -317,6 +317,14 @@ extern unsigned long xen_phys_start;
> #define ARG_XLAT_START(v) \
> (ARG_XLAT_VIRT_START + ((v)->vcpu_id << ARG_XLAT_VA_SHIFT))
>
> +/* pv_root_pt mapping area. The fourth per-domain-mapping sub-area */
> +#define PV_ROOT_PT_MAPPING_VIRT_START PERDOMAIN_VIRT_SLOT(3)
> +#define PV_ROOT_PT_MAPPING_ENTRIES MAX_VIRT_CPUS
> +
> +/* The address of a particular VCPU's PV_ROOT_PT */
> +#define PV_ROOT_PT_MAPPING_VCPU_VIRT_START(v) \
> + (PV_ROOT_PT_MAPPING_VIRT_START + ((v)->vcpu_id * PAGE_SIZE))
I know we are not there yet, but I wonder if we need to start having
some non-shared per-cpu mapping area in the page-tables. Right now
this is shared between all the vCPUs, as it's per-domain space
(instead of per-vCPU).
> +
> #define ELFSIZE 64
>
> #define ARCH_CRASH_SAVE_VMCOREINFO
> diff --git a/xen/arch/x86/include/asm/domain.h
> b/xen/arch/x86/include/asm/domain.h
> index f5daeb182b..8a97530607 100644
> --- a/xen/arch/x86/include/asm/domain.h
> +++ b/xen/arch/x86/include/asm/domain.h
> @@ -272,6 +272,7 @@ struct time_scale {
> struct pv_domain
> {
> l1_pgentry_t **gdt_ldt_l1tab;
> + l1_pgentry_t **root_pt_l1tab;
>
> atomic_t nr_l4_pages;
>
> diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
> index d968bbbc73..efdf20f775 100644
> --- a/xen/arch/x86/mm.c
> +++ b/xen/arch/x86/mm.c
> @@ -505,6 +505,13 @@ void share_xen_page_with_guest(struct page_info *page,
> struct domain *d,
> nrspin_unlock(&d->page_alloc_lock);
> }
>
> +#define pv_root_pt_idx(v) \
> + ((v)->vcpu_id >> PAGETABLE_ORDER)
> +
> +#define pv_root_pt_pte(v) \
> + ((v)->domain->arch.pv.root_pt_l1tab[pv_root_pt_idx(v)] + \
> + ((v)->vcpu_id & (L1_PAGETABLE_ENTRIES - 1)))
> +
> void make_cr3(struct vcpu *v, mfn_t mfn)
> {
> struct domain *d = v->domain;
> @@ -524,6 +531,13 @@ void write_ptbase(struct vcpu *v)
>
> if ( is_pv_vcpu(v) && v->domain->arch.pv.xpti )
> {
> + mfn_t guest_root_pt = _mfn(MASK_EXTR(v->arch.cr3, PAGE_MASK));
> + l1_pgentry_t *pte = pv_root_pt_pte(v);
> +
> + ASSERT(v == current);
> +
> + l1e_write(pte, l1e_from_mfn(guest_root_pt, __PAGE_HYPERVISOR_RO));
> +
> cpu_info->root_pgt_changed = true;
> cpu_info->pv_cr3 = __pa(this_cpu(root_pgt));
> if ( new_cr4 & X86_CR4_PCIDE )
> diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c
> index 2a445bb17b..1b025986f7 100644
> --- a/xen/arch/x86/pv/domain.c
> +++ b/xen/arch/x86/pv/domain.c
> @@ -288,6 +288,21 @@ static void pv_destroy_gdt_ldt_l1tab(struct vcpu *v)
> 1U << GDT_LDT_VCPU_SHIFT);
> }
>
> +static int pv_create_root_pt_l1tab(struct vcpu *v)
> +{
> + return create_perdomain_mapping(v->domain,
> + PV_ROOT_PT_MAPPING_VCPU_VIRT_START(v),
> + 1, v->domain->arch.pv.root_pt_l1tab,
> + NULL);
> +}
> +
> +static void pv_destroy_root_pt_l1tab(struct vcpu *v)
The two 'v' parameters could be const here.
> +
> +{
> + destroy_perdomain_mapping(v->domain,
> + PV_ROOT_PT_MAPPING_VCPU_VIRT_START(v), 1);
> +}
> +
> void pv_vcpu_destroy(struct vcpu *v)
> {
> if ( is_pv_32bit_vcpu(v) )
> @@ -297,6 +312,7 @@ void pv_vcpu_destroy(struct vcpu *v)
> }
>
> pv_destroy_gdt_ldt_l1tab(v);
> + pv_destroy_root_pt_l1tab(v);
> XFREE(v->arch.pv.trap_ctxt);
> }
>
> @@ -311,6 +327,13 @@ int pv_vcpu_initialise(struct vcpu *v)
> if ( rc )
> return rc;
>
> + if ( v->domain->arch.pv.xpti )
> + {
> + rc = pv_create_root_pt_l1tab(v);
> + if ( rc )
> + goto done;
> + }
> +
> BUILD_BUG_ON(X86_NR_VECTORS * sizeof(*v->arch.pv.trap_ctxt) >
> PAGE_SIZE);
> v->arch.pv.trap_ctxt = xzalloc_array(struct trap_info, X86_NR_VECTORS);
> @@ -346,10 +369,12 @@ void pv_domain_destroy(struct domain *d)
>
> destroy_perdomain_mapping(d, GDT_LDT_VIRT_START,
> GDT_LDT_MBYTES << (20 - PAGE_SHIFT));
> + destroy_perdomain_mapping(d, PV_ROOT_PT_MAPPING_VIRT_START,
> PV_ROOT_PT_MAPPING_ENTRIES);
Line too long. Also see comment below about using d->max_vcpus
instead of MAX_VIRT_CPUS.
>
> XFREE(d->arch.pv.cpuidmasks);
>
> FREE_XENHEAP_PAGE(d->arch.pv.gdt_ldt_l1tab);
> + FREE_XENHEAP_PAGE(d->arch.pv.root_pt_l1tab);
> }
>
> void noreturn cf_check continue_pv_domain(void);
> @@ -371,6 +396,12 @@ int pv_domain_initialise(struct domain *d)
> goto fail;
> clear_page(d->arch.pv.gdt_ldt_l1tab);
>
> + d->arch.pv.root_pt_l1tab =
> + alloc_xenheap_pages(0, MEMF_node(domain_to_node(d)));
> + if ( !d->arch.pv.root_pt_l1tab )
> + goto fail;
> + clear_page(d->arch.pv.root_pt_l1tab);
> +
> if ( levelling_caps & ~LCAP_faulting &&
> (d->arch.pv.cpuidmasks = xmemdup(&cpuidmask_defaults)) == NULL )
> goto fail;
> @@ -381,6 +412,11 @@ int pv_domain_initialise(struct domain *d)
> if ( rc )
> goto fail;
>
> + rc = create_perdomain_mapping(d, PV_ROOT_PT_MAPPING_VIRT_START,
> + PV_ROOT_PT_MAPPING_ENTRIES, NULL, NULL);
Why not use d->max_vcpus here, instead of forcing up to MAX_VIRT_CPUS?
By the time pv_domain_initialise() is called max_vcpus should already
be initialized. AFAICT it doesn't make a difference, because for the
call here only the L3 table is created, as last two parameters are
NULL, but still is more accurate to use max_vcpus.
> + if ( rc )
> + goto fail;
> +
> d->arch.ctxt_switch = &pv_csw;
>
> d->arch.pv.xpti = is_hardware_domain(d) ? opt_xpti_hwdom : opt_xpti_domu;
> diff --git a/xen/arch/x86/x86_64/asm-offsets.c
> b/xen/arch/x86/x86_64/asm-offsets.c
> index 630bdc3945..c1ae5013af 100644
> --- a/xen/arch/x86/x86_64/asm-offsets.c
> +++ b/xen/arch/x86/x86_64/asm-offsets.c
> @@ -80,6 +80,7 @@ void __dummy__(void)
>
> #undef OFFSET_EF
>
> + OFFSET(VCPU_id, struct vcpu, vcpu_id);
> OFFSET(VCPU_processor, struct vcpu, processor);
> OFFSET(VCPU_domain, struct vcpu, domain);
> OFFSET(VCPU_vcpu_info, struct vcpu, vcpu_info_area.map);
> diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
> index df015589ce..c1377da7a5 100644
> --- a/xen/arch/x86/x86_64/entry.S
> +++ b/xen/arch/x86/x86_64/entry.S
> @@ -162,7 +162,15 @@ FUNC_LOCAL(restore_all_guest)
> and %rsi, %rdi
> and %r9, %rsi
> add %rcx, %rdi
> +
> + /*
> + * The address in the vCPU cr3 is always mapped in the per-domain
> + * pv_root_pt virt area.
> + */
> + imul $PAGE_SIZE, VCPU_id(%rbx), %esi
Aren't some of the previous operations against %rsi now useless since
it gets unconditionally overwritten here?
and %r9, %rsi
[...]
add %rcx, %rsi
Thanks, Roger.
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |