[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH] xen: Improvements to domain_crash()



>>> On 30.08.18 at 17:31, <andrew.cooper3@xxxxxxxxxx> wrote:
> --- a/xen/arch/arm/mem_access.c
> +++ b/xen/arch/arm/mem_access.c
> @@ -293,12 +293,7 @@ bool p2m_mem_access_check(paddr_t gpa, vaddr_t gla, 
> const struct npfec npfec)
>      {
>          /* No listener */
>          if ( p2m->access_required )
> -        {
> -            gdprintk(XENLOG_INFO, "Memory access permissions failure, "
> -                                  "no vm_event listener VCPU %d, dom %d\n",
> -                                  v->vcpu_id, v->domain->domain_id);
> -            domain_crash(v->domain);
> -        }
> +            domain_crash(v->domain, "No vm_event listener\n");

Which vCPU caused the issue is lost with this transformation.

> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -2078,19 +2078,15 @@ int hvm_mov_to_cr(unsigned int cr, unsigned int gpr)
>          rc = X86EMUL_OKAY;
>          break;
>  
> -    default:
> -        gdprintk(XENLOG_ERR, "invalid cr: %d\n", cr);
> -        goto exit_and_crash;
> +    default: /* VT-x/SVM intercept malfunction? */
> +        domain_crash(curr->domain, "Invalid cr %u\n", cr);

"cr%u does not exist"? I in particular dislike the space between "cr"
and the number.

> @@ -2315,12 +2307,17 @@ int hvm_set_cr3(unsigned long value, bool_t may_defer)
>      if ( hvm_paging_enabled(v) && !paging_mode_hap(v->domain) &&
>           (value != v->arch.hvm_vcpu.guest_cr[3]) )
>      {
> +        unsigned long gfn = paddr_to_pfn(value);

Along the lines of your recent comment on one of my patches:
Doesn't the comparison above need to ignore the upper half of
the stored value? (Arguably not something we want to fix in this
patch, but anyway.)

> @@ -2686,17 +2678,22 @@ static void *hvm_map_entry(unsigned long va, bool_t 
> *writable)
>      pfec = PFEC_page_present;
>      gfn = paging_gva_to_gfn(current, va, &pfec);
>      if ( pfec & (PFEC_page_paged | PFEC_page_shared) )
> -        goto fail;
> +    {
> +        domain_crash(current->domain, "Descriptor table is paged or 
> shared\n");

Since a page can't be paged and shared at the same time, perhaps better
to log just one of the two (whichever is applicable)?

> @@ -3719,7 +3716,7 @@ int hvm_descriptor_access_intercept(uint64_t exit_info,
>                                        descriptor, is_write);
>      }
>      else if ( !hvm_emulate_one_insn(is_sysdesc_access, "sysdesc access") )
> -        domain_crash(currd);
> +        domain_crash(currd, "sysdesc emulation failure\n");

The string passed to hvm_emulate_one_insn() makes clear that
some logging already occurs in the error case - do we really need
the extra verbosity you add here?

> --- a/xen/arch/x86/hvm/intercept.c
> +++ b/xen/arch/x86/hvm/intercept.c
> @@ -44,7 +44,7 @@ static bool_t hvm_mmio_accept(const struct hvm_io_handler 
> *handler,
>      last = hvm_mmio_last_byte(p);
>      if ( last != first &&
>           !handler->mmio.ops->check(current, last) )
> -        domain_crash(current->domain);
> +        domain_crash(current->domain, "Fatal MMIO error\n");

How about "%ps accepts %"PRIpaddr" but not %"PRIpaddr?

> @@ -134,8 +134,10 @@ int hvm_process_io_intercept(const struct hvm_io_handler 
> *handler,
>  
>              if ( p->data_is_ptr )
>              {
> -                switch ( hvm_copy_to_guest_phys(p->data + step * i,
> -                                                &data, p->size, current) )
> +                enum hvm_translation_result res =
> +                    hvm_copy_to_guest_phys(p->data + step * i,
> +                                           &data, p->size, current);
> +                switch ( res )

Blank line above here please.

> @@ -162,9 +166,12 @@ int hvm_process_io_intercept(const struct hvm_io_handler 
> *handler,
>          {
>              if ( p->data_is_ptr )
>              {
> +                enum hvm_translation_result res;
> +
>                  data = 0;
> -                switch ( hvm_copy_from_guest_phys(&data, p->data + step * i,
> -                                                  p->size) )
> +                res = hvm_copy_from_guest_phys(&data, p->data + step * i,
> +                                               p->size);
> +                switch ( res )

And here.

> @@ -2709,7 +2705,7 @@ void svm_vmexit_handler(struct cpu_user_regs *regs)
>      {
>          gdprintk(XENLOG_ERR, "invalid VMCB state:\n");
>          svm_vmcb_dump(__func__, vmcb);
> -        domain_crash(v->domain);
> +        __domain_crash(v->domain);

Perhaps the gdprintk() above then also wants to become gprintk()?

> --- a/xen/arch/x86/hvm/vmx/realmode.c
> +++ b/xen/arch/x86/hvm/vmx/realmode.c
> @@ -148,7 +148,7 @@ void vmx_realmode_emulate_one(struct hvm_emulate_ctxt 
> *hvmemul_ctxt)
>  
>   fail:
>      hvm_dump_emulation_state(XENLOG_G_ERR, "Real-mode", hvmemul_ctxt, rc);
> -    domain_crash(curr->domain);
> +    domain_crash(curr->domain, "Emulation failure\n");

Unnecessary extra verbosity again?

> @@ -1381,18 +1380,21 @@ static void vmx_load_pdptrs(struct vcpu *v)
>          return;
>  
>      if ( (cr3 & 0x1fUL) && !hvm_pcid_enabled(v) )
> -        goto crash;
> +    {
> +        domain_crash(v->domain, "Bad cr3 %p\n", _p(cr3));

I wonder about the use of _p() here, considering your earlier comment
on my patch to this function that the upper 32 bits are to be ignored
anyway.

> --- a/xen/arch/x86/mm/mem_access.c
> +++ b/xen/arch/x86/mm/mem_access.c
> @@ -186,10 +186,7 @@ bool p2m_mem_access_check(paddr_t gpa, unsigned long gla,
>          /* No listener */
>          if ( p2m->access_required )
>          {
> -            gdprintk(XENLOG_INFO, "Memory access permissions failure, "
> -                                  "no vm_event listener VCPU %d, dom %d\n",
> -                                  v->vcpu_id, d->domain_id);
> -            domain_crash(v->domain);
> +            domain_crash(v->domain, "No vm_event listener\n");

Info on offending vCPU lost again. But here as well as in the ARM case,
the VM event subsystem maintainers will know best whether that's
relevant here.

> --- a/xen/arch/x86/mm/p2m-pod.c
> +++ b/xen/arch/x86/mm/p2m-pod.c
> @@ -567,7 +567,7 @@ p2m_pod_decrease_reservation(struct domain *d, gfn_t gfn, 
> unsigned int order)
>               * impossible.
>               */
>              if ( order != 0 )
> -                domain_crash(d);
> +                domain_crash(d, "Fatal PoD error\n");

I'm a little uncertain here whether it's relevant to have the multiple
identical messages in the same functions here distinguishable, but
perhaps simply add #1 etc to them?

> @@ -839,7 +839,7 @@ guest_physmap_add_entry(struct domain *d, gfn_t gfn, 
> mfn_t mfn,
>          if ( p2m_is_grant(ot) || p2m_is_foreign(ot) )
>          {
>              /* Really shouldn't be unmapping grant/foreign maps this way */
> -            domain_crash(d);
> +            domain_crash(d, "Attempting to implicitly unmap grant/foreign 
> frame\n");

Drop the "ing"?

> @@ -991,11 +991,8 @@ void p2m_change_type_range(struct domain *d,
>      if ( gfn < end )
>          rc = p2m->change_entry_type_range(p2m, ot, nt, gfn, end - 1);
>      if ( rc )
> -    {
> -        printk(XENLOG_G_ERR "Error %d changing Dom%d GFNs [%lx,%lx] from %d 
> to %d\n",
> -               rc, d->domain_id, start, end - 1, ot, nt);
> -        domain_crash(d);
> -    }
> +        domain_crash(d, "Error %d changing d%d GFNs [%lx,%lx] from %d to 
> %d\n",
> +                     rc, d->domain_id, start, end - 1, ot, nt);

You change Dom%d to d%d here.

> @@ -1011,11 +1008,8 @@ void p2m_change_type_range(struct domain *d,
>          break;
>      }
>      if ( rc )
> -    {
> -        printk(XENLOG_G_ERR "Error %d manipulating Dom%d's log-dirty 
> ranges\n",
> -               rc, d->domain_id);
> -        domain_crash(d);
> -    }
> +        domain_crash(d, "Error %d manipulating Dom%d's log-dirty ranges\n",
> +                     rc, d->domain_id);

Why not also here? Or actually isn't logging the domain ID explicitly here
quite pointless (redundant with what domain_crash() produces)?

> --- a/xen/arch/x86/mm/shadow/common.c
> +++ b/xen/arch/x86/mm/shadow/common.c
> @@ -1954,12 +1954,9 @@ int sh_remove_write_access(struct domain *d, mfn_t 
> gmfn,
>      /* If this isn't a "normal" writeable page, the domain is trying to
>       * put pagetables in special memory of some kind.  We can't allow that. 
> */
>      if ( (pg->u.inuse.type_info & PGT_type_mask) != PGT_writable_page )
> -    {
> -        printk(XENLOG_G_ERR "can't remove write access to mfn %"PRI_mfn
> -               ", type_info is %"PRtype_info "\n",
> -               mfn_x(gmfn), mfn_to_page(gmfn)->u.inuse.type_info);
> -        domain_crash(d);
> -    }
> +        domain_crash(d, "can't remove write access to mfn %"PRI_mfn
> +                     ", type_info is %"PRtype_info"\n",
> +                     mfn_x(gmfn), mfn_to_page(gmfn)->u.inuse.type_info);

Here and below - perhaps better for the format strings to go all on one line,
for grep-ability?

> @@ -2583,13 +2576,12 @@ static void sh_update_paging_modes(struct vcpu *v)
>  
>                  if ( v != current && vcpu_runnable(v) )
>                  {
> -                    printk(XENLOG_G_ERR
> -                           "Some third party (%pv) is changing this HVM 
> vcpu's"
> -                           " (%pv) paging mode while it is running\n",
> -                           current, v);
>                      /* It's not safe to do that because we can't change
>                       * the host CR3 for a running domain */
> -                    domain_crash(v->domain);
> +                    domain_crash(v->domain,
> +                                 "Some third party (%pv) is changing this "
> +                                 "HVM vcpu's (%pv) paging mode while it is "
> +                                 "running\n", current, v);

Just "%pv is changing %pv's ..."?

> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> @@ -8686,7 +8686,7 @@ x86_emulate(
>      gprintk(XENLOG_INFO, "  stub: %"__stringify(MAX_INST_LEN)"ph\n",
>              stub.func);
>      generate_exception_if(stub_exn.info.fields.trapnr == EXC_UD, EXC_UD);
> -    domain_crash(current->domain);
> +    domain_crash(current->domain, "Fatal exception during emulation\n");

Perhaps include the exception type?

> @@ -713,10 +713,9 @@ int handle_xsetbv(u32 index, u64 new_bv)
>       */
>      if ( unlikely(xcr0_max & ~xfeature_mask) )
>      {
> -        gprintk(XENLOG_ERR,
> -                "xcr0_max %016" PRIx64 " exceeds hardware max %016" PRIx64 
> "\n",
> -                xcr0_max, xfeature_mask);
> -        domain_crash(curr->domain);
> +        domain_crash(curr->domain, "xcr0_max %016" PRIx64
> +                     " exceeds hardware max %016" PRIx64 "\n",
> +                     xcr0_max, xfeature_mask);

Format string again better all on one line?

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.