[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 4/9] kexec: extend hypercall with improved load/unload ops



On Fri, Sep 20, 2013 at 02:10:50PM +0100, David Vrabel wrote:
> From: David Vrabel <david.vrabel@xxxxxxxxxx>
>
> In the existing kexec hypercall, the load and unload ops depend on
> internals of the Linux kernel (the page list and code page provided by
> the kernel).  The code page is used to transition between Xen context
> and the image so using kernel code doesn't make sense and will not
> work for PVH guests.
>
> Add replacement KEXEC_CMD_kexec_load and KEXEC_CMD_kexec_unload ops
> that no longer require a code page to be provided by the guest -- Xen
> now provides the code for calling the image directly.
>
> The new load op looks similar to the Linux kexec_load system call and
> allows the guest to provide the image data to be loaded.  The guest
> specifies the architecture of the image which may be a 32-bit subarch
> of the hypervisor's architecture (i.e., an EM_386 image on an
> EM_X86_64 hypervisor).
>
> The toolstack can now load images without kernel involvement.  This is
> required for supporting kexec when using a dom0 with an upstream
> kernel.
>
> Crash images are copied directly into the crash region on load.
> Default images are copied into domheap pages and a list of source and
> destination machine addresses is created.  This is list is used in
> kexec_reloc() to relocate the image to its destination.
>
> The old load and unload sub-ops are still available (as
> KEXEC_CMD_load_v1 and KEXEC_CMD_unload_v1) and are implemented on top
> of the new infrastructure.
>
> Signed-off-by: David Vrabel <david.vrabel@xxxxxxxxxx>

[...]

> diff --git a/xen/arch/x86/x86_64/kexec_reloc.S 
> b/xen/arch/x86/x86_64/kexec_reloc.S
> new file mode 100644
> index 0000000..41dd27b
> --- /dev/null
> +++ b/xen/arch/x86/x86_64/kexec_reloc.S
> @@ -0,0 +1,208 @@
> +/*
> + * Relocate a kexec_image to its destination and call it.
> + *
> + * Copyright (C) 2013 Citrix Systems R&D Ltd.
> + *
> + * Portions derived from Linux's arch/x86/kernel/relocate_kernel_64.S.
> + *
> + *   Copyright (C) 2002-2005 Eric Biederman  <ebiederm@xxxxxxxxxxxx>
> + *
> + * This source code is licensed under the GNU General Public License,
> + * Version 2.  See the file COPYING for more details.
> + */
> +#include <xen/config.h>
> +#include <xen/kimage.h>
> +
> +#include <asm/asm_defns.h>
> +#include <asm/msr.h>
> +#include <asm/page.h>
> +#include <asm/machine_kexec.h>
> +
> +        .text
> +        .align PAGE_SIZE
> +        .code64
> +
> +ENTRY(kexec_reloc)
> +        /* %rdi - code page maddr */
> +        /* %rsi - page table maddr */
> +        /* %rdx - indirection page maddr */
> +        /* %rcx - entry maddr */
> +        /* %r8 - flags */
> +
> +        movq %rdx, %rbx

Delete movq %rdx, %rbx

> +        /* Setup stack. */
> +        leaq (reloc_stack - kexec_reloc)(%rdi), %rsp
> +
> +        /* Load reloc page table. */
> +        movq %rsi, %cr3
> +
> +        /* Jump to identity mapped code. */
> +        leaq (identity_mapped - kexec_reloc)(%rdi), %rax
> +        jmpq *%rax
> +
> +identity_mapped:
> +        pushq %rcx
> +        pushq %rbx
> +        pushq %rsi
> +        pushq %rdi

Delete pushq %rbx, pushq %rsi, pushq %rdi

> +        /*
> +         * Set cr0 to a known state:
> +         *  - Paging enabled
> +         *  - Alignment check disabled
> +         *  - Write protect disabled
> +         *  - No task switch
> +         *  - Don't do FP software emulation.
> +         *  - Proctected mode enabled
> +         */
> +        movq    %cr0, %rax
> +        andl    $~(X86_CR0_AM | X86_CR0_WP | X86_CR0_TS | X86_CR0_EM), %eax
> +        orl     $(X86_CR0_PG | X86_CR0_PE), %eax
> +        movq    %rax, %cr0
> +
> +        /*
> +         * Set cr4 to a known state:
> +         *  - physical address extension enabled
> +         */
> +        movl    $X86_CR4_PAE, %eax
> +        movq    %rax, %cr4
> +
> +        movq %rbx, %rdi

movq %rdx, %rdi

> +        call relocate_pages
> +
> +        popq %rdi
> +        popq %rsi
> +        popq %rbx
> +        popq %rcx

Delete popq %rdi, popq %rsi, popq %rbx

> +        /* Need to switch to 32-bit mode? */
> +        testq $KEXEC_RELOC_FLAG_COMPAT, %r8
> +        jnz call_32_bit
> +
> +call_64_bit:
> +        /* Call the image entry point.  This should never return. */

I think that all general purpose registers (including %rsi, %rdi, %rbp
and %rsp) should be zeroed here. We should leave as little as possible
info about previous system. Especially in kexec case. Just in case.
Please look into linux/arch/x86/kernel/relocate_kernel_64.S
for more details.

> +        callq *%rcx

Maybe we should use retq to jump into image entry point. If not
I think that we should store image entry point address in %rax
(just to the order).

> +        ud2
> +
> +call_32_bit:
> +        /* Setup IDT. */
> +        lidt compat_mode_idt(%rip)
> +
> +        /* Load compat GDT. */
> +        leaq (compat_mode_gdt - kexec_reloc)(%rdi), %rax
> +        movq %rax, (compat_mode_gdt_desc + 2)(%rip)
> +        lgdt compat_mode_gdt_desc(%rip)
> +
> +        /* Relocate compatibility mode entry point address. */
> +        leal (compatibility_mode - kexec_reloc)(%edi), %eax
> +        movl %eax, compatibility_mode_far(%rip)
> +
> +        /* Enter compatibility mode. */
> +        ljmp *compatibility_mode_far(%rip)
> +
> +relocate_pages:
> +        /* %rdi - indirection page maddr */
> +        cld
> +        movq    %rdi, %rcx
> +        xorl    %edi, %edi
> +        xorl    %esi, %esi
> +        jmp     is_dest
> +
> +next_entry: /* top, read another word for the indirection page */
> +
> +        movq    (%rbx), %rcx
> +        addq    $8, %rbx
> +is_dest:
> +        testb   $IND_DESTINATION, %cl
> +        jz      is_ind
> +        movq    %rcx, %rdi
> +        andq    $PAGE_MASK, %rdi
> +        jmp     next_entry
> +is_ind:
> +        testb   $IND_INDIRECTION, %cl
> +        jz      is_done
> +        movq    %rcx, %rbx
> +        andq    $PAGE_MASK, %rbx
> +        jmp     next_entry
> +is_done:
> +        testb   $IND_DONE, %cl
> +        jnz     done
> +is_source:
> +        testb   $IND_SOURCE, %cl
> +        jz      is_zero
> +        movq    %rcx, %rsi      /* For every source page do a copy */
> +        andq    $PAGE_MASK, %rsi
> +        movl    $(PAGE_SIZE / 8), %ecx
> +        rep movsq
> +        jmp     next_entry
> +is_zero:
> +        testb   $IND_ZERO, %cl
> +        jz      next_entry
> +        movl    $(PAGE_SIZE / 8), %ecx  /* Zero the destination page. */
> +        xorl    %eax, %eax
> +        rep stosq
> +        jmp     next_entry
> +done:
> +        ret
> +
> +        .code32
> +
> +compatibility_mode:
> +        /* Setup some sane segments. */
> +        movl $0x0008, %eax
> +        movl %eax, %ds
> +        movl %eax, %es
> +        movl %eax, %fs
> +        movl %eax, %gs
> +        movl %eax, %ss
> +
> +        movl %ecx, %ebp
> +
> +        /* Disable paging and therefore leave 64 bit mode. */
> +        movl %cr0, %eax
> +        andl $~X86_CR0_PG, %eax
> +        movl %eax, %cr0
> +
> +        /* Disable long mode */
> +        movl    $MSR_EFER, %ecx
> +        rdmsr
> +        andl    $~EFER_LME, %eax
> +        wrmsr
> +
> +        /* Clear cr4 to disable PAE. */
> +        xorl    %eax, %eax
> +        movl    %eax, %cr4
> +
> +        /* Call the image entry point.  This should never return. */

Ditto.

> +        call *%ebp

Ditto.

Daniel

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.