[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH 4/5] x86/alternative: Implement NMI/#MC-safe patching
Patching code which is being executed is problematic, because it impossible to arrange an atomic update of the instruction stream outside of a few corner cases. Furthermore, we have no feasible way to prevent execution of the NMI and #MC exception handlers, but have patch points in them. Use a breakpoint to capture execution which hits an in-progress patch, and have the exception handler cope with the safety. See the code comments for exact algorithm details. Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> --- CC: Jan Beulich <JBeulich@xxxxxxxx> CC: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx> CC: Ross Lagerwall <ross.lagerwall@xxxxxxxxxx> --- xen/arch/x86/alternative.c | 104 +++++++++++++++++++++++++++++++++++++++- xen/arch/x86/traps.c | 2 + xen/arch/x86/x86_64/entry.S | 34 ++++++++++++- xen/include/asm-x86/processor.h | 2 + 4 files changed, 139 insertions(+), 3 deletions(-) diff --git a/xen/arch/x86/alternative.c b/xen/arch/x86/alternative.c index 0b309c3..40bfaad 100644 --- a/xen/arch/x86/alternative.c +++ b/xen/arch/x86/alternative.c @@ -131,6 +131,84 @@ void init_or_livepatch add_nops(void *insns, unsigned int len) } } +static init_or_livepatch_data struct live_poke_info { + uint8_t *addr; + const uint8_t *opcode; + size_t len; + unsigned int cpu; +} live_poke_info; + +/* + * text_poke_live - Update the live .text area, in an interrupt-safe way. + * + * !!! WARNING - Reentrantly called from the int3 exception handler !!! + * + * All patching data are passed via the live_poke_info structure. @regs is + * non-NULL if entering from an exception handler. + * + * Returns a boolean indicating whether the transient breakpoint was hit. + */ +bool init_or_livepatch text_poke_live(const struct cpu_user_regs *regs) +{ + struct live_poke_info *i = &live_poke_info; + + if ( unlikely(i->cpu != smp_processor_id()) ) + { + ASSERT(regs); + + /* + * We hit a breakpoint, on a CPU which was not performing the + * patching. This is only expected to be possible for the NMI/#MC + * paths, and even then, only if we hit the tiny race window below + * while patching in the NMI/#MC handlers. + * + * We can't safely evaluate whether we hit a transient breakpoint + * because i->cpu has likely completed the patch and moved on to the + * next patch site. + * + * Go to sleep for a bit and synchronise the pipeline as we are now in + * a cross-modifying scenario. + */ + cpu_relax(); + cpuid_eax(0); + + /* + * Presume that we hit the transient breakpoint, as we can't confirm + * whether we did or not. We don't expect there to be any other + * breakpoints to hit, but if we did hit another one, then in the + * worse case we will only livelock until i->cpu has finished all of + * its patching. + */ + return true; + } + + /* + * We are the CPU performing the patching, and might have ended up here by + * hitting a breakpoint. + * + * Either way, we need to complete particular patch to make forwards + * progress. This logic is safe even if executed recursively in the + * breakpoint handler; the worst that will happen when normal execution + * resumes is that we will rewrite the same bytes a second time. + */ + + /* First, insert a breakpoint to prevent execution of the patch site. */ + i->addr[0] = 0xcc; + smp_wmb(); + /* Second, copy the remaining instructions into place. */ + memcpy(i->addr + 1, i->opcode + 1, i->len - 1); + smp_wmb(); + /* Third, replace the breakpoint with the real instruction byte. */ + i->addr[0] = i->opcode[0]; + + /* + * Inform our caller whether we hit the transient breakpoint (in which + * case we iret normally, having already finished the patch), or whether + * we need to continue further into the debug trap handler. + */ + return regs && regs->rip == (unsigned long)&i->addr[1]; +} + /* * text_poke - Update instructions on a live kernel or non-executed code. * @addr: address to modify @@ -153,7 +231,31 @@ void init_or_livepatch add_nops(void *insns, unsigned int len) void init_or_livepatch noinline text_poke(void *addr, const void *opcode, size_t len, bool live) { - memcpy(addr, opcode, len); + if ( !live || len == 1 ) + { + /* + * If we know *addr can't be executed, or we are patching a single + * byte, it is safe to use a straight memcpy(). + */ + memcpy(addr, opcode, len); + } + else + { + /* + * If not, arrange safe patching via the use of breakpoints. Ordering + * of actions here are between this CPU, and the instruction fetch of + * the breakpoint exception handler on any CPU. + */ + live_poke_info = (struct live_poke_info){ + addr, opcode, len, smp_processor_id() + }; + smp_wmb(); + active_text_patching = true; + smp_wmb(); + text_poke_live(NULL); + smp_wmb(); + active_text_patching = false; + } } /* diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c index a3e8f0c..6bea963 100644 --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -119,6 +119,8 @@ boolean_param("ler", opt_ler); #define stack_words_per_line 4 #define ESP_BEFORE_EXCEPTION(regs) ((unsigned long *)regs->rsp) +bool active_text_patching; + static void show_code(const struct cpu_user_regs *regs) { unsigned char insns_before[8] = {}, insns_after[16] = {}; diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S index a5a6702..56f52c7 100644 --- a/xen/arch/x86/x86_64/entry.S +++ b/xen/arch/x86/x86_64/entry.S @@ -530,7 +530,10 @@ ENTRY(page_fault) movl $TRAP_page_fault,4(%rsp) /* No special register assumptions. */ GLOBAL(handle_exception) - SAVE_ALL CLAC + SAVE_ALL + +handle_exception_gprs_saved: + ASM_CLAC GET_STACK_END(14) @@ -686,9 +689,36 @@ ENTRY(debug) jmp handle_exception ENTRY(int3) + /* For patching-safety, there must not be any alternatives here. */ pushq $0 movl $TRAP_int3,4(%rsp) - jmp handle_exception + + /* If there is no patching active, continue normally. */ + cmpb $1, active_text_patching(%rip) + jne handle_exception + + /* + * We hit a debug trap, but not necessarily the one from active + * patching. Let text_poke_live() work out what to do. + */ + SAVE_ALL + mov %rsp, %rdi + call text_poke_live + + /* + * Does text_poke_live() think we hit the transient debug trap? If + * not, continue down the normal int3 handler. + */ + cmp $0, %eax + je handle_exception_gprs_saved + + /* + * We think we hit the transient debug trap. text_poke_live() has + * probably completed the patching, so rewind the instruction pointer + * and try again. + */ + subq $1, UREGS_rip(%rsp) + jmp restore_all_xen ENTRY(overflow) pushq $0 diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h index e8c2f02..5433157 100644 --- a/xen/include/asm-x86/processor.h +++ b/xen/include/asm-x86/processor.h @@ -439,6 +439,8 @@ extern idt_entry_t *idt_tables[]; DECLARE_PER_CPU(struct tss_struct, init_tss); DECLARE_PER_CPU(root_pgentry_t *, root_pgt); +extern bool active_text_patching; + extern void init_int80_direct_trap(struct vcpu *v); extern void write_ptbase(struct vcpu *v); -- 2.1.4 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |