# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1195759420 0
# Node ID 69b56d3289f52092f797cf1f7d500883dbbf2867
# Parent fd3f6d814f6dca9f46c95a5b808e2f47bdcd1715
x86: emulate I/O port access breakpoints
Emulate the trapping on I/O port accesses when emulating IN/OUT.
Also allow 8-byte breakpoints on x86-64 (and on i686 if the hardware
supports them), and tighten the condition for loading debug registers
during context switch.
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
Signed-off-by: Keir Fraser <keir.fraser@xxxxxxxxxxxxx>
---
xen/arch/x86/domain.c | 7 +-
xen/arch/x86/domctl.c | 8 ++
xen/arch/x86/hvm/svm/svm.c | 3
xen/arch/x86/hvm/vmx/vmx.c | 3
xen/arch/x86/traps.c | 133 ++++++++++++++++++++++++++++++++---------
xen/include/asm-x86/debugreg.h | 55 ++++++++--------
xen/include/asm-x86/domain.h | 2
7 files changed, 149 insertions(+), 62 deletions(-)
diff -r fd3f6d814f6d -r 69b56d3289f5 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c Thu Nov 22 18:28:47 2007 +0000
+++ b/xen/arch/x86/domain.c Thu Nov 22 19:23:40 2007 +0000
@@ -42,6 +42,7 @@
#include <asm/hypercall.h>
#include <asm/hvm/hvm.h>
#include <asm/hvm/support.h>
+#include <asm/debugreg.h>
#include <asm/msr.h>
#include <asm/nmi.h>
#include <asm/iommu.h>
@@ -583,7 +584,7 @@ unsigned long pv_guest_cr4_fixup(unsigne
if ( (guest_cr4 & hv_cr4_mask) != (hv_cr4 & hv_cr4_mask) )
gdprintk(XENLOG_WARNING,
"Attempt to change CR4 flags %08lx -> %08lx\n",
- hv_cr4 & ~(X86_CR4_PGE|X86_CR4_PSE), guest_cr4);
+ hv_cr4, guest_cr4);
return (hv_cr4 & hv_cr4_mask) | (guest_cr4 & ~hv_cr4_mask);
}
@@ -1219,7 +1220,7 @@ static void paravirt_ctxt_switch_from(st
* inside Xen, before we get a chance to reload DR7, and this cannot always
* safely be handled.
*/
- if ( unlikely(v->arch.guest_context.debugreg[7]) )
+ if ( unlikely(v->arch.guest_context.debugreg[7] & DR7_ACTIVE_MASK) )
write_debugreg(7, 0);
}
@@ -1234,7 +1235,7 @@ static void paravirt_ctxt_switch_to(stru
if ( unlikely(cr4 != read_cr4()) )
write_cr4(cr4);
- if ( unlikely(v->arch.guest_context.debugreg[7]) )
+ if ( unlikely(v->arch.guest_context.debugreg[7] & DR7_ACTIVE_MASK) )
{
write_debugreg(0, v->arch.guest_context.debugreg[0]);
write_debugreg(1, v->arch.guest_context.debugreg[1]);
diff -r fd3f6d814f6d -r 69b56d3289f5 xen/arch/x86/domctl.c
--- a/xen/arch/x86/domctl.c Thu Nov 22 18:28:47 2007 +0000
+++ b/xen/arch/x86/domctl.c Thu Nov 22 19:23:40 2007 +0000
@@ -825,12 +825,20 @@ void arch_get_info_guest(struct vcpu *v,
c.nat->ctrlreg[1] = xen_pfn_to_cr3(
pagetable_get_pfn(v->arch.guest_table_user));
#endif
+
+ /* Merge shadow DR7 bits into real DR7. */
+ c.nat->debugreg[7] |= c.nat->debugreg[5];
+ c.nat->debugreg[5] = 0;
}
#ifdef CONFIG_COMPAT
else
{
l4_pgentry_t *l4e = __va(pagetable_get_paddr(v->arch.guest_table));
c.cmp->ctrlreg[3] = compat_pfn_to_cr3(l4e_get_pfn(*l4e));
+
+ /* Merge shadow DR7 bits into real DR7. */
+ c.cmp->debugreg[7] |= c.cmp->debugreg[5];
+ c.cmp->debugreg[5] = 0;
}
#endif
diff -r fd3f6d814f6d -r 69b56d3289f5 xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c Thu Nov 22 18:28:47 2007 +0000
+++ b/xen/arch/x86/hvm/svm/svm.c Thu Nov 22 19:23:40 2007 +0000
@@ -34,6 +34,7 @@
#include <asm/cpufeature.h>
#include <asm/processor.h>
#include <asm/types.h>
+#include <asm/debugreg.h>
#include <asm/msr.h>
#include <asm/spinlock.h>
#include <asm/hvm/hvm.h>
@@ -176,8 +177,6 @@ static void __restore_debug_registers(st
* if one of the breakpoints is enabled. So mask out all bits that don't
* enable some breakpoint functionality.
*/
-#define DR7_ACTIVE_MASK 0xff
-
static void svm_restore_dr(struct vcpu *v)
{
if ( unlikely(v->arch.guest_context.debugreg[7] & DR7_ACTIVE_MASK) )
diff -r fd3f6d814f6d -r 69b56d3289f5 xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c Thu Nov 22 18:28:47 2007 +0000
+++ b/xen/arch/x86/hvm/vmx/vmx.c Thu Nov 22 19:23:40 2007 +0000
@@ -32,6 +32,7 @@
#include <asm/cpufeature.h>
#include <asm/processor.h>
#include <asm/types.h>
+#include <asm/debugreg.h>
#include <asm/msr.h>
#include <asm/spinlock.h>
#include <asm/paging.h>
@@ -435,8 +436,6 @@ static void __restore_debug_registers(st
* if one of the breakpoints is enabled. So mask out all bits that don't
* enable some breakpoint functionality.
*/
-#define DR7_ACTIVE_MASK 0xff
-
static void vmx_restore_dr(struct vcpu *v)
{
/* NB. __vmread() is not usable here, so we cannot read from the VMCS. */
diff -r fd3f6d814f6d -r 69b56d3289f5 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c Thu Nov 22 18:28:47 2007 +0000
+++ b/xen/arch/x86/traps.c Thu Nov 22 19:23:40 2007 +0000
@@ -414,15 +414,53 @@ static int do_guest_trap(
return 0;
}
-static void instruction_done(struct cpu_user_regs *regs, unsigned long eip)
+static void instruction_done(
+ struct cpu_user_regs *regs, unsigned long eip, unsigned int bpmatch)
{
regs->eip = eip;
regs->eflags &= ~X86_EFLAGS_RF;
- if ( regs->eflags & X86_EFLAGS_TF )
- {
- current->arch.guest_context.debugreg[6] |= 0xffff4ff0;
+ if ( bpmatch || (regs->eflags & X86_EFLAGS_TF) )
+ {
+ current->arch.guest_context.debugreg[6] |= bpmatch | 0xffff0ff0;
+ if ( regs->eflags & X86_EFLAGS_TF )
+ current->arch.guest_context.debugreg[6] |= 0x4000;
do_guest_trap(TRAP_debug, regs, 0);
}
+}
+
+static unsigned int check_guest_io_breakpoint(struct vcpu *v,
+ unsigned int port, unsigned int len)
+{
+ unsigned int width, i, match = 0;
+ unsigned long start;
+
+ if ( !(v->arch.guest_context.debugreg[5]) ||
+ !(v->arch.guest_context.ctrlreg[4] & X86_CR4_DE) )
+ return 0;
+
+ for ( i = 0; i < 4; i++ )
+ {
+ if ( !(v->arch.guest_context.debugreg[5] &
+ (3 << (i * DR_ENABLE_SIZE))) )
+ continue;
+
+ start = v->arch.guest_context.debugreg[i];
+ width = 0;
+
+ switch ( (v->arch.guest_context.debugreg[7] >>
+ (DR_CONTROL_SHIFT + i * DR_CONTROL_SIZE)) & 0xc )
+ {
+ case DR_LEN_1: width = 1; break;
+ case DR_LEN_2: width = 2; break;
+ case DR_LEN_4: width = 4; break;
+ case DR_LEN_8: width = 8; break;
+ }
+
+ if ( (start < (port + len)) && ((start + width) > port) )
+ match |= 1 << i;
+ }
+
+ return match;
}
/*
@@ -639,7 +677,6 @@ static int emulate_forced_invalid_op(str
{
/* Modify Feature Information. */
clear_bit(X86_FEATURE_VME, &d);
- clear_bit(X86_FEATURE_DE, &d);
clear_bit(X86_FEATURE_PSE, &d);
clear_bit(X86_FEATURE_PGE, &d);
if ( !cpu_has_sep )
@@ -669,7 +706,7 @@ static int emulate_forced_invalid_op(str
regs->ecx = c;
regs->edx = d;
- instruction_done(regs, eip);
+ instruction_done(regs, eip, 0);
trace_trap_one_addr(TRC_PV_FORCED_INVALID_OP, regs->eip);
@@ -1329,7 +1366,7 @@ static int emulate_privileged_op(struct
unsigned long *reg, eip = regs->eip, res;
u8 opcode, modrm_reg = 0, modrm_rm = 0, rep_prefix = 0, lock = 0, rex = 0;
enum { lm_seg_none, lm_seg_fs, lm_seg_gs } lm_ovr = lm_seg_none;
- unsigned int port, i, data_sel, ar, data, rc;
+ unsigned int port, i, data_sel, ar, data, rc, bpmatch = 0;
unsigned int op_bytes, op_default, ad_bytes, ad_default;
#define rd_ad(reg) (ad_bytes >= sizeof(regs->reg) \
? regs->reg \
@@ -1479,6 +1516,8 @@ static int emulate_privileged_op(struct
}
#endif
+ port = (u16)regs->edx;
+
continue_io_string:
switch ( opcode )
{
@@ -1487,9 +1526,8 @@ static int emulate_privileged_op(struct
case 0x6d: /* INSW/INSL */
if ( data_limit < op_bytes - 1 ||
rd_ad(edi) > data_limit - (op_bytes - 1) ||
- !guest_io_okay((u16)regs->edx, op_bytes, v, regs) )
+ !guest_io_okay(port, op_bytes, v, regs) )
goto fail;
- port = (u16)regs->edx;
switch ( op_bytes )
{
case 1:
@@ -1519,7 +1557,7 @@ static int emulate_privileged_op(struct
case 0x6f: /* OUTSW/OUTSL */
if ( data_limit < op_bytes - 1 ||
rd_ad(esi) > data_limit - (op_bytes - 1) ||
- !guest_io_okay((u16)regs->edx, op_bytes, v, regs) )
+ !guest_io_okay(port, op_bytes, v, regs) )
goto fail;
rc = copy_from_user(&data, (void *)data_base + rd_ad(esi),
op_bytes);
if ( rc != 0 )
@@ -1527,7 +1565,6 @@ static int emulate_privileged_op(struct
propagate_page_fault(data_base + rd_ad(esi) + op_bytes - rc,
0);
return EXCRET_fault_fixed;
}
- port = (u16)regs->edx;
switch ( op_bytes )
{
case 1:
@@ -1553,9 +1590,11 @@ static int emulate_privileged_op(struct
break;
}
+ bpmatch = check_guest_io_breakpoint(v, port, op_bytes);
+
if ( rep_prefix && (wr_ad(ecx, regs->ecx - 1) != 0) )
{
- if ( !hypercall_preempt_check() )
+ if ( !bpmatch && !hypercall_preempt_check() )
goto continue_io_string;
eip = regs->eip;
}
@@ -1634,6 +1673,7 @@ static int emulate_privileged_op(struct
regs->eax = (u32)~0;
break;
}
+ bpmatch = check_guest_io_breakpoint(v, port, op_bytes);
goto done;
case 0xec: /* IN %dx,%al */
@@ -1671,6 +1711,7 @@ static int emulate_privileged_op(struct
io_emul(regs);
break;
}
+ bpmatch = check_guest_io_breakpoint(v, port, op_bytes);
goto done;
case 0xee: /* OUT %al,%dx */
@@ -1964,7 +2005,7 @@ static int emulate_privileged_op(struct
#undef rd_ad
done:
- instruction_done(regs, eip);
+ instruction_done(regs, eip, bpmatch);
return EXCRET_fault_fixed;
fail:
@@ -2295,7 +2336,7 @@ static int emulate_gate_op(struct cpu_us
sel |= (regs->cs & 3);
regs->cs = sel;
- instruction_done(regs, off);
+ instruction_done(regs, off, 0);
#endif
return 0;
@@ -2805,25 +2846,47 @@ long set_debugreg(struct vcpu *v, int re
/*
* DR7: Bit 10 reserved (set to 1).
* Bits 11-12,14-15 reserved (set to 0).
+ */
+ value &= ~DR_CONTROL_RESERVED_ZERO; /* reserved bits => 0 */
+ value |= DR_CONTROL_RESERVED_ONE; /* reserved bits => 1 */
+ /*
* Privileged bits:
* GD (bit 13): must be 0.
- * R/Wn (bits 16-17,20-21,24-25,28-29): mustn't be 10.
- * LENn (bits 18-19,22-23,26-27,30-31): mustn't be 10.
*/
- /* DR7 == 0 => debugging disabled for this domain. */
- if ( value != 0 )
- {
- value &= 0xffff27ff; /* reserved bits => 0 */
- value |= 0x00000400; /* reserved bits => 1 */
- if ( (value & (1<<13)) != 0 ) return -EPERM;
- for ( i = 0; i < 16; i += 2 )
- if ( ((value >> (i+16)) & 3) == 2 ) return -EPERM;
+ if ( value & DR_GENERAL_DETECT )
+ return -EPERM;
+ /* DR7.{G,L}E = 0 => debugging disabled for this domain. */
+ if ( value & DR7_ACTIVE_MASK )
+ {
+ unsigned int io_enable = 0;
+
+ for ( i = DR_CONTROL_SHIFT; i < 32; i += DR_CONTROL_SIZE )
+ {
+ if ( ((value >> i) & 3) == DR_IO )
+ {
+ if ( !(v->arch.guest_context.ctrlreg[4] & X86_CR4_DE) )
+ return -EPERM;
+ io_enable |= value & (3 << ((i - 16) >> 1));
+ }
+#ifdef __i386__
+ if ( ((boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) ||
+ !boot_cpu_has(X86_FEATURE_LM)) &&
+ (((value >> i) & 0xc) == DR_LEN_8) )
+ return -EPERM;
+#endif
+ }
+
+ /* Guest DR5 is a handy stash for I/O intercept information. */
+ v->arch.guest_context.debugreg[5] = io_enable;
+ value &= ~io_enable;
+
/*
* If DR7 was previously clear then we need to load all other
* debug registers at this point as they were not restored during
* context switch.
*/
- if ( (v == curr) && (v->arch.guest_context.debugreg[7] == 0) )
+ if ( (v == curr) &&
+ !(v->arch.guest_context.debugreg[7] & DR7_ACTIVE_MASK) )
{
write_debugreg(0, v->arch.guest_context.debugreg[0]);
write_debugreg(1, v->arch.guest_context.debugreg[1]);
@@ -2832,7 +2895,7 @@ long set_debugreg(struct vcpu *v, int re
write_debugreg(6, v->arch.guest_context.debugreg[6]);
}
}
- if ( v == curr )
+ if ( v == curr )
write_debugreg(7, value);
break;
default:
@@ -2850,8 +2913,22 @@ long do_set_debugreg(int reg, unsigned l
unsigned long do_get_debugreg(int reg)
{
- if ( (reg < 0) || (reg > 7) ) return -EINVAL;
- return current->arch.guest_context.debugreg[reg];
+ struct vcpu *curr = current;
+
+ switch ( reg )
+ {
+ case 0 ... 3:
+ case 6:
+ return curr->arch.guest_context.debugreg[reg];
+ case 7:
+ return (curr->arch.guest_context.debugreg[7] |
+ curr->arch.guest_context.debugreg[5]);
+ case 4 ... 5:
+ return ((curr->arch.guest_context.ctrlreg[4] & X86_CR4_DE) ?
+ curr->arch.guest_context.debugreg[reg + 2] : 0);
+ }
+
+ return -EINVAL;
}
/*
diff -r fd3f6d814f6d -r 69b56d3289f5 xen/include/asm-x86/debugreg.h
--- a/xen/include/asm-x86/debugreg.h Thu Nov 22 18:28:47 2007 +0000
+++ b/xen/include/asm-x86/debugreg.h Thu Nov 22 19:23:40 2007 +0000
@@ -4,23 +4,22 @@
/* Indicate the register numbers for a number of the specific
debug registers. Registers 0-3 contain the addresses we wish to trap on */
-#define DR_FIRSTADDR 0 /* u_debugreg[DR_FIRSTADDR] */
-#define DR_LASTADDR 3 /* u_debugreg[DR_LASTADDR] */
-#define DR_STATUS 6 /* u_debugreg[DR_STATUS] */
-#define DR_CONTROL 7 /* u_debugreg[DR_CONTROL] */
+#define DR_FIRSTADDR 0
+#define DR_LASTADDR 3
+#define DR_STATUS 6
+#define DR_CONTROL 7
/* Define a few things for the status register. We can use this to determine
which debugging register was responsible for the trap. The other bits
are either reserved or not of interest to us. */
-#define DR_TRAP0 (0x1) /* db0 */
-#define DR_TRAP1 (0x2) /* db1 */
-#define DR_TRAP2 (0x4) /* db2 */
-#define DR_TRAP3 (0x8) /* db3 */
-
-#define DR_STEP (0x4000) /* single-step */
-#define DR_SWITCH (0x8000) /* task switch */
+#define DR_TRAP0 (0x1) /* db0 */
+#define DR_TRAP1 (0x2) /* db1 */
+#define DR_TRAP2 (0x4) /* db2 */
+#define DR_TRAP3 (0x8) /* db3 */
+#define DR_STEP (0x4000) /* single-step */
+#define DR_SWITCH (0x8000) /* task switch */
/* Now define a bunch of things for manipulating the control register.
The top two bytes of the control register consist of 4 fields of 4
@@ -29,36 +28,40 @@
field is that we are looking at */
#define DR_CONTROL_SHIFT 16 /* Skip this many bits in ctl register */
-#define DR_CONTROL_SIZE 4 /* 4 control bits per register */
+#define DR_CONTROL_SIZE 4 /* 4 control bits per register */
-#define DR_RW_EXECUTE (0x0) /* Settings for the access types to trap on */
-#define DR_RW_WRITE (0x1)
-#define DR_RW_READ (0x3)
+#define DR_RW_EXECUTE (0x0) /* Settings for the access types to trap on */
+#define DR_RW_WRITE (0x1)
+#define DR_IO (0x2)
+#define DR_RW_READ (0x3)
-#define DR_LEN_1 (0x0) /* Settings for data length to trap on */
-#define DR_LEN_2 (0x4)
-#define DR_LEN_4 (0xC)
+#define DR_LEN_1 (0x0) /* Settings for data length to trap on */
+#define DR_LEN_2 (0x4)
+#define DR_LEN_4 (0xC)
+#define DR_LEN_8 (0x8)
/* The low byte to the control register determine which registers are
enabled. There are 4 fields of two bits. One bit is "local", meaning
that the processor will reset the bit after a task switch and the other
- is global meaning that we have to explicitly reset the bit. With linux,
- you can use either one, since we explicitly zero the register when we enter
- kernel mode. */
+ is global meaning that we have to explicitly reset the bit. */
-#define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */
+#define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */
#define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */
-#define DR_ENABLE_SIZE 2 /* 2 enable bits per register */
+#define DR_ENABLE_SIZE 2 /* 2 enable bits per register */
#define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */
#define DR_GLOBAL_ENABLE_MASK (0xAA) /* Set global bits for all 4 regs */
+
+#define DR7_ACTIVE_MASK (DR_LOCAL_ENABLE_MASK|DR_GLOBAL_ENABLE_MASK)
/* The second byte to the control register has a few special things.
We can slow the instruction pipeline for instructions coming via the
gdt or the ldt if we want to. I am not sure why this is an advantage */
-#define DR_CONTROL_RESERVED (~0xFFFF03FFUL) /* Reserved by Intel */
-#define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */
-#define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */
+#define DR_CONTROL_RESERVED_ZERO (0x0000d800ul) /* Reserved, read as zero */
+#define DR_CONTROL_RESERVED_ONE (0x00000400ul) /* Reserved, read as one */
+#define DR_LOCAL_EXACT_ENABLE (0x00000100ul) /* Local exact enable */
+#define DR_GLOBAL_EXACT_ENABLE (0x00000200ul) /* Global exact enable */
+#define DR_GENERAL_DETECT (0x00002000ul) /* General detect enable */
#endif /* _X86_DEBUGREG_H */
diff -r fd3f6d814f6d -r 69b56d3289f5 xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h Thu Nov 22 18:28:47 2007 +0000
+++ b/xen/include/asm-x86/domain.h Thu Nov 22 19:23:40 2007 +0000
@@ -356,7 +356,7 @@ unsigned long pv_guest_cr4_fixup(unsigne
/* Convert between guest-visible and real CR4 values. */
#define pv_guest_cr4_to_real_cr4(c) \
- ((c) | (mmu_cr4_features & (X86_CR4_PGE | X86_CR4_PSE)))
+ (((c) | (mmu_cr4_features & (X86_CR4_PGE | X86_CR4_PSE))) & ~X86_CR4_DE)
#define real_cr4_to_pv_guest_cr4(c) \
((c) & ~(X86_CR4_PGE | X86_CR4_PSE))
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|