# HG changeset patch
# User Isaku Yamahata <yamahata@xxxxxxxxxxxxx>
# Date 1228356113 -32400
# Node ID f4c5befcba8d78d4f93f58602e9ea942e7372496
# Parent 54e5d15af567012bb7e88b0ccb3bad9a7f0168a5
# Parent 7338f6301067c7298eb4b3ff44992c53488b0df7
merge with xen-unstable.hg
---
stubdom/grub/mini-os.c | 2
tools/python/xen/xend/XendDomainInfo.py | 12 +++-
tools/python/xen/xm/addlabel.py | 5 +
tools/xcutils/xc_save.c | 38 +++++++++----
tools/xenpmd/xenpmd.c | 2
tools/xenstat/libxenstat/src/xenstat_linux.c | 27 ++-------
xen/arch/ia64/xen/domain.c | 3 -
xen/arch/x86/boot/wakeup.S | 3 -
xen/arch/x86/domain.c | 10 ++-
xen/arch/x86/domctl.c | 16 +----
xen/arch/x86/hpet.c | 3 +
xen/arch/x86/irq.c | 70 +++++++++++++++++++++---
xen/arch/x86/mm/shadow/multi.c | 22 ++++---
xen/arch/x86/physdev.c | 43 +++++++++++++++
xen/arch/x86/x86_64/physdev.c | 3 +
xen/arch/x86/x86_emulate/x86_emulate.c | 73 ++++++++------------------
xen/common/domain.c | 3 +
xen/common/event_channel.c | 5 -
xen/common/timer.c | 10 ++-
xen/drivers/char/console.c | 2
xen/drivers/passthrough/amd/iommu_init.c | 71 +++++++++++++++++--------
xen/drivers/passthrough/amd/iommu_map.c | 44 +++++++++++++++
xen/drivers/passthrough/amd/pci_amd_iommu.c | 1
xen/drivers/passthrough/vtd/dmar.c | 26 +++++++--
xen/drivers/passthrough/vtd/dmar.h | 1
xen/drivers/passthrough/vtd/iommu.c | 39 +++++++------
xen/drivers/passthrough/vtd/iommu.h | 4 +
xen/drivers/passthrough/vtd/qinval.c | 23 ++++----
xen/include/asm-x86/domain.h | 4 +
xen/include/asm-x86/hvm/svm/amd-iommu-proto.h | 1
xen/include/asm-x86/page.h | 2
xen/include/public/physdev.h | 15 +++++
xen/include/xen/event.h | 3 +
xen/include/xen/irq.h | 1
xen/tools/symbols.c | 3 -
35 files changed, 400 insertions(+), 190 deletions(-)
diff -r 54e5d15af567 -r f4c5befcba8d stubdom/grub/mini-os.c
--- a/stubdom/grub/mini-os.c Wed Dec 03 11:43:54 2008 +0900
+++ b/stubdom/grub/mini-os.c Thu Dec 04 11:01:53 2008 +0900
@@ -649,7 +649,7 @@ int getrtsecs (void)
{
struct timeval tv;
gettimeofday(&tv, NULL);
- return tv.tv_sec;
+ return tv.tv_sec % 10 + ((tv.tv_sec / 10) % 6) * 0x10;
}
int currticks (void)
diff -r 54e5d15af567 -r f4c5befcba8d tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py Wed Dec 03 11:43:54 2008 +0900
+++ b/tools/python/xen/xend/XendDomainInfo.py Thu Dec 04 11:01:53 2008 +0900
@@ -1990,13 +1990,21 @@ class XendDomainInfo:
for devclass in XendDevices.valid_devices():
for dev in t.list(devclass):
try:
+ true_devclass = devclass
+ if devclass == 'vbd':
+ # In the case of "vbd", the true device class
+ # may possibly be "tap". Just in case, verify
+ # device class.
+ devid = dev.split('/')[-1]
+ true_devclass = self.getBlockDeviceClass(devid)
log.debug("Removing %s", dev);
- self.destroyDevice(devclass, dev, False);
+ self.destroyDevice(true_devclass, dev, False);
except:
# Log and swallow any exceptions in removal --
# there's nothing more we can do.
log.exception("Device release failed: %s; %s; %s",
- self.info['name_label'], devclass, dev)
+ self.info['name_label'],
+ true_devclass, dev)
finally:
t.abort()
diff -r 54e5d15af567 -r f4c5befcba8d tools/python/xen/xm/addlabel.py
--- a/tools/python/xen/xm/addlabel.py Wed Dec 03 11:43:54 2008 +0900
+++ b/tools/python/xen/xm/addlabel.py Thu Dec 04 11:01:53 2008 +0900
@@ -64,12 +64,13 @@ def validate_config_file(configfile):
return 0
# sanity check on the data from the file
+ # requiring 'memory,' 'name,' and ether 'kernel' or 'bootloader'
count = 0
- required = ['kernel', 'memory', 'name']
+ required = ['kernel', 'bootloader', 'memory', 'name']
for (k, v) in locs.items():
if k in required:
count += 1
- if count != 3:
+ if count < len(required) - 1:
print "Invalid configuration file."
return 0
else:
diff -r 54e5d15af567 -r f4c5befcba8d tools/xcutils/xc_save.c
--- a/tools/xcutils/xc_save.c Wed Dec 03 11:43:54 2008 +0900
+++ b/tools/xcutils/xc_save.c Thu Dec 04 11:01:53 2008 +0900
@@ -24,8 +24,11 @@
#include <xenguest.h>
static struct suspendinfo {
+ int xc_fd; /* libxc handle */
int xce; /* event channel handle */
int suspend_evtchn;
+ int domid;
+ unsigned int flags;
} si;
/**
@@ -161,6 +164,19 @@ static int evtchn_suspend(void)
static int suspend(void)
{
+ unsigned long sx_state = 0;
+
+ /* Nothing to do if the guest is in an ACPI sleep state. */
+ if (si.flags & XCFLAGS_HVM)
+ xc_get_hvm_param(si.xc_fd, si.domid,
+ HVM_PARAM_ACPI_S_STATE, &sx_state);
+ if (sx_state != 0) {
+ /* notify xend that it can do device migration */
+ printf("suspended\n");
+ fflush(stdout);
+ return 1;
+ }
+
if (si.suspend_evtchn >= 0)
return evtchn_suspend();
@@ -297,32 +313,32 @@ int
int
main(int argc, char **argv)
{
- unsigned int domid, maxit, max_f, flags;
- int xc_fd, io_fd, ret;
+ unsigned int maxit, max_f;
+ int io_fd, ret;
if (argc != 6)
errx(1, "usage: %s iofd domid maxit maxf flags", argv[0]);
- xc_fd = xc_interface_open();
- if (xc_fd < 0)
+ si.xc_fd = xc_interface_open();
+ if (si.xc_fd < 0)
errx(1, "failed to open control interface");
io_fd = atoi(argv[1]);
- domid = atoi(argv[2]);
+ si.domid = atoi(argv[2]);
maxit = atoi(argv[3]);
max_f = atoi(argv[4]);
- flags = atoi(argv[5]);
-
- if (suspend_evtchn_init(xc_fd, domid) < 0)
+ si.flags = atoi(argv[5]);
+
+ if (suspend_evtchn_init(si.xc_fd, si.domid) < 0)
warnx("suspend event channel initialization failed, using slow path");
- ret = xc_domain_save(xc_fd, io_fd, domid, maxit, max_f, flags,
- &suspend, !!(flags & XCFLAGS_HVM),
+ ret = xc_domain_save(si.xc_fd, io_fd, si.domid, maxit, max_f, si.flags,
+ &suspend, !!(si.flags & XCFLAGS_HVM),
&init_qemu_maps, &qemu_flip_buffer);
suspend_evtchn_release();
- xc_interface_close(xc_fd);
+ xc_interface_close(si.xc_fd);
return ret;
}
diff -r 54e5d15af567 -r f4c5befcba8d tools/xenpmd/xenpmd.c
--- a/tools/xenpmd/xenpmd.c Wed Dec 03 11:43:54 2008 +0900
+++ b/tools/xenpmd/xenpmd.c Thu Dec 04 11:01:53 2008 +0900
@@ -373,7 +373,7 @@ void write_battery_info_to_xenstore(stru
(unsigned int)strlen(info->serial_number), info->serial_number,
(unsigned int)strlen(info->battery_type), info->battery_type,
(unsigned int)strlen(info->oem_info), info->oem_info);
- strncat(val+73, string_info, 1024);
+ strncat(val+73, string_info, 1024-73-1);
xs_write(xs, XBT_NULL, "/pm/bif",
val, 73+8+strlen(info->model_number)+strlen(info->serial_number)+
strlen(info->battery_type)+strlen(info->oem_info)+1);
diff -r 54e5d15af567 -r f4c5befcba8d
tools/xenstat/libxenstat/src/xenstat_linux.c
--- a/tools/xenstat/libxenstat/src/xenstat_linux.c Wed Dec 03 11:43:54
2008 +0900
+++ b/tools/xenstat/libxenstat/src/xenstat_linux.c Thu Dec 04 11:01:53
2008 +0900
@@ -182,12 +182,6 @@ int xenstat_collect_vbds(xenstat_node *
struct dirent *dp;
struct priv_data *priv = get_priv_data(node->handle);
- char *sys_prefix = "statistics/";
-
- /* 23 = "statistics/" + "xxxx_xx_req" */
- char ooreq[23], rdreq[23], wrreq[23];
- char *stat_prefix = NULL;
-
if (priv == NULL) {
perror("Allocation error");
return 0;
@@ -215,16 +209,12 @@ int xenstat_collect_vbds(xenstat_node *
if (ret != 3)
continue;
-
- if (strcmp(buf,"vbd") == 0){
- stat_prefix = "";
+ if (strcmp(buf,"vbd") == 0)
vbd.back_type = 1;
- } else if (strcmp(buf,"tap") == 0){
- stat_prefix = "tap_";
+ else if (strcmp(buf,"tap") == 0)
vbd.back_type = 2;
- } else {
- continue;
- }
+ else
+ continue;
domain = xenstat_node_domain(node, domid);
if (domain == NULL) {
@@ -235,22 +225,19 @@ int xenstat_collect_vbds(xenstat_node *
continue;
}
- snprintf(ooreq, sizeof(ooreq), "%s%soo_req", sys_prefix,
stat_prefix);
- if((read_attributes_vbd(dp->d_name, ooreq, buf, 256)<=0)
+ if((read_attributes_vbd(dp->d_name, "statistics/oo_req", buf,
256)<=0)
|| ((ret = sscanf(buf, "%llu", &vbd.oo_reqs)) != 1))
{
continue;
}
- snprintf(rdreq, sizeof(rdreq),"%s%srd_req", sys_prefix,
stat_prefix);
- if((read_attributes_vbd(dp->d_name, rdreq, buf, 256)<=0)
+ if((read_attributes_vbd(dp->d_name, "statistics/rd_req", buf,
256)<=0)
|| ((ret = sscanf(buf, "%llu", &vbd.rd_reqs)) != 1))
{
continue;
}
- snprintf(wrreq, sizeof(wrreq),"%s%swr_req", sys_prefix,
stat_prefix);
- if((read_attributes_vbd(dp->d_name, wrreq, buf, 256)<=0)
+ if((read_attributes_vbd(dp->d_name, "statistics/wr_req", buf,
256)<=0)
|| ((ret = sscanf(buf, "%llu", &vbd.wr_reqs)) != 1))
{
continue;
diff -r 54e5d15af567 -r f4c5befcba8d xen/arch/ia64/xen/domain.c
--- a/xen/arch/ia64/xen/domain.c Wed Dec 03 11:43:54 2008 +0900
+++ b/xen/arch/ia64/xen/domain.c Thu Dec 04 11:01:53 2008 +0900
@@ -1686,9 +1686,6 @@ int domain_relinquish_resources(struct d
if (is_hvm_domain(d) && d->arch.sal_data)
xfree(d->arch.sal_data);
- /* Free page used by xen oprofile buffer */
- free_xenoprof_pages(d);
-
return 0;
}
diff -r 54e5d15af567 -r f4c5befcba8d xen/arch/x86/boot/wakeup.S
--- a/xen/arch/x86/boot/wakeup.S Wed Dec 03 11:43:54 2008 +0900
+++ b/xen/arch/x86/boot/wakeup.S Thu Dec 04 11:01:53 2008 +0900
@@ -50,8 +50,7 @@ 1: # Show some progress if VGA is r
movw $1, %ax
lmsw %ax # Turn on CR0.PE
- jmp 1f
-1: ljmpl $BOOT_CS32, $bootsym_phys(wakeup_32)
+ ljmpl $BOOT_CS32, $bootsym_phys(wakeup_32)
/* This code uses an extended set of video mode numbers. These include:
* Aliases for standard modes
diff -r 54e5d15af567 -r f4c5befcba8d xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c Wed Dec 03 11:43:54 2008 +0900
+++ b/xen/arch/x86/domain.c Thu Dec 04 11:01:53 2008 +0900
@@ -1814,6 +1814,13 @@ int domain_relinquish_resources(struct d
unmap_vcpu_info(v);
}
+ if ( d->arch.pirq_eoi_map != NULL )
+ {
+ unmap_domain_page_global(d->arch.pirq_eoi_map);
+ put_page_and_type(mfn_to_page(d->arch.pirq_eoi_map_mfn));
+ d->arch.pirq_eoi_map = NULL;
+ }
+
d->arch.relmem = RELMEM_xen;
/* fallthrough */
@@ -1856,9 +1863,6 @@ int domain_relinquish_resources(struct d
default:
BUG();
}
-
- /* Free page used by xen oprofile buffer. */
- free_xenoprof_pages(d);
if ( is_hvm_domain(d) )
hvm_domain_relinquish_resources(d);
diff -r 54e5d15af567 -r f4c5befcba8d xen/arch/x86/domctl.c
--- a/xen/arch/x86/domctl.c Wed Dec 03 11:43:54 2008 +0900
+++ b/xen/arch/x86/domctl.c Thu Dec 04 11:01:53 2008 +0900
@@ -326,13 +326,9 @@ long arch_do_domctl(
case XEN_DOMCTL_sethvmcontext:
{
- struct hvm_domain_context c;
- struct domain *d;
-
- c.cur = 0;
- c.size = domctl->u.hvmcontext.size;
- c.data = NULL;
-
+ struct hvm_domain_context c = { .size = domctl->u.hvmcontext.size };
+ struct domain *d;
+
ret = -ESRCH;
if ( (d = rcu_lock_domain_by_id(domctl->domain)) == NULL )
break;
@@ -367,8 +363,8 @@ long arch_do_domctl(
case XEN_DOMCTL_gethvmcontext:
{
- struct hvm_domain_context c;
- struct domain *d;
+ struct hvm_domain_context c = { 0 };
+ struct domain *d;
ret = -ESRCH;
if ( (d = rcu_lock_domain_by_id(domctl->domain)) == NULL )
@@ -382,9 +378,7 @@ long arch_do_domctl(
if ( !is_hvm_domain(d) )
goto gethvmcontext_out;
- c.cur = 0;
c.size = hvm_save_size(d);
- c.data = NULL;
if ( guest_handle_is_null(domctl->u.hvmcontext.buffer) )
{
diff -r 54e5d15af567 -r f4c5befcba8d xen/arch/x86/hpet.c
--- a/xen/arch/x86/hpet.c Wed Dec 03 11:43:54 2008 +0900
+++ b/xen/arch/x86/hpet.c Thu Dec 04 11:01:53 2008 +0900
@@ -273,6 +273,9 @@ u64 hpet_setup(void)
return hpet_rate;
system_reset_latch = system_reset_counter;
+ if ( hpet_address == 0 )
+ return 0;
+
set_fixmap_nocache(FIX_HPET_BASE, hpet_address);
hpet_id = hpet_read32(HPET_ID);
diff -r 54e5d15af567 -r f4c5befcba8d xen/arch/x86/irq.c
--- a/xen/arch/x86/irq.c Wed Dec 03 11:43:54 2008 +0900
+++ b/xen/arch/x86/irq.c Thu Dec 04 11:01:53 2008 +0900
@@ -18,6 +18,7 @@
#include <xen/iommu.h>
#include <asm/msi.h>
#include <asm/current.h>
+#include <asm/flushtlb.h>
#include <public/physdev.h>
/* opt_noirqbalance: If true, software IRQ balancing/affinity is disabled. */
@@ -206,16 +207,42 @@ static DEFINE_PER_CPU(struct pending_eoi
static DEFINE_PER_CPU(struct pending_eoi, pending_eoi[NR_VECTORS]);
#define pending_eoi_sp(p) ((p)[NR_VECTORS-1].vector)
+static inline void set_pirq_eoi(struct domain *d, unsigned int irq)
+{
+ if ( d->arch.pirq_eoi_map )
+ set_bit(irq, d->arch.pirq_eoi_map);
+}
+
+static inline void clear_pirq_eoi(struct domain *d, unsigned int irq)
+{
+ if ( d->arch.pirq_eoi_map )
+ clear_bit(irq, d->arch.pirq_eoi_map);
+}
+
+static void _irq_guest_eoi(irq_desc_t *desc)
+{
+ irq_guest_action_t *action = (irq_guest_action_t *)desc->action;
+ unsigned int i, vector = desc - irq_desc;
+
+ if ( !(desc->status & IRQ_GUEST_EOI_PENDING) )
+ return;
+
+ for ( i = 0; i < action->nr_guests; ++i )
+ clear_pirq_eoi(action->guest[i],
+ domain_vector_to_irq(action->guest[i], vector));
+
+ desc->status &= ~(IRQ_INPROGRESS|IRQ_GUEST_EOI_PENDING);
+ desc->handler->enable(vector);
+}
+
static struct timer irq_guest_eoi_timer[NR_VECTORS];
static void irq_guest_eoi_timer_fn(void *data)
{
irq_desc_t *desc = data;
- unsigned vector = desc - irq_desc;
unsigned long flags;
spin_lock_irqsave(&desc->lock, flags);
- desc->status &= ~IRQ_INPROGRESS;
- desc->handler->enable(vector);
+ _irq_guest_eoi(desc);
spin_unlock_irqrestore(&desc->lock, flags);
}
@@ -272,8 +299,22 @@ static void __do_IRQ_guest(int vector)
if ( already_pending == action->nr_guests )
{
+ stop_timer(&irq_guest_eoi_timer[vector]);
desc->handler->disable(vector);
- stop_timer(&irq_guest_eoi_timer[vector]);
+ desc->status |= IRQ_GUEST_EOI_PENDING;
+ for ( i = 0; i < already_pending; ++i )
+ {
+ d = action->guest[i];
+ set_pirq_eoi(d, domain_vector_to_irq(d, vector));
+ /*
+ * Could check here whether the guest unmasked the event by now
+ * (or perhaps just re-issue the send_guest_pirq()), and if it
+ * can now accept the event,
+ * - clear all the pirq_eoi bits we already set,
+ * - re-enable the vector, and
+ * - skip the timer setup below.
+ */
+ }
init_timer(&irq_guest_eoi_timer[vector],
irq_guest_eoi_timer_fn, desc, smp_processor_id());
set_timer(&irq_guest_eoi_timer[vector], NOW() + MILLISECS(1));
@@ -310,7 +351,7 @@ irq_desc_t *domain_spin_lock_irq_desc(
}
/* Flush all ready EOIs from the top of this CPU's pending-EOI stack. */
-static void flush_ready_eoi(void *unused)
+static void flush_ready_eoi(void)
{
struct pending_eoi *peoi = this_cpu(pending_eoi);
irq_desc_t *desc;
@@ -364,7 +405,7 @@ static void set_eoi_ready(void *data)
__set_eoi_ready(desc);
spin_unlock(&desc->lock);
- flush_ready_eoi(NULL);
+ flush_ready_eoi();
}
static void __pirq_guest_eoi(struct domain *d, int irq)
@@ -382,8 +423,12 @@ static void __pirq_guest_eoi(struct doma
action = (irq_guest_action_t *)desc->action;
vector = desc - irq_desc;
- ASSERT(!test_bit(irq, d->pirq_mask) ||
- (action->ack_type != ACKTYPE_NONE));
+ if ( action->ack_type == ACKTYPE_NONE )
+ {
+ ASSERT(!test_bit(irq, d->pirq_mask));
+ stop_timer(&irq_guest_eoi_timer[vector]);
+ _irq_guest_eoi(desc);
+ }
if ( unlikely(!test_and_clear_bit(irq, d->pirq_mask)) ||
unlikely(--action->in_flight != 0) )
@@ -408,7 +453,7 @@ static void __pirq_guest_eoi(struct doma
{
__set_eoi_ready(desc);
spin_unlock(&desc->lock);
- flush_ready_eoi(NULL);
+ flush_ready_eoi();
local_irq_enable();
}
else
@@ -606,6 +651,11 @@ int pirq_guest_bind(struct vcpu *v, int
}
action->guest[action->nr_guests++] = v->domain;
+
+ if ( action->ack_type != ACKTYPE_NONE )
+ set_pirq_eoi(v->domain, irq);
+ else
+ clear_pirq_eoi(v->domain, irq);
unlock_out:
spin_unlock_irq(&desc->lock);
@@ -1050,6 +1100,6 @@ void fixup_irqs(cpumask_t map)
peoi = this_cpu(pending_eoi);
for ( sp = 0; sp < pending_eoi_sp(peoi); sp++ )
peoi[sp].ready = 1;
- flush_ready_eoi(NULL);
+ flush_ready_eoi();
}
#endif
diff -r 54e5d15af567 -r f4c5befcba8d xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c Wed Dec 03 11:43:54 2008 +0900
+++ b/xen/arch/x86/mm/shadow/multi.c Thu Dec 04 11:01:53 2008 +0900
@@ -1886,13 +1886,6 @@ static shadow_l1e_t * shadow_get_and_cre
if ( r & SHADOW_SET_ERROR )
return NULL;
-#if (SHADOW_OPTIMIZATIONS && SHOPT_OUT_OF_SYNC )
- /* All pages walked are now pagetables. Safe to resync pages
- in case level 4 or 3 shadows were set. */
- if ( resync )
- shadow_resync_all(v, 0);
-#endif
-
/* This next line is important: in 32-on-PAE and 32-on-64 modes,
* the guest l1 table has an 8k shadow, and we need to return
* the right mfn of the pair. This call will set it for us as a
@@ -1900,6 +1893,14 @@ static shadow_l1e_t * shadow_get_and_cre
* compiled out.) */
(void) shadow_l1_index(sl1mfn, guest_l1_table_offset(gw->va));
}
+
+#if (SHADOW_OPTIMIZATIONS && SHOPT_OUT_OF_SYNC )
+ /* All pages walked are now pagetables. Safe to resync pages
+ in case level 4 or 3 shadows were set. */
+ if ( resync )
+ shadow_resync_all(v, 0);
+#endif
+
/* Now follow it down a level. Guaranteed to succeed. */
return sh_linear_l1_table(v) + shadow_l1_linear_offset(gw->va);
}
@@ -2176,7 +2177,8 @@ static int validate_gl4e(struct vcpu *v,
result |= SHADOW_SET_ERROR;
#if (SHADOW_OPTIMIZATIONS && SHOPT_OUT_OF_SYNC )
- shadow_resync_all(v, 0);
+ if ( mfn_valid(sl3mfn) )
+ shadow_resync_all(v, 0);
#endif
}
l4e_propagate_from_guest(v, new_gl4e, sl3mfn, &new_sl4e, ft_prefetch);
@@ -2232,7 +2234,8 @@ static int validate_gl3e(struct vcpu *v,
result |= SHADOW_SET_ERROR;
#if (SHADOW_OPTIMIZATIONS && SHOPT_OUT_OF_SYNC )
- shadow_resync_all(v, 0);
+ if ( mfn_valid(sl2mfn) )
+ shadow_resync_all(v, 0);
#endif
}
l3e_propagate_from_guest(v, new_gl3e, sl2mfn, &new_sl3e, ft_prefetch);
@@ -2924,6 +2927,7 @@ static int sh_page_fault(struct vcpu *v,
writes to an out of sync page. */
if ( mfn_valid(gmfn) && mfn_is_out_of_sync(gmfn) )
{
+ fast_emul = 0;
v->arch.paging.last_write_emul_ok = 0;
goto page_fault_slow_path;
}
diff -r 54e5d15af567 -r f4c5befcba8d xen/arch/x86/physdev.c
--- a/xen/arch/x86/physdev.c Wed Dec 03 11:43:54 2008 +0900
+++ b/xen/arch/x86/physdev.c Thu Dec 04 11:01:53 2008 +0900
@@ -14,6 +14,7 @@
#include <public/xen.h>
#include <public/physdev.h>
#include <xsm/xsm.h>
+#include <asm/p2m.h>
#ifndef COMPAT
typedef long ret_t;
@@ -191,7 +192,49 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
ret = -EFAULT;
if ( copy_from_guest(&eoi, arg, 1) != 0 )
break;
+ ret = -EINVAL;
+ if ( eoi.irq < 0 || eoi.irq >= NR_IRQS )
+ break;
+ if ( v->domain->arch.pirq_eoi_map )
+ evtchn_unmask(v->domain->pirq_to_evtchn[eoi.irq]);
ret = pirq_guest_eoi(v->domain, eoi.irq);
+ break;
+ }
+
+ case PHYSDEVOP_pirq_eoi_gmfn: {
+ struct physdev_pirq_eoi_gmfn info;
+ unsigned long mfn;
+
+ BUILD_BUG_ON(NR_IRQS > (PAGE_SIZE * 8));
+
+ ret = -EFAULT;
+ if ( copy_from_guest(&info, arg, 1) != 0 )
+ break;
+
+ ret = -EINVAL;
+ mfn = gmfn_to_mfn(current->domain, info.gmfn);
+ if ( !mfn_valid(mfn) ||
+ !get_page_and_type(mfn_to_page(mfn), v->domain,
+ PGT_writable_page) )
+ break;
+
+ if ( cmpxchg(&v->domain->arch.pirq_eoi_map_mfn, 0, mfn) != 0 )
+ {
+ put_page_and_type(mfn_to_page(mfn));
+ ret = -EBUSY;
+ break;
+ }
+
+ v->domain->arch.pirq_eoi_map = map_domain_page_global(mfn);
+ if ( v->domain->arch.pirq_eoi_map == NULL )
+ {
+ v->domain->arch.pirq_eoi_map_mfn = 0;
+ put_page_and_type(mfn_to_page(mfn));
+ ret = -ENOSPC;
+ break;
+ }
+
+ ret = 0;
break;
}
diff -r 54e5d15af567 -r f4c5befcba8d xen/arch/x86/x86_64/physdev.c
--- a/xen/arch/x86/x86_64/physdev.c Wed Dec 03 11:43:54 2008 +0900
+++ b/xen/arch/x86/x86_64/physdev.c Thu Dec 04 11:01:53 2008 +0900
@@ -17,6 +17,9 @@
#define physdev_eoi compat_physdev_eoi
#define physdev_eoi_t physdev_eoi_compat_t
+
+#define physdev_pirq_eoi_gmfn compat_physdev_pirq_eoi_gmfn
+#define physdev_pirq_eoi_gmfn_t physdev_pirq_eoi_gmfn_compat_t
#define physdev_set_iobitmap compat_physdev_set_iobitmap
#define physdev_set_iobitmap_t physdev_set_iobitmap_compat_t
diff -r 54e5d15af567 -r f4c5befcba8d xen/arch/x86/x86_emulate/x86_emulate.c
--- a/xen/arch/x86/x86_emulate/x86_emulate.c Wed Dec 03 11:43:54 2008 +0900
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c Thu Dec 04 11:01:53 2008 +0900
@@ -28,6 +28,7 @@
#define DstImplicit (0<<1) /* Destination operand is implicit in the opcode. */
#define DstBitBase (1<<1) /* Memory operand, bit string. */
#define DstReg (2<<1) /* Register operand. */
+#define DstEax DstReg /* Register EAX (aka DstReg with no ModRM) */
#define DstMem (3<<1) /* Memory operand. */
#define DstMask (3<<1)
/* Source operand type. */
@@ -51,35 +52,35 @@ static uint8_t opcode_table[256] = {
/* 0x00 - 0x07 */
ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
- ByteOp|DstReg|SrcImm, DstReg|SrcImm, ImplicitOps, ImplicitOps,
+ ByteOp|DstEax|SrcImm, DstEax|SrcImm, ImplicitOps, ImplicitOps,
/* 0x08 - 0x0F */
ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
- ByteOp|DstReg|SrcImm, DstReg|SrcImm, ImplicitOps, 0,
+ ByteOp|DstEax|SrcImm, DstEax|SrcImm, ImplicitOps, 0,
/* 0x10 - 0x17 */
ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
- ByteOp|DstReg|SrcImm, DstReg|SrcImm, ImplicitOps, ImplicitOps,
+ ByteOp|DstEax|SrcImm, DstEax|SrcImm, ImplicitOps, ImplicitOps,
/* 0x18 - 0x1F */
ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
- ByteOp|DstReg|SrcImm, DstReg|SrcImm, ImplicitOps, ImplicitOps,
+ ByteOp|DstEax|SrcImm, DstEax|SrcImm, ImplicitOps, ImplicitOps,
/* 0x20 - 0x27 */
ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
- ByteOp|DstReg|SrcImm, DstReg|SrcImm, 0, ImplicitOps,
+ ByteOp|DstEax|SrcImm, DstEax|SrcImm, 0, ImplicitOps,
/* 0x28 - 0x2F */
ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
- ByteOp|DstReg|SrcImm, DstReg|SrcImm, 0, ImplicitOps,
+ ByteOp|DstEax|SrcImm, DstEax|SrcImm, 0, ImplicitOps,
/* 0x30 - 0x37 */
ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
- ByteOp|DstReg|SrcImm, DstReg|SrcImm, 0, ImplicitOps,
+ ByteOp|DstEax|SrcImm, DstEax|SrcImm, 0, ImplicitOps,
/* 0x38 - 0x3F */
ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
- ByteOp|DstReg|SrcImm, DstReg|SrcImm, 0, ImplicitOps,
+ ByteOp|DstEax|SrcImm, DstEax|SrcImm, 0, ImplicitOps,
/* 0x40 - 0x4F */
ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
@@ -125,7 +126,7 @@ static uint8_t opcode_table[256] = {
ByteOp|ImplicitOps|Mov, ImplicitOps|Mov,
ByteOp|ImplicitOps, ImplicitOps,
/* 0xA8 - 0xAF */
- ByteOp|DstReg|SrcImm, DstReg|SrcImm,
+ ByteOp|DstEax|SrcImm, DstEax|SrcImm,
ByteOp|ImplicitOps|Mov, ImplicitOps|Mov,
ByteOp|ImplicitOps|Mov, ImplicitOps|Mov,
ByteOp|ImplicitOps, ImplicitOps,
@@ -687,12 +688,12 @@ static void __put_rep_prefix(
})
/* Clip maximum repetitions so that the index register only just wraps. */
-#define truncate_ea_and_reps(ea, reps, bytes_per_rep) ({ \
- unsigned long __todo = (ctxt->regs->eflags & EF_DF) ? (ea) : ~(ea); \
- __todo = truncate_word(__todo, ad_bytes); \
- __todo = (__todo / (bytes_per_rep)) + 1; \
- (reps) = (__todo < (reps)) ? __todo : (reps); \
- truncate_word((ea), ad_bytes); \
+#define truncate_ea_and_reps(ea, reps, bytes_per_rep) ({ \
+ unsigned long __todo = (ctxt->regs->eflags & EFLG_DF) ? (ea) : ~(ea); \
+ __todo = truncate_word(__todo, ad_bytes); \
+ __todo = (__todo / (bytes_per_rep)) + 1; \
+ (reps) = (__todo < (reps)) ? __todo : (reps); \
+ truncate_word((ea), ad_bytes); \
})
/* Compatibility function: read guest memory, zero-extend result to a ulong. */
@@ -1574,59 +1575,35 @@ x86_emulate(
switch ( b )
{
- case 0x04 ... 0x05: /* add imm,%%eax */
- dst.reg = (unsigned long *)&_regs.eax;
- dst.val = _regs.eax;
- case 0x00 ... 0x03: add: /* add */
+ case 0x00 ... 0x05: add: /* add */
emulate_2op_SrcV("add", src, dst, _regs.eflags);
break;
- case 0x0c ... 0x0d: /* or imm,%%eax */
- dst.reg = (unsigned long *)&_regs.eax;
- dst.val = _regs.eax;
- case 0x08 ... 0x0b: or: /* or */
+ case 0x08 ... 0x0d: or: /* or */
emulate_2op_SrcV("or", src, dst, _regs.eflags);
break;
- case 0x14 ... 0x15: /* adc imm,%%eax */
- dst.reg = (unsigned long *)&_regs.eax;
- dst.val = _regs.eax;
- case 0x10 ... 0x13: adc: /* adc */
+ case 0x10 ... 0x15: adc: /* adc */
emulate_2op_SrcV("adc", src, dst, _regs.eflags);
break;
- case 0x1c ... 0x1d: /* sbb imm,%%eax */
- dst.reg = (unsigned long *)&_regs.eax;
- dst.val = _regs.eax;
- case 0x18 ... 0x1b: sbb: /* sbb */
+ case 0x18 ... 0x1d: sbb: /* sbb */
emulate_2op_SrcV("sbb", src, dst, _regs.eflags);
break;
- case 0x24 ... 0x25: /* and imm,%%eax */
- dst.reg = (unsigned long *)&_regs.eax;
- dst.val = _regs.eax;
- case 0x20 ... 0x23: and: /* and */
+ case 0x20 ... 0x25: and: /* and */
emulate_2op_SrcV("and", src, dst, _regs.eflags);
break;
- case 0x2c ... 0x2d: /* sub imm,%%eax */
- dst.reg = (unsigned long *)&_regs.eax;
- dst.val = _regs.eax;
- case 0x28 ... 0x2b: sub: /* sub */
+ case 0x28 ... 0x2d: sub: /* sub */
emulate_2op_SrcV("sub", src, dst, _regs.eflags);
break;
- case 0x34 ... 0x35: /* xor imm,%%eax */
- dst.reg = (unsigned long *)&_regs.eax;
- dst.val = _regs.eax;
- case 0x30 ... 0x33: xor: /* xor */
+ case 0x30 ... 0x35: xor: /* xor */
emulate_2op_SrcV("xor", src, dst, _regs.eflags);
break;
- case 0x3c ... 0x3d: /* cmp imm,%%eax */
- dst.reg = (unsigned long *)&_regs.eax;
- dst.val = _regs.eax;
- case 0x38 ... 0x3b: cmp: /* cmp */
+ case 0x38 ... 0x3d: cmp: /* cmp */
emulate_2op_SrcV("cmp", src, dst, _regs.eflags);
dst.type = OP_NONE;
break;
@@ -1988,8 +1965,6 @@ x86_emulate(
break;
case 0xa8 ... 0xa9: /* test imm,%%eax */
- dst.reg = (unsigned long *)&_regs.eax;
- dst.val = _regs.eax;
case 0x84 ... 0x85: test: /* test */
emulate_2op_SrcV("test", src, dst, _regs.eflags);
dst.type = OP_NONE;
diff -r 54e5d15af567 -r f4c5befcba8d xen/common/domain.c
--- a/xen/common/domain.c Wed Dec 03 11:43:54 2008 +0900
+++ b/xen/common/domain.c Thu Dec 04 11:01:53 2008 +0900
@@ -553,6 +553,9 @@ static void complete_domain_destroy(stru
sched_destroy_domain(d);
+ /* Free page used by xen oprofile buffer. */
+ free_xenoprof_pages(d);
+
for ( i = MAX_VIRT_CPUS-1; i >= 0; i-- )
if ( (v = d->vcpu[i]) != NULL )
free_vcpu_struct(v);
diff -r 54e5d15af567 -r f4c5befcba8d xen/common/event_channel.c
--- a/xen/common/event_channel.c Wed Dec 03 11:43:54 2008 +0900
+++ b/xen/common/event_channel.c Thu Dec 04 11:01:53 2008 +0900
@@ -762,10 +762,9 @@ long evtchn_bind_vcpu(unsigned int port,
}
-static long evtchn_unmask(evtchn_unmask_t *unmask)
+int evtchn_unmask(unsigned int port)
{
struct domain *d = current->domain;
- int port = unmask->port;
struct vcpu *v;
spin_lock(&d->event_lock);
@@ -916,7 +915,7 @@ long do_event_channel_op(int cmd, XEN_GU
struct evtchn_unmask unmask;
if ( copy_from_guest(&unmask, arg, 1) != 0 )
return -EFAULT;
- rc = evtchn_unmask(&unmask);
+ rc = evtchn_unmask(unmask.port);
break;
}
diff -r 54e5d15af567 -r f4c5befcba8d xen/common/timer.c
--- a/xen/common/timer.c Wed Dec 03 11:43:54 2008 +0900
+++ b/xen/common/timer.c Thu Dec 04 11:01:53 2008 +0900
@@ -494,12 +494,14 @@ static void dump_timerq(unsigned char ke
for ( j = 1; j <= GET_HEAP_SIZE(ts->heap); j++ )
{
t = ts->heap[j];
- printk (" %d : %p ex=0x%08X%08X %p\n",
- j, t, (u32)(t->expires>>32), (u32)t->expires, t->data);
+ printk (" %d : %p ex=0x%08X%08X %p %p\n",
+ j, t, (u32)(t->expires>>32), (u32)t->expires,
+ t->data, t->function);
}
for ( t = ts->list, j = 0; t != NULL; t = t->list_next, j++ )
- printk (" L%d : %p ex=0x%08X%08X %p\n",
- j, t, (u32)(t->expires>>32), (u32)t->expires, t->data);
+ printk (" L%d : %p ex=0x%08X%08X %p %p\n",
+ j, t, (u32)(t->expires>>32), (u32)t->expires,
+ t->data, t->function);
spin_unlock_irqrestore(&ts->lock, flags);
printk("\n");
}
diff -r 54e5d15af567 -r f4c5befcba8d xen/drivers/char/console.c
--- a/xen/drivers/char/console.c Wed Dec 03 11:43:54 2008 +0900
+++ b/xen/drivers/char/console.c Thu Dec 04 11:01:53 2008 +0900
@@ -927,7 +927,7 @@ void panic(const char *fmt, ...)
console_start_sync();
printk("\n****************************************\n");
printk("Panic on CPU %d:\n", smp_processor_id());
- printk(buf);
+ printk("%s", buf);
printk("****************************************\n\n");
if ( opt_noreboot )
printk("Manual reset required ('noreboot' specified)\n");
diff -r 54e5d15af567 -r f4c5befcba8d xen/drivers/passthrough/amd/iommu_init.c
--- a/xen/drivers/passthrough/amd/iommu_init.c Wed Dec 03 11:43:54 2008 +0900
+++ b/xen/drivers/passthrough/amd/iommu_init.c Thu Dec 04 11:01:53 2008 +0900
@@ -152,13 +152,33 @@ static void __init set_iommu_translation
{
u32 entry;
- entry = readl(iommu->mmio_base+IOMMU_CONTROL_MMIO_OFFSET);
- set_field_in_reg_u32(iommu->ht_tunnel_support ? IOMMU_CONTROL_ENABLED :
- IOMMU_CONTROL_ENABLED, entry,
+ entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
+
+ if ( enable )
+ {
+ set_field_in_reg_u32(iommu->ht_tunnel_support ? IOMMU_CONTROL_ENABLED :
+ IOMMU_CONTROL_DISABLED, entry,
IOMMU_CONTROL_HT_TUNNEL_TRANSLATION_MASK,
IOMMU_CONTROL_HT_TUNNEL_TRANSLATION_SHIFT, &entry);
+ set_field_in_reg_u32(iommu->isochronous ? IOMMU_CONTROL_ENABLED :
+ IOMMU_CONTROL_DISABLED, entry,
+ IOMMU_CONTROL_ISOCHRONOUS_MASK,
+ IOMMU_CONTROL_ISOCHRONOUS_SHIFT, &entry);
+ set_field_in_reg_u32(iommu->coherent ? IOMMU_CONTROL_ENABLED :
+ IOMMU_CONTROL_DISABLED, entry,
+ IOMMU_CONTROL_COHERENT_MASK,
+ IOMMU_CONTROL_COHERENT_SHIFT, &entry);
+ set_field_in_reg_u32(iommu->res_pass_pw ? IOMMU_CONTROL_ENABLED :
+ IOMMU_CONTROL_DISABLED, entry,
+ IOMMU_CONTROL_RESP_PASS_POSTED_WRITE_MASK,
+ IOMMU_CONTROL_RESP_PASS_POSTED_WRITE_SHIFT, &entry);
+ /* do not set PassPW bit */
+ set_field_in_reg_u32(IOMMU_CONTROL_DISABLED, entry,
+ IOMMU_CONTROL_PASS_POSTED_WRITE_MASK,
+ IOMMU_CONTROL_PASS_POSTED_WRITE_SHIFT, &entry);
+ }
set_field_in_reg_u32(enable ? IOMMU_CONTROL_ENABLED :
- IOMMU_CONTROL_ENABLED, entry,
+ IOMMU_CONTROL_DISABLED, entry,
IOMMU_CONTROL_TRANSLATION_ENABLE_MASK,
IOMMU_CONTROL_TRANSLATION_ENABLE_SHIFT, &entry);
writel(entry, iommu->mmio_base+IOMMU_CONTROL_MMIO_OFFSET);
@@ -171,7 +191,7 @@ static void __init set_iommu_command_buf
entry = readl(iommu->mmio_base+IOMMU_CONTROL_MMIO_OFFSET);
set_field_in_reg_u32(enable ? IOMMU_CONTROL_ENABLED :
- IOMMU_CONTROL_ENABLED, entry,
+ IOMMU_CONTROL_DISABLED, entry,
IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_MASK,
IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_SHIFT, &entry);
writel(entry, iommu->mmio_base+IOMMU_CONTROL_MMIO_OFFSET);
@@ -235,8 +255,7 @@ static void __init set_iommu_event_log_c
IOMMU_CONTROL_EVENT_LOG_INT_SHIFT, &entry);
writel(entry, iommu->mmio_base+IOMMU_CONTROL_MMIO_OFFSET);
- set_field_in_reg_u32(enable ? IOMMU_CONTROL_ENABLED :
- IOMMU_CONTROL_DISABLED, entry,
+ set_field_in_reg_u32(IOMMU_CONTROL_DISABLED, entry,
IOMMU_CONTROL_COMP_WAIT_INT_MASK,
IOMMU_CONTROL_COMP_WAIT_INT_SHIFT, &entry);
writel(entry, iommu->mmio_base+IOMMU_CONTROL_MMIO_OFFSET);
@@ -391,20 +410,19 @@ static void parse_event_log_entry(u32 en
u32 code;
u64 *addr;
char * event_str[] = {"ILLEGAL_DEV_TABLE_ENTRY",
- "IO_PAGE_FALT",
- "DEV_TABLE_HW_ERROR",
- "PAGE_TABLE_HW_ERROR",
- "ILLEGAL_COMMAND_ERROR",
- "COMMAND_HW_ERROR",
- "IOTLB_INV_TIMEOUT",
- "INVALID_DEV_REQUEST"};
-
- code = get_field_from_reg_u32(entry[1],
- IOMMU_EVENT_CODE_MASK,
- IOMMU_EVENT_CODE_SHIFT);
-
- if ( (code > IOMMU_EVENT_INVALID_DEV_REQUEST)
- || (code < IOMMU_EVENT_ILLEGAL_DEV_TABLE_ENTRY) )
+ "IO_PAGE_FALT",
+ "DEV_TABLE_HW_ERROR",
+ "PAGE_TABLE_HW_ERROR",
+ "ILLEGAL_COMMAND_ERROR",
+ "COMMAND_HW_ERROR",
+ "IOTLB_INV_TIMEOUT",
+ "INVALID_DEV_REQUEST"};
+
+ code = get_field_from_reg_u32(entry[1], IOMMU_EVENT_CODE_MASK,
+ IOMMU_EVENT_CODE_SHIFT);
+
+ if ( (code > IOMMU_EVENT_INVALID_DEV_REQUEST) ||
+ (code < IOMMU_EVENT_ILLEGAL_DEV_TABLE_ENTRY) )
{
amd_iov_error("Invalid event log entry!\n");
return;
@@ -428,13 +446,20 @@ static void amd_iommu_page_fault(int vec
static void amd_iommu_page_fault(int vector, void *dev_id,
struct cpu_user_regs *regs)
{
- u32 event[4];
+ u32 event[4];
+ u32 entry;
unsigned long flags;
int ret = 0;
struct amd_iommu *iommu = dev_id;
spin_lock_irqsave(&iommu->lock, flags);
ret = amd_iommu_read_event_log(iommu, event);
+ /* reset interrupt status bit */
+ entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+ set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry,
+ IOMMU_STATUS_EVENT_LOG_INT_MASK,
+ IOMMU_STATUS_EVENT_LOG_INT_SHIFT, &entry);
+ writel(entry, iommu->mmio_base+IOMMU_STATUS_MMIO_OFFSET);
spin_unlock_irqrestore(&iommu->lock, flags);
if ( ret != 0 )
@@ -466,7 +491,7 @@ static int set_iommu_interrupt_handler(s
amd_iov_error("can't request irq\n");
return 0;
}
-
+ iommu->vector = vector;
return vector;
}
diff -r 54e5d15af567 -r f4c5befcba8d xen/drivers/passthrough/amd/iommu_map.c
--- a/xen/drivers/passthrough/amd/iommu_map.c Wed Dec 03 11:43:54 2008 +0900
+++ b/xen/drivers/passthrough/amd/iommu_map.c Thu Dec 04 11:01:53 2008 +0900
@@ -580,3 +580,47 @@ out:
spin_unlock_irqrestore(&hd->mapping_lock, flags);
return 0;
}
+
+void invalidate_all_iommu_pages(struct domain *d)
+{
+ u32 cmd[4], entry;
+ unsigned long flags;
+ struct amd_iommu *iommu;
+ int domain_id = d->domain_id;
+ u64 addr_lo = 0x7FFFFFFFFFFFF000ULL & DMA_32BIT_MASK;
+ u64 addr_hi = 0x7FFFFFFFFFFFF000ULL >> 32;
+
+ set_field_in_reg_u32(domain_id, 0,
+ IOMMU_INV_IOMMU_PAGES_DOMAIN_ID_MASK,
+ IOMMU_INV_IOMMU_PAGES_DOMAIN_ID_SHIFT, &entry);
+ set_field_in_reg_u32(IOMMU_CMD_INVALIDATE_IOMMU_PAGES, entry,
+ IOMMU_CMD_OPCODE_MASK, IOMMU_CMD_OPCODE_SHIFT,
+ &entry);
+ cmd[1] = entry;
+
+ set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, 0,
+ IOMMU_INV_IOMMU_PAGES_S_FLAG_MASK,
+ IOMMU_INV_IOMMU_PAGES_S_FLAG_SHIFT, &entry);
+ set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry,
+ IOMMU_INV_IOMMU_PAGES_PDE_FLAG_MASK,
+ IOMMU_INV_IOMMU_PAGES_PDE_FLAG_SHIFT, &entry);
+ set_field_in_reg_u32((u32)addr_lo >> PAGE_SHIFT, entry,
+ IOMMU_INV_IOMMU_PAGES_ADDR_LOW_MASK,
+ IOMMU_INV_IOMMU_PAGES_ADDR_LOW_SHIFT, &entry);
+ cmd[2] = entry;
+
+ set_field_in_reg_u32((u32)addr_hi, 0,
+ IOMMU_INV_IOMMU_PAGES_ADDR_HIGH_MASK,
+ IOMMU_INV_IOMMU_PAGES_ADDR_HIGH_SHIFT, &entry);
+ cmd[3] = entry;
+
+ cmd[0] = 0;
+
+ for_each_amd_iommu ( iommu )
+ {
+ spin_lock_irqsave(&iommu->lock, flags);
+ send_iommu_command(iommu, cmd);
+ flush_command_buffer(iommu);
+ spin_unlock_irqrestore(&iommu->lock, flags);
+ }
+}
diff -r 54e5d15af567 -r f4c5befcba8d xen/drivers/passthrough/amd/pci_amd_iommu.c
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c Wed Dec 03 11:43:54
2008 +0900
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c Thu Dec 04 11:01:53
2008 +0900
@@ -389,6 +389,7 @@ static void amd_iommu_domain_destroy(str
static void amd_iommu_domain_destroy(struct domain *d)
{
deallocate_iommu_page_tables(d);
+ invalidate_all_iommu_pages(d);
}
static int amd_iommu_return_device(
diff -r 54e5d15af567 -r f4c5befcba8d xen/drivers/passthrough/vtd/dmar.c
--- a/xen/drivers/passthrough/vtd/dmar.c Wed Dec 03 11:43:54 2008 +0900
+++ b/xen/drivers/passthrough/vtd/dmar.c Thu Dec 04 11:01:53 2008 +0900
@@ -172,6 +172,28 @@ struct acpi_drhd_unit * acpi_find_matche
return found ? found : include_all;
}
+struct acpi_atsr_unit * acpi_find_matched_atsr_unit(u8 bus, u8 devfn)
+{
+ struct acpi_atsr_unit *atsr;
+ struct acpi_atsr_unit *found = NULL, *include_all = NULL;
+ int i;
+
+ list_for_each_entry ( atsr, &acpi_atsr_units, list )
+ {
+ for (i = 0; i < atsr->scope.devices_cnt; i++)
+ if ( atsr->scope.devices[i] == PCI_BDF2(bus, devfn) )
+ return atsr;
+
+ if ( test_bit(bus, atsr->scope.buses) )
+ found = atsr;
+
+ if ( atsr->all_ports )
+ include_all = atsr;
+ }
+
+ return found ? found : include_all;
+}
+
/*
* Count number of devices in device scope. Do not include PCI sub
* hierarchies.
@@ -242,7 +264,6 @@ static int __init acpi_parse_dev_scope(v
switch ( acpi_scope->dev_type )
{
case ACPI_DEV_P2PBRIDGE:
- {
sec_bus = pci_conf_read8(
bus, path->dev, path->fn, PCI_SECONDARY_BUS);
sub_bus = pci_conf_read8(
@@ -253,7 +274,6 @@ static int __init acpi_parse_dev_scope(v
dmar_scope_add_buses(scope, sec_bus, sub_bus);
break;
- }
case ACPI_DEV_MSI_HPET:
dprintk(XENLOG_INFO VTDPREFIX, "found MSI HPET: bdf = %x:%x.%x\n",
@@ -268,7 +288,6 @@ static int __init acpi_parse_dev_scope(v
break;
case ACPI_DEV_IOAPIC:
- {
dprintk(XENLOG_INFO VTDPREFIX, "found IOAPIC: bdf = %x:%x.%x\n",
bus, path->dev, path->fn);
@@ -288,7 +307,6 @@ static int __init acpi_parse_dev_scope(v
scope->devices[didx++] = PCI_BDF(bus, path->dev, path->fn);
break;
}
- }
start += acpi_scope->length;
}
diff -r 54e5d15af567 -r f4c5befcba8d xen/drivers/passthrough/vtd/dmar.h
--- a/xen/drivers/passthrough/vtd/dmar.h Wed Dec 03 11:43:54 2008 +0900
+++ b/xen/drivers/passthrough/vtd/dmar.h Thu Dec 04 11:01:53 2008 +0900
@@ -80,6 +80,7 @@ struct acpi_atsr_unit {
idx < rmrr->scope.devices_cnt; idx++)
struct acpi_drhd_unit * acpi_find_matched_drhd_unit(u8 bus, u8 devfn);
+struct acpi_atsr_unit * acpi_find_matched_atsr_unit(u8 bus, u8 devfn);
void dmar_scope_add_buses(struct dmar_scope *scope, u16 sec, u16 sub);
void dmar_scope_remove_buses(struct dmar_scope *scope, u16 sec, u16 sub);
diff -r 54e5d15af567 -r f4c5befcba8d xen/drivers/passthrough/vtd/iommu.c
--- a/xen/drivers/passthrough/vtd/iommu.c Wed Dec 03 11:43:54 2008 +0900
+++ b/xen/drivers/passthrough/vtd/iommu.c Thu Dec 04 11:01:53 2008 +0900
@@ -446,10 +446,6 @@ static int flush_iotlb_reg(void *_iommu,
if ( DMA_TLB_IAIG(val) == 0 )
dprintk(XENLOG_ERR VTDPREFIX, "IOMMU: flush IOTLB failed\n");
- if ( DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type) )
- dprintk(XENLOG_INFO VTDPREFIX,
- "IOMMU: tlb flush request %x, actual %x\n",
- (u32)DMA_TLB_IIRG(type), (u32)DMA_TLB_IAIG(val));
/* flush iotlb entry will implicitly flush write buffer */
return 0;
}
@@ -714,22 +710,22 @@ static void iommu_fault_status(u32 fault
if ( fault_status & DMA_FSTS_PFO )
dprintk(XENLOG_ERR VTDPREFIX,
"iommu_fault_status: Fault Overflow\n");
- else if ( fault_status & DMA_FSTS_PPF )
+ if ( fault_status & DMA_FSTS_PPF )
dprintk(XENLOG_ERR VTDPREFIX,
"iommu_fault_status: Primary Pending Fault\n");
- else if ( fault_status & DMA_FSTS_AFO )
+ if ( fault_status & DMA_FSTS_AFO )
dprintk(XENLOG_ERR VTDPREFIX,
"iommu_fault_status: Advanced Fault Overflow\n");
- else if ( fault_status & DMA_FSTS_APF )
+ if ( fault_status & DMA_FSTS_APF )
dprintk(XENLOG_ERR VTDPREFIX,
"iommu_fault_status: Advanced Pending Fault\n");
- else if ( fault_status & DMA_FSTS_IQE )
+ if ( fault_status & DMA_FSTS_IQE )
dprintk(XENLOG_ERR VTDPREFIX,
"iommu_fault_status: Invalidation Queue Error\n");
- else if ( fault_status & DMA_FSTS_ICE )
+ if ( fault_status & DMA_FSTS_ICE )
dprintk(XENLOG_ERR VTDPREFIX,
"iommu_fault_status: Invalidation Completion Error\n");
- else if ( fault_status & DMA_FSTS_ITE )
+ if ( fault_status & DMA_FSTS_ITE )
dprintk(XENLOG_ERR VTDPREFIX,
"iommu_fault_status: Invalidation Time-out Error\n");
}
@@ -754,10 +750,11 @@ static void iommu_page_fault(int vector,
/* FIXME: ignore advanced fault log */
if ( !(fault_status & DMA_FSTS_PPF) )
- return;
+ goto clear_overflow;
+
fault_index = dma_fsts_fault_record_index(fault_status);
reg = cap_fault_reg_offset(iommu->cap);
- for ( ; ; )
+ while (1)
{
u8 fault_reason;
u16 source_id;
@@ -797,8 +794,9 @@ static void iommu_page_fault(int vector,
if ( fault_index > cap_num_fault_regs(iommu->cap) )
fault_index = 0;
}
-
+clear_overflow:
/* clear primary fault overflow */
+ fault_status = readl(iommu->reg + DMAR_FSTS_REG);
if ( fault_status & DMA_FSTS_PFO )
{
spin_lock_irqsave(&iommu->register_lock, flags);
@@ -1125,10 +1123,11 @@ static int domain_context_mapping_one(
unmap_vtd_domain_page(context_entries);
/* Context entry was previously non-present (with domid 0). */
- iommu_flush_context_device(iommu, 0, (((u16)bus) << 8) | devfn,
- DMA_CCMD_MASK_NOBIT, 1);
- if ( iommu_flush_iotlb_dsi(iommu, 0, 1) )
+ if ( iommu_flush_context_device(iommu, 0, (((u16)bus) << 8) | devfn,
+ DMA_CCMD_MASK_NOBIT, 1) )
iommu_flush_write_buffer(iommu);
+ else
+ iommu_flush_iotlb_dsi(iommu, 0, 1);
set_bit(iommu->index, &hd->iommu_bitmap);
spin_unlock_irqrestore(&iommu->lock, flags);
@@ -1308,8 +1307,12 @@ static int domain_context_unmap_one(
context_clear_present(*context);
context_clear_entry(*context);
iommu_flush_cache_entry(context);
- iommu_flush_context_domain(iommu, domain_iommu_domid(domain), 0);
- iommu_flush_iotlb_dsi(iommu, domain_iommu_domid(domain), 0);
+
+ if ( iommu_flush_context_domain(iommu, domain_iommu_domid(domain), 0) )
+ iommu_flush_write_buffer(iommu);
+ else
+ iommu_flush_iotlb_dsi(iommu, domain_iommu_domid(domain), 0);
+
unmap_vtd_domain_page(context_entries);
spin_unlock_irqrestore(&iommu->lock, flags);
diff -r 54e5d15af567 -r f4c5befcba8d xen/drivers/passthrough/vtd/iommu.h
--- a/xen/drivers/passthrough/vtd/iommu.h Wed Dec 03 11:43:54 2008 +0900
+++ b/xen/drivers/passthrough/vtd/iommu.h Thu Dec 04 11:01:53 2008 +0900
@@ -310,6 +310,10 @@ struct qinval_entry {
struct qinval_entry {
union {
struct {
+ u64 lo;
+ u64 hi;
+ }val;
+ struct {
struct {
u64 type : 4,
granu : 2,
diff -r 54e5d15af567 -r f4c5befcba8d xen/drivers/passthrough/vtd/qinval.c
--- a/xen/drivers/passthrough/vtd/qinval.c Wed Dec 03 11:43:54 2008 +0900
+++ b/xen/drivers/passthrough/vtd/qinval.c Thu Dec 04 11:01:53 2008 +0900
@@ -34,13 +34,13 @@ static void print_qi_regs(struct iommu *
u64 val;
val = dmar_readq(iommu->reg, DMAR_IQA_REG);
- printk("DMAR_IAQ_REG = %"PRIx64"\n", val);
+ printk("DMAR_IQA_REG = %"PRIx64"\n", val);
val = dmar_readq(iommu->reg, DMAR_IQH_REG);
- printk("DMAR_IAH_REG = %"PRIx64"\n", val);
+ printk("DMAR_IQH_REG = %"PRIx64"\n", val);
val = dmar_readq(iommu->reg, DMAR_IQT_REG);
- printk("DMAR_IAT_REG = %"PRIx64"\n", val);
+ printk("DMAR_IQT_REG = %"PRIx64"\n", val);
}
static int qinval_next_index(struct iommu *iommu)
@@ -252,14 +252,15 @@ static int gen_dev_iotlb_inv_dsc(struct
qinval_entry->q.dev_iotlb_inv_dsc.lo.res_3 = 0;
qinval_entry->q.dev_iotlb_inv_dsc.hi.size = size;
- qinval_entry->q.dev_iotlb_inv_dsc.hi.addr = addr;
-
- unmap_vtd_domain_page(qinval_entries);
- spin_unlock_irqrestore(&qi_ctrl->qinval_lock, flags);
- return 0;
-}
-
-int queue_invalidate_device_iotlb(struct iommu *iommu,
+ qinval_entry->q.dev_iotlb_inv_dsc.hi.res_1 = 0;
+ qinval_entry->q.dev_iotlb_inv_dsc.hi.addr = addr >> PAGE_SHIFT_4K;
+
+ unmap_vtd_domain_page(qinval_entries);
+ spin_unlock_irqrestore(&qi_ctrl->qinval_lock, flags);
+ return 0;
+}
+
+int qinval_device_iotlb(struct iommu *iommu,
u32 max_invs_pend, u16 sid, u16 size, u64 addr)
{
int ret = -1;
diff -r 54e5d15af567 -r f4c5befcba8d xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h Wed Dec 03 11:43:54 2008 +0900
+++ b/xen/include/asm-x86/domain.h Thu Dec 04 11:01:53 2008 +0900
@@ -237,6 +237,10 @@ struct arch_domain
/* NB. protected by d->event_lock and by irq_desc[vector].lock */
int vector_pirq[NR_VECTORS];
s16 pirq_vector[NR_IRQS];
+
+ /* Shared page for notifying that explicit PIRQ EOI is required. */
+ unsigned long *pirq_eoi_map;
+ unsigned long pirq_eoi_map_mfn;
/* Pseudophysical e820 map (XENMEM_memory_map). */
struct e820entry e820[3];
diff -r 54e5d15af567 -r f4c5befcba8d
xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h Wed Dec 03 11:43:54
2008 +0900
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h Thu Dec 04 11:01:53
2008 +0900
@@ -63,6 +63,7 @@ int amd_iommu_reserve_domain_unity_map(s
int amd_iommu_reserve_domain_unity_map(struct domain *domain,
unsigned long phys_addr, unsigned long size, int iw, int ir);
int amd_iommu_sync_p2m(struct domain *d);
+void invalidate_all_iommu_pages(struct domain *d);
/* device table functions */
void amd_iommu_set_dev_table_entry(u32 *dte, u64 root_ptr, u64 intremap_ptr,
diff -r 54e5d15af567 -r f4c5befcba8d xen/include/asm-x86/page.h
--- a/xen/include/asm-x86/page.h Wed Dec 03 11:43:54 2008 +0900
+++ b/xen/include/asm-x86/page.h Thu Dec 04 11:01:53 2008 +0900
@@ -228,7 +228,7 @@ void copy_page_sse2(void *, const void *
/* Convert between Xen-heap virtual addresses and machine frame numbers. */
#define virt_to_mfn(va) (virt_to_maddr(va) >> PAGE_SHIFT)
-#define mfn_to_virt(mfn) (maddr_to_virt(mfn << PAGE_SHIFT))
+#define mfn_to_virt(mfn) (maddr_to_virt((paddr_t)(mfn) << PAGE_SHIFT))
/* Convert between machine frame numbers and page-info structures. */
#define mfn_to_page(mfn) (frame_table + (mfn))
diff -r 54e5d15af567 -r f4c5befcba8d xen/include/public/physdev.h
--- a/xen/include/public/physdev.h Wed Dec 03 11:43:54 2008 +0900
+++ b/xen/include/public/physdev.h Thu Dec 04 11:01:53 2008 +0900
@@ -41,6 +41,21 @@ DEFINE_XEN_GUEST_HANDLE(physdev_eoi_t);
DEFINE_XEN_GUEST_HANDLE(physdev_eoi_t);
/*
+ * Register a shared page for the hypervisor to indicate whether the guest
+ * must issue PHYSDEVOP_eoi. The semantics of PHYSDEVOP_eoi change slightly
+ * once the guest used this function in that the associated event channel
+ * will automatically get unmasked. The page registered is used as a bit
+ * array indexed by Xen's PIRQ value.
+ */
+#define PHYSDEVOP_pirq_eoi_gmfn 17
+struct physdev_pirq_eoi_gmfn {
+ /* IN */
+ xen_pfn_t gmfn;
+};
+typedef struct physdev_pirq_eoi_gmfn physdev_pirq_eoi_gmfn_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_pirq_eoi_gmfn_t);
+
+/*
* Query the status of an IRQ line.
* @arg == pointer to physdev_irq_status_query structure.
*/
diff -r 54e5d15af567 -r f4c5befcba8d xen/include/xen/event.h
--- a/xen/include/xen/event.h Wed Dec 03 11:43:54 2008 +0900
+++ b/xen/include/xen/event.h Thu Dec 04 11:01:53 2008 +0900
@@ -44,6 +44,9 @@ int evtchn_send(struct domain *d, unsign
/* Bind a local event-channel port to the specified VCPU. */
long evtchn_bind_vcpu(unsigned int port, unsigned int vcpu_id);
+/* Unmask a local event-channel port. */
+int evtchn_unmask(unsigned int port);
+
/* Allocate/free a Xen-attached event channel port. */
int alloc_unbound_xen_event_channel(
struct vcpu *local_vcpu, domid_t remote_domid);
diff -r 54e5d15af567 -r f4c5befcba8d xen/include/xen/irq.h
--- a/xen/include/xen/irq.h Wed Dec 03 11:43:54 2008 +0900
+++ b/xen/include/xen/irq.h Thu Dec 04 11:01:53 2008 +0900
@@ -22,6 +22,7 @@ struct irqaction
#define IRQ_PENDING 4 /* IRQ pending - replay on enable */
#define IRQ_REPLAY 8 /* IRQ has been replayed but not acked yet */
#define IRQ_GUEST 16 /* IRQ is handled by guest OS(es) */
+#define IRQ_GUEST_EOI_PENDING 32 /* IRQ was disabled, pending a guest EOI */
#define IRQ_PER_CPU 256 /* IRQ is per CPU */
/*
diff -r 54e5d15af567 -r f4c5befcba8d xen/tools/symbols.c
--- a/xen/tools/symbols.c Wed Dec 03 11:43:54 2008 +0900
+++ b/xen/tools/symbols.c Thu Dec 04 11:01:53 2008 +0900
@@ -81,7 +81,8 @@ static int read_symbol(FILE *in, struct
if (rc != 3) {
if (rc != EOF) {
/* skip line */
- fgets(str, 500, in);
+ if (fgets(str, 500, in) == NULL)
+ return -1; /* must check fgets result */
}
return -1;
}
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|