# HG changeset patch
# User Isaku Yamahata <yamahata@xxxxxxxxxxxxx>
# Date 1218633741 -32400
# Node ID da236d7f59b963585800e7471f8a0451b83ae569
# Parent fa8be8a6cb74976d5a96f830a9a2238abf622822
# Parent c6402709acc8122e3f8f92a885750afb4061ac61
merge with xen-unstable.hg
---
.hgtags | 1
docs/misc/kexec_and_kdump.txt | 213 ++++++++++++++++++++++++++++++++++++
extras/mini-os/include/lwipopts.h | 1
tools/Makefile | 7 -
tools/cross-install | 8 +
tools/ioemu/hw/pass-through.h | 1
tools/ioemu/hw/pt-msi.c | 24 +---
tools/libxc/xc_physdev.c | 10 -
tools/libxc/xenctrl.h | 2
tools/misc/xend | 16 +-
tools/python/xen/xend/XendAPI.py | 3
tools/python/xen/xend/XendConfig.py | 2
tools/python/xen/xend/XendPIF.py | 20 +++
xen/Makefile | 2
xen/arch/x86/cpu/mcheck/mce.h | 2
xen/arch/x86/mm/shadow/common.c | 40 +++++-
xen/arch/x86/mm/shadow/multi.c | 7 -
xen/arch/x86/mm/shadow/private.h | 9 -
xen/arch/x86/msi.c | 82 +++++--------
xen/arch/x86/oprofile/nmi_int.c | 40 ++++--
xen/arch/x86/physdev.c | 15 +-
xen/common/page_alloc.c | 13 ++
xen/drivers/passthrough/io.c | 3
xen/drivers/passthrough/vtd/iommu.c | 3
xen/include/asm-x86/event.h | 7 -
xen/include/asm-x86/msi.h | 10 +
xen/include/public/physdev.h | 11 +
27 files changed, 417 insertions(+), 135 deletions(-)
diff -r fa8be8a6cb74 -r da236d7f59b9 .hgtags
--- a/.hgtags Wed Aug 13 13:18:06 2008 +0900
+++ b/.hgtags Wed Aug 13 22:22:21 2008 +0900
@@ -28,3 +28,4 @@ c3494402098e26507fc61a6579832c0149351d6a
c3494402098e26507fc61a6579832c0149351d6a 3.3.0-rc1
dde12ff94c96331668fe38a7b09506fa94d03c34 3.3.0-rc2
57fca3648f25dcc085ee380954342960a7979987 3.3.0-rc3
+96d0a48e87ee46ba7b73e8c906a7e2e0baf60e2e 3.3.0-rc4
diff -r fa8be8a6cb74 -r da236d7f59b9 docs/misc/kexec_and_kdump.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/docs/misc/kexec_and_kdump.txt Wed Aug 13 22:22:21 2008 +0900
@@ -0,0 +1,213 @@
+
+=======================
+Kexec and Kdump for Xen
+=======================
+
+This is a breif guide to using Kexec and Kdump in conjunction with Xen.
+This functionaly works at the level of the hypervisor and dom0 kernel.
+And will thus affect all guests running on a machine.
+
+At this stage it does not work in conjunction with domU kernels.
+
+This document should be read in conjunction with
+Documentation/kdump/kdump.txt from the Linux kernel source.
+Some of the information in this document has been
+sourced from that document.
+
+
+Kexec
+=====
+
+It is possible to kexec from Xen or Linux to either Xen or Linux.
+
+Pattern | Before Kexec | After Kexec
+---------------+--------------------+--------------------
+Xen -> Xen | first hypervisor & | second hypervisor &
+ | dom0 kernel | dom0 kernel
+---------------+--------------------+--------------------
+Xen -> Linux | first hypervisor & | second kernel
+ | dom0 kernel |
+---------------+--------------------+--------------------
+Linux -> Xen | first kernel | second hypervisor &
+ | | dom0 kernel
+---------------+--------------------+--------------------
+Linux -> Linux | first kernel | second kernel
+
+If you are kexecing to Xen then you will also need to preapare the second
+hypervisor and dom0 kernel that will run after kexec. These may be the same
+as the first hypervisor and dom0 kernel that are used before kexec if you
+are kexecing from Xen to Xen.
+
+If you are kexecing to Linux then you will need to prepare the second Linux
+kernel that will run after kexec. In the case that you are kexecing from
+Linux, it may be the same as the first kernel image that that runs before
+kexec.
+
+Regardless of which kexec pattern you wish to run, you will
+need to have kexec-tools installed. This provides the kexec command.
+
+1. Load
+-------
+
+Before kexecing the second kernel or hypervisor & dom0 kernel
+need to be loaded into the running hypervisor or kernel using
+the kexec command.
+
+ a. To kexec to Xen (Xen->Xen or Linux->Xen)
+
+ kexec -l --append="XEN_ARGS -- DOM0_ARGS" \
+ --vmm="XEN_IMAGE" "DOM0_IMAGE" KEXEC_ARGS
+
+ where:
+ XEN_ARGS: command line arguments to the xen hypervisor
+ On x86 the no-real-mode argument should be included
+ DOM0_ARGS: command line arguments to the dom0 kernel
+ XEN_IMAGE: xen hypervisor image
+ DOM0_IMAGE: dom0 kernel image
+ KEXEC_ARGS: additional kexec-tools command line arguments
+
+ e.g. kexec -l --append "no-real-mode" --vmm="/boot/xen.gz" /boot/vmlinuz.gz
+
+ OR
+
+ b. To kexec to Linux (Xen->Linux or Linux->Linux)
+
+ kexec -l LINUX_IMAGE --append "$LINUX_ARGS" KEXEC_ARGS
+
+ where:
+ LINUX_IMAGE: the second linux kernel image
+ LINUX_ARGS: command line arguments to the second linux kernel
+ KEXEC_ARGS: additional kexec-tools command line arguments
+
+ e.g. kexec -l /boot/second-vmlinuz.gz
+
+2. Execute
+----------
+
+Once the second kernel is loaded, it can be executed at any time.
+If you don't see the second kernel booting within a second or so,
+you are in trouble :(
+
+ kexec -e
+
+Kdump
+=====
+
+It is possible to kdump from Xen or Linux to a Linux crash kernel.
+It is not possible to use xen as a crash kernel.
+
+Pattern | Before Kexec | After Kexec
+---------------+--------------------+--------------------
+Xen -> Linux | first hypervisor & | crash kernel
+ | dom0 kernel |
+---------------+--------------------+--------------------
+Linux -> Linux | first kernel | crash kernel
+
+Regardless of if you are kdumping from Xen or Linux you will need to
+prepare a linux crash kernel. You will also need to have kexec-tools
+installed. This provides the kexec command.
+
+0. Set-Up The Crash Kernel Region
+---------------------------------
+
+In order to use kdump an area of memory has to be reserved at boot time.
+This is the area of memory that the crash kernel will use, thus allowing it
+to run without disrupting the memory used by the first kernel. This area is
+called the crash kernel region and is reserved using the crashkernel
+command line parameter to the Xen hypervisor. It has two forms:
+
+ i) crashkernel=size
+
+ This is the simplest and recommended way to reserve the crash kernel
+ region. Just specify how large the region should be and the hypervisor
+ will find a good location for it. A good size to start with is 128Mb
+
+ e.g.
+
+ crashkernel=128M
+
+ ii) crashkernel=size@base
+
+ In this form the base address is provided in addition to
+ the size. Use this if auto-placement doesn't work for some reason.
+ It is strongly recommended that the base address be aligned
+ to 64Mb, else memory below the alignment point will not
+ be usable.
+
+ e.g. crashkernel=128M@256M
+
+ Regardless of which of the two forms of the crashkernel command line you
+ use, the crash kernel region should appear in /proc/iomem on x86 or
+ /proc/iomem_machine on ia64. If it doesn't then either the crashkernel
+ parameter is missing, or for some reason the region couldn't be placed -
+ for instance because it is too large.
+
+ # cat /proc/iomem
+ ...
+ 00100000-07feffff : System RAM
+ 00100000-00bfffff : Hypervisor code and data
+ 0533f000-0733efff : Crash kernel
+ ...
+
+
+1. Load
+-------
+
+Once you are running in a kexec-enabled hypervisor and dom0,
+you can prepare to kdump by loading the crash kernel into the
+running kernel.
+
+ kexec -p CRASH_KERNEL_IMAGE --append "$CRASH_KERNEL_ARGS" KEXEC_ARGS
+
+ where:
+ CRASH_KERNEL_IMAGE: the crash kernel image
+ CRASH_KERNEL_ARGS: command line arguments to the crash kernel
+ init 1 is strongly recommended
+ irqpoll is strongly recommended
+ maxcpus=1 is required if the crash kernel is SMP
+ reset_devices is strongly recommended
+ KEXEC_ARGS: additional kexec-tools command line arguments
+ On x86 --args-linux should be supplied if an uncompressed
+ vmlinux image is used as the crash kernel
+
+ e.g. kexec -p /boot/crash-vmlinuz \
+ --append "init 1 irqpoll maxcpus=1 reset_devices" --args-linux
+
+On x86 systems the crash kernel may be either
+- A uncompressed vmlinux image if the kernel is not relocatable
+- A compressed bzImage or vmlinuz image if the kernel is relocatable
+- Relocatability is crontroled by the CONFIG_RELOCATABLE kernel
+ compile configuration parameter. This option may not be available
+ depending on the kernel version
+On ia64
+ Either a vmlinuz or vmlinux.gz image may be used
+
+
+2. Execute
+----------
+
+Once the second kernel is loaded, the crash kernel will be executed if the
+hypervisor panics. It will also be executed if dom0 panics or if dom0
+oopses and /proc/sys/kernel/panic_on_oops is set to a non-zero value
+
+echo 1 > /proc/sys/kernel/panic_on_oops
+
+Kdump may also be triggered (for testing)
+
+ a. From Domain 0
+
+ echo c > /proc/sysrq-trigger
+
+ b. From Xen
+
+ Enter the xen console
+
+ ctrl^a ctrl^a (may be bound to a different key, this is the default)
+
+ Select C for "trigger a crashdump"
+
+ C
+
+If you don't see the crash kernel booting within a second or so,
+you are in trouble :(
+
diff -r fa8be8a6cb74 -r da236d7f59b9 extras/mini-os/include/lwipopts.h
--- a/extras/mini-os/include/lwipopts.h Wed Aug 13 13:18:06 2008 +0900
+++ b/extras/mini-os/include/lwipopts.h Wed Aug 13 22:22:21 2008 +0900
@@ -15,6 +15,7 @@
#define LWIP_DHCP 1
#define LWIP_COMPAT_SOCKETS 0
#define LWIP_IGMP 1
+#define LWIP_USE_HEAP_FROM_INTERRUPT 1
#define MEMP_NUM_SYS_TIMEOUT 10
#define TCP_SND_BUF 3000
#define TCP_MSS 1500
diff -r fa8be8a6cb74 -r da236d7f59b9 tools/Makefile
--- a/tools/Makefile Wed Aug 13 13:18:06 2008 +0900
+++ b/tools/Makefile Wed Aug 13 22:22:21 2008 +0900
@@ -38,8 +38,10 @@ endif
# For the sake of linking, set the sys-root
ifneq ($(CROSS_COMPILE),)
+CROSS_BIN_PATH ?= /usr/$(CROSS_COMPILE:-=)/bin
CROSS_SYS_ROOT ?= /usr/$(CROSS_COMPILE:-=)/sys-root
-export CROSS_SYS_ROOT
+export CROSS_SYS_ROOT # exported for check/funcs.sh
+export CROSS_BIN_PATH # exported for cross-install.sh
endif
.PHONY: all
@@ -57,7 +59,8 @@ ifneq ($(XEN_COMPILE_ARCH),$(XEN_TARGET_
ifneq ($(XEN_COMPILE_ARCH),$(XEN_TARGET_ARCH))
IOEMU_CONFIGURE_CROSS ?= --cpu=$(XEN_TARGET_ARCH) \
--cross-prefix=$(CROSS_COMPILE) \
- --interp-prefix=$(CROSS_SYS_ROOT)
+ --interp-prefix=$(CROSS_SYS_ROOT) \
+ --install=$(CURDIR)/cross-install
endif
ioemu/config-host.mak:
diff -r fa8be8a6cb74 -r da236d7f59b9 tools/cross-install
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/cross-install Wed Aug 13 22:22:21 2008 +0900
@@ -0,0 +1,8 @@
+#!/bin/sh
+
+# prepend CROSS_BIN_PATH to find the right "strip"
+if [ -n "$CROSS_BIN_PATH" ]; then
+ PATH="$CROSS_BIN_PATH:$PATH"
+fi
+
+exec install "$@"
diff -r fa8be8a6cb74 -r da236d7f59b9 tools/ioemu/hw/pass-through.h
--- a/tools/ioemu/hw/pass-through.h Wed Aug 13 13:18:06 2008 +0900
+++ b/tools/ioemu/hw/pass-through.h Wed Aug 13 22:22:21 2008 +0900
@@ -120,6 +120,7 @@ struct pt_msix_info {
int enabled;
int total_entries;
int bar_index;
+ uint64_t table_base;
uint32_t table_off;
uint64_t mmio_base_addr;
int mmio_index;
diff -r fa8be8a6cb74 -r da236d7f59b9 tools/ioemu/hw/pt-msi.c
--- a/tools/ioemu/hw/pt-msi.c Wed Aug 13 13:18:06 2008 +0900
+++ b/tools/ioemu/hw/pt-msi.c Wed Aug 13 22:22:21 2008 +0900
@@ -38,8 +38,8 @@ int pt_msi_setup(struct pt_dev *dev)
}
if ( xc_physdev_map_pirq_msi(xc_handle, domid, AUTO_ASSIGN, &pirq,
- dev->pci_dev->dev << 3
| dev->pci_dev->func,
- dev->pci_dev->bus, 0,
1) )
+ dev->pci_dev->dev << 3 | dev->pci_dev->func,
+ dev->pci_dev->bus, 0, 0) )
{
PT_LOG("error map msi\n");
return -1;
@@ -121,7 +121,8 @@ static int pt_msix_update_one(struct pt_
{
ret = xc_physdev_map_pirq_msi(xc_handle, domid, AUTO_ASSIGN, &pirq,
dev->pci_dev->dev << 3 | dev->pci_dev->func,
- dev->pci_dev->bus, entry_nr, 0);
+ dev->pci_dev->bus, entry_nr,
+ dev->msix->table_base);
if ( ret )
{
PT_LOG("error map msix entry %x\n", entry_nr);
@@ -183,7 +184,7 @@ static void pci_msix_writel(void *opaque
entry = &msix->msix_entry[entry_nr];
offset = ((addr - msix->mmio_base_addr) % 16) / 4;
- if ( offset != 3 && msix->enabled && entry->io_mem[3] & 0x1 )
+ if ( offset != 3 && msix->enabled && !(entry->io_mem[3] & 0x1) )
{
PT_LOG("can not update msix entry %d since MSI-X is already \
function now.\n", entry_nr);
@@ -196,7 +197,7 @@ static void pci_msix_writel(void *opaque
if ( offset == 3 )
{
- if ( !(val & 0x1) )
+ if ( msix->enabled && !(val & 0x1) )
pt_msix_update_one(dev, entry_nr);
mask_physical_msix_entry(dev, entry_nr, entry->io_mem[3] & 0x1);
}
@@ -280,7 +281,6 @@ int pt_msix_init(struct pt_dev *dev, int
uint8_t id;
uint16_t control;
int i, total_entries, table_off, bar_index;
- uint64_t bar_base;
struct pci_dev *pd = dev->pci_dev;
id = pci_read_byte(pd, pos + PCI_CAP_LIST_ID);
@@ -314,18 +314,14 @@ int pt_msix_init(struct pt_dev *dev, int
table_off = pci_read_long(pd, pos + PCI_MSIX_TABLE);
bar_index = dev->msix->bar_index = table_off & PCI_MSIX_BIR;
table_off &= table_off & ~PCI_MSIX_BIR;
- bar_base = pci_read_long(pd, 0x10 + 4 * bar_index);
- if ( (bar_base & 0x6) == 0x4 )
- {
- bar_base &= ~0xf;
- bar_base += (uint64_t)pci_read_long(pd, 0x10 + 4 * (bar_index + 1)) <<
32;
- }
- PT_LOG("get MSI-X table bar base %lx\n", bar_base);
+ dev->msix->table_base = dev->pci_dev->base_addr[bar_index];
+ PT_LOG("get MSI-X table bar base %llx\n",
+ (unsigned long long)dev->msix->table_base);
dev->msix->fd = open("/dev/mem", O_RDWR);
dev->msix->phys_iomem_base = mmap(0, total_entries * 16,
PROT_WRITE | PROT_READ, MAP_SHARED | MAP_LOCKED,
- dev->msix->fd, bar_base + table_off);
+ dev->msix->fd, dev->msix->table_base + table_off);
PT_LOG("mapping physical MSI-X table to %lx\n",
(unsigned long)dev->msix->phys_iomem_base);
return 0;
diff -r fa8be8a6cb74 -r da236d7f59b9 tools/libxc/xc_physdev.c
--- a/tools/libxc/xc_physdev.c Wed Aug 13 13:18:06 2008 +0900
+++ b/tools/libxc/xc_physdev.c Wed Aug 13 22:22:21 2008 +0900
@@ -51,7 +51,7 @@ int xc_physdev_map_pirq_msi(int xc_handl
int devfn,
int bus,
int entry_nr,
- int msi_type)
+ uint64_t table_base)
{
int rc;
struct physdev_map_pirq map;
@@ -63,10 +63,10 @@ int xc_physdev_map_pirq_msi(int xc_handl
map.type = MAP_PIRQ_TYPE_MSI;
map.index = index;
map.pirq = *pirq;
- map.msi_info.devfn = devfn;
- map.msi_info.bus = bus;
- map.msi_info.entry_nr = entry_nr;
- map.msi_info.msi = msi_type;
+ map.bus = bus;
+ map.devfn = devfn;
+ map.entry_nr = entry_nr;
+ map.table_base = table_base;
rc = do_physdev_op(xc_handle, PHYSDEVOP_map_pirq, &map);
diff -r fa8be8a6cb74 -r da236d7f59b9 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h Wed Aug 13 13:18:06 2008 +0900
+++ b/tools/libxc/xenctrl.h Wed Aug 13 22:22:21 2008 +0900
@@ -917,7 +917,7 @@ int xc_physdev_map_pirq_msi(int xc_handl
int devfn,
int bus,
int entry_nr,
- int msi_type);
+ uint64_t table_base);
int xc_physdev_unmap_pirq(int xc_handle,
int domid,
diff -r fa8be8a6cb74 -r da236d7f59b9 tools/misc/xend
--- a/tools/misc/xend Wed Aug 13 13:18:06 2008 +0900
+++ b/tools/misc/xend Wed Aug 13 22:22:21 2008 +0900
@@ -77,6 +77,10 @@ def check_user():
hline()
raise CheckError("invalid user")
+def start_daemon(daemon, *args):
+ if os.fork() == 0:
+ os.execvp(daemon, (daemon,) + args)
+
def start_xenstored():
pidfname = "/var/run/xenstore.pid"
try:
@@ -102,13 +106,15 @@ def start_xenstored():
s,o = commands.getstatusoutput(cmd)
def start_consoled():
- if os.fork() == 0:
- os.execvp('xenconsoled', ['xenconsoled'])
+ XENCONSOLED_TRACE = os.getenv("XENCONSOLED_TRACE")
+ args = ""
+ if XENCONSOLED_TRACE:
+ args += "--log=" + XENCONSOLED_TRACE
+ start_daemon("xenconsoled", args)
def start_blktapctrl():
- if os.fork() == 0:
- os.execvp('blktapctrl', ['blktapctrl'])
-
+ start_daemon("blktapctrl", "")
+
def main():
try:
check_logging()
diff -r fa8be8a6cb74 -r da236d7f59b9 tools/python/xen/xend/XendAPI.py
--- a/tools/python/xen/xend/XendAPI.py Wed Aug 13 13:18:06 2008 +0900
+++ b/tools/python/xen/xend/XendAPI.py Wed Aug 13 22:22:21 2008 +0900
@@ -2265,7 +2265,8 @@ class XendAPI(object):
'type': image.type,
'sharable': image.sharable,
'read_only': image.read_only,
- 'other_config': image.other_config
+ 'other_config': image.other_config,
+ 'security_label' : image.get_security_label()
})
# Class Functions
diff -r fa8be8a6cb74 -r da236d7f59b9 tools/python/xen/xend/XendConfig.py
--- a/tools/python/xen/xend/XendConfig.py Wed Aug 13 13:18:06 2008 +0900
+++ b/tools/python/xen/xend/XendConfig.py Wed Aug 13 22:22:21 2008 +0900
@@ -448,7 +448,7 @@ class XendConfig(dict):
self['platform']['hpet'] = 0
if 'loader' not in self['platform']:
# Old configs may have hvmloader set as PV_kernel param
- if self.has_key('PV_kernel') and re.search('hvmloader',
self['PV_kernel']):
+ if self.has_key('PV_kernel') and self['PV_kernel'] != '':
self['platform']['loader'] = self['PV_kernel']
self['PV_kernel'] = ''
else:
diff -r fa8be8a6cb74 -r da236d7f59b9 tools/python/xen/xend/XendPIF.py
--- a/tools/python/xen/xend/XendPIF.py Wed Aug 13 13:18:06 2008 +0900
+++ b/tools/python/xen/xend/XendPIF.py Wed Aug 13 22:22:21 2008 +0900
@@ -95,6 +95,22 @@ def linux_set_mtu(iface, mtu):
except ValueError:
return False
+def linux_get_mtu(device):
+ return _linux_get_pif_param(device, 'mtu')
+
+def linux_get_mac(device):
+ return _linux_get_pif_param(device, 'link/ether')
+
+def _linux_get_pif_parm(device, param_name):
+ ip_get_dev_data = 'ip link show %s' % device
+ rc, output = commands.getstatusoutput(ip_get_dev_data)
+ if rc == 0:
+ params = output.split(' ')
+ for i in xrange(len(params)):
+ if params[i] == param_name:
+ return params[i+1]
+ return ''
+
def _create_VLAN(dev, vlan):
rc, _ = commands.getstatusoutput('vconfig add %s %d' %
(dev, vlan))
@@ -259,8 +275,8 @@ class XendPIF(XendBase):
# Create the record
record = {
"device": device,
- "MAC": '',
- "MTU": '',
+ "MAC": linux_get_mac('%s.%d' % (device, vlan)),
+ "MTU": linux_get_mtu('%s.%d' % (device, vlan)),
"network": network_uuid,
"VLAN": vlan
}
diff -r fa8be8a6cb74 -r da236d7f59b9 xen/Makefile
--- a/xen/Makefile Wed Aug 13 13:18:06 2008 +0900
+++ b/xen/Makefile Wed Aug 13 22:22:21 2008 +0900
@@ -2,7 +2,7 @@
# All other places this is stored (eg. compile.h) should be autogenerated.
export XEN_VERSION = 3
export XEN_SUBVERSION = 3
-export XEN_EXTRAVERSION ?= .0-rc4-pre$(XEN_VENDORVERSION)
+export XEN_EXTRAVERSION ?= .0-rc5-pre$(XEN_VENDORVERSION)
export XEN_FULLVERSION = $(XEN_VERSION).$(XEN_SUBVERSION)$(XEN_EXTRAVERSION)
-include xen-version
diff -r fa8be8a6cb74 -r da236d7f59b9 xen/arch/x86/cpu/mcheck/mce.h
--- a/xen/arch/x86/cpu/mcheck/mce.h Wed Aug 13 13:18:06 2008 +0900
+++ b/xen/arch/x86/cpu/mcheck/mce.h Wed Aug 13 22:22:21 2008 +0900
@@ -26,5 +26,5 @@ void x86_mcinfo_dump(struct mc_info *mi)
void x86_mcinfo_dump(struct mc_info *mi);
/* Global variables */
-extern int mce_disabled __initdata;
+extern int mce_disabled;
extern unsigned int nr_mce_banks;
diff -r fa8be8a6cb74 -r da236d7f59b9 xen/arch/x86/mm/shadow/common.c
--- a/xen/arch/x86/mm/shadow/common.c Wed Aug 13 13:18:06 2008 +0900
+++ b/xen/arch/x86/mm/shadow/common.c Wed Aug 13 22:22:21 2008 +0900
@@ -3357,23 +3357,45 @@ shadow_write_p2m_entry(struct vcpu *v, u
}
}
- /* If we're removing a superpage mapping from the p2m, remove all the
- * MFNs covered by it from the shadows too. */
+ /* If we're removing a superpage mapping from the p2m, we need to check
+ * all the pages covered by it. If they're still there in the new
+ * scheme, that's OK, but otherwise they must be unshadowed. */
if ( level == 2 && (l1e_get_flags(*p) & _PAGE_PRESENT) &&
(l1e_get_flags(*p) & _PAGE_PSE) )
{
unsigned int i;
- mfn_t mfn = _mfn(l1e_get_pfn(*p));
+ cpumask_t flushmask;
+ mfn_t omfn = _mfn(l1e_get_pfn(*p));
+ mfn_t nmfn = _mfn(l1e_get_pfn(new));
+ l1_pgentry_t *npte = NULL;
p2m_type_t p2mt = p2m_flags_to_type(l1e_get_flags(*p));
- if ( p2m_is_valid(p2mt) && mfn_valid(mfn) )
- {
+ if ( p2m_is_valid(p2mt) && mfn_valid(omfn) )
+ {
+ cpus_clear(flushmask);
+
+ /* If we're replacing a superpage with a normal L1 page, map it */
+ if ( (l1e_get_flags(new) & _PAGE_PRESENT)
+ && !(l1e_get_flags(new) & _PAGE_PSE)
+ && mfn_valid(nmfn) )
+ npte = map_domain_page(mfn_x(nmfn));
+
for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
{
- sh_remove_all_shadows_and_parents(v, mfn);
- if ( sh_remove_all_mappings(v, mfn) )
- flush_tlb_mask(d->domain_dirty_cpumask);
- mfn = _mfn(mfn_x(mfn) + 1);
+ if ( !npte
+ || !p2m_is_ram(p2m_flags_to_type(l1e_get_flags(npte[i])))
+ || l1e_get_pfn(npte[i]) != mfn_x(omfn) )
+ {
+ /* This GFN->MFN mapping has gone away */
+ sh_remove_all_shadows_and_parents(v, omfn);
+ if ( sh_remove_all_mappings(v, omfn) )
+ cpus_or(flushmask, flushmask, d->domain_dirty_cpumask);
+ }
+ omfn = _mfn(mfn_x(omfn) + 1);
}
+ flush_tlb_mask(flushmask);
+
+ if ( npte )
+ unmap_domain_page(npte);
}
}
diff -r fa8be8a6cb74 -r da236d7f59b9 xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c Wed Aug 13 13:18:06 2008 +0900
+++ b/xen/arch/x86/mm/shadow/multi.c Wed Aug 13 22:22:21 2008 +0900
@@ -3181,14 +3181,9 @@ static int sh_page_fault(struct vcpu *v,
rc = guest_walk_tables(v, va, &gw, regs->error_code);
#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
+ regs->error_code &= ~PFEC_page_present;
if ( !(rc & _PAGE_PRESENT) )
regs->error_code |= PFEC_page_present;
- else if ( regs->error_code & PFEC_page_present )
- {
- SHADOW_ERROR("OOS paranoia: Something is wrong in guest TLB"
- " flushing. Have fun debugging it.\n");
- regs->error_code &= ~PFEC_page_present;
- }
#endif
if ( rc != 0 )
diff -r fa8be8a6cb74 -r da236d7f59b9 xen/arch/x86/mm/shadow/private.h
--- a/xen/arch/x86/mm/shadow/private.h Wed Aug 13 13:18:06 2008 +0900
+++ b/xen/arch/x86/mm/shadow/private.h Wed Aug 13 22:22:21 2008 +0900
@@ -213,15 +213,14 @@ struct shadow_page_info
};
};
-/* The structure above *must* be the same size as a struct page_info
+/* The structure above *must* be no larger than a struct page_info
* from mm.h, since we'll be using the same space in the frametable.
* Also, the mbz field must line up with the owner field of normal
* pages, so they look properly like anonymous/xen pages. */
static inline void shadow_check_page_struct_offsets(void) {
- BUILD_BUG_ON(sizeof (struct shadow_page_info)
- != sizeof (struct page_info));
- BUILD_BUG_ON(offsetof(struct shadow_page_info, mbz)
- != offsetof(struct page_info, u.inuse._domain));
+ BUILD_BUG_ON(sizeof (struct shadow_page_info) > sizeof (struct page_info));
+ BUILD_BUG_ON(offsetof(struct shadow_page_info, mbz) !=
+ offsetof(struct page_info, u.inuse._domain));
};
/* Shadow type codes */
diff -r fa8be8a6cb74 -r da236d7f59b9 xen/arch/x86/msi.c
--- a/xen/arch/x86/msi.c Wed Aug 13 13:18:06 2008 +0900
+++ b/xen/arch/x86/msi.c Wed Aug 13 22:22:21 2008 +0900
@@ -490,28 +490,6 @@ static int msi_capability_init(struct pc
return 0;
}
-static u64 pci_resource_start(struct pci_dev *dev, u8 bar_index)
-{
- u64 bar_base;
- u32 reg_val;
- u8 bus = dev->bus;
- u8 slot = PCI_SLOT(dev->devfn);
- u8 func = PCI_FUNC(dev->devfn);
-
- reg_val = pci_conf_read32(bus, slot, func,
- PCI_BASE_ADDRESS_0 + 4 * bar_index);
- bar_base = reg_val & PCI_BASE_ADDRESS_MEM_MASK;
- if ( ( reg_val & PCI_BASE_ADDRESS_MEM_TYPE_MASK ) ==
- PCI_BASE_ADDRESS_MEM_TYPE_64 )
- {
- reg_val = pci_conf_read32(bus, slot, func,
- PCI_BASE_ADDRESS_0 + 4 * (bar_index + 1));
- bar_base |= ((u64)reg_val) << 32;
- }
-
- return bar_base;
-}
-
/**
* msix_capability_init - configure device's MSI-X capability
* @dev: pointer to the pci_dev data structure of MSI-X device function
@@ -522,7 +500,7 @@ static u64 pci_resource_start(struct pci
* single MSI-X irq. A return of zero indicates the successful setup of
* requested MSI-X entries with allocated irqs or non-zero for otherwise.
**/
-static int msix_capability_init(struct pci_dev *dev, int vector, int entry_nr)
+static int msix_capability_init(struct pci_dev *dev, struct msi_info *msi)
{
struct msi_desc *entry;
int pos;
@@ -549,7 +527,7 @@ static int msix_capability_init(struct p
table_offset = pci_conf_read32(bus, slot, func,
msix_table_offset_reg(pos));
bir = (u8)(table_offset & PCI_MSIX_FLAGS_BIRMASK);
table_offset &= ~PCI_MSIX_FLAGS_BIRMASK;
- phys_addr = pci_resource_start(dev, bir) + table_offset;
+ phys_addr = msi->table_base + table_offset;
idx = msix_fixmap_alloc();
if ( idx < 0 )
{
@@ -561,11 +539,11 @@ static int msix_capability_init(struct p
entry->msi_attrib.type = PCI_CAP_ID_MSIX;
entry->msi_attrib.is_64 = 1;
- entry->msi_attrib.entry_nr = entry_nr;
+ entry->msi_attrib.entry_nr = msi->entry_nr;
entry->msi_attrib.maskbit = 1;
entry->msi_attrib.masked = 1;
entry->msi_attrib.pos = pos;
- entry->vector = vector;
+ entry->vector = msi->vector;
entry->dev = dev;
entry->mask_base = base;
@@ -589,24 +567,25 @@ static int msix_capability_init(struct p
* indicates the successful setup of an entry zero with the new MSI
* irq or non-zero for otherwise.
**/
-static int __pci_enable_msi(u8 bus, u8 devfn, int vector)
+static int __pci_enable_msi(struct msi_info *msi)
{
int status;
struct pci_dev *pdev;
- pdev = pci_lock_pdev(bus, devfn);
+ pdev = pci_lock_pdev(msi->bus, msi->devfn);
if ( !pdev )
return -ENODEV;
- if ( find_msi_entry(pdev, vector, PCI_CAP_ID_MSI) )
+ if ( find_msi_entry(pdev, msi->vector, PCI_CAP_ID_MSI) )
{
spin_unlock(&pdev->lock);
- dprintk(XENLOG_WARNING, "vector %d has already mapped to MSI on device
\
- %02x:%02x.%01x.\n", vector, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+ dprintk(XENLOG_WARNING, "vector %d has already mapped to MSI on "
+ "device %02x:%02x.%01x.\n", msi->vector, msi->bus,
+ PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
return 0;
}
- status = msi_capability_init(pdev, vector);
+ status = msi_capability_init(pdev, msi->vector);
spin_unlock(&pdev->lock);
return status;
}
@@ -659,37 +638,37 @@ static void __pci_disable_msi(int vector
* of irqs available. Driver should use the returned value to re-send
* its request.
**/
-static int __pci_enable_msix(u8 bus, u8 devfn, int vector, int entry_nr)
+static int __pci_enable_msix(struct msi_info *msi)
{
int status, pos, nr_entries;
struct pci_dev *pdev;
u16 control;
- u8 slot = PCI_SLOT(devfn);
- u8 func = PCI_FUNC(devfn);
-
- pdev = pci_lock_pdev(bus, devfn);
+ u8 slot = PCI_SLOT(msi->devfn);
+ u8 func = PCI_FUNC(msi->devfn);
+
+ pdev = pci_lock_pdev(msi->bus, msi->devfn);
if ( !pdev )
return -ENODEV;
- pos = pci_find_cap_offset(bus, slot, func, PCI_CAP_ID_MSIX);
- control = pci_conf_read16(bus, slot, func, msi_control_reg(pos));
+ pos = pci_find_cap_offset(msi->bus, slot, func, PCI_CAP_ID_MSIX);
+ control = pci_conf_read16(msi->bus, slot, func, msi_control_reg(pos));
nr_entries = multi_msix_capable(control);
- if (entry_nr > nr_entries)
+ if (msi->entry_nr > nr_entries)
{
spin_unlock(&pdev->lock);
return -EINVAL;
}
- if ( find_msi_entry(pdev, vector, PCI_CAP_ID_MSIX) )
+ if ( find_msi_entry(pdev, msi->vector, PCI_CAP_ID_MSIX) )
{
spin_unlock(&pdev->lock);
- dprintk(XENLOG_WARNING, "vector %d has already mapped to MSIX on \
- device %02x:%02x.%01x.\n", vector, bus,
- PCI_SLOT(devfn), PCI_FUNC(devfn));
+ dprintk(XENLOG_WARNING, "vector %d has already mapped to MSIX on "
+ "device %02x:%02x.%01x.\n", msi->vector, msi->bus,
+ PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
return 0;
}
- status = msix_capability_init(pdev, vector, entry_nr);
+ status = msix_capability_init(pdev, msi);
spin_unlock(&pdev->lock);
return status;
}
@@ -727,13 +706,12 @@ static void __pci_disable_msix(int vecto
spin_unlock(&dev->lock);
}
-int pci_enable_msi(u8 bus, u8 devfn, int vector, int entry_nr, int msi)
-{
- ASSERT(spin_is_locked(&irq_desc[vector].lock));
- if ( msi )
- return __pci_enable_msi(bus, devfn, vector);
- else
- return __pci_enable_msix(bus, devfn, vector, entry_nr);
+int pci_enable_msi(struct msi_info *msi)
+{
+ ASSERT(spin_is_locked(&irq_desc[msi->vector].lock));
+
+ return msi->table_base ? __pci_enable_msix(msi) :
+ __pci_enable_msi(msi);
}
void pci_disable_msi(int vector)
diff -r fa8be8a6cb74 -r da236d7f59b9 xen/arch/x86/oprofile/nmi_int.c
--- a/xen/arch/x86/oprofile/nmi_int.c Wed Aug 13 13:18:06 2008 +0900
+++ b/xen/arch/x86/oprofile/nmi_int.c Wed Aug 13 22:22:21 2008 +0900
@@ -296,24 +296,40 @@ static int __init ppro_init(char ** cpu_
{
__u8 cpu_model = current_cpu_data.x86_model;
- if (cpu_model == 15 || cpu_model == 23) {
+ switch (cpu_model) {
+ case 0 ... 2:
+ *cpu_type = "i386/ppro";
+ break;
+ case 3 ... 5:
+ *cpu_type = "i386/pii";
+ break;
+ case 6 ... 8:
+ *cpu_type = "i386/piii";
+ break;
+ case 9:
+ *cpu_type = "i386/p6_mobile";
+ break;
+ case 10 ... 13:
+ *cpu_type = "i386/p6";
+ break;
+ case 14:
+ *cpu_type = "i386/core";
+ break;
+ case 15: case 23:
*cpu_type = "i386/core_2";
ppro_has_global_ctrl = 1;
- } else if (cpu_model == 14)
- *cpu_type = "i386/core";
- else if (cpu_model > 13) {
+ break;
+ case 26:
+ *cpu_type = "i386/core_2";
+ ppro_has_global_ctrl = 1;
+ break;
+ default:
+ /* Unknown */
printk("xenoprof: Initialization failed. "
"Intel processor model %d for P6 class family is not "
"supported\n", cpu_model);
return 0;
- } else if (cpu_model == 9)
- *cpu_type = "i386/p6_mobile";
- else if (cpu_model > 5)
- *cpu_type = "i386/piii";
- else if (cpu_model > 2)
- *cpu_type = "i386/pii";
- else
- *cpu_type = "i386/ppro";
+ }
model = &op_ppro_spec;
return 1;
diff -r fa8be8a6cb74 -r da236d7f59b9 xen/arch/x86/physdev.c
--- a/xen/arch/x86/physdev.c Wed Aug 13 13:18:06 2008 +0900
+++ b/xen/arch/x86/physdev.c Wed Aug 13 22:22:21 2008 +0900
@@ -66,6 +66,7 @@ static int map_domain_pirq(struct domain
{
int ret = 0;
int old_vector, old_pirq;
+ struct msi_info msi;
if ( d == NULL )
return -EINVAL;
@@ -115,10 +116,14 @@ static int map_domain_pirq(struct domain
vector);
desc->handler = &pci_msi_type;
- ret = pci_enable_msi(map->msi_info.bus,
- map->msi_info.devfn, vector,
- map->msi_info.entry_nr,
- map->msi_info.msi);
+ msi.bus = map->bus;
+ msi.devfn = map->devfn;
+ msi.entry_nr = map->entry_nr;
+ msi.table_base = map->table_base;
+ msi.vector = vector;
+
+ ret = pci_enable_msi(&msi);
+
spin_unlock_irqrestore(&desc->lock, flags);
if ( ret )
goto done;
@@ -139,7 +144,7 @@ static int unmap_domain_pirq(struct doma
int ret = 0;
int vector;
- if ( d == NULL || pirq < 0 || pirq > NR_PIRQS )
+ if ( d == NULL || pirq < 0 || pirq >= NR_PIRQS )
return -EINVAL;
if ( !IS_PRIV(current->domain) )
diff -r fa8be8a6cb74 -r da236d7f59b9 xen/common/page_alloc.c
--- a/xen/common/page_alloc.c Wed Aug 13 13:18:06 2008 +0900
+++ b/xen/common/page_alloc.c Wed Aug 13 22:22:21 2008 +0900
@@ -950,6 +950,14 @@ static void page_scrub_softirq(void)
void *p;
int i;
s_time_t start = NOW();
+ static spinlock_t serialise_lock = SPIN_LOCK_UNLOCKED;
+
+ /* free_heap_pages() does not parallelise well. Serialise this function. */
+ if ( !spin_trylock(&serialise_lock) )
+ {
+ set_timer(&this_cpu(page_scrub_timer), NOW() + MILLISECS(1));
+ return;
+ }
/* Aim to do 1ms of work every 10ms. */
do {
@@ -958,7 +966,7 @@ static void page_scrub_softirq(void)
if ( unlikely((ent = page_scrub_list.next) == &page_scrub_list) )
{
spin_unlock(&page_scrub_lock);
- return;
+ goto out;
}
/* Peel up to 16 pages from the list. */
@@ -989,6 +997,9 @@ static void page_scrub_softirq(void)
} while ( (NOW() - start) < MILLISECS(1) );
set_timer(&this_cpu(page_scrub_timer), NOW() + MILLISECS(10));
+
+ out:
+ spin_unlock(&serialise_lock);
}
static void page_scrub_timer_fn(void *unused)
diff -r fa8be8a6cb74 -r da236d7f59b9 xen/drivers/passthrough/io.c
--- a/xen/drivers/passthrough/io.c Wed Aug 13 13:18:06 2008 +0900
+++ b/xen/drivers/passthrough/io.c Wed Aug 13 22:22:21 2008 +0900
@@ -74,6 +74,9 @@ int pt_irq_create_bind_vtd(
if ( pt_irq_bind->irq_type == PT_IRQ_TYPE_MSI )
{
int pirq = pt_irq_bind->machine_irq;
+
+ if ( pirq < 0 || pirq >= NR_IRQS )
+ return -EINVAL;
if ( !(hvm_irq_dpci->mirq[pirq].flags & HVM_IRQ_DPCI_VALID ) )
{
diff -r fa8be8a6cb74 -r da236d7f59b9 xen/drivers/passthrough/vtd/iommu.c
--- a/xen/drivers/passthrough/vtd/iommu.c Wed Aug 13 13:18:06 2008 +0900
+++ b/xen/drivers/passthrough/vtd/iommu.c Wed Aug 13 22:22:21 2008 +0900
@@ -1789,7 +1789,8 @@ int intel_vtd_setup(void)
memset(domid_bitmap, 0, domid_bitmap_size / 8);
set_bit(0, domid_bitmap);
- init_vtd_hw();
+ if ( init_vtd_hw() )
+ goto error;
register_keyhandler('V', dump_iommu_info, "dump iommu info");
diff -r fa8be8a6cb74 -r da236d7f59b9 xen/include/asm-x86/event.h
--- a/xen/include/asm-x86/event.h Wed Aug 13 13:18:06 2008 +0900
+++ b/xen/include/asm-x86/event.h Wed Aug 13 22:22:21 2008 +0900
@@ -69,12 +69,7 @@ static inline void local_event_delivery_
/* No arch specific virq definition now. Default to global. */
static inline int arch_virq_is_global(int virq)
{
- switch (virq) {
- case VIRQ_MCA:
- return 1;
- default:
- return 1;
- }
+ return 1;
}
#endif
diff -r fa8be8a6cb74 -r da236d7f59b9 xen/include/asm-x86/msi.h
--- a/xen/include/asm-x86/msi.h Wed Aug 13 13:18:06 2008 +0900
+++ b/xen/include/asm-x86/msi.h Wed Aug 13 22:22:21 2008 +0900
@@ -53,6 +53,14 @@
#else
#define MAX_MSIX_PAGES 32
#endif
+
+struct msi_info {
+ int bus;
+ int devfn;
+ int vector;
+ int entry_nr;
+ uint64_t table_base;
+};
struct msi_msg {
u32 address_lo; /* low 32 bits of msi message address */
@@ -64,7 +72,7 @@ extern void mask_msi_irq(unsigned int ir
extern void mask_msi_irq(unsigned int irq);
extern void unmask_msi_irq(unsigned int irq);
extern void set_msi_irq_affinity(unsigned int irq, cpumask_t mask);
-extern int pci_enable_msi(u8 bus, u8 devfn, int vector, int entry_nr, int msi);
+extern int pci_enable_msi(struct msi_info *msi);
extern void pci_disable_msi(int vector);
extern void pci_cleanup_msi(struct pci_dev *pdev);
diff -r fa8be8a6cb74 -r da236d7f59b9 xen/include/public/physdev.h
--- a/xen/include/public/physdev.h Wed Aug 13 13:18:06 2008 +0900
+++ b/xen/include/public/physdev.h Wed Aug 13 22:22:21 2008 +0900
@@ -136,10 +136,13 @@ struct physdev_map_pirq {
/* IN or OUT */
int pirq;
/* IN */
- struct {
- int bus, devfn, entry_nr;
- int msi; /* 0 - MSIX 1 - MSI */
- } msi_info;
+ int bus;
+ /* IN */
+ int devfn;
+ /* IN */
+ int entry_nr;
+ /* IN */
+ uint64_t table_base;
};
typedef struct physdev_map_pirq physdev_map_pirq_t;
DEFINE_XEN_GUEST_HANDLE(physdev_map_pirq_t);
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|