WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] merge with xen-unstable.hg

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] merge with xen-unstable.hg
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Wed, 13 Aug 2008 08:10:21 -0700
Delivery-date: Wed, 13 Aug 2008 08:11:25 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Isaku Yamahata <yamahata@xxxxxxxxxxxxx>
# Date 1218633741 -32400
# Node ID da236d7f59b963585800e7471f8a0451b83ae569
# Parent  fa8be8a6cb74976d5a96f830a9a2238abf622822
# Parent  c6402709acc8122e3f8f92a885750afb4061ac61
merge with xen-unstable.hg
---
 .hgtags                             |    1 
 docs/misc/kexec_and_kdump.txt       |  213 ++++++++++++++++++++++++++++++++++++
 extras/mini-os/include/lwipopts.h   |    1 
 tools/Makefile                      |    7 -
 tools/cross-install                 |    8 +
 tools/ioemu/hw/pass-through.h       |    1 
 tools/ioemu/hw/pt-msi.c             |   24 +---
 tools/libxc/xc_physdev.c            |   10 -
 tools/libxc/xenctrl.h               |    2 
 tools/misc/xend                     |   16 +-
 tools/python/xen/xend/XendAPI.py    |    3 
 tools/python/xen/xend/XendConfig.py |    2 
 tools/python/xen/xend/XendPIF.py    |   20 +++
 xen/Makefile                        |    2 
 xen/arch/x86/cpu/mcheck/mce.h       |    2 
 xen/arch/x86/mm/shadow/common.c     |   40 +++++-
 xen/arch/x86/mm/shadow/multi.c      |    7 -
 xen/arch/x86/mm/shadow/private.h    |    9 -
 xen/arch/x86/msi.c                  |   82 +++++--------
 xen/arch/x86/oprofile/nmi_int.c     |   40 ++++--
 xen/arch/x86/physdev.c              |   15 +-
 xen/common/page_alloc.c             |   13 ++
 xen/drivers/passthrough/io.c        |    3 
 xen/drivers/passthrough/vtd/iommu.c |    3 
 xen/include/asm-x86/event.h         |    7 -
 xen/include/asm-x86/msi.h           |   10 +
 xen/include/public/physdev.h        |   11 +
 27 files changed, 417 insertions(+), 135 deletions(-)

diff -r fa8be8a6cb74 -r da236d7f59b9 .hgtags
--- a/.hgtags   Wed Aug 13 13:18:06 2008 +0900
+++ b/.hgtags   Wed Aug 13 22:22:21 2008 +0900
@@ -28,3 +28,4 @@ c3494402098e26507fc61a6579832c0149351d6a
 c3494402098e26507fc61a6579832c0149351d6a 3.3.0-rc1
 dde12ff94c96331668fe38a7b09506fa94d03c34 3.3.0-rc2
 57fca3648f25dcc085ee380954342960a7979987 3.3.0-rc3
+96d0a48e87ee46ba7b73e8c906a7e2e0baf60e2e 3.3.0-rc4
diff -r fa8be8a6cb74 -r da236d7f59b9 docs/misc/kexec_and_kdump.txt
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/docs/misc/kexec_and_kdump.txt     Wed Aug 13 22:22:21 2008 +0900
@@ -0,0 +1,213 @@
+
+=======================
+Kexec and Kdump for Xen
+=======================
+
+This is a breif guide to using Kexec and Kdump in conjunction with Xen.
+This functionaly works at the level of the hypervisor and dom0 kernel.
+And will thus affect all guests running on a machine.
+
+At this stage it does not work in conjunction with domU kernels.
+
+This document should be read in conjunction with
+Documentation/kdump/kdump.txt from the Linux kernel source.
+Some of the information in this document has been
+sourced from that document.
+
+
+Kexec
+=====
+
+It is possible to kexec from Xen or Linux to either Xen or Linux.
+
+Pattern        | Before Kexec       | After Kexec
+---------------+--------------------+--------------------
+Xen -> Xen     | first hypervisor & | second hypervisor &
+               | dom0 kernel        | dom0 kernel
+---------------+--------------------+--------------------
+Xen   -> Linux | first hypervisor & | second kernel
+               | dom0 kernel        |
+---------------+--------------------+--------------------
+Linux -> Xen   | first kernel       | second hypervisor &
+               |                    | dom0 kernel
+---------------+--------------------+--------------------
+Linux -> Linux | first kernel       | second kernel
+
+If you are kexecing to Xen then you will also need to preapare the second
+hypervisor and dom0 kernel that will run after kexec. These may be the same
+as the first hypervisor and dom0 kernel that are used before kexec if you
+are kexecing from Xen to Xen.
+
+If you are kexecing to Linux then you will need to prepare the second Linux
+kernel that will run after kexec. In the case that you are kexecing from
+Linux, it may be the same as the first kernel image that that runs before
+kexec.
+
+Regardless of which kexec pattern you wish to run, you will
+need to have kexec-tools installed. This provides the kexec command.
+
+1. Load
+-------
+
+Before kexecing the second kernel or hypervisor & dom0 kernel
+need to be loaded into the running hypervisor or kernel using
+the kexec command.
+
+  a. To kexec to Xen (Xen->Xen or Linux->Xen)
+
+  kexec -l --append="XEN_ARGS -- DOM0_ARGS" \
+       --vmm="XEN_IMAGE" "DOM0_IMAGE" KEXEC_ARGS
+
+  where:
+    XEN_ARGS: command line arguments to the xen hypervisor
+              On x86 the no-real-mode argument should be included
+    DOM0_ARGS: command line arguments to the dom0 kernel
+    XEN_IMAGE: xen hypervisor image
+    DOM0_IMAGE: dom0 kernel image
+    KEXEC_ARGS: additional kexec-tools command line arguments
+
+  e.g. kexec -l --append "no-real-mode" --vmm="/boot/xen.gz" /boot/vmlinuz.gz
+
+  OR
+
+  b. To kexec to Linux (Xen->Linux or Linux->Linux)
+
+  kexec -l LINUX_IMAGE --append "$LINUX_ARGS" KEXEC_ARGS
+
+  where:
+    LINUX_IMAGE: the second linux kernel image
+    LINUX_ARGS: command line arguments to the second linux kernel
+    KEXEC_ARGS: additional kexec-tools command line arguments
+
+  e.g. kexec -l /boot/second-vmlinuz.gz
+
+2. Execute
+----------
+
+Once the second kernel is loaded, it can be executed at any time.
+If you don't see the second kernel booting within a second or so,
+you are in trouble :(
+
+   kexec -e
+
+Kdump
+=====
+
+It is possible to kdump from Xen or Linux to a Linux crash kernel.
+It is not possible to use xen as a crash kernel.
+
+Pattern        | Before Kexec       | After Kexec
+---------------+--------------------+--------------------
+Xen -> Linux   | first hypervisor & | crash kernel
+               | dom0 kernel        |
+---------------+--------------------+--------------------
+Linux -> Linux | first kernel       | crash kernel
+
+Regardless of if you are kdumping from Xen or Linux you will need to
+prepare a linux crash kernel.  You will also need to have kexec-tools
+installed. This provides the kexec command.
+
+0. Set-Up The Crash Kernel Region
+---------------------------------
+
+In order to use kdump an area of memory has to be reserved at boot time.
+This is the area of memory that the crash kernel will use, thus allowing it
+to run without disrupting the memory used by the first kernel. This area is
+called the crash kernel region and is reserved using the crashkernel
+command line parameter to the Xen hypervisor. It has two forms:
+
+  i) crashkernel=size
+
+     This is the simplest and recommended way to reserve the crash kernel
+     region. Just specify how large the region should be and the hypervisor
+     will find a good location for it. A good size to start with is 128Mb
+
+     e.g.
+
+     crashkernel=128M
+
+  ii) crashkernel=size@base
+
+      In this form the base address is provided in addition to
+      the size. Use this if auto-placement doesn't work for some reason.
+      It is strongly recommended that the base address be aligned
+      to 64Mb, else memory below the alignment point will not
+      be usable.
+
+      e.g. crashkernel=128M@256M
+
+   Regardless of which of the two forms of the crashkernel command line you
+   use, the crash kernel region should appear in /proc/iomem on x86 or
+   /proc/iomem_machine on ia64. If it doesn't then either the crashkernel
+   parameter is missing, or for some reason the region couldn't be placed -
+   for instance because it is too large.
+
+   # cat /proc/iomem
+   ...
+   00100000-07feffff : System RAM
+     00100000-00bfffff : Hypervisor code and data
+     0533f000-0733efff : Crash kernel
+   ...
+
+
+1. Load
+-------
+
+Once you are running in a kexec-enabled hypervisor and dom0,
+you can prepare to kdump by loading the crash kernel into the
+running kernel.
+
+  kexec -p CRASH_KERNEL_IMAGE --append "$CRASH_KERNEL_ARGS" KEXEC_ARGS
+
+  where:
+    CRASH_KERNEL_IMAGE: the crash kernel image
+    CRASH_KERNEL_ARGS: command line arguments to the crash kernel
+                      init 1 is strongly recommended
+                      irqpoll is strongly recommended
+                      maxcpus=1 is required if the crash kernel is SMP
+                      reset_devices is strongly recommended
+    KEXEC_ARGS: additional kexec-tools command line arguments
+                On x86 --args-linux should be supplied if an uncompressed
+               vmlinux image is used as the crash kernel
+
+  e.g. kexec -p /boot/crash-vmlinuz \
+        --append "init 1 irqpoll maxcpus=1 reset_devices" --args-linux
+
+On x86 systems the crash kernel may be either
+- A uncompressed vmlinux image if the kernel is not relocatable
+- A compressed bzImage or vmlinuz image if the kernel is relocatable
+- Relocatability is crontroled by the CONFIG_RELOCATABLE kernel
+  compile configuration parameter. This option may not be available
+  depending on the kernel version
+On ia64
+  Either a vmlinuz or vmlinux.gz image may be used
+
+
+2. Execute
+----------
+
+Once the second kernel is loaded, the crash kernel will be executed if the
+hypervisor panics. It will also be executed if dom0 panics or if dom0
+oopses and /proc/sys/kernel/panic_on_oops is set to a non-zero value
+
+echo 1 > /proc/sys/kernel/panic_on_oops
+
+Kdump may also be triggered (for testing)
+
+  a. From Domain 0
+
+  echo c > /proc/sysrq-trigger
+
+  b. From Xen
+
+     Enter the xen console
+
+     ctrl^a ctrl^a  (may be bound to a different key, this is the default)
+
+     Select C for "trigger a crashdump"
+
+     C
+
+If you don't see the crash kernel booting within a second or so,
+you are in trouble :(
+
diff -r fa8be8a6cb74 -r da236d7f59b9 extras/mini-os/include/lwipopts.h
--- a/extras/mini-os/include/lwipopts.h Wed Aug 13 13:18:06 2008 +0900
+++ b/extras/mini-os/include/lwipopts.h Wed Aug 13 22:22:21 2008 +0900
@@ -15,6 +15,7 @@
 #define LWIP_DHCP 1
 #define LWIP_COMPAT_SOCKETS 0
 #define LWIP_IGMP 1
+#define LWIP_USE_HEAP_FROM_INTERRUPT 1
 #define MEMP_NUM_SYS_TIMEOUT 10
 #define TCP_SND_BUF 3000
 #define TCP_MSS 1500
diff -r fa8be8a6cb74 -r da236d7f59b9 tools/Makefile
--- a/tools/Makefile    Wed Aug 13 13:18:06 2008 +0900
+++ b/tools/Makefile    Wed Aug 13 22:22:21 2008 +0900
@@ -38,8 +38,10 @@ endif
 
 # For the sake of linking, set the sys-root
 ifneq ($(CROSS_COMPILE),)
+CROSS_BIN_PATH ?= /usr/$(CROSS_COMPILE:-=)/bin
 CROSS_SYS_ROOT ?= /usr/$(CROSS_COMPILE:-=)/sys-root
-export CROSS_SYS_ROOT
+export CROSS_SYS_ROOT # exported for check/funcs.sh
+export CROSS_BIN_PATH # exported for cross-install.sh
 endif
 
 .PHONY: all
@@ -57,7 +59,8 @@ ifneq ($(XEN_COMPILE_ARCH),$(XEN_TARGET_
 ifneq ($(XEN_COMPILE_ARCH),$(XEN_TARGET_ARCH))
 IOEMU_CONFIGURE_CROSS ?= --cpu=$(XEN_TARGET_ARCH) \
                         --cross-prefix=$(CROSS_COMPILE) \
-                        --interp-prefix=$(CROSS_SYS_ROOT)
+                        --interp-prefix=$(CROSS_SYS_ROOT) \
+                        --install=$(CURDIR)/cross-install
 endif
 
 ioemu/config-host.mak:
diff -r fa8be8a6cb74 -r da236d7f59b9 tools/cross-install
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/cross-install       Wed Aug 13 22:22:21 2008 +0900
@@ -0,0 +1,8 @@
+#!/bin/sh
+
+# prepend CROSS_BIN_PATH to find the right "strip"
+if [ -n "$CROSS_BIN_PATH" ]; then
+    PATH="$CROSS_BIN_PATH:$PATH"
+fi
+
+exec install "$@"
diff -r fa8be8a6cb74 -r da236d7f59b9 tools/ioemu/hw/pass-through.h
--- a/tools/ioemu/hw/pass-through.h     Wed Aug 13 13:18:06 2008 +0900
+++ b/tools/ioemu/hw/pass-through.h     Wed Aug 13 22:22:21 2008 +0900
@@ -120,6 +120,7 @@ struct pt_msix_info {
     int enabled;
     int total_entries;
     int bar_index;
+    uint64_t table_base;
     uint32_t table_off;
     uint64_t mmio_base_addr;
     int mmio_index;
diff -r fa8be8a6cb74 -r da236d7f59b9 tools/ioemu/hw/pt-msi.c
--- a/tools/ioemu/hw/pt-msi.c   Wed Aug 13 13:18:06 2008 +0900
+++ b/tools/ioemu/hw/pt-msi.c   Wed Aug 13 22:22:21 2008 +0900
@@ -38,8 +38,8 @@ int pt_msi_setup(struct pt_dev *dev)
     }
 
     if ( xc_physdev_map_pirq_msi(xc_handle, domid, AUTO_ASSIGN, &pirq,
-                                                       dev->pci_dev->dev << 3 
| dev->pci_dev->func,
-                                                       dev->pci_dev->bus, 0, 
1) )
+                                 dev->pci_dev->dev << 3 | dev->pci_dev->func,
+                                 dev->pci_dev->bus, 0, 0) )
     {
         PT_LOG("error map msi\n");
         return -1;
@@ -121,7 +121,8 @@ static int pt_msix_update_one(struct pt_
     {
         ret = xc_physdev_map_pirq_msi(xc_handle, domid, AUTO_ASSIGN, &pirq,
                                 dev->pci_dev->dev << 3 | dev->pci_dev->func,
-                                dev->pci_dev->bus, entry_nr, 0);
+                                dev->pci_dev->bus, entry_nr,
+                                dev->msix->table_base);
         if ( ret )
         {
             PT_LOG("error map msix entry %x\n", entry_nr);
@@ -183,7 +184,7 @@ static void pci_msix_writel(void *opaque
     entry = &msix->msix_entry[entry_nr];
     offset = ((addr - msix->mmio_base_addr) % 16) / 4;
 
-    if ( offset != 3 && msix->enabled && entry->io_mem[3] & 0x1 )
+    if ( offset != 3 && msix->enabled && !(entry->io_mem[3] & 0x1) )
     {
         PT_LOG("can not update msix entry %d since MSI-X is already \
                 function now.\n", entry_nr);
@@ -196,7 +197,7 @@ static void pci_msix_writel(void *opaque
 
     if ( offset == 3 )
     {
-        if ( !(val & 0x1) )
+        if ( msix->enabled && !(val & 0x1) )
             pt_msix_update_one(dev, entry_nr);
         mask_physical_msix_entry(dev, entry_nr, entry->io_mem[3] & 0x1);
     }
@@ -280,7 +281,6 @@ int pt_msix_init(struct pt_dev *dev, int
     uint8_t id;
     uint16_t control;
     int i, total_entries, table_off, bar_index;
-    uint64_t bar_base;
     struct pci_dev *pd = dev->pci_dev;
 
     id = pci_read_byte(pd, pos + PCI_CAP_LIST_ID);
@@ -314,18 +314,14 @@ int pt_msix_init(struct pt_dev *dev, int
     table_off = pci_read_long(pd, pos + PCI_MSIX_TABLE);
     bar_index = dev->msix->bar_index = table_off & PCI_MSIX_BIR;
     table_off &= table_off & ~PCI_MSIX_BIR;
-    bar_base = pci_read_long(pd, 0x10 + 4 * bar_index);
-    if ( (bar_base & 0x6) == 0x4 )
-    {
-        bar_base &= ~0xf;
-        bar_base += (uint64_t)pci_read_long(pd, 0x10 + 4 * (bar_index + 1)) << 
32;
-    }
-    PT_LOG("get MSI-X table bar base %lx\n", bar_base);
+    dev->msix->table_base = dev->pci_dev->base_addr[bar_index];
+    PT_LOG("get MSI-X table bar base %llx\n",
+           (unsigned long long)dev->msix->table_base);
 
     dev->msix->fd = open("/dev/mem", O_RDWR);
     dev->msix->phys_iomem_base = mmap(0, total_entries * 16,
                           PROT_WRITE | PROT_READ, MAP_SHARED | MAP_LOCKED,
-                          dev->msix->fd, bar_base + table_off);
+                          dev->msix->fd, dev->msix->table_base + table_off);
     PT_LOG("mapping physical MSI-X table to %lx\n",
            (unsigned long)dev->msix->phys_iomem_base);
     return 0;
diff -r fa8be8a6cb74 -r da236d7f59b9 tools/libxc/xc_physdev.c
--- a/tools/libxc/xc_physdev.c  Wed Aug 13 13:18:06 2008 +0900
+++ b/tools/libxc/xc_physdev.c  Wed Aug 13 22:22:21 2008 +0900
@@ -51,7 +51,7 @@ int xc_physdev_map_pirq_msi(int xc_handl
                             int devfn,
                             int bus,
                             int entry_nr,
-                            int msi_type)
+                            uint64_t table_base)
 {
     int rc;
     struct physdev_map_pirq map;
@@ -63,10 +63,10 @@ int xc_physdev_map_pirq_msi(int xc_handl
     map.type = MAP_PIRQ_TYPE_MSI;
     map.index = index;
     map.pirq = *pirq;
-    map.msi_info.devfn = devfn;
-    map.msi_info.bus = bus;
-    map.msi_info.entry_nr = entry_nr;
-    map.msi_info.msi = msi_type;
+    map.bus = bus;
+    map.devfn = devfn;
+    map.entry_nr = entry_nr;
+    map.table_base = table_base;
 
     rc = do_physdev_op(xc_handle, PHYSDEVOP_map_pirq, &map);
 
diff -r fa8be8a6cb74 -r da236d7f59b9 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Wed Aug 13 13:18:06 2008 +0900
+++ b/tools/libxc/xenctrl.h     Wed Aug 13 22:22:21 2008 +0900
@@ -917,7 +917,7 @@ int xc_physdev_map_pirq_msi(int xc_handl
                             int devfn,
                             int bus,
                             int entry_nr,
-                            int msi_type);
+                            uint64_t table_base);
 
 int xc_physdev_unmap_pirq(int xc_handle,
                           int domid,
diff -r fa8be8a6cb74 -r da236d7f59b9 tools/misc/xend
--- a/tools/misc/xend   Wed Aug 13 13:18:06 2008 +0900
+++ b/tools/misc/xend   Wed Aug 13 22:22:21 2008 +0900
@@ -77,6 +77,10 @@ def check_user():
         hline()
         raise CheckError("invalid user")
 
+def start_daemon(daemon, *args):
+    if os.fork() == 0:
+        os.execvp(daemon, (daemon,) + args)
+
 def start_xenstored():
     pidfname = "/var/run/xenstore.pid"
     try:
@@ -102,13 +106,15 @@ def start_xenstored():
     s,o = commands.getstatusoutput(cmd)
 
 def start_consoled():
-    if os.fork() == 0:
-        os.execvp('xenconsoled', ['xenconsoled'])
+    XENCONSOLED_TRACE = os.getenv("XENCONSOLED_TRACE")
+    args = ""
+    if XENCONSOLED_TRACE:
+        args += "--log=" + XENCONSOLED_TRACE
+    start_daemon("xenconsoled", args)
 
 def start_blktapctrl():
-    if os.fork() == 0:
-        os.execvp('blktapctrl', ['blktapctrl'])
-            
+    start_daemon("blktapctrl", "")
+
 def main():
     try:
         check_logging()
diff -r fa8be8a6cb74 -r da236d7f59b9 tools/python/xen/xend/XendAPI.py
--- a/tools/python/xen/xend/XendAPI.py  Wed Aug 13 13:18:06 2008 +0900
+++ b/tools/python/xen/xend/XendAPI.py  Wed Aug 13 22:22:21 2008 +0900
@@ -2265,7 +2265,8 @@ class XendAPI(object):
             'type': image.type,
             'sharable': image.sharable,
             'read_only': image.read_only,
-            'other_config': image.other_config
+            'other_config': image.other_config,
+            'security_label' : image.get_security_label()
             })
 
     # Class Functions    
diff -r fa8be8a6cb74 -r da236d7f59b9 tools/python/xen/xend/XendConfig.py
--- a/tools/python/xen/xend/XendConfig.py       Wed Aug 13 13:18:06 2008 +0900
+++ b/tools/python/xen/xend/XendConfig.py       Wed Aug 13 22:22:21 2008 +0900
@@ -448,7 +448,7 @@ class XendConfig(dict):
                 self['platform']['hpet'] = 0
             if 'loader' not in self['platform']:
                 # Old configs may have hvmloader set as PV_kernel param
-                if self.has_key('PV_kernel') and re.search('hvmloader', 
self['PV_kernel']):
+                if self.has_key('PV_kernel') and self['PV_kernel'] != '':
                     self['platform']['loader'] = self['PV_kernel']
                     self['PV_kernel'] = ''
                 else:
diff -r fa8be8a6cb74 -r da236d7f59b9 tools/python/xen/xend/XendPIF.py
--- a/tools/python/xen/xend/XendPIF.py  Wed Aug 13 13:18:06 2008 +0900
+++ b/tools/python/xen/xend/XendPIF.py  Wed Aug 13 22:22:21 2008 +0900
@@ -95,6 +95,22 @@ def linux_set_mtu(iface, mtu):
     except ValueError:
         return False
 
+def linux_get_mtu(device):
+    return _linux_get_pif_param(device, 'mtu')
+
+def linux_get_mac(device):
+    return _linux_get_pif_param(device, 'link/ether')
+
+def _linux_get_pif_parm(device, param_name):
+    ip_get_dev_data = 'ip link show %s' % device
+    rc, output = commands.getstatusoutput(ip_get_dev_data)
+    if rc == 0:
+        params = output.split(' ')
+        for i in xrange(len(params)):
+            if params[i] == param_name:
+                return params[i+1]
+    return ''
+
 def _create_VLAN(dev, vlan):
     rc, _ = commands.getstatusoutput('vconfig add %s %d' %
                                      (dev, vlan))
@@ -259,8 +275,8 @@ class XendPIF(XendBase):
         # Create the record
         record = {
             "device":  device,
-            "MAC":     '',
-            "MTU":     '',
+            "MAC":     linux_get_mac('%s.%d' % (device, vlan)),
+            "MTU":     linux_get_mtu('%s.%d' % (device, vlan)),
             "network": network_uuid,
             "VLAN":    vlan
             }
diff -r fa8be8a6cb74 -r da236d7f59b9 xen/Makefile
--- a/xen/Makefile      Wed Aug 13 13:18:06 2008 +0900
+++ b/xen/Makefile      Wed Aug 13 22:22:21 2008 +0900
@@ -2,7 +2,7 @@
 # All other places this is stored (eg. compile.h) should be autogenerated.
 export XEN_VERSION       = 3
 export XEN_SUBVERSION    = 3
-export XEN_EXTRAVERSION ?= .0-rc4-pre$(XEN_VENDORVERSION)
+export XEN_EXTRAVERSION ?= .0-rc5-pre$(XEN_VENDORVERSION)
 export XEN_FULLVERSION   = $(XEN_VERSION).$(XEN_SUBVERSION)$(XEN_EXTRAVERSION)
 -include xen-version
 
diff -r fa8be8a6cb74 -r da236d7f59b9 xen/arch/x86/cpu/mcheck/mce.h
--- a/xen/arch/x86/cpu/mcheck/mce.h     Wed Aug 13 13:18:06 2008 +0900
+++ b/xen/arch/x86/cpu/mcheck/mce.h     Wed Aug 13 22:22:21 2008 +0900
@@ -26,5 +26,5 @@ void x86_mcinfo_dump(struct mc_info *mi)
 void x86_mcinfo_dump(struct mc_info *mi);
 
 /* Global variables */
-extern int mce_disabled __initdata;
+extern int mce_disabled;
 extern unsigned int nr_mce_banks;
diff -r fa8be8a6cb74 -r da236d7f59b9 xen/arch/x86/mm/shadow/common.c
--- a/xen/arch/x86/mm/shadow/common.c   Wed Aug 13 13:18:06 2008 +0900
+++ b/xen/arch/x86/mm/shadow/common.c   Wed Aug 13 22:22:21 2008 +0900
@@ -3357,23 +3357,45 @@ shadow_write_p2m_entry(struct vcpu *v, u
         }
     }
 
-    /* If we're removing a superpage mapping from the p2m, remove all the
-     * MFNs covered by it from the shadows too. */
+    /* If we're removing a superpage mapping from the p2m, we need to check 
+     * all the pages covered by it.  If they're still there in the new 
+     * scheme, that's OK, but otherwise they must be unshadowed. */
     if ( level == 2 && (l1e_get_flags(*p) & _PAGE_PRESENT) &&
          (l1e_get_flags(*p) & _PAGE_PSE) )
     {
         unsigned int i;
-        mfn_t mfn = _mfn(l1e_get_pfn(*p));
+        cpumask_t flushmask;
+        mfn_t omfn = _mfn(l1e_get_pfn(*p));
+        mfn_t nmfn = _mfn(l1e_get_pfn(new));
+        l1_pgentry_t *npte = NULL;
         p2m_type_t p2mt = p2m_flags_to_type(l1e_get_flags(*p));
-        if ( p2m_is_valid(p2mt) && mfn_valid(mfn) )
-        {
+        if ( p2m_is_valid(p2mt) && mfn_valid(omfn) )
+        {
+            cpus_clear(flushmask);
+
+            /* If we're replacing a superpage with a normal L1 page, map it */
+            if ( (l1e_get_flags(new) & _PAGE_PRESENT)
+                 && !(l1e_get_flags(new) & _PAGE_PSE) 
+                 && mfn_valid(nmfn) )
+                npte = map_domain_page(mfn_x(nmfn));
+            
             for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
             {
-                sh_remove_all_shadows_and_parents(v, mfn);
-                if ( sh_remove_all_mappings(v, mfn) )
-                    flush_tlb_mask(d->domain_dirty_cpumask);
-                mfn = _mfn(mfn_x(mfn) + 1);
+                if ( !npte 
+                     || !p2m_is_ram(p2m_flags_to_type(l1e_get_flags(npte[i])))
+                     || l1e_get_pfn(npte[i]) != mfn_x(omfn) )
+                {
+                    /* This GFN->MFN mapping has gone away */
+                    sh_remove_all_shadows_and_parents(v, omfn);
+                    if ( sh_remove_all_mappings(v, omfn) )
+                        cpus_or(flushmask, flushmask, d->domain_dirty_cpumask);
+                }
+                omfn = _mfn(mfn_x(omfn) + 1);
             }
+            flush_tlb_mask(flushmask);
+            
+            if ( npte )
+                unmap_domain_page(npte);
         }
     }
 
diff -r fa8be8a6cb74 -r da236d7f59b9 xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c    Wed Aug 13 13:18:06 2008 +0900
+++ b/xen/arch/x86/mm/shadow/multi.c    Wed Aug 13 22:22:21 2008 +0900
@@ -3181,14 +3181,9 @@ static int sh_page_fault(struct vcpu *v,
     rc = guest_walk_tables(v, va, &gw, regs->error_code);
 
 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
+    regs->error_code &= ~PFEC_page_present;
     if ( !(rc & _PAGE_PRESENT) )
         regs->error_code |= PFEC_page_present;
-    else if ( regs->error_code & PFEC_page_present )
-    {
-            SHADOW_ERROR("OOS paranoia: Something is wrong in guest TLB"
-                         " flushing. Have fun debugging it.\n");
-            regs->error_code &= ~PFEC_page_present;
-    }
 #endif
 
     if ( rc != 0 )
diff -r fa8be8a6cb74 -r da236d7f59b9 xen/arch/x86/mm/shadow/private.h
--- a/xen/arch/x86/mm/shadow/private.h  Wed Aug 13 13:18:06 2008 +0900
+++ b/xen/arch/x86/mm/shadow/private.h  Wed Aug 13 22:22:21 2008 +0900
@@ -213,15 +213,14 @@ struct shadow_page_info
     };
 };
 
-/* The structure above *must* be the same size as a struct page_info
+/* The structure above *must* be no larger than a struct page_info
  * from mm.h, since we'll be using the same space in the frametable. 
  * Also, the mbz field must line up with the owner field of normal 
  * pages, so they look properly like anonymous/xen pages. */
 static inline void shadow_check_page_struct_offsets(void) {
-    BUILD_BUG_ON(sizeof (struct shadow_page_info) 
-                 != sizeof (struct page_info));
-    BUILD_BUG_ON(offsetof(struct shadow_page_info, mbz) 
-                 != offsetof(struct page_info, u.inuse._domain));
+    BUILD_BUG_ON(sizeof (struct shadow_page_info) > sizeof (struct page_info));
+    BUILD_BUG_ON(offsetof(struct shadow_page_info, mbz) !=
+                 offsetof(struct page_info, u.inuse._domain));
 };
 
 /* Shadow type codes */
diff -r fa8be8a6cb74 -r da236d7f59b9 xen/arch/x86/msi.c
--- a/xen/arch/x86/msi.c        Wed Aug 13 13:18:06 2008 +0900
+++ b/xen/arch/x86/msi.c        Wed Aug 13 22:22:21 2008 +0900
@@ -490,28 +490,6 @@ static int msi_capability_init(struct pc
     return 0;
 }
 
-static u64 pci_resource_start(struct pci_dev *dev, u8 bar_index)
-{
-    u64 bar_base;
-    u32 reg_val;
-    u8 bus = dev->bus;
-    u8 slot = PCI_SLOT(dev->devfn);
-    u8 func = PCI_FUNC(dev->devfn);
-
-    reg_val = pci_conf_read32(bus, slot, func,
-                              PCI_BASE_ADDRESS_0 + 4 * bar_index);
-    bar_base = reg_val & PCI_BASE_ADDRESS_MEM_MASK;
-    if ( ( reg_val & PCI_BASE_ADDRESS_MEM_TYPE_MASK ) ==
-         PCI_BASE_ADDRESS_MEM_TYPE_64 )
-    {
-        reg_val = pci_conf_read32(bus, slot, func,
-                                  PCI_BASE_ADDRESS_0 + 4 * (bar_index + 1));
-        bar_base |= ((u64)reg_val) << 32;
-    }
-
-    return bar_base;
-}
-
 /**
  * msix_capability_init - configure device's MSI-X capability
  * @dev: pointer to the pci_dev data structure of MSI-X device function
@@ -522,7 +500,7 @@ static u64 pci_resource_start(struct pci
  * single MSI-X irq. A return of zero indicates the successful setup of
  * requested MSI-X entries with allocated irqs or non-zero for otherwise.
  **/
-static int msix_capability_init(struct pci_dev *dev, int vector, int entry_nr)
+static int msix_capability_init(struct pci_dev *dev, struct msi_info *msi)
 {
     struct msi_desc *entry;
     int pos;
@@ -549,7 +527,7 @@ static int msix_capability_init(struct p
     table_offset = pci_conf_read32(bus, slot, func, 
msix_table_offset_reg(pos));
     bir = (u8)(table_offset & PCI_MSIX_FLAGS_BIRMASK);
     table_offset &= ~PCI_MSIX_FLAGS_BIRMASK;
-    phys_addr = pci_resource_start(dev, bir) + table_offset;
+    phys_addr = msi->table_base + table_offset;
     idx = msix_fixmap_alloc();
     if ( idx < 0 )
     {
@@ -561,11 +539,11 @@ static int msix_capability_init(struct p
 
     entry->msi_attrib.type = PCI_CAP_ID_MSIX;
     entry->msi_attrib.is_64 = 1;
-    entry->msi_attrib.entry_nr = entry_nr;
+    entry->msi_attrib.entry_nr = msi->entry_nr;
     entry->msi_attrib.maskbit = 1;
     entry->msi_attrib.masked = 1;
     entry->msi_attrib.pos = pos;
-    entry->vector = vector;
+    entry->vector = msi->vector;
     entry->dev = dev;
     entry->mask_base = base;
 
@@ -589,24 +567,25 @@ static int msix_capability_init(struct p
  * indicates the successful setup of an entry zero with the new MSI
  * irq or non-zero for otherwise.
  **/
-static int __pci_enable_msi(u8 bus, u8 devfn, int vector)
+static int __pci_enable_msi(struct msi_info *msi)
 {
     int status;
     struct pci_dev *pdev;
 
-    pdev = pci_lock_pdev(bus, devfn);
+    pdev = pci_lock_pdev(msi->bus, msi->devfn);
     if ( !pdev )
        return -ENODEV;
 
-    if ( find_msi_entry(pdev, vector, PCI_CAP_ID_MSI) )
+    if ( find_msi_entry(pdev, msi->vector, PCI_CAP_ID_MSI) )
     {
        spin_unlock(&pdev->lock);
-        dprintk(XENLOG_WARNING, "vector %d has already mapped to MSI on device 
\
-            %02x:%02x.%01x.\n", vector, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+        dprintk(XENLOG_WARNING, "vector %d has already mapped to MSI on "
+            "device %02x:%02x.%01x.\n", msi->vector, msi->bus,
+            PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
         return 0;
     }
 
-    status = msi_capability_init(pdev, vector);
+    status = msi_capability_init(pdev, msi->vector);
     spin_unlock(&pdev->lock);
     return status;
 }
@@ -659,37 +638,37 @@ static void __pci_disable_msi(int vector
  * of irqs available. Driver should use the returned value to re-send
  * its request.
  **/
-static int __pci_enable_msix(u8 bus, u8 devfn, int vector, int entry_nr)
+static int __pci_enable_msix(struct msi_info *msi)
 {
     int status, pos, nr_entries;
     struct pci_dev *pdev;
     u16 control;
-    u8 slot = PCI_SLOT(devfn);
-    u8 func = PCI_FUNC(devfn);
-
-    pdev = pci_lock_pdev(bus, devfn);
+    u8 slot = PCI_SLOT(msi->devfn);
+    u8 func = PCI_FUNC(msi->devfn);
+
+    pdev = pci_lock_pdev(msi->bus, msi->devfn);
     if ( !pdev )
        return -ENODEV;
 
-    pos = pci_find_cap_offset(bus, slot, func, PCI_CAP_ID_MSIX);
-    control = pci_conf_read16(bus, slot, func, msi_control_reg(pos));
+    pos = pci_find_cap_offset(msi->bus, slot, func, PCI_CAP_ID_MSIX);
+    control = pci_conf_read16(msi->bus, slot, func, msi_control_reg(pos));
     nr_entries = multi_msix_capable(control);
-    if (entry_nr > nr_entries)
+    if (msi->entry_nr > nr_entries)
     {
        spin_unlock(&pdev->lock);
         return -EINVAL;
     }
 
-    if ( find_msi_entry(pdev, vector, PCI_CAP_ID_MSIX) )
+    if ( find_msi_entry(pdev, msi->vector, PCI_CAP_ID_MSIX) )
     {
        spin_unlock(&pdev->lock);
-        dprintk(XENLOG_WARNING, "vector %d has already mapped to MSIX on \
-                device %02x:%02x.%01x.\n", vector, bus,
-                PCI_SLOT(devfn), PCI_FUNC(devfn));
+        dprintk(XENLOG_WARNING, "vector %d has already mapped to MSIX on "
+                "device %02x:%02x.%01x.\n", msi->vector, msi->bus,
+                PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
         return 0;
     }
 
-    status = msix_capability_init(pdev, vector, entry_nr);
+    status = msix_capability_init(pdev, msi);
     spin_unlock(&pdev->lock);
     return status;
 }
@@ -727,13 +706,12 @@ static void __pci_disable_msix(int vecto
     spin_unlock(&dev->lock);
 }
 
-int pci_enable_msi(u8 bus, u8 devfn, int vector, int entry_nr, int msi)
-{
-    ASSERT(spin_is_locked(&irq_desc[vector].lock));
-    if ( msi )
-        return __pci_enable_msi(bus, devfn, vector);
-    else
-        return __pci_enable_msix(bus, devfn, vector, entry_nr);
+int pci_enable_msi(struct msi_info *msi)
+{
+    ASSERT(spin_is_locked(&irq_desc[msi->vector].lock));
+
+    return  msi->table_base ? __pci_enable_msix(msi) :
+                              __pci_enable_msi(msi);
 }
 
 void pci_disable_msi(int vector)
diff -r fa8be8a6cb74 -r da236d7f59b9 xen/arch/x86/oprofile/nmi_int.c
--- a/xen/arch/x86/oprofile/nmi_int.c   Wed Aug 13 13:18:06 2008 +0900
+++ b/xen/arch/x86/oprofile/nmi_int.c   Wed Aug 13 22:22:21 2008 +0900
@@ -296,24 +296,40 @@ static int __init ppro_init(char ** cpu_
 {
        __u8 cpu_model = current_cpu_data.x86_model;
 
-       if (cpu_model == 15 || cpu_model == 23) {
+       switch (cpu_model) {
+       case 0 ... 2:
+               *cpu_type = "i386/ppro";
+               break;
+       case 3 ... 5:
+               *cpu_type = "i386/pii";
+               break;
+       case 6 ... 8:
+               *cpu_type = "i386/piii";
+               break;
+       case 9:
+               *cpu_type = "i386/p6_mobile";
+               break;
+       case 10 ... 13:
+               *cpu_type = "i386/p6";
+               break;
+       case 14:
+               *cpu_type = "i386/core";
+               break;
+       case 15: case 23:
                *cpu_type = "i386/core_2";
                ppro_has_global_ctrl = 1;
-       } else if (cpu_model == 14)
-               *cpu_type = "i386/core";
-       else if (cpu_model > 13) {
+               break;
+       case 26:
+               *cpu_type = "i386/core_2";
+               ppro_has_global_ctrl = 1;
+               break;
+       default:
+               /* Unknown */
                printk("xenoprof: Initialization failed. "
                       "Intel processor model %d for P6 class family is not "
                       "supported\n", cpu_model);
                return 0;
-       } else if (cpu_model == 9)
-               *cpu_type = "i386/p6_mobile";
-       else if (cpu_model > 5)
-               *cpu_type = "i386/piii";
-       else if (cpu_model > 2)
-               *cpu_type = "i386/pii";
-       else
-               *cpu_type = "i386/ppro";
+       }
 
        model = &op_ppro_spec;
        return 1;
diff -r fa8be8a6cb74 -r da236d7f59b9 xen/arch/x86/physdev.c
--- a/xen/arch/x86/physdev.c    Wed Aug 13 13:18:06 2008 +0900
+++ b/xen/arch/x86/physdev.c    Wed Aug 13 22:22:21 2008 +0900
@@ -66,6 +66,7 @@ static int map_domain_pirq(struct domain
 {
     int ret = 0;
     int old_vector, old_pirq;
+    struct msi_info msi;
 
     if ( d == NULL )
         return -EINVAL;
@@ -115,10 +116,14 @@ static int map_domain_pirq(struct domain
                      vector);
         desc->handler = &pci_msi_type;
 
-        ret = pci_enable_msi(map->msi_info.bus,
-                                    map->msi_info.devfn, vector,
-                                                        map->msi_info.entry_nr,
-                                                        map->msi_info.msi);
+        msi.bus = map->bus;
+        msi.devfn = map->devfn;
+        msi.entry_nr = map->entry_nr;
+        msi.table_base = map->table_base;
+        msi.vector = vector;
+
+        ret = pci_enable_msi(&msi);
+
         spin_unlock_irqrestore(&desc->lock, flags);
         if ( ret )
             goto done;
@@ -139,7 +144,7 @@ static int unmap_domain_pirq(struct doma
     int ret = 0;
     int vector;
 
-    if ( d == NULL || pirq < 0 || pirq > NR_PIRQS )
+    if ( d == NULL || pirq < 0 || pirq >= NR_PIRQS )
         return -EINVAL;
 
     if ( !IS_PRIV(current->domain) )
diff -r fa8be8a6cb74 -r da236d7f59b9 xen/common/page_alloc.c
--- a/xen/common/page_alloc.c   Wed Aug 13 13:18:06 2008 +0900
+++ b/xen/common/page_alloc.c   Wed Aug 13 22:22:21 2008 +0900
@@ -950,6 +950,14 @@ static void page_scrub_softirq(void)
     void             *p;
     int               i;
     s_time_t          start = NOW();
+    static spinlock_t serialise_lock = SPIN_LOCK_UNLOCKED;
+
+    /* free_heap_pages() does not parallelise well. Serialise this function. */
+    if ( !spin_trylock(&serialise_lock) )
+    {
+        set_timer(&this_cpu(page_scrub_timer), NOW() + MILLISECS(1));
+        return;
+    }
 
     /* Aim to do 1ms of work every 10ms. */
     do {
@@ -958,7 +966,7 @@ static void page_scrub_softirq(void)
         if ( unlikely((ent = page_scrub_list.next) == &page_scrub_list) )
         {
             spin_unlock(&page_scrub_lock);
-            return;
+            goto out;
         }
         
         /* Peel up to 16 pages from the list. */
@@ -989,6 +997,9 @@ static void page_scrub_softirq(void)
     } while ( (NOW() - start) < MILLISECS(1) );
 
     set_timer(&this_cpu(page_scrub_timer), NOW() + MILLISECS(10));
+
+ out:
+    spin_unlock(&serialise_lock);
 }
 
 static void page_scrub_timer_fn(void *unused)
diff -r fa8be8a6cb74 -r da236d7f59b9 xen/drivers/passthrough/io.c
--- a/xen/drivers/passthrough/io.c      Wed Aug 13 13:18:06 2008 +0900
+++ b/xen/drivers/passthrough/io.c      Wed Aug 13 22:22:21 2008 +0900
@@ -74,6 +74,9 @@ int pt_irq_create_bind_vtd(
     if ( pt_irq_bind->irq_type == PT_IRQ_TYPE_MSI )
     {
         int pirq = pt_irq_bind->machine_irq;
+
+        if ( pirq < 0 || pirq >= NR_IRQS )
+            return -EINVAL;
 
         if ( !(hvm_irq_dpci->mirq[pirq].flags & HVM_IRQ_DPCI_VALID ) )
         {
diff -r fa8be8a6cb74 -r da236d7f59b9 xen/drivers/passthrough/vtd/iommu.c
--- a/xen/drivers/passthrough/vtd/iommu.c       Wed Aug 13 13:18:06 2008 +0900
+++ b/xen/drivers/passthrough/vtd/iommu.c       Wed Aug 13 22:22:21 2008 +0900
@@ -1789,7 +1789,8 @@ int intel_vtd_setup(void)
     memset(domid_bitmap, 0, domid_bitmap_size / 8);
     set_bit(0, domid_bitmap);
 
-    init_vtd_hw();
+    if ( init_vtd_hw() )
+        goto error;
 
     register_keyhandler('V', dump_iommu_info, "dump iommu info");
 
diff -r fa8be8a6cb74 -r da236d7f59b9 xen/include/asm-x86/event.h
--- a/xen/include/asm-x86/event.h       Wed Aug 13 13:18:06 2008 +0900
+++ b/xen/include/asm-x86/event.h       Wed Aug 13 22:22:21 2008 +0900
@@ -69,12 +69,7 @@ static inline void local_event_delivery_
 /* No arch specific virq definition now. Default to global. */
 static inline int arch_virq_is_global(int virq)
 {
-    switch (virq) {
-    case VIRQ_MCA:
-        return 1;
-    default:
-        return 1;
-    }
+    return 1;
 }
 
 #endif
diff -r fa8be8a6cb74 -r da236d7f59b9 xen/include/asm-x86/msi.h
--- a/xen/include/asm-x86/msi.h Wed Aug 13 13:18:06 2008 +0900
+++ b/xen/include/asm-x86/msi.h Wed Aug 13 22:22:21 2008 +0900
@@ -53,6 +53,14 @@
 #else
 #define MAX_MSIX_PAGES              32
 #endif
+
+struct msi_info {
+    int bus;
+    int devfn;
+    int vector;
+    int entry_nr;
+    uint64_t table_base;
+};
 
 struct msi_msg {
        u32     address_lo;     /* low 32 bits of msi message address */
@@ -64,7 +72,7 @@ extern void mask_msi_irq(unsigned int ir
 extern void mask_msi_irq(unsigned int irq);
 extern void unmask_msi_irq(unsigned int irq);
 extern void set_msi_irq_affinity(unsigned int irq, cpumask_t mask);
-extern int pci_enable_msi(u8 bus, u8 devfn, int vector, int entry_nr, int msi);
+extern int pci_enable_msi(struct msi_info *msi);
 extern void pci_disable_msi(int vector);
 extern void pci_cleanup_msi(struct pci_dev *pdev);
 
diff -r fa8be8a6cb74 -r da236d7f59b9 xen/include/public/physdev.h
--- a/xen/include/public/physdev.h      Wed Aug 13 13:18:06 2008 +0900
+++ b/xen/include/public/physdev.h      Wed Aug 13 22:22:21 2008 +0900
@@ -136,10 +136,13 @@ struct physdev_map_pirq {
     /* IN or OUT */
     int pirq;
     /* IN */
-    struct {
-        int bus, devfn, entry_nr;
-               int msi;  /* 0 - MSIX    1 - MSI */
-    } msi_info;
+    int bus;
+    /* IN */
+    int devfn;
+    /* IN */
+    int entry_nr;
+    /* IN */
+    uint64_t table_base;
 };
 typedef struct physdev_map_pirq physdev_map_pirq_t;
 DEFINE_XEN_GUEST_HANDLE(physdev_map_pirq_t);

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>