# HG changeset patch
# User Isaku Yamahata <yamahata@xxxxxxxxxxxxx>
# Date 1225770199 -32400
# Node ID e75cb35c798beabee0b0ed4025ef82a39c702279
# Parent 10f0e1bb8e5e9a28e1ebe3fbb9291fb8114ef4bc
# Parent 43a079fd50fdab01cd2be443bfef011b3b0495ae
merge with xen-unstable.hg
---
xen/common/xmalloc.c | 286 --------------
.hgignore | 1
extras/mini-os/include/sched.h | 3
extras/mini-os/include/wait.h | 10
extras/mini-os/minios.mk | 3
tools/Makefile | 1
tools/blktap/drivers/block-qcow.c | 24 -
tools/firmware/hvmloader/acpi/static_tables.c | 2
tools/firmware/rombios/rombios.c | 4
tools/flask/policy/policy/modules/xen/xen.te | 3
tools/python/xen/util/diagnose.py | 4
tools/python/xen/xend/XendConfig.py | 17
tools/python/xen/xend/XendDomainInfo.py | 73 ++-
tools/python/xen/xend/server/DevConstants.py | 45 ++
tools/python/xen/xend/server/DevController.py | 31 -
tools/python/xen/xend/server/iopif.py | 20 -
tools/python/xen/xend/server/irqif.py | 19
tools/python/xen/xend/server/pciif.py | 3
tools/python/xen/xend/server/vscsiif.py | 15
tools/python/xen/xm/create.py | 14
tools/python/xen/xm/main.py | 5
tools/xenpmd/Makefile | 20 +
tools/xenpmd/xenpmd.c | 520 ++++++++++++++++++++++++++
xen/arch/ia64/xen/cpufreq/cpufreq.c | 15
xen/arch/ia64/xen/irq.c | 2
xen/arch/x86/acpi/cpu_idle.c | 103 ++---
xen/arch/x86/acpi/cpufreq/cpufreq.c | 14
xen/arch/x86/acpi/cpufreq/powernow.c | 14
xen/arch/x86/acpi/cpuidle_menu.c | 14
xen/arch/x86/domain.c | 116 ++++-
xen/arch/x86/domain_build.c | 34 +
xen/arch/x86/hpet.c | 7
xen/arch/x86/hvm/emulate.c | 30 +
xen/arch/x86/hvm/hpet.c | 339 +++++++++-------
xen/arch/x86/hvm/hvm.c | 1
xen/arch/x86/hvm/i8254.c | 4
xen/arch/x86/hvm/rtc.c | 4
xen/arch/x86/hvm/svm/entry.S | 3
xen/arch/x86/hvm/vlapic.c | 10
xen/arch/x86/hvm/vmx/entry.S | 6
xen/arch/x86/hvm/vmx/vmx.c | 81 ++--
xen/arch/x86/hvm/vmx/vpmu_core2.c | 20 +
xen/arch/x86/hvm/vpt.c | 18
xen/arch/x86/irq.c | 6
xen/arch/x86/mm.c | 251 +++++++++---
xen/arch/x86/mm/hap/p2m-ept.c | 8
xen/arch/x86/mm/p2m.c | 17
xen/arch/x86/msi.c | 69 +--
xen/arch/x86/oprofile/nmi_int.c | 51 ++
xen/arch/x86/oprofile/op_model_ppro.c | 103 +++++
xen/arch/x86/oprofile/op_x86_model.h | 5
xen/arch/x86/setup.c | 1
xen/arch/x86/smpboot.c | 14
xen/arch/x86/time.c | 4
xen/arch/x86/traps.c | 29 -
xen/arch/x86/x86_32/domain_page.c | 10
xen/arch/x86/x86_64/compat/mm.c | 5
xen/arch/x86/x86_64/cpufreq.c | 33 -
xen/common/event_channel.c | 2
xen/common/kernel.c | 3
xen/common/keyhandler.c | 4
xen/common/spinlock.c | 69 +++
xen/common/timer.c | 125 +++---
xen/common/xenoprof.c | 2
xen/drivers/char/serial.c | 7
xen/drivers/cpufreq/cpufreq.c | 149 ++++++-
xen/include/asm-x86/config.h | 8
xen/include/asm-x86/event.h | 32 -
xen/include/asm-x86/fixmap.h | 1
xen/include/asm-x86/hvm/vmx/vpmu.h | 2
xen/include/asm-x86/hvm/vmx/vpmu_core2.h | 22 -
xen/include/asm-x86/hvm/vpt.h | 70 +--
xen/include/asm-x86/mm.h | 30 +
xen/include/asm-x86/page.h | 3
xen/include/asm-x86/softirq.h | 3
xen/include/asm-x86/x86_32/page.h | 3
xen/include/asm-x86/x86_64/page.h | 5
xen/include/asm-x86/xenoprof.h | 3
xen/include/public/features.h | 3
xen/include/public/trace.h | 2
xen/include/public/xen.h | 14
xen/include/xen/cpuidle.h | 8
xen/include/xen/domain_page.h | 6
xen/include/xen/spinlock.h | 23 +
xen/include/xen/time.h | 1
xen/include/xen/timer.h | 3
xen/include/xlat.lst | 2
87 files changed, 2085 insertions(+), 1084 deletions(-)
diff -r 10f0e1bb8e5e -r e75cb35c798b .hgignore
--- a/.hgignore Tue Nov 04 12:07:22 2008 +0900
+++ b/.hgignore Tue Nov 04 12:43:19 2008 +0900
@@ -211,6 +211,7 @@
^tools/xenfb/vncfb$
^tools/xenmon/xentrace_setmask$
^tools/xenmon/xenbaked$
+^tools/xenpmd/xenpmd$
^tools/xenstat/xentop/xentop$
^tools/xenstore/testsuite/tmp/.*$
^tools/xenstore/xen$
diff -r 10f0e1bb8e5e -r e75cb35c798b extras/mini-os/include/sched.h
--- a/extras/mini-os/include/sched.h Tue Nov 04 12:07:22 2008 +0900
+++ b/extras/mini-os/include/sched.h Tue Nov 04 12:43:19 2008 +0900
@@ -48,8 +48,9 @@ void exit_thread(void) __attribute__((no
void exit_thread(void) __attribute__((noreturn));
void schedule(void);
+#ifdef __INSIDE_MINIOS__
#define current get_current()
-
+#endif
void wake(struct thread *thread);
void block(struct thread *thread);
diff -r 10f0e1bb8e5e -r e75cb35c798b extras/mini-os/include/wait.h
--- a/extras/mini-os/include/wait.h Tue Nov 04 12:07:22 2008 +0900
+++ b/extras/mini-os/include/wait.h Tue Nov 04 12:43:19 2008 +0900
@@ -7,7 +7,7 @@
#define DEFINE_WAIT(name) \
struct wait_queue name = { \
- .thread = current, \
+ .thread = get_current(), \
.thread_list = MINIOS_LIST_HEAD_INIT((name).thread_list), \
}
@@ -53,7 +53,7 @@ static inline void wake_up(struct wait_q
unsigned long flags; \
local_irq_save(flags); \
add_wait_queue(&wq, &w); \
- block(current); \
+ block(get_current()); \
local_irq_restore(flags); \
} while (0)
@@ -74,8 +74,8 @@ static inline void wake_up(struct wait_q
/* protect the list */ \
local_irq_save(flags); \
add_wait_queue(&wq, &__wait); \
- current->wakeup_time = deadline; \
- clear_runnable(current); \
+ get_current()->wakeup_time = deadline; \
+ clear_runnable(get_current()); \
local_irq_restore(flags); \
if((condition) || (deadline && NOW() >= deadline)) \
break; \
@@ -83,7 +83,7 @@ static inline void wake_up(struct wait_q
} \
local_irq_save(flags); \
/* need to wake up */ \
- wake(current); \
+ wake(get_current()); \
remove_wait_queue(&__wait); \
local_irq_restore(flags); \
} while(0)
diff -r 10f0e1bb8e5e -r e75cb35c798b extras/mini-os/minios.mk
--- a/extras/mini-os/minios.mk Tue Nov 04 12:07:22 2008 +0900
+++ b/extras/mini-os/minios.mk Tue Nov 04 12:43:19 2008 +0900
@@ -25,6 +25,9 @@ else
else
DEF_CFLAGS += -O3
endif
+
+# Make the headers define our internal stuff
+DEF_CFLAGS += -D__INSIDE_MINIOS__
# Build the CFLAGS and ASFLAGS for compiling and assembling.
# DEF_... flags are the common mini-os flags,
diff -r 10f0e1bb8e5e -r e75cb35c798b tools/Makefile
--- a/tools/Makefile Tue Nov 04 12:07:22 2008 +0900
+++ b/tools/Makefile Tue Nov 04 12:43:19 2008 +0900
@@ -24,6 +24,7 @@ SUBDIRS-$(LIBXENAPI_BINDINGS) += libxen
SUBDIRS-$(LIBXENAPI_BINDINGS) += libxen
SUBDIRS-y += fs-back
SUBDIRS-$(CONFIG_IOEMU) += ioemu-dir
+SUBDIRS-y += xenpmd
# These don't cross-compile
ifeq ($(XEN_COMPILE_ARCH),$(XEN_TARGET_ARCH))
diff -r 10f0e1bb8e5e -r e75cb35c798b tools/blktap/drivers/block-qcow.c
--- a/tools/blktap/drivers/block-qcow.c Tue Nov 04 12:07:22 2008 +0900
+++ b/tools/blktap/drivers/block-qcow.c Tue Nov 04 12:43:19 2008 +0900
@@ -722,11 +722,11 @@ static inline void init_fds(struct disk_
/* Open the disk file and initialize qcow state. */
static int tdqcow_open (struct disk_driver *dd, const char *name, td_flag_t
flags)
{
- int fd, len, i, shift, ret, size, l1_table_size, o_flags;
+ int fd, len, i, shift, ret, size, l1_table_size, o_flags,
l1_table_block;
int max_aio_reqs;
struct td_state *bs = dd->td_state;
struct tdqcow_state *s = (struct tdqcow_state *)dd->private;
- char *buf;
+ char *buf, *buf2;
QCowHeader *header;
QCowHeader_ext *exthdr;
uint32_t cksum;
@@ -734,8 +734,8 @@ static int tdqcow_open (struct disk_driv
DPRINTF("QCOW: Opening %s\n",name);
- /* Since we don't handle O_DIRECT correctly, don't use it */
- o_flags = O_LARGEFILE | ((flags == TD_RDONLY) ? O_RDONLY : O_RDWR);
+ o_flags = O_DIRECT | O_LARGEFILE |
+ ((flags == TD_RDONLY) ? O_RDONLY : O_RDWR);
fd = open(name, o_flags);
if (fd < 0) {
DPRINTF("Unable to open %s (%d)\n",name,0 - errno);
@@ -819,9 +819,14 @@ static int tdqcow_open (struct disk_driv
(int) (s->l1_size * sizeof(uint64_t)),
l1_table_size);
- lseek(fd, s->l1_table_offset, SEEK_SET);
- if (read(fd, s->l1_table, l1_table_size) != l1_table_size)
+ lseek(fd, 0, SEEK_SET);
+ l1_table_block = l1_table_size + s->l1_table_offset;
+ l1_table_block = l1_table_block + 512 - (l1_table_block % 512);
+ ret = posix_memalign((void **)&buf2, 4096, l1_table_block);
+ if (ret != 0) goto fail;
+ if (read(fd, buf2, l1_table_block) != l1_table_block)
goto fail;
+ memcpy(s->l1_table, buf2 + s->l1_table_offset, l1_table_size);
for(i = 0; i < s->l1_size; i++) {
be64_to_cpus(&s->l1_table[i]);
@@ -871,8 +876,9 @@ static int tdqcow_open (struct disk_driv
DPRINTF("qcow: Converting image to big endian L1
table\n");
- lseek(fd, s->l1_table_offset, SEEK_SET);
- if (write(fd, s->l1_table, l1_table_size) !=
l1_table_size) {
+ memcpy(buf2 + s->l1_table_offset, s->l1_table,
l1_table_size);
+ lseek(fd, 0, SEEK_SET);
+ if (write(fd, buf2, l1_table_block) != l1_table_block) {
DPRINTF("qcow: Failed to write new L1 table\n");
goto fail;
}
@@ -917,7 +923,7 @@ static int tdqcow_open (struct disk_driv
init_fds(dd);
if (!final_cluster)
- s->fd_end = s->l1_table_offset + l1_table_size;
+ s->fd_end = l1_table_block;
else {
s->fd_end = lseek(fd, 0, SEEK_END);
if (s->fd_end == (off_t)-1)
diff -r 10f0e1bb8e5e -r e75cb35c798b
tools/firmware/hvmloader/acpi/static_tables.c
--- a/tools/firmware/hvmloader/acpi/static_tables.c Tue Nov 04 12:07:22
2008 +0900
+++ b/tools/firmware/hvmloader/acpi/static_tables.c Tue Nov 04 12:43:19
2008 +0900
@@ -67,7 +67,7 @@ struct acpi_20_fadt Fadt = {
.p_lvl2_lat = 0x0fff, /* >100, means we do not support C2 state */
.p_lvl3_lat = 0x0fff, /* >1000, means we do not support C3 state */
- .iapc_boot_arch = ACPI_LEGACY_DEVICES | ACPI_8042,
+ .iapc_boot_arch = ACPI_8042,
.flags = (ACPI_PROC_C1 | ACPI_SLP_BUTTON |
ACPI_WBINVD | ACPI_PWR_BUTTON |
ACPI_FIX_RTC | ACPI_TMR_VAL_EXT),
diff -r 10f0e1bb8e5e -r e75cb35c798b tools/firmware/rombios/rombios.c
--- a/tools/firmware/rombios/rombios.c Tue Nov 04 12:07:22 2008 +0900
+++ b/tools/firmware/rombios/rombios.c Tue Nov 04 12:43:19 2008 +0900
@@ -7216,7 +7216,7 @@ BX_INFO("floppy: drive>1 || head>1 ...\n
outb(0x03f5, head);
outb(0x03f5, sector);
outb(0x03f5, 2); // 512 byte sector size
- outb(0x03f5, 0); // last sector number possible on track
+ outb(0x03f5, sector + num_sectors - 1); // last sector to read on track
outb(0x03f5, 0); // Gap length
outb(0x03f5, 0xff); // Gap length
@@ -7364,7 +7364,7 @@ BX_INFO("floppy: drive>1 || head>1 ...\n
outb(0x03f5, head);
outb(0x03f5, sector);
outb(0x03f5, 2); // 512 byte sector size
- outb(0x03f5, 0); // last sector number possible on track
+ outb(0x03f5, sector + num_sectors - 1); // last sector to write on
track
outb(0x03f5, 0); // Gap length
outb(0x03f5, 0xff); // Gap length
diff -r 10f0e1bb8e5e -r e75cb35c798b
tools/flask/policy/policy/modules/xen/xen.te
--- a/tools/flask/policy/policy/modules/xen/xen.te Tue Nov 04 12:07:22
2008 +0900
+++ b/tools/flask/policy/policy/modules/xen/xen.te Tue Nov 04 12:43:19
2008 +0900
@@ -74,7 +74,7 @@ allow dom0_t pirq_t:event {vector};
allow dom0_t pirq_t:event {vector};
allow dom0_t xen_t:mmu {memorymap};
-allow dom0_t dom0_t:mmu {pinpage map_read map_write adjust};
+allow dom0_t dom0_t:mmu {pinpage map_read map_write adjust updatemp};
allow dom0_t dom0_t:grant {query setup};
allow dom0_t dom0_t:domain {scheduler getdomaininfo getvcpuinfo
getvcpuaffinity};
@@ -112,6 +112,7 @@ allow domU_t evchnU-0_t:event {send};
allow dom0_t dom0_t:event {send};
allow dom0_t domU_t:grant {copy};
+allow domU_t domU_t:grant {copy};
manage_domain(dom0_t, domU_t)
diff -r 10f0e1bb8e5e -r e75cb35c798b tools/python/xen/util/diagnose.py
--- a/tools/python/xen/util/diagnose.py Tue Nov 04 12:07:22 2008 +0900
+++ b/tools/python/xen/util/diagnose.py Tue Nov 04 12:43:19 2008 +0900
@@ -23,7 +23,7 @@ from xen.xend.XendClient import server
from xen.xend.XendClient import server
from xen.xend.XendError import XendError
from xen.xend.xenstore.xstransact import xstransact
-from xen.xend.server import DevController
+from xen.xend.server import DevConstants
import xen.xend.XendProtocol
@@ -169,7 +169,7 @@ def diagnose_hotplugging():
def stateString(state):
- return state and DevController.xenbusState[int(state)] or '<None>'
+ return state and DevConstants.xenbusState[int(state)] or '<None>'
def main(argv = None):
diff -r 10f0e1bb8e5e -r e75cb35c798b tools/python/xen/xend/XendConfig.py
--- a/tools/python/xen/xend/XendConfig.py Tue Nov 04 12:07:22 2008 +0900
+++ b/tools/python/xen/xend/XendConfig.py Tue Nov 04 12:43:19 2008 +0900
@@ -1602,21 +1602,21 @@ class XendConfig(dict):
# [vscsi,
# [dev,
# [devid, 0], [p-devname, sdb], [p-dev, 1:0:0:1],
- # [v-dev, 0:0:0:0], [state, Initialising]
+ # [v-dev, 0:0:0:0], [state, 1]
# ],
# [dev,
# [devid, 0], [p-devname, sdc], [p-dev, 1:0:0:2],
- # [v-dev, 0:0:0:1], [satet, Initialising]
+ # [v-dev, 0:0:0:1], [satet, 1]
# ]
# ],
# [vscsi,
# [dev,
# [devid, 1], [p-devname, sdg], [p-dev, 2:0:0:0],
- # [v-dev, 1:0:0:0], [state, Initialising]
+ # [v-dev, 1:0:0:0], [state, 1]
# ],
# [dev,
# [devid, 1], [p-devname, sdh], [p-dev, 2:0:0:1],
- # [v-dev, 1:0:0:1], [satet, Initialising]
+ # [v-dev, 1:0:0:1], [satet, 1]
# ]
# ]
# ]
@@ -1632,18 +1632,19 @@ class XendConfig(dict):
# [vscsi,
# [dev,
# [devid, 0], [p-devname, sdd], [p-dev, 1:0:0:3],
- # [v-dev, 0:0:0:2], [state, Initialising]
+ # [v-dev, 0:0:0:2], [state, 1]
# ]
# ]
# ]
#
- # state 'Initialising' indicates that the device is being attached,
- # while state 'Closing' indicates that the device is being detached.
+ # state xenbusState['Initialising'] indicates that the device is
+ # being attached, while state xenbusState['Closing'] indicates
+ # that the device is being detached.
#
# The Dict looks like this:
#
# { devs: [ {devid: 0, p-devname: sdd, p-dev: 1:0:0:3,
- # v-dev: 0:0:0:2, state: Initialising} ] }
+ # v-dev: 0:0:0:2, state: 1} ] }
dev_config = {}
diff -r 10f0e1bb8e5e -r e75cb35c798b tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py Tue Nov 04 12:07:22 2008 +0900
+++ b/tools/python/xen/xend/XendDomainInfo.py Tue Nov 04 12:43:19 2008 +0900
@@ -52,6 +52,7 @@ from xen.xend.xenstore.xswatch import xs
from xen.xend.xenstore.xswatch import xswatch
from xen.xend.XendConstants import *
from xen.xend.XendAPIConstants import *
+from xen.xend.server.DevConstants import xenbusState
from xen.xend.XendVMMetrics import XendVMMetrics
@@ -797,7 +798,7 @@ class XendDomainInfo:
existing_dev_info = self._getDeviceInfo_vscsi(req_devid, dev['v-dev'])
state = dev['state']
- if state == 'Initialising':
+ if state == xenbusState['Initialising']:
# new create
# If request devid does not exist, create and exit.
if existing_dev_info is None:
@@ -806,25 +807,48 @@ class XendDomainInfo:
elif existing_dev_info == "exists":
raise XendError("The virtual device %s is already defined" %
dev['v-dev'])
- elif state == 'Closing':
+ elif state == xenbusState['Closing']:
if existing_dev_info is None:
raise XendError("Cannot detach vscsi device does not exist")
- # use DevController.reconfigureDevice to change device config
- dev_control = self.getDeviceController(dev_class)
- dev_uuid = dev_control.reconfigureDevice(req_devid, dev_config)
- dev_control.waitForDevice_reconfigure(req_devid)
- num_devs = dev_control.cleanupDevice(req_devid)
-
- # update XendConfig with new device info
- if dev_uuid:
- new_dev_sxp = dev_control.configuration(req_devid)
+ if self.domid is not None:
+ # use DevController.reconfigureDevice to change device config
+ dev_control = self.getDeviceController(dev_class)
+ dev_uuid = dev_control.reconfigureDevice(req_devid, dev_config)
+ dev_control.waitForDevice_reconfigure(req_devid)
+ num_devs = dev_control.cleanupDevice(req_devid)
+
+ # update XendConfig with new device info
+ if dev_uuid:
+ new_dev_sxp = dev_control.configuration(req_devid)
+ self.info.device_update(dev_uuid, new_dev_sxp)
+
+ # If there is no device left, destroy vscsi and remove config.
+ if num_devs == 0:
+ self.destroyDevice('vscsi', req_devid)
+ del self.info['devices'][dev_uuid]
+
+ else:
+ cur_dev_sxp = self._getDeviceInfo_vscsi(req_devid, None)
+ new_dev_sxp = ['vscsi']
+ for cur_dev in sxp.children(cur_dev_sxp, 'dev'):
+ if state == xenbusState['Closing']:
+ cur_dev_vdev = sxp.child_value(cur_dev, 'v-dev')
+ if cur_dev_vdev == dev['v-dev']:
+ continue
+ new_dev_sxp.append(cur_dev)
+
+ if state == xenbusState['Initialising']:
+ new_dev_sxp.append(sxp.child0(dev_sxp, 'dev'))
+
+ dev_uuid = sxp.child_value(cur_dev_sxp, 'uuid')
self.info.device_update(dev_uuid, new_dev_sxp)
- # If there is no device left, destroy vscsi and remove config.
- if num_devs == 0:
- self.destroyDevice('vscsi', req_devid)
- del self.info['devices'][dev_uuid]
+ # If there is only 'vscsi' in new_dev_sxp, remove the config.
+ if len(sxp.children(new_dev_sxp, 'dev')) == 0:
+ del self.info['devices'][dev_uuid]
+
+ xen.xend.XendDomain.instance().managed_config_save(self)
return True
@@ -986,7 +1010,17 @@ class XendDomainInfo:
sxprs = []
dev_num = 0
for dev_type, dev_info in self.info.all_devices_sxpr():
- if dev_type == deviceClass:
+ if dev_type != deviceClass:
+ continue
+
+ if deviceClass == 'vscsi':
+ vscsi_devs = ['devs', []]
+ for vscsi_dev in sxp.children(dev_info, 'dev'):
+ vscsi_dev.append(['frontstate', None])
+ vscsi_devs[1].append(vscsi_dev)
+ dev_num = int(sxp.child_value(vscsi_dev, 'devid'))
+ sxprs.append([dev_num, [vscsi_devs]])
+ else:
sxprs.append([dev_num, dev_info])
dev_num += 1
return sxprs
@@ -2380,11 +2414,10 @@ class XendDomainInfo:
time.sleep(2)
for paths in plist:
if paths.find('backend') != -1:
- from xen.xend.server import DevController
# Modify online status /before/ updating state (latter is
watched by
# drivers, so this ordering avoids a race).
xstransact.Write(paths, 'online', "0")
- xstransact.Write(paths, 'state',
str(DevController.xenbusState['Closing']))
+ xstransact.Write(paths, 'state', str(xenbusState['Closing']))
# force
xstransact.Remove(paths)
@@ -3439,7 +3472,7 @@ class XendDomainInfo:
['p-devname', pscsi.get_dev_name()],
['p-dev', pscsi.get_physical_HCTL()],
['v-dev', xenapi_dscsi.get('virtual_HCTL')],
- ['state', 'Initialising'],
+ ['state', xenbusState['Initialising']],
['uuid', dscsi_uuid]
]
]
@@ -3558,7 +3591,7 @@ class XendDomainInfo:
if target_dev is None:
raise XendError('Failed to destroy device')
- target_dev.append(['state', 'Closing'])
+ target_dev.append(['state', xenbusState['Closing']])
target_vscsi_sxp = ['vscsi', target_dev]
if self._stateGet() != XEN_API_VM_POWER_STATE_RUNNING:
diff -r 10f0e1bb8e5e -r e75cb35c798b
tools/python/xen/xend/server/DevConstants.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/python/xen/xend/server/DevConstants.py Tue Nov 04 12:43:19
2008 +0900
@@ -0,0 +1,45 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@xxxxxx>
+# Copyright (C) 2005 XenSource Ltd
+#============================================================================
+
+DEVICE_CREATE_TIMEOUT = 100
+DEVICE_DESTROY_TIMEOUT = 100
+HOTPLUG_STATUS_NODE = "hotplug-status"
+HOTPLUG_ERROR_NODE = "hotplug-error"
+HOTPLUG_STATUS_ERROR = "error"
+HOTPLUG_STATUS_BUSY = "busy"
+
+Connected = 1
+Error = 2
+Missing = 3
+Timeout = 4
+Busy = 5
+Disconnected = 6
+
+xenbusState = {
+ 'Unknown' : 0,
+ 'Initialising' : 1,
+ 'InitWait' : 2,
+ 'Initialised' : 3,
+ 'Connected' : 4,
+ 'Closing' : 5,
+ 'Closed' : 6,
+ 'Reconfiguring' : 7,
+ 'Reconfigured' : 8,
+ }
+xenbusState.update(dict(zip(xenbusState.values(), xenbusState.keys())))
+
diff -r 10f0e1bb8e5e -r e75cb35c798b
tools/python/xen/xend/server/DevController.py
--- a/tools/python/xen/xend/server/DevController.py Tue Nov 04 12:07:22
2008 +0900
+++ b/tools/python/xen/xend/server/DevController.py Tue Nov 04 12:43:19
2008 +0900
@@ -23,41 +23,14 @@ from xen.xend.XendError import VmError
from xen.xend.XendError import VmError
from xen.xend.XendLogging import log
import xen.xend.XendConfig
+from xen.xend.server.DevConstants import *
from xen.xend.xenstore.xstransact import xstransact, complete
from xen.xend.xenstore.xswatch import xswatch
import os
-DEVICE_CREATE_TIMEOUT = 100
-DEVICE_DESTROY_TIMEOUT = 100
-HOTPLUG_STATUS_NODE = "hotplug-status"
-HOTPLUG_ERROR_NODE = "hotplug-error"
-HOTPLUG_STATUS_ERROR = "error"
-HOTPLUG_STATUS_BUSY = "busy"
-
-Connected = 1
-Error = 2
-Missing = 3
-Timeout = 4
-Busy = 5
-Disconnected = 6
-
-xenbusState = {
- 'Unknown' : 0,
- 'Initialising' : 1,
- 'InitWait' : 2,
- 'Initialised' : 3,
- 'Connected' : 4,
- 'Closing' : 5,
- 'Closed' : 6,
- 'Reconfiguring': 7,
- 'Reconfigured' : 8,
- }
-
xoptions = XendOptions.instance()
-
-xenbusState.update(dict(zip(xenbusState.values(), xenbusState.keys())))
class DevController:
@@ -569,7 +542,7 @@ class DevController:
xswatch(statusPath, hotplugStatusCallback, ev, result)
ev.wait(DEVICE_CREATE_TIMEOUT)
err = xstransact.Read(statusPath, HOTPLUG_ERROR_NODE)
- if result['status'] != 'Connected':
+ if result['status'] != Connected:
return (result['status'], err)
backpath = self.readVm(devid, "backend")
diff -r 10f0e1bb8e5e -r e75cb35c798b tools/python/xen/xend/server/iopif.py
--- a/tools/python/xen/xend/server/iopif.py Tue Nov 04 12:07:22 2008 +0900
+++ b/tools/python/xen/xend/server/iopif.py Tue Nov 04 12:43:19 2008 +0900
@@ -45,8 +45,21 @@ def parse_ioport(val):
class IOPortsController(DevController):
+ valid_cfg = ['to', 'from', 'uuid']
+
def __init__(self, vm):
DevController.__init__(self, vm)
+
+ def getDeviceConfiguration(self, devid, transaction = None):
+ result = DevController.getDeviceConfiguration(self, devid, transaction)
+ if transaction is None:
+ devinfo = self.readBackend(devid, *self.valid_cfg)
+ else:
+ devinfo = self.readBackendTxn(transaction, devid, *self.valid_cfg)
+ config = dict(zip(self.valid_cfg, devinfo))
+ config = dict([(key, val) for key, val in config.items()
+ if val != None])
+ return config
def getDeviceDetails(self, config):
"""@see DevController.getDeviceDetails"""
@@ -81,4 +94,9 @@ class IOPortsController(DevController):
'ioports: Failed to configure legacy i/o range: %s - %s' %
(io_from, io_to))
- return (None, {}, {})
+ back = dict([(k, config[k]) for k in self.valid_cfg if k in config])
+ return (self.allocateDeviceID(), back, {})
+
+ def waitForDevice(self, devid):
+ # don't wait for hotplug
+ return
diff -r 10f0e1bb8e5e -r e75cb35c798b tools/python/xen/xend/server/irqif.py
--- a/tools/python/xen/xend/server/irqif.py Tue Nov 04 12:07:22 2008 +0900
+++ b/tools/python/xen/xend/server/irqif.py Tue Nov 04 12:43:19 2008 +0900
@@ -39,6 +39,18 @@ class IRQController(DevController):
def __init__(self, vm):
DevController.__init__(self, vm)
+ valid_cfg = ['irq', 'uuid']
+
+ def getDeviceConfiguration(self, devid, transaction = None):
+ result = DevController.getDeviceConfiguration(self, devid, transaction)
+ if transaction is None:
+ devinfo = self.readBackend(devid, *self.valid_cfg)
+ else:
+ devinfo = self.readBackendTxn(transaction, devid, *self.valid_cfg)
+ config = dict(zip(self.valid_cfg, devinfo))
+ config = dict([(key, val) for key, val in config.items()
+ if val != None])
+ return config
def getDeviceDetails(self, config):
"""@see DevController.getDeviceDetails"""
@@ -75,4 +87,9 @@ class IRQController(DevController):
if rc < 0:
raise VmError(
'irq: Failed to map irq %x' % (pirq))
- return (None, {}, {})
+ back = dict([(k, config[k]) for k in self.valid_cfg if k in config])
+ return (self.allocateDeviceID(), back, {})
+
+ def waitForDevice(self, devid):
+ # don't wait for hotplug
+ return
diff -r 10f0e1bb8e5e -r e75cb35c798b tools/python/xen/xend/server/pciif.py
--- a/tools/python/xen/xend/server/pciif.py Tue Nov 04 12:07:22 2008 +0900
+++ b/tools/python/xen/xend/server/pciif.py Tue Nov 04 12:43:19 2008 +0900
@@ -25,7 +25,8 @@ from xen.xend.XendError import VmError
from xen.xend.XendError import VmError
from xen.xend.XendLogging import log
-from xen.xend.server.DevController import DevController, xenbusState
+from xen.xend.server.DevController import DevController
+from xen.xend.server.DevConstants import xenbusState
import xen.lowlevel.xc
diff -r 10f0e1bb8e5e -r e75cb35c798b tools/python/xen/xend/server/vscsiif.py
--- a/tools/python/xen/xend/server/vscsiif.py Tue Nov 04 12:07:22 2008 +0900
+++ b/tools/python/xen/xend/server/vscsiif.py Tue Nov 04 12:43:19 2008 +0900
@@ -28,7 +28,8 @@ from xen.xend.XendError import VmError
from xen.xend.XendError import VmError
from xen.xend.XendLogging import log
-from xen.xend.server.DevController import DevController, xenbusState
+from xen.xend.server.DevController import DevController
+from xen.xend.server.DevConstants import xenbusState
from xen.xend.xenstore.xstransact import xstransact
class VSCSIController(DevController):
@@ -92,8 +93,8 @@ class VSCSIController(DevController):
back[devpath + '/p-devname'] = pdevname
vdev = vscsi_config.get('v-dev', '')
back[devpath + '/v-dev'] = vdev
- state = vscsi_config.get('state', '')
- back[devpath + '/state'] = str(xenbusState[state])
+ state = vscsi_config.get('state', xenbusState['Unknown'])
+ back[devpath + '/state'] = str(state)
devid = vscsi_config.get('devid', '')
back[devpath + '/devid'] = str(devid)
@@ -168,17 +169,17 @@ class VSCSIController(DevController):
(devid, back, front) = self.getDeviceDetails(config)
devid = int(devid)
vscsi_config = config['devs'][0]
- state = vscsi_config.get('state', '')
+ state = vscsi_config.get('state', xenbusState['Unknown'])
driver_state = self.readBackend(devid, 'state')
if str(xenbusState['Connected']) != driver_state:
raise VmError("Driver status is not connected")
uuid = self.readBackend(devid, 'uuid')
- if state == 'Initialising':
+ if state == xenbusState['Initialising']:
back['uuid'] = uuid
self.writeBackend(devid, back)
- elif state == 'Closing':
+ elif state == xenbusState['Closing']:
found = False
devs = self.readBackendList(devid, "vscsi-devs")
vscsipath = "vscsi-devs/"
@@ -198,7 +199,7 @@ class VSCSIController(DevController):
else:
raise XendError("Error configuring device invalid "
- "state '%s'" % state)
+ "state '%s'" % xenbusState[state])
self.writeBackend(devid, 'state', str(xenbusState['Reconfiguring']))
return self.readBackend(devid, 'uuid')
diff -r 10f0e1bb8e5e -r e75cb35c798b tools/python/xen/xm/create.py
--- a/tools/python/xen/xm/create.py Tue Nov 04 12:07:22 2008 +0900
+++ b/tools/python/xen/xm/create.py Tue Nov 04 12:43:19 2008 +0900
@@ -32,6 +32,7 @@ from xen.xend import osdep
from xen.xend import osdep
import xen.xend.XendClient
from xen.xend.XendBootloader import bootloader
+from xen.xend.server.DevConstants import xenbusState
from xen.util import blkif
from xen.util import vscsi_util
import xen.util.xsm.xsm as security
@@ -707,7 +708,7 @@ def configure_vscsis(config_devs, vals):
vscsi_util.vscsi_get_hctl_and_devname_by(p_dev, scsi_devices)
if p_hctl == None:
- raise ValueError("Cannot find device \"%s\"" % p_dev)
+ raise ValueError('Cannot find device "%s"' % p_dev)
for config in config_scsi:
dev = vscsi_convert_sxp_to_dict(config)
@@ -717,7 +718,7 @@ def configure_vscsis(config_devs, vals):
v_hctl = v_dev.split(':')
devid = int(v_hctl[0])
config_scsi.append(['dev', \
- ['state', 'Initialising'], \
+ ['state', xenbusState['Initialising']], \
['devid', devid], \
['p-dev', p_hctl], \
['p-devname', devname], \
@@ -1035,6 +1036,14 @@ def preprocess_ioports(vals):
ioports.append(hexd)
vals.ioports = ioports
+def preprocess_irq(vals):
+ if not vals.irq: return
+ irq = []
+ for v in vals.irq:
+ d = repr(v)
+ irq.append(d)
+ vals.irq = irq
+
def preprocess_vtpm(vals):
if not vals.vtpm: return
vtpms = []
@@ -1133,6 +1142,7 @@ def preprocess(vals):
preprocess_vscsi(vals)
preprocess_ioports(vals)
preprocess_ip(vals)
+ preprocess_irq(vals)
preprocess_nfs(vals)
preprocess_vtpm(vals)
preprocess_access_control(vals)
diff -r 10f0e1bb8e5e -r e75cb35c798b tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py Tue Nov 04 12:07:22 2008 +0900
+++ b/tools/python/xen/xm/main.py Tue Nov 04 12:43:19 2008 +0900
@@ -47,6 +47,7 @@ from xen.xend import sxp
from xen.xend import sxp
from xen.xend import XendClient
from xen.xend.XendConstants import *
+from xen.xend.server.DevConstants import xenbusState
from xen.xm.opts import OptionError, Opts, wrap, set_true
from xen.xm import console
@@ -2515,7 +2516,7 @@ def xm_scsi_attach(args):
dom = args[0]
p_scsi = args[1]
v_hctl = args[2]
- scsi = parse_scsi_configuration(p_scsi, v_hctl, 'Initialising')
+ scsi = parse_scsi_configuration(p_scsi, v_hctl,
xenbusState['Initialising'])
if serverType == SERVER_XEN_API:
@@ -2635,7 +2636,7 @@ def xm_scsi_detach(args):
arg_check(args, 'scsi-detach', 2)
dom = args[0]
v_hctl = args[1]
- scsi = parse_scsi_configuration(None, v_hctl, 'Closing')
+ scsi = parse_scsi_configuration(None, v_hctl, xenbusState['Closing'])
if serverType == SERVER_XEN_API:
diff -r 10f0e1bb8e5e -r e75cb35c798b tools/xenpmd/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/xenpmd/Makefile Tue Nov 04 12:43:19 2008 +0900
@@ -0,0 +1,20 @@
+XEN_ROOT=../..
+include $(XEN_ROOT)/tools/Rules.mk
+
+CFLAGS += -Werror
+CFLAGS += $(CFLAGS_libxenstore)
+LDFLAGS += $(LDFLAGS_libxenstore)
+
+BIN = xenpmd
+
+.PHONY: all
+all: $(BIN)
+
+.PHONY: install
+install: all
+ $(INSTALL_DIR) $(DESTDIR)$(SBINDIR)
+ $(INSTALL_PROG) $(BIN) $(DESTDIR)$(SBINDIR)
+
+.PHONY: clean
+clean:
+ $(RM) -f $(BIN)
diff -r 10f0e1bb8e5e -r e75cb35c798b tools/xenpmd/xenpmd.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/xenpmd/xenpmd.c Tue Nov 04 12:43:19 2008 +0900
@@ -0,0 +1,520 @@
+/*
+ * xenpmd.c
+ *
+ * xen power management daemon - Facilitates power management
+ * functionality within xen guests.
+ *
+ * Copyright (c) 2008 Kamala Narasimhan
+ * Copyright (c) 2008 Citrix Systems, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/* Xen extended power management support provides HVM guest power management
+ * features beyond S3, S4, S5. For example, it helps expose system level
+ * battery status and battery meter information and in future will be extended
+ * to include more power management support. This extended power management
+ * support is enabled by setting xen_extended_power_mgmt to 1 or 2 in the HVM
+ * config file. When set to 2, non-pass through mode is enabled which heavily
+ * relies on this power management daemon to glean battery information from
+ * dom0 and store it xenstore which would then be queries and used by qemu and
+ * passed to the guest when appropriate battery ports are read/written to.
+ */
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <stdlib.h>
+#include <dirent.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <xs.h>
+
+/* #define RUN_STANDALONE */
+#define RUN_IN_SIMULATE_MODE
+
+enum BATTERY_INFO_TYPE {
+ BIF,
+ BST
+};
+
+enum BATTERY_PRESENT {
+ NO,
+ YES
+};
+
+enum BATTERY_TECHNOLOGY {
+ NON_RECHARGEABLE,
+ RECHARGEABLE
+};
+
+struct battery_info {
+ enum BATTERY_PRESENT present;
+ unsigned long design_capacity;
+ unsigned long last_full_capacity;
+ enum BATTERY_TECHNOLOGY battery_technology;
+ unsigned long design_voltage;
+ unsigned long design_capacity_warning;
+ unsigned long design_capacity_low;
+ unsigned long capacity_granularity_1;
+ unsigned long capacity_granularity_2;
+ char model_number[32];
+ char serial_number[32];
+ char battery_type[32];
+ char oem_info[32];
+};
+
+struct battery_status {
+ enum BATTERY_PRESENT present;
+ unsigned long state;
+ unsigned long present_rate;
+ unsigned long remaining_capacity;
+ unsigned long present_voltage;
+};
+
+static struct xs_handle *xs;
+
+#ifdef RUN_IN_SIMULATE_MODE
+ #define BATTERY_DIR_PATH "/tmp/battery"
+ #define BATTERY_INFO_FILE_PATH "/tmp/battery/%s/info"
+ #define BATTERY_STATE_FILE_PATH "/tmp/battery/%s/state"
+#else
+ #define BATTERY_DIR_PATH "/proc/acpi/battery"
+ #define BATTERY_INFO_FILE_PATH "/proc/acpi/battery/%s/info"
+ #define BATTERY_STATE_FILE_PATH "/proc/acpi/battery/%s/state"
+#endif
+
+FILE *get_next_battery_file(DIR *battery_dir,
+ enum BATTERY_INFO_TYPE battery_info_type)
+{
+ FILE *file = 0;
+ struct dirent *dir_entries;
+ char file_name[32];
+
+ do
+ {
+ dir_entries = readdir(battery_dir);
+ if ( !dir_entries )
+ return 0;
+ if ( strlen(dir_entries->d_name) < 4 )
+ continue;
+ if ( battery_info_type == BIF )
+ snprintf(file_name, 32, BATTERY_INFO_FILE_PATH,
+ dir_entries->d_name);
+ else
+ snprintf(file_name, 32, BATTERY_STATE_FILE_PATH,
+ dir_entries->d_name);
+ file = fopen(file_name, "r");
+ } while ( !file );
+
+ return file;
+}
+
+void set_attribute_battery_info(char *attrib_name,
+ char *attrib_value,
+ struct battery_info *info)
+{
+ if ( strstr(attrib_name, "present") )
+ {
+ if ( strstr(attrib_value, "yes") )
+ info->present = YES;
+ return;
+ }
+
+ if ( strstr(attrib_name, "design capacity warning") )
+ {
+ info->design_capacity_warning = strtoull(attrib_value, NULL, 10);
+ return;
+ }
+
+ if ( strstr(attrib_name, "design capacity low") )
+ {
+ info->design_capacity_low = strtoull(attrib_value, NULL, 10);
+ return;
+ }
+
+ if ( strstr(attrib_name, "design capacity") )
+ {
+ info->design_capacity = strtoull(attrib_value, NULL, 10);
+ return;
+ }
+
+ if ( strstr(attrib_name, "last full capacity") )
+ {
+ info->last_full_capacity = strtoull(attrib_value, NULL, 10);
+ return;
+ }
+
+ if ( strstr(attrib_name, "design voltage") )
+ {
+ info->design_voltage = strtoull(attrib_value, NULL, 10);
+ return;
+ }
+
+ if ( strstr(attrib_name, "capacity granularity 1") )
+ {
+ info->capacity_granularity_1 = strtoull(attrib_value, NULL, 10);
+ return;
+ }
+
+ if ( strstr(attrib_name, "capacity granularity 2") )
+ {
+ info->capacity_granularity_2 = strtoull(attrib_value, NULL, 10);
+ return;
+ }
+
+ if ( strstr(attrib_name, "battery technology") )
+ {
+ if ( strncmp(attrib_value, "rechargeable",
+ strlen("rechargeable")) == 0 )
+ info->battery_technology = RECHARGEABLE;
+ else
+ info->battery_technology = NON_RECHARGEABLE;
+ return;
+ }
+
+ if ( strstr(attrib_name, "model number") )
+ {
+ strncpy(info->model_number, attrib_value, 32);
+ return;
+ }
+
+ if ( strstr(attrib_name, "serial number") )
+ {
+ strncpy(info->serial_number, attrib_value, 32);
+ return;
+ }
+
+ if ( strstr(attrib_name, "battery type") )
+ {
+ strncpy(info->battery_type, attrib_value, 32);
+ return;
+ }
+
+ if ( strstr(attrib_name, "OEM info") )
+ {
+ strncpy(info->oem_info, attrib_value, 32);
+ return;
+ }
+
+ return;
+}
+
+void set_attribute_battery_status(char *attrib_name,
+ char *attrib_value,
+ struct battery_status *status)
+{
+ if ( strstr(attrib_name, "charging state") )
+ {
+ /* Check this, below is half baked */
+ if ( strstr(attrib_value, "charged") )
+ status->state = 0;
+ else
+ status->state = 1;
+ return;
+ }
+
+ if ( strstr(attrib_name, "present rate") )
+ {
+ status->present_rate = strtoull(attrib_value, NULL, 10);
+ return;
+ }
+
+ if ( strstr(attrib_name, "remaining capacity") )
+ {
+ status->remaining_capacity = strtoull(attrib_value, NULL, 10);
+ return;
+ }
+
+ if ( strstr(attrib_name, "present voltage") )
+ {
+ status->present_voltage = strtoull(attrib_value, NULL, 10);
+ return;
+ }
+
+ if ( strstr(attrib_name, "present") )
+ {
+ if ( strstr(attrib_value, "yes") )
+ status->present = YES;
+ return;
+ }
+}
+
+void parse_battery_info_or_status(char *line_info,
+ enum BATTERY_INFO_TYPE type,
+ void *info_or_status)
+{
+ char attrib_name[128];
+ char attrib_value[64];
+ char *delimiter;
+ unsigned long length;
+
+ length = strlen(line_info);
+ delimiter = (char *) strchr( line_info, ':');
+ if ( (!delimiter) || (delimiter == line_info) ||
+ (delimiter == line_info + length) )
+ return;
+
+ strncpy(attrib_name, line_info, delimiter-line_info);
+ while ( *(delimiter+1) == ' ' )
+ {
+ delimiter++;
+ if ( delimiter+1 == line_info + length)
+ return;
+ }
+ strncpy(attrib_value, delimiter+1,
+ (unsigned long)line_info + length -(unsigned long)delimiter);
+
+ if ( type == BIF )
+ set_attribute_battery_info(attrib_name, attrib_value,
+ (struct battery_info *)info_or_status);
+ else
+ set_attribute_battery_status(attrib_name, attrib_value,
+ (struct battery_status *)info_or_status);
+
+ return;
+}
+
+int get_next_battery_info_or_status(DIR *battery_dir,
+ enum BATTERY_INFO_TYPE type,
+ void *info_or_status)
+{
+ FILE *file;
+ char line_info[256];
+
+ if ( !info_or_status )
+ return 0;
+
+ memset(line_info, 0, 256);
+ if (type == BIF)
+ memset(info_or_status, 0, sizeof(struct battery_info));
+ else
+ memset(info_or_status, 0, sizeof(struct battery_status));
+
+ file = get_next_battery_file(battery_dir, type);
+ if ( !file )
+ return 0;
+
+ while ( fgets(line_info, 1024, file) != NULL )
+ {
+ parse_battery_info_or_status(line_info, type, info_or_status);
+ memset(line_info, 0, 256);
+ }
+
+ fclose(file);
+ return 1;
+}
+
+#ifdef RUN_STANDALONE
+void print_battery_info(struct battery_info *info)
+{
+ printf("present: %d\n", info->present);
+ printf("design capacity: %d\n", info->design_capacity);
+ printf("last full capacity: %d\n", info->last_full_capacity);
+ printf("battery technology: %d\n", info->battery_technology);
+ printf("design voltage: %d\n", info->design_voltage);
+ printf("design capacity warning:%d\n", info->design_capacity_warning);
+ printf("design capacity low: %d\n", info->design_capacity_low);
+ printf("capacity granularity 1: %d\n", info->capacity_granularity_1);
+ printf("capacity granularity 2: %d\n", info->capacity_granularity_2);
+ printf("model number: %s\n", info->model_number);
+ printf("serial number: %s\n", info->serial_number);
+ printf("battery type: %s\n", info->battery_type);
+ printf("OEM info: %s\n", info->oem_info);
+}
+#endif /*RUN_STANDALONE*/
+
+void write_ulong_lsb_first(char *temp_val, unsigned long val)
+{
+ snprintf(temp_val, 9, "%02x%02x%02x%02x", (unsigned int)val & 0xff,
+ (unsigned int)(val & 0xff00) >> 8, (unsigned int)(val & 0xff0000) >> 16,
+ (unsigned int)(val & 0xff000000) >> 24);
+}
+
+void write_battery_info_to_xenstore(struct battery_info *info)
+{
+ char val[1024], string_info[256];
+
+ xs_mkdir(xs, XBT_NULL, "/pm");
+
+ memset(val, 0, 1024);
+ memset(string_info, 0, 256);
+ /* write 9 dwords (so 9*4) + length of 4 strings + 4 null terminators */
+ snprintf(val, 3, "%02x",
+ (unsigned int)(9*4 +
+ strlen(info->model_number) +
+ strlen(info->serial_number) +
+ strlen(info->battery_type) +
+ strlen(info->oem_info) + 4));
+ write_ulong_lsb_first(val+2, info->present);
+ write_ulong_lsb_first(val+10, info->design_capacity);
+ write_ulong_lsb_first(val+18, info->last_full_capacity);
+ write_ulong_lsb_first(val+26, info->battery_technology);
+ write_ulong_lsb_first(val+34, info->design_voltage);
+ write_ulong_lsb_first(val+42, info->design_capacity_warning);
+ write_ulong_lsb_first(val+50, info->design_capacity_low);
+ write_ulong_lsb_first(val+58, info->capacity_granularity_1);
+ write_ulong_lsb_first(val+66, info->capacity_granularity_2);
+
+ snprintf(string_info, 256, "%02x%s%02x%s%02x%s%02x%s",
+ (unsigned int)strlen(info->model_number), info->model_number,
+ (unsigned int)strlen(info->serial_number), info->serial_number,
+ (unsigned int)strlen(info->battery_type), info->battery_type,
+ (unsigned int)strlen(info->oem_info), info->oem_info);
+ strncat(val+73, string_info, 1024);
+ xs_write(xs, XBT_NULL, "/pm/bif",
+ val, 73+8+strlen(info->model_number)+strlen(info->serial_number)+
+ strlen(info->battery_type)+strlen(info->oem_info)+1);
+}
+
+int write_one_time_battery_info(void)
+{
+ DIR *dir;
+ int ret = 0;
+ struct battery_info info;
+
+ dir = opendir(BATTERY_DIR_PATH);
+ if ( !dir )
+ return 0;
+
+ while ( get_next_battery_info_or_status(dir, BIF, (void *)&info) )
+ {
+#ifdef RUN_STANDALONE
+ print_battery_info(&info);
+#endif
+ if ( info.present == YES )
+ {
+ write_battery_info_to_xenstore(&info);
+ ret = 1;
+ break; /* rethink this... */
+ }
+ }
+
+ closedir(dir);
+ return ret;
+}
+
+#ifdef RUN_STANDALONE
+void print_battery_status(struct battery_status *status)
+{
+ printf("present: %d\n", status->present);
+ printf("Battery state %d\n", status->state);
+ printf("Battery present rate %d\n", status->present_rate);
+ printf("Battery remining capacity %d\n", status->remaining_capacity);
+ printf("Battery present voltage %d\n", status->present_voltage);
+}
+#endif /*RUN_STANDALONE*/
+
+void write_battery_status_to_xenstore(struct battery_status *status)
+{
+ char val[35];
+
+ xs_mkdir(xs, XBT_NULL, "/pm");
+
+ memset(val, 0, 35);
+ snprintf(val, 3, "%02x", 16);
+ write_ulong_lsb_first(val+2, status->state);
+ write_ulong_lsb_first(val+10, status->present_rate);
+ write_ulong_lsb_first(val+18, status->remaining_capacity);
+ write_ulong_lsb_first(val+26, status->present_voltage);
+
+ xs_write(xs, XBT_NULL, "/pm/bst", val, 35);
+}
+
+int wait_for_and_update_battery_status_request(void)
+{
+ DIR *dir;
+ int ret = 0;
+ unsigned int count;
+ struct battery_status status;
+
+ while ( true )
+ {
+ /* KN:@TODO - It is rather inefficient to not cache the file handle.
+ * Switch to caching file handle.
+ */
+ dir = opendir(BATTERY_DIR_PATH);
+ if ( !dir )
+ return 0;
+
+ while ( get_next_battery_info_or_status(dir, BST, (void *)&status) )
+ {
+#ifdef RUN_STANDALONE
+ print_battery_status(&status);
+#endif
+ if ( status.present == YES )
+ {
+ write_battery_status_to_xenstore(&status);
+ ret = 1;
+ /* rethink this; though I have never seen, there might be
+ * systems out there with more than one battery device
+ * present
+ */
+ break;
+ }
+ }
+ closedir(dir);
+ xs_watch(xs, "/pm/events", "refreshbatterystatus");
+ xs_read_watch(xs, &count);
+ }
+
+ return ret;
+}
+
+/* Borrowed daemonize from xenstored - Initially written by Stevens. */
+static void daemonize(void)
+{
+ pid_t pid;
+
+ if ( (pid = fork()) < 0 )
+ exit(1);
+
+ if ( pid != 0 )
+ exit(0);
+
+ setsid();
+
+ if ( (pid = fork()) < 0 )
+ exit(1);
+
+ if ( pid != 0 )
+ exit(0);
+
+ if ( chdir("/") == -1 )
+ exit(1);
+
+ umask(0);
+}
+
+int main(int argc, char *argv[])
+{
+#ifndef RUN_STANDALONE
+ daemonize();
+#endif
+ xs = (struct xs_handle *)xs_daemon_open();
+ if ( xs == NULL )
+ return -1;
+
+ if ( write_one_time_battery_info() == 0 )
+ {
+ xs_daemon_close(xs);
+ return -1;
+ }
+
+ wait_for_and_update_battery_status_request();
+ xs_daemon_close(xs);
+ return 0;
+}
+
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/arch/ia64/xen/cpufreq/cpufreq.c
--- a/xen/arch/ia64/xen/cpufreq/cpufreq.c Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/arch/ia64/xen/cpufreq/cpufreq.c Tue Nov 04 12:43:19 2008 +0900
@@ -210,21 +210,6 @@ acpi_cpufreq_cpu_init (struct cpufreq_po
data->acpi_data = &processor_pminfo[cpu]->perf;
- /* capability check */
- if (data->acpi_data->state_count <= 1) {
- printk(KERN_WARNING "P-States\n");
- result = -ENODEV;
- goto err_unreg;
- }
-
- if ((data->acpi_data->control_register.space_id !=
- ACPI_ADR_SPACE_FIXED_HARDWARE) ||
- (data->acpi_data->status_register.space_id !=
- ACPI_ADR_SPACE_FIXED_HARDWARE)) {
- result = -ENODEV;
- goto err_unreg;
- }
-
data->freq_table = xmalloc_array(struct cpufreq_frequency_table,
(data->acpi_data->state_count + 1));
if (!data->freq_table) {
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/arch/ia64/xen/irq.c
--- a/xen/arch/ia64/xen/irq.c Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/arch/ia64/xen/irq.c Tue Nov 04 12:43:19 2008 +0900
@@ -74,7 +74,7 @@ unsigned int __ia64_local_vector_to_irq
/*
* Controller mappings for all interrupt sources:
*/
-irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned = {
+irq_desc_t irq_desc[NR_IRQS] = {
[0 ... NR_IRQS-1] = {
.status = IRQ_DISABLED,
.handler = &no_irq_type,
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/arch/x86/acpi/cpu_idle.c
--- a/xen/arch/x86/acpi/cpu_idle.c Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/arch/x86/acpi/cpu_idle.c Tue Nov 04 12:43:19 2008 +0900
@@ -75,13 +75,14 @@ static void print_acpi_power(uint32_t cp
printk("==cpu%d==\n", cpu);
printk("active state:\t\tC%d\n",
- power->last_state ? (int)(power->last_state - power->states) : -1);
+ power->last_state ? power->last_state->idx : -1);
printk("max_cstate:\t\tC%d\n", max_cstate);
printk("states:\n");
for ( i = 1; i < power->count; i++ )
{
- printk((power->last_state == &power->states[i]) ? " *" : " ");
+ printk((power->last_state && power->last_state->idx == i) ?
+ " *" : " ");
printk("C%d:\t", i);
printk("type[C%d] ", power->states[i].type);
printk("latency[%03d] ", power->states[i].latency);
@@ -139,20 +140,26 @@ static void acpi_processor_ffh_cstate_en
static void acpi_idle_do_entry(struct acpi_processor_cx *cx)
{
- if ( cx->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE )
- {
+ int unused;
+
+ switch ( cx->entry_method )
+ {
+ case ACPI_CSTATE_EM_FFH:
/* Call into architectural FFH based C-state */
acpi_processor_ffh_cstate_enter(cx);
- }
- else
- {
- int unused;
+ return;
+ case ACPI_CSTATE_EM_SYSIO:
/* IO port based C-state */
inb(cx->address);
/* Dummy wait op - must do something useless after P_LVL2 read
because chipsets cannot guarantee that STPCLK# signal
gets asserted in time to freeze execution properly. */
unused = inl(pmtmr_ioport);
+ return;
+ case ACPI_CSTATE_EM_HALT:
+ acpi_safe_halt();
+ local_irq_disable();
+ return;
}
}
@@ -222,7 +229,7 @@ static void acpi_processor_idle(void)
if ( power->flags.bm_check && acpi_idle_bm_check()
&& cx->type == ACPI_STATE_C3 )
cx = power->safe_state;
- if ( cx - &power->states[0] > max_cstate )
+ if ( cx->idx > max_cstate )
cx = &power->states[max_cstate];
}
if ( !cx )
@@ -252,35 +259,11 @@ static void acpi_processor_idle(void)
switch ( cx->type )
{
case ACPI_STATE_C1:
- /* Trace cpu idle entry */
- TRACE_1D(TRC_PM_IDLE_ENTRY, 1);
-
- /*
- * Invoke C1.
- * Use the appropriate idle routine, the one that would
- * be used without acpi C-states.
- */
- if ( pm_idle_save )
- pm_idle_save();
- else
- acpi_safe_halt();
-
- /* Trace cpu idle exit */
- TRACE_1D(TRC_PM_IDLE_EXIT, 1);
-
- /*
- * TBD: Can't get time duration while in C1, as resumes
- * go to an ISR rather than here. Need to instrument
- * base interrupt handler.
- */
- sleep_ticks = 0xFFFFFFFF;
- break;
-
case ACPI_STATE_C2:
- if ( local_apic_timer_c2_ok )
+ if ( cx->type == ACPI_STATE_C1 || local_apic_timer_c2_ok )
{
/* Trace cpu idle entry */
- TRACE_1D(TRC_PM_IDLE_ENTRY, 2);
+ TRACE_1D(TRC_PM_IDLE_ENTRY, cx->idx);
/* Get start time (ticks) */
t1 = inl(pmtmr_ioport);
/* Invoke C2 */
@@ -288,7 +271,7 @@ static void acpi_processor_idle(void)
/* Get end time (ticks) */
t2 = inl(pmtmr_ioport);
/* Trace cpu idle exit */
- TRACE_1D(TRC_PM_IDLE_EXIT, 2);
+ TRACE_1D(TRC_PM_IDLE_EXIT, cx->idx);
/* Re-enable interrupts */
local_irq_enable();
@@ -328,7 +311,7 @@ static void acpi_processor_idle(void)
}
/* Trace cpu idle entry */
- TRACE_1D(TRC_PM_IDLE_ENTRY, cx - &power->states[0]);
+ TRACE_1D(TRC_PM_IDLE_ENTRY, cx->idx);
/*
* Before invoking C3, be aware that TSC/APIC timer may be
* stopped by H/W. Without carefully handling of TSC/APIC stop issues,
@@ -349,7 +332,7 @@ static void acpi_processor_idle(void)
/* recovering TSC */
cstate_restore_tsc();
/* Trace cpu idle exit */
- TRACE_1D(TRC_PM_IDLE_EXIT, cx - &power->states[0]);
+ TRACE_1D(TRC_PM_IDLE_EXIT, cx->idx);
if ( power->flags.bm_check && power->flags.bm_control )
{
@@ -387,9 +370,15 @@ static void acpi_processor_idle(void)
static int init_cx_pminfo(struct acpi_processor_power *acpi_power)
{
+ int i;
+
memset(acpi_power, 0, sizeof(*acpi_power));
+ for ( i = 0; i < ACPI_PROCESSOR_MAX_POWER; i++ )
+ acpi_power->states[i].idx = i;
+
acpi_power->states[ACPI_STATE_C1].type = ACPI_STATE_C1;
+ acpi_power->states[ACPI_STATE_C1].entry_method = ACPI_CSTATE_EM_HALT;
acpi_power->states[ACPI_STATE_C0].valid = 1;
acpi_power->states[ACPI_STATE_C1].valid = 1;
@@ -486,16 +475,13 @@ static int check_cx(struct acpi_processo
break;
case ACPI_ADR_SPACE_FIXED_HARDWARE:
- if ( cx->type > ACPI_STATE_C1 )
- {
- if ( cx->reg.bit_width != VENDOR_INTEL ||
- cx->reg.bit_offset != NATIVE_CSTATE_BEYOND_HALT )
- return -EINVAL;
-
- /* assume all logical cpu has the same support for mwait */
- if ( acpi_processor_ffh_cstate_probe(cx) )
- return -EINVAL;
- }
+ if ( cx->reg.bit_width != VENDOR_INTEL ||
+ cx->reg.bit_offset != NATIVE_CSTATE_BEYOND_HALT )
+ return -EINVAL;
+
+ /* assume all logical cpu has the same support for mwait */
+ if ( acpi_processor_ffh_cstate_probe(cx) )
+ return -EINVAL;
break;
default:
@@ -599,7 +585,23 @@ static void set_cx(
cx->valid = 1;
cx->type = xen_cx->type;
cx->address = xen_cx->reg.address;
- cx->space_id = xen_cx->reg.space_id;
+
+ switch ( xen_cx->reg.space_id )
+ {
+ case ACPI_ADR_SPACE_FIXED_HARDWARE:
+ if ( xen_cx->reg.bit_width == VENDOR_INTEL &&
+ xen_cx->reg.bit_offset == NATIVE_CSTATE_BEYOND_HALT )
+ cx->entry_method = ACPI_CSTATE_EM_FFH;
+ else
+ cx->entry_method = ACPI_CSTATE_EM_HALT;
+ break;
+ case ACPI_ADR_SPACE_SYSTEM_IO:
+ cx->entry_method = ACPI_CSTATE_EM_SYSIO;
+ break;
+ default:
+ cx->entry_method = ACPI_CSTATE_EM_NONE;
+ }
+
cx->latency = xen_cx->latency;
cx->power = xen_cx->power;
@@ -761,8 +763,7 @@ int pmstat_get_cx_stat(uint32_t cpuid, s
return 0;
}
- stat->last = (power->last_state) ?
- (int)(power->last_state - &power->states[0]) : 0;
+ stat->last = power->last_state ? power->last_state->idx : 0;
stat->nr = power->count;
stat->idle_time = v->runstate.time[RUNSTATE_running];
if ( v->is_running )
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/arch/x86/acpi/cpufreq/cpufreq.c
--- a/xen/arch/x86/acpi/cpufreq/cpufreq.c Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c Tue Nov 04 12:43:19 2008 +0900
@@ -370,7 +370,7 @@ static int acpi_cpufreq_target(struct cp
if (!check_freqs(cmd.mask, freqs.new, data))
return -EAGAIN;
- for_each_cpu_mask(j, cmd.mask)
+ for_each_cpu_mask(j, online_policy_cpus)
cpufreq_statistic_update(j, perf->state, next_perf_state);
perf->state = next_perf_state;
@@ -447,18 +447,6 @@ acpi_cpufreq_cpu_init(struct cpufreq_pol
perf = data->acpi_data;
policy->shared_type = perf->shared_type;
- /* capability check */
- if (perf->state_count <= 1) {
- printk("No P-States\n");
- result = -ENODEV;
- goto err_unreg;
- }
-
- if (perf->control_register.space_id != perf->status_register.space_id) {
- result = -ENODEV;
- goto err_unreg;
- }
-
switch (perf->control_register.space_id) {
case ACPI_ADR_SPACE_SYSTEM_IO:
printk("xen_pminfo: @acpi_cpufreq_cpu_init,"
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/arch/x86/acpi/cpufreq/powernow.c
--- a/xen/arch/x86/acpi/cpufreq/powernow.c Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/arch/x86/acpi/cpufreq/powernow.c Tue Nov 04 12:43:19 2008 +0900
@@ -229,9 +229,23 @@ err_unreg:
return result;
}
+static int powernow_cpufreq_cpu_exit(struct cpufreq_policy *policy)
+{
+ struct powernow_cpufreq_data *data = drv_data[policy->cpu];
+
+ if (data) {
+ drv_data[policy->cpu] = NULL;
+ xfree(data->freq_table);
+ xfree(data);
+ }
+
+ return 0;
+}
+
static struct cpufreq_driver powernow_cpufreq_driver = {
.target = powernow_cpufreq_target,
.init = powernow_cpufreq_cpu_init,
+ .exit = powernow_cpufreq_cpu_exit
};
int powernow_cpufreq_init(void)
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/arch/x86/acpi/cpuidle_menu.c
--- a/xen/arch/x86/acpi/cpuidle_menu.c Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/arch/x86/acpi/cpuidle_menu.c Tue Nov 04 12:43:19 2008 +0900
@@ -59,7 +59,7 @@ static int menu_select(struct acpi_proce
data->expected_us = (u32) get_sleep_length_ns() / 1000;
/* find the deepest idle state that satisfies our constraints */
- for ( i = 1; i < power->count; i++ )
+ for ( i = 2; i < power->count; i++ )
{
struct acpi_processor_cx *s = &power->states[i];
@@ -81,17 +81,7 @@ static void menu_reflect(struct acpi_pro
unsigned int last_residency;
unsigned int measured_us;
- /*
- * Ugh, this idle state doesn't support residency measurements, so we
- * are basically lost in the dark. As a compromise, assume we slept
- * for one full standard timer tick. However, be aware that this
- * could potentially result in a suboptimal state transition.
- */
- if ( target->type == ACPI_STATE_C1 )
- last_residency = USEC_PER_SEC / HZ;
- else
- last_residency = power->last_residency;
-
+ last_residency = power->last_residency;
measured_us = last_residency + data->elapsed_us;
/* if wrapping, set to max uint (-1) */
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/arch/x86/domain.c Tue Nov 04 12:43:19 2008 +0900
@@ -174,9 +174,10 @@ void free_vcpu_struct(struct vcpu *v)
static int setup_compat_l4(struct vcpu *v)
{
- struct page_info *pg = alloc_domheap_page(NULL, 0);
+ struct page_info *pg;
l4_pgentry_t *l4tab;
+ pg = alloc_domheap_page(NULL, MEMF_node(vcpu_to_node(v)));
if ( pg == NULL )
return -ENOMEM;
@@ -1639,31 +1640,22 @@ static int relinquish_memory(
}
if ( test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) )
- put_page_and_type(page);
+ ret = put_page_and_type_preemptible(page, 1);
+ switch ( ret )
+ {
+ case 0:
+ break;
+ case -EAGAIN:
+ case -EINTR:
+ set_bit(_PGT_pinned, &page->u.inuse.type_info);
+ put_page(page);
+ goto out;
+ default:
+ BUG();
+ }
if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
put_page(page);
-
-#ifdef DOMAIN_DESTRUCT_AVOID_RECURSION
- /*
- * Forcibly drop reference counts of page tables above top most (which
- * were skipped to prevent long latencies due to deep recursion - see
- * the special treatment in free_lX_table()).
- */
- y = page->u.inuse.type_info;
- if ( (type < PGT_root_page_table) &&
- unlikely(((y + PGT_type_mask) &
- (PGT_type_mask|PGT_validated)) == type) )
- {
- BUG_ON((y & PGT_count_mask) >=
- (page->count_info & PGC_count_mask));
- while ( y & PGT_count_mask )
- {
- put_page_and_type(page);
- y = page->u.inuse.type_info;
- }
- }
-#endif
/*
* Forcibly invalidate top-most, still valid page tables at this point
@@ -1685,8 +1677,31 @@ static int relinquish_memory(
x & ~(PGT_validated|PGT_partial));
if ( likely(y == x) )
{
- if ( free_page_type(page, x, 0) != 0 )
+ /* No need for atomic update of type_info here: noone else
updates it. */
+ switch ( ret = free_page_type(page, x, 1) )
+ {
+ case 0:
+ break;
+ case -EINTR:
+ page->u.inuse.type_info |= PGT_validated;
+ if ( x & PGT_partial )
+ put_page(page);
+ put_page(page);
+ ret = -EAGAIN;
+ goto out;
+ case -EAGAIN:
+ page->u.inuse.type_info |= PGT_partial;
+ if ( x & PGT_partial )
+ put_page(page);
+ goto out;
+ default:
BUG();
+ }
+ if ( x & PGT_partial )
+ {
+ page->u.inuse.type_info--;
+ put_page(page);
+ }
break;
}
}
@@ -1831,11 +1846,6 @@ int domain_relinquish_resources(struct d
/* fallthrough */
case RELMEM_done:
-#ifdef DOMAIN_DESTRUCT_AVOID_RECURSION
- ret = relinquish_memory(d, &d->page_list, PGT_l1_page_table);
- if ( ret )
- return ret;
-#endif
break;
default:
@@ -1891,6 +1901,54 @@ void domain_cpuid(
*eax = *ebx = *ecx = *edx = 0;
}
+
+void vcpu_kick(struct vcpu *v)
+{
+ /*
+ * NB1. 'pause_flags' and 'processor' must be checked /after/ update of
+ * pending flag. These values may fluctuate (after all, we hold no
+ * locks) but the key insight is that each change will cause
+ * evtchn_upcall_pending to be polled.
+ *
+ * NB2. We save the running flag across the unblock to avoid a needless
+ * IPI for domains that we IPI'd to unblock.
+ */
+ bool_t running = v->is_running;
+ vcpu_unblock(v);
+ if ( running && (in_irq() || (v != current)) )
+ cpu_raise_softirq(v->processor, VCPU_KICK_SOFTIRQ);
+}
+
+void vcpu_mark_events_pending(struct vcpu *v)
+{
+ int already_pending = test_and_set_bit(
+ 0, (unsigned long *)&vcpu_info(v, evtchn_upcall_pending));
+
+ if ( already_pending )
+ return;
+
+ if ( is_hvm_vcpu(v) )
+ hvm_assert_evtchn_irq(v);
+ else
+ vcpu_kick(v);
+}
+
+static void vcpu_kick_softirq(void)
+{
+ /*
+ * Nothing to do here: we merely prevent notifiers from racing with checks
+ * executed on return to guest context with interrupts enabled. See, for
+ * example, xxx_intr_assist() executed on return to HVM guest context.
+ */
+}
+
+static int __init init_vcpu_kick_softirq(void)
+{
+ open_softirq(VCPU_KICK_SOFTIRQ, vcpu_kick_softirq);
+ return 0;
+}
+__initcall(init_vcpu_kick_softirq);
+
/*
* Local variables:
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/arch/x86/domain_build.c Tue Nov 04 12:43:19 2008 +0900
@@ -194,6 +194,30 @@ static void __init process_dom0_ioports_
}
}
+/* We run on dom0's page tables for the final part of the build process. */
+static void dom0_pt_enter(struct vcpu *v)
+{
+ struct desc_ptr gdt_desc = {
+ .limit = LAST_RESERVED_GDT_BYTE,
+ .base = (unsigned long)(this_cpu(gdt_table) - FIRST_RESERVED_GDT_ENTRY)
+ };
+
+ asm volatile ( "lgdt %0" : : "m" (gdt_desc) );
+ write_ptbase(v);
+}
+
+/* Return to idle domain's page tables. */
+static void dom0_pt_exit(void)
+{
+ struct desc_ptr gdt_desc = {
+ .limit = LAST_RESERVED_GDT_BYTE,
+ .base = GDT_VIRT_START(current)
+ };
+
+ write_ptbase(current);
+ asm volatile ( "lgdt %0" : : "m" (gdt_desc) );
+}
+
int __init construct_dom0(
struct domain *d,
unsigned long _image_start, unsigned long image_len,
@@ -700,14 +724,12 @@ int __init construct_dom0(
(void)alloc_vcpu(d, i, i % num_online_cpus());
/* Set up CR3 value for write_ptbase */
- if ( paging_mode_enabled(v->domain) )
+ if ( paging_mode_enabled(d) )
paging_update_paging_modes(v);
else
update_cr3(v);
- /* Install the new page tables. */
- local_irq_disable();
- write_ptbase(v);
+ dom0_pt_enter(v);
/* Copy the OS image and free temporary buffer. */
elf.dest = (void*)vkern_start;
@@ -804,9 +826,7 @@ int __init construct_dom0(
xlat_start_info(si, XLAT_start_info_console_dom0);
#endif
- /* Reinstate the caller's page tables. */
- write_ptbase(current);
- local_irq_enable();
+ dom0_pt_exit();
#if defined(__i386__)
/* Destroy low mappings - they were only for our convenience. */
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/arch/x86/hpet.c
--- a/xen/arch/x86/hpet.c Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/arch/x86/hpet.c Tue Nov 04 12:43:19 2008 +0900
@@ -14,8 +14,6 @@
#include <asm/div64.h>
#include <asm/hpet.h>
-#define STIME_MAX ((s_time_t)((uint64_t)~0ull>>1))
-
#define MAX_DELTA_NS MILLISECS(10*1000)
#define MIN_DELTA_NS MICROSECS(20)
@@ -146,7 +144,7 @@ static void handle_hpet_broadcast(struct
s_time_t now, next_event;
int cpu;
- spin_lock(&ch->lock);
+ spin_lock_irq(&ch->lock);
again:
ch->next_event = STIME_MAX;
@@ -171,7 +169,7 @@ again:
if ( reprogram_hpet_evt_channel(ch, next_event, now, 0) )
goto again;
}
- spin_unlock(&ch->lock);
+ spin_unlock_irq(&ch->lock);
}
void hpet_broadcast_init(void)
@@ -213,6 +211,7 @@ void hpet_broadcast_enter(void)
{
struct hpet_event_channel *ch = &hpet_event;
+ ASSERT(!local_irq_is_enabled());
spin_lock(&ch->lock);
disable_APIC_timer();
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/arch/x86/hvm/emulate.c
--- a/xen/arch/x86/hvm/emulate.c Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/arch/x86/hvm/emulate.c Tue Nov 04 12:43:19 2008 +0900
@@ -14,10 +14,38 @@
#include <xen/lib.h>
#include <xen/sched.h>
#include <xen/paging.h>
+#include <xen/trace.h>
#include <asm/event.h>
#include <asm/hvm/emulate.h>
#include <asm/hvm/hvm.h>
#include <asm/hvm/support.h>
+
+#define HVMTRACE_IO_ASSIST_WRITE 0x200
+static void hvmtrace_io_assist(int is_mmio, ioreq_t *p)
+{
+ unsigned int size, event;
+ unsigned char buffer[12];
+
+ if ( likely(!tb_init_done) )
+ return;
+
+ event = is_mmio ? TRC_HVM_MMIO_ASSIST : TRC_HVM_IO_ASSIST;
+ if ( !p->dir )
+ event |= HVMTRACE_IO_ASSIST_WRITE;
+
+ *(uint64_t *)buffer = p->addr;
+ size = (p->addr != (u32)p->addr) ? 8 : 4;
+ if ( size == 8 )
+ event |= TRC_64_FLAG;
+
+ if ( !p->data_is_ptr )
+ {
+ *(uint32_t *)&buffer[size] = p->data;
+ size += 4;
+ }
+
+ trace_var(event, 0/*!cycles*/, size, buffer);
+}
static int hvmemul_do_io(
int is_mmio, paddr_t addr, unsigned long *reps, int size,
@@ -110,6 +138,8 @@ static int hvmemul_do_io(
p->df = df;
p->data = value;
p->io_count++;
+
+ hvmtrace_io_assist(is_mmio, p);
if ( is_mmio )
{
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/arch/x86/hvm/hpet.c
--- a/xen/arch/x86/hvm/hpet.c Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/arch/x86/hvm/hpet.c Tue Nov 04 12:43:19 2008 +0900
@@ -76,6 +76,7 @@
~0ULL : (tick) * (h)->hpet_to_ns_scale) >> 10))
#define timer_config(h, n) (h->hpet.timers[n].config)
+#define timer_enabled(h, n) (timer_config(h, n) & HPET_TN_ENABLE)
#define timer_is_periodic(h, n) (timer_config(h, n) & HPET_TN_PERIODIC)
#define timer_is_32bit(h, n) (timer_config(h, n) & HPET_TN_32BIT)
#define hpet_enabled(h) (h->hpet.config & HPET_CFG_ENABLE)
@@ -88,9 +89,40 @@
((timer_config(h, n) & HPET_TN_INT_ROUTE_CAP_MASK) \
>> HPET_TN_INT_ROUTE_CAP_SHIFT)
-#define hpet_time_after(a, b) ((int32_t)(b) - (int32_t)(a) < 0)
-#define hpet_time_after64(a, b) ((int64_t)(b) - (int64_t)(a) < 0)
-
+static inline uint64_t hpet_read_maincounter(HPETState *h)
+{
+ ASSERT(spin_is_locked(&h->lock));
+
+ if ( hpet_enabled(h) )
+ return guest_time_hpet(h->vcpu) + h->mc_offset;
+ else
+ return h->hpet.mc64;
+}
+
+static uint64_t hpet_get_comparator(HPETState *h, unsigned int tn)
+{
+ uint64_t comparator;
+ uint64_t elapsed;
+
+ comparator = h->hpet.comparator64[tn];
+ if ( timer_is_periodic(h, tn) )
+ {
+ /* update comparator by number of periods elapsed since last update */
+ uint64_t period = h->hpet.period[tn];
+ if (period)
+ {
+ elapsed = hpet_read_maincounter(h) + period - 1 - comparator;
+ comparator += (elapsed / period) * period;
+ h->hpet.comparator64[tn] = comparator;
+ }
+ }
+
+ /* truncate if timer is in 32 bit mode */
+ if ( timer_is_32bit(h, tn) )
+ comparator = (uint32_t)comparator;
+ h->hpet.timers[tn].cmp = comparator;
+ return comparator;
+}
static inline uint64_t hpet_read64(HPETState *h, unsigned long addr)
{
addr &= ~7;
@@ -104,7 +136,7 @@ static inline uint64_t hpet_read64(HPETS
case HPET_STATUS:
return h->hpet.isr;
case HPET_COUNTER:
- return h->hpet.mc64;
+ return hpet_read_maincounter(h);
case HPET_T0_CFG:
case HPET_T1_CFG:
case HPET_T2_CFG:
@@ -112,7 +144,7 @@ static inline uint64_t hpet_read64(HPETS
case HPET_T0_CMP:
case HPET_T1_CMP:
case HPET_T2_CMP:
- return h->hpet.timers[(addr - HPET_T0_CMP) >> 5].cmp;
+ return hpet_get_comparator(h, (addr - HPET_T0_CMP) >> 5);
case HPET_T0_ROUTE:
case HPET_T1_ROUTE:
case HPET_T2_ROUTE:
@@ -140,16 +172,6 @@ static inline int hpet_check_access_leng
return 0;
}
-static inline uint64_t hpet_read_maincounter(HPETState *h)
-{
- ASSERT(spin_is_locked(&h->lock));
-
- if ( hpet_enabled(h) )
- return guest_time_hpet(h->vcpu) + h->mc_offset;
- else
- return h->hpet.mc64;
-}
-
static int hpet_read(
struct vcpu *v, unsigned long addr, unsigned long length,
unsigned long *pval)
@@ -169,8 +191,6 @@ static int hpet_read(
spin_lock(&h->lock);
val = hpet_read64(h, addr);
- if ( (addr & ~7) == HPET_COUNTER )
- val = hpet_read_maincounter(h);
result = val;
if ( length != 8 )
@@ -187,7 +207,10 @@ static void hpet_stop_timer(HPETState *h
{
ASSERT(tn < HPET_TIMER_NUM);
ASSERT(spin_is_locked(&h->lock));
- stop_timer(&h->timers[tn]);
+ destroy_periodic_time(&h->pt[tn]);
+ /* read the comparator to get it updated so a read while stopped will
+ * return the expected value. */
+ hpet_get_comparator(h, tn);
}
/* the number of HPET tick that stands for
@@ -197,6 +220,8 @@ static void hpet_set_timer(HPETState *h,
static void hpet_set_timer(HPETState *h, unsigned int tn)
{
uint64_t tn_cmp, cur_tick, diff;
+ unsigned int irq;
+ unsigned int oneshot;
ASSERT(tn < HPET_TIMER_NUM);
ASSERT(spin_is_locked(&h->lock));
@@ -209,7 +234,10 @@ static void hpet_set_timer(HPETState *h,
pit_stop_channel0_irq(pit);
}
- tn_cmp = h->hpet.timers[tn].cmp;
+ if ( !timer_enabled(h, tn) )
+ return;
+
+ tn_cmp = hpet_get_comparator(h, tn);
cur_tick = hpet_read_maincounter(h);
if ( timer_is_32bit(h, tn) )
{
@@ -229,7 +257,25 @@ static void hpet_set_timer(HPETState *h,
diff = (timer_is_32bit(h, tn) && (-diff > HPET_TINY_TIME_SPAN))
? (uint32_t)diff : 0;
- set_timer(&h->timers[tn], NOW() + hpet_tick_to_ns(h, diff));
+ if ( (tn <= 1) && (h->hpet.config & HPET_CFG_LEGACY) )
+ /* if LegacyReplacementRoute bit is set, HPET specification requires
+ timer0 be routed to IRQ0 in NON-APIC or IRQ2 in the I/O APIC,
+ timer1 be routed to IRQ8 in NON-APIC or IRQ8 in the I/O APIC. */
+ irq = (tn == 0) ? 0 : 8;
+ else
+ irq = timer_int_route(h, tn);
+
+ /*
+ * diff is the time from now when the timer should fire, for a periodic
+ * timer we also need the period which may be different because time may
+ * have elapsed between the time the comparator was written and the timer
+ * being enabled (now).
+ */
+ oneshot = !timer_is_periodic(h, tn);
+ create_periodic_time(h->vcpu, &h->pt[tn],
+ hpet_tick_to_ns(h, diff),
+ oneshot ? 0 : hpet_tick_to_ns(h, h->hpet.period[tn]),
+ irq, NULL, NULL);
}
static inline uint64_t hpet_fixup_reg(
@@ -248,6 +294,13 @@ static int hpet_write(
uint64_t old_val, new_val;
int tn, i;
+ /* Acculumate a bit mask of timers whos state is changed by this write. */
+ unsigned long start_timers = 0;
+ unsigned long stop_timers = 0;
+#define set_stop_timer(n) (__set_bit((n), &stop_timers))
+#define set_start_timer(n) (__set_bit((n), &start_timers))
+#define set_restart_timer(n) (set_stop_timer(n),set_start_timer(n))
+
addr &= HPET_MMAP_SIZE-1;
if ( hpet_check_access_length(addr, length) != 0 )
@@ -256,9 +309,6 @@ static int hpet_write(
spin_lock(&h->lock);
old_val = hpet_read64(h, addr);
- if ( (addr & ~7) == HPET_COUNTER )
- old_val = hpet_read_maincounter(h);
-
new_val = val;
if ( length != 8 )
new_val = hpet_fixup_reg(
@@ -275,22 +325,35 @@ static int hpet_write(
/* Enable main counter and interrupt generation. */
h->mc_offset = h->hpet.mc64 - guest_time_hpet(h->vcpu);
for ( i = 0; i < HPET_TIMER_NUM; i++ )
- hpet_set_timer(h, i);
+ {
+ h->hpet.comparator64[i] =
+ h->hpet.timers[i].config & HPET_TN_32BIT ?
+ (uint32_t)h->hpet.timers[i].cmp :
+ h->hpet.timers[i].cmp;
+ if ( timer_enabled(h, i) )
+ set_start_timer(i);
+ }
}
else if ( (old_val & HPET_CFG_ENABLE) && !(new_val & HPET_CFG_ENABLE) )
{
/* Halt main counter and disable interrupt generation. */
h->hpet.mc64 = h->mc_offset + guest_time_hpet(h->vcpu);
for ( i = 0; i < HPET_TIMER_NUM; i++ )
- hpet_stop_timer(h, i);
+ if ( timer_enabled(h, i) )
+ set_stop_timer(i);
}
break;
case HPET_COUNTER:
+ h->hpet.mc64 = new_val;
if ( hpet_enabled(h) )
+ {
gdprintk(XENLOG_WARNING,
"HPET: writing main counter but it's not halted!\n");
- h->hpet.mc64 = new_val;
+ for ( i = 0; i < HPET_TIMER_NUM; i++ )
+ if ( timer_enabled(h, i) )
+ set_restart_timer(i);
+ }
break;
case HPET_T0_CFG:
@@ -313,7 +376,28 @@ static int hpet_write(
h->hpet.timers[tn].cmp = (uint32_t)h->hpet.timers[tn].cmp;
h->hpet.period[tn] = (uint32_t)h->hpet.period[tn];
}
-
+ if ( hpet_enabled(h) )
+ {
+ if ( new_val & HPET_TN_ENABLE )
+ {
+ if ( (new_val ^ old_val) & HPET_TN_PERIODIC )
+ /* timer is enabled but switching mode to/from periodic/
+ * one-shot, stop and restart the vpt timer to get it in
+ * the right mode. */
+ set_restart_timer(tn);
+ else if ( (new_val & HPET_TN_32BIT) &&
+ !(old_val & HPET_TN_32BIT) )
+ /* switching from 64 bit to 32 bit mode could cause timer
+ * next fire time, or period, to change. */
+ set_restart_timer(tn);
+ else if ( !(old_val & HPET_TN_ENABLE) )
+ /* transition from timer disabled to timer enabled. */
+ set_start_timer(tn);
+ }
+ else if ( old_val & HPET_TN_ENABLE )
+ /* transition from timer enabled to timer disabled. */
+ set_stop_timer(tn);
+ }
break;
case HPET_T0_CMP:
@@ -322,24 +406,32 @@ static int hpet_write(
tn = (addr - HPET_T0_CMP) >> 5;
if ( timer_is_32bit(h, tn) )
new_val = (uint32_t)new_val;
- if ( !timer_is_periodic(h, tn) ||
- (h->hpet.timers[tn].config & HPET_TN_SETVAL) )
- h->hpet.timers[tn].cmp = new_val;
- else
+ h->hpet.timers[tn].cmp = new_val;
+ if ( h->hpet.timers[tn].config & HPET_TN_SETVAL )
+ /*
+ * When SETVAL is one, software is able to "directly set a periodic
+ * timer's accumulator." That is, set the comparator without
+ * adjusting the period. Much the same as just setting the
+ * comparator on an enabled one-shot timer.
+ *
+ * This configuration bit clears when the comparator is written.
+ */
+ h->hpet.timers[tn].config &= ~HPET_TN_SETVAL;
+ else if ( timer_is_periodic(h, tn) )
{
/*
* Clamp period to reasonable min/max values:
- * - minimum is 900us, same as timers controlled by vpt.c
+ * - minimum is 100us, same as timers controlled by vpt.c
* - maximum is to prevent overflow in time_after() calculations
*/
- if ( hpet_tick_to_ns(h, new_val) < MICROSECS(900) )
- new_val = (MICROSECS(900) << 10) / h->hpet_to_ns_scale;
+ if ( hpet_tick_to_ns(h, new_val) < MICROSECS(100) )
+ new_val = (MICROSECS(100) << 10) / h->hpet_to_ns_scale;
new_val &= (timer_is_32bit(h, tn) ? ~0u : ~0ull) >> 1;
h->hpet.period[tn] = new_val;
}
- h->hpet.timers[tn].config &= ~HPET_TN_SETVAL;
- if ( hpet_enabled(h) )
- hpet_set_timer(h, tn);
+ h->hpet.comparator64[tn] = new_val;
+ if ( hpet_enabled(h) && timer_enabled(h, tn) )
+ set_restart_timer(tn);
break;
case HPET_T0_ROUTE:
@@ -354,6 +446,25 @@ static int hpet_write(
break;
}
+ /* stop/start timers whos state was changed by this write. */
+ while (stop_timers)
+ {
+ i = find_first_set_bit(stop_timers);
+ __clear_bit(i, &stop_timers);
+ hpet_stop_timer(h, i);
+ }
+
+ while (start_timers)
+ {
+ i = find_first_set_bit(start_timers);
+ __clear_bit(i, &start_timers);
+ hpet_set_timer(h, i);
+ }
+
+#undef set_stop_timer
+#undef set_start_timer
+#undef set_restart_timer
+
spin_unlock(&h->lock);
out:
@@ -373,86 +484,6 @@ struct hvm_mmio_handler hpet_mmio_handle
.write_handler = hpet_write
};
-static void hpet_route_interrupt(HPETState *h, unsigned int tn)
-{
- unsigned int tn_int_route = timer_int_route(h, tn);
- struct domain *d = h->vcpu->domain;
-
- ASSERT(spin_is_locked(&h->lock));
-
- if ( (tn <= 1) && (h->hpet.config & HPET_CFG_LEGACY) )
- {
- /* if LegacyReplacementRoute bit is set, HPET specification requires
- timer0 be routed to IRQ0 in NON-APIC or IRQ2 in the I/O APIC,
- timer1 be routed to IRQ8 in NON-APIC or IRQ8 in the I/O APIC. */
- int isa_irq = (tn == 0) ? 0 : 8;
- hvm_isa_irq_deassert(d, isa_irq);
- hvm_isa_irq_assert(d, isa_irq);
- return;
- }
-
- if ( !(timer_int_route_cap(h, tn) & (1U << tn_int_route)) )
- {
- gdprintk(XENLOG_ERR,
- "HPET: timer%u: invalid interrupt route config\n", tn);
- domain_crash(d);
- return;
- }
-
- /* We support only edge-triggered interrupt. */
- spin_lock(&d->arch.hvm_domain.irq_lock);
- vioapic_irq_positive_edge(d, tn_int_route);
- spin_unlock(&d->arch.hvm_domain.irq_lock);
-}
-
-static void hpet_timer_fn(void *opaque)
-{
- struct HPET_timer_fn_info *htfi = opaque;
- HPETState *h = htfi->hs;
- unsigned int tn = htfi->tn;
-
- spin_lock(&h->lock);
-
- if ( !hpet_enabled(h) )
- {
- spin_unlock(&h->lock);
- return;
- }
-
- if ( timer_config(h, tn) & HPET_TN_ENABLE )
- hpet_route_interrupt(h, tn);
-
- if ( timer_is_periodic(h, tn) && (h->hpet.period[tn] != 0) )
- {
- uint64_t mc = hpet_read_maincounter(h), period = h->hpet.period[tn];
- if ( timer_is_32bit(h, tn) )
- {
- while ( hpet_time_after(mc, h->hpet.timers[tn].cmp) )
- h->hpet.timers[tn].cmp = (uint32_t)(
- h->hpet.timers[tn].cmp + period);
- }
- else
- {
- while ( hpet_time_after64(mc, h->hpet.timers[tn].cmp) )
- h->hpet.timers[tn].cmp += period;
- }
- set_timer(&h->timers[tn], NOW() + hpet_tick_to_ns(h, period));
- }
-
- spin_unlock(&h->lock);
-}
-
-void hpet_migrate_timers(struct vcpu *v)
-{
- struct HPETState *h = &v->domain->arch.hvm_domain.pl_time.vhpet;
- int i;
-
- if ( v != h->vcpu )
- return;
-
- for ( i = 0; i < HPET_TIMER_NUM; i++ )
- migrate_timer(&h->timers[i], v->processor);
-}
static int hpet_save(struct domain *d, hvm_domain_context_t *h)
{
@@ -477,18 +508,20 @@ static int hpet_save(struct domain *d, h
C(isr);
C(mc64);
C(timers[0].config);
- C(timers[0].cmp);
C(timers[0].fsb);
C(timers[1].config);
- C(timers[1].cmp);
C(timers[1].fsb);
C(timers[2].config);
- C(timers[2].cmp);
C(timers[2].fsb);
C(period[0]);
C(period[1]);
C(period[2]);
#undef C
+ /* save the 64 bit comparator in the 64 bit timer[n].cmp field
+ * regardless of whether or not the timer is in 32 bit mode. */
+ rec->timers[0].cmp = hp->hpet.comparator64[0];
+ rec->timers[1].cmp = hp->hpet.comparator64[1];
+ rec->timers[2].cmp = hp->hpet.comparator64[2];
}
spin_unlock(&hp->lock);
@@ -500,6 +533,7 @@ static int hpet_load(struct domain *d, h
{
HPETState *hp = &d->arch.hvm_domain.pl_time.vhpet;
struct hvm_hw_hpet *rec;
+ uint64_t cmp;
int i;
spin_lock(&hp->lock);
@@ -515,32 +549,38 @@ static int hpet_load(struct domain *d, h
h->cur += HVM_SAVE_LENGTH(HPET);
#define C(x) hp->hpet.x = rec->x
- C(capability);
- C(config);
- C(isr);
- C(mc64);
- C(timers[0].config);
- C(timers[0].cmp);
- C(timers[0].fsb);
- C(timers[1].config);
- C(timers[1].cmp);
- C(timers[1].fsb);
- C(timers[2].config);
- C(timers[2].cmp);
- C(timers[2].fsb);
- C(period[0]);
- C(period[1]);
- C(period[2]);
+ C(capability);
+ C(config);
+ C(isr);
+ C(mc64);
+ /* The following define will generate a compiler error if HPET_TIMER_NUM
+ * changes. This indicates an incompatability with previous saved state. */
+#define HPET_TIMER_NUM 3
+ for ( i = 0; i < HPET_TIMER_NUM; i++ )
+ {
+ C(timers[i].config);
+ C(timers[i].fsb);
+ C(period[i]);
+ /* restore the hidden 64 bit comparator and truncate the timer's
+ * visible comparator field if in 32 bit mode. */
+ cmp = rec->timers[i].cmp;
+ hp->hpet.comparator64[i] = cmp;
+ if ( timer_is_32bit(hp, i) )
+ cmp = (uint32_t)cmp;
+ hp->hpet.timers[i].cmp = cmp;
+ }
#undef C
/* Recalculate the offset between the main counter and guest time */
hp->mc_offset = hp->hpet.mc64 - guest_time_hpet(hp->vcpu);
-
- /* Restart the timers */
- for ( i = 0; i < HPET_TIMER_NUM; i++ )
- if ( hpet_enabled(hp) )
- hpet_set_timer(hp, i);
-
+
+ /* restart all timers */
+
+ if ( hpet_enabled(hp) )
+ for ( i = 0; i < HPET_TIMER_NUM; i++ )
+ if ( timer_enabled(hp, i) )
+ hpet_set_timer(hp, i);
+
spin_unlock(&hp->lock);
return 0;
@@ -575,10 +615,7 @@ void hpet_init(struct vcpu *v)
h->hpet.timers[i].config =
HPET_TN_INT_ROUTE_CAP | HPET_TN_SIZE_CAP | HPET_TN_PERIODIC_CAP;
h->hpet.timers[i].cmp = ~0ULL;
- h->timer_fn_info[i].hs = h;
- h->timer_fn_info[i].tn = i;
- init_timer(&h->timers[i], hpet_timer_fn, &h->timer_fn_info[i],
- v->processor);
+ h->pt[i].source = PTSRC_isa;
}
}
@@ -587,8 +624,14 @@ void hpet_deinit(struct domain *d)
int i;
HPETState *h = &d->arch.hvm_domain.pl_time.vhpet;
- for ( i = 0; i < HPET_TIMER_NUM; i++ )
- kill_timer(&h->timers[i]);
+ spin_lock(&h->lock);
+
+ if ( hpet_enabled(h) )
+ for ( i = 0; i < HPET_TIMER_NUM; i++ )
+ if ( timer_enabled(h, i) )
+ hpet_stop_timer(h, i);
+
+ spin_unlock(&h->lock);
}
void hpet_reset(struct domain *d)
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/arch/x86/hvm/hvm.c Tue Nov 04 12:43:19 2008 +0900
@@ -163,7 +163,6 @@ void hvm_migrate_timers(struct vcpu *v)
void hvm_migrate_timers(struct vcpu *v)
{
rtc_migrate_timers(v);
- hpet_migrate_timers(v);
pt_migrate(v);
}
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/arch/x86/hvm/i8254.c
--- a/xen/arch/x86/hvm/i8254.c Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/arch/x86/hvm/i8254.c Tue Nov 04 12:43:19 2008 +0900
@@ -213,13 +213,13 @@ static void pit_load_count(PITState *pit
case 2:
case 3:
/* Periodic timer. */
- create_periodic_time(v, &pit->pt0, period, 0, 0, pit_time_fired,
+ create_periodic_time(v, &pit->pt0, period, period, 0, pit_time_fired,
&pit->count_load_time[channel]);
break;
case 1:
case 4:
/* One-shot timer. */
- create_periodic_time(v, &pit->pt0, period, 0, 1, pit_time_fired,
+ create_periodic_time(v, &pit->pt0, period, 0, 0, pit_time_fired,
&pit->count_load_time[channel]);
break;
default:
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/arch/x86/hvm/rtc.c
--- a/xen/arch/x86/hvm/rtc.c Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/arch/x86/hvm/rtc.c Tue Nov 04 12:43:19 2008 +0900
@@ -59,8 +59,8 @@ static void rtc_timer_update(RTCState *s
period = 1 << (period_code - 1); /* period in 32 Khz cycles */
period = DIV_ROUND((period * 1000000000ULL), 32768); /* period in ns */
- create_periodic_time(v, &s->pt, period, RTC_IRQ,
- 0, rtc_periodic_cb, s);
+ create_periodic_time(v, &s->pt, period, period, RTC_IRQ,
+ rtc_periodic_cb, s);
}
else
{
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/arch/x86/hvm/svm/entry.S
--- a/xen/arch/x86/hvm/svm/entry.S Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/arch/x86/hvm/svm/entry.S Tue Nov 04 12:43:19 2008 +0900
@@ -57,6 +57,8 @@
#endif
ENTRY(svm_asm_do_resume)
+ call svm_intr_assist
+
get_current(bx)
CLGI
@@ -67,7 +69,6 @@ ENTRY(svm_asm_do_resume)
jnz .Lsvm_process_softirqs
call svm_asid_handle_vmrun
- call svm_intr_assist
cmpb $0,addr_of(tb_init_done)
jnz .Lsvm_trace
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/arch/x86/hvm/vlapic.c
--- a/xen/arch/x86/hvm/vlapic.c Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/arch/x86/hvm/vlapic.c Tue Nov 04 12:43:19 2008 +0900
@@ -701,8 +701,9 @@ static int vlapic_write(struct vcpu *v,
(uint32_t)val * vlapic->hw.timer_divisor;
vlapic_set_reg(vlapic, APIC_TMICT, val);
- create_periodic_time(current, &vlapic->pt, period, vlapic->pt.irq,
- !vlapic_lvtt_period(vlapic), vlapic_pt_cb,
+ create_periodic_time(current, &vlapic->pt, period,
+ vlapic_lvtt_period(vlapic) ? period : 0,
+ vlapic->pt.irq, vlapic_pt_cb,
&vlapic->timer_last_update);
vlapic->timer_last_update = vlapic->pt.last_plt_gtime;
@@ -861,8 +862,9 @@ static void lapic_rearm(struct vlapic *s
period = ((uint64_t)APIC_BUS_CYCLE_NS *
(uint32_t)tmict * s->hw.timer_divisor);
s->pt.irq = vlapic_get_reg(s, APIC_LVTT) & APIC_VECTOR_MASK;
- create_periodic_time(vlapic_vcpu(s), &s->pt, period, s->pt.irq,
- !vlapic_lvtt_period(s), vlapic_pt_cb,
+ create_periodic_time(vlapic_vcpu(s), &s->pt, period,
+ vlapic_lvtt_period(s) ? period : 0,
+ s->pt.irq, vlapic_pt_cb,
&s->timer_last_update);
s->timer_last_update = s->pt.last_plt_gtime;
}
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/arch/x86/hvm/vmx/entry.S
--- a/xen/arch/x86/hvm/vmx/entry.S Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/arch/x86/hvm/vmx/entry.S Tue Nov 04 12:43:19 2008 +0900
@@ -122,6 +122,8 @@ vmx_asm_vmexit_handler:
.globl vmx_asm_do_vmentry
vmx_asm_do_vmentry:
+ call vmx_intr_assist
+
get_current(bx)
cli
@@ -130,8 +132,6 @@ vmx_asm_do_vmentry:
lea addr_of(irq_stat),r(dx)
cmpl $0,(r(dx),r(ax),1)
jnz .Lvmx_process_softirqs
-
- call vmx_intr_assist
testb $0xff,VCPU_vmx_emul(r(bx))
jnz .Lvmx_goto_realmode
@@ -179,11 +179,13 @@ vmx_asm_do_vmentry:
/*.Lvmx_resume:*/
VMRESUME
+ sti
call vm_resume_fail
ud2
.Lvmx_launch:
VMLAUNCH
+ sti
call vm_launch_fail
ud2
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/arch/x86/hvm/vmx/vmx.c Tue Nov 04 12:43:19 2008 +0900
@@ -49,6 +49,7 @@
#include <asm/hvm/vpt.h>
#include <public/hvm/save.h>
#include <asm/hvm/trace.h>
+#include <asm/xenoprof.h>
enum handler_return { HNDL_done, HNDL_unhandled, HNDL_exception_raised };
@@ -132,6 +133,7 @@ static void vmx_vcpu_destroy(struct vcpu
{
vmx_destroy_vmcs(v);
vpmu_destroy(v);
+ passive_domain_destroy(v);
}
#ifdef __x86_64__
@@ -1666,6 +1668,8 @@ static int vmx_msr_read_intercept(struct
default:
if ( vpmu_do_rdmsr(regs) )
goto done;
+ if ( passive_domain_do_rdmsr(regs) )
+ goto done;
switch ( long_mode_do_msr_read(regs) )
{
case HNDL_unhandled:
@@ -1860,6 +1864,8 @@ static int vmx_msr_write_intercept(struc
goto gp_fault;
default:
if ( vpmu_do_wrmsr(regs) )
+ return X86EMUL_OKAY;
+ if ( passive_domain_do_wrmsr(regs) )
return X86EMUL_OKAY;
if ( wrmsr_viridian_regs(ecx, regs->eax, regs->edx) )
@@ -1964,27 +1970,25 @@ static void ept_handle_violation(unsigne
{
unsigned long gla_validity = qualification & EPT_GLA_VALIDITY_MASK;
struct domain *d = current->domain;
- unsigned long gfn = gpa >> PAGE_SHIFT;
+ unsigned long gla, gfn = gpa >> PAGE_SHIFT;
mfn_t mfn;
p2m_type_t t;
- if ( unlikely(qualification & EPT_GAW_VIOLATION) )
- {
- gdprintk(XENLOG_ERR, "EPT violation: guest physical address %"PRIpaddr
- " exceeded its width limit.\n", gpa);
- goto crash;
- }
-
- if ( unlikely(gla_validity == EPT_GLA_VALIDITY_RSVD) ||
- unlikely(gla_validity == EPT_GLA_VALIDITY_PDPTR_LOAD) )
- {
- gdprintk(XENLOG_ERR, "EPT violation: reserved bit or "
- "pdptr load violation.\n");
- goto crash;
- }
-
mfn = gfn_to_mfn(d, gfn, &t);
- if ( (t != p2m_ram_ro) && p2m_is_ram(t) && paging_mode_log_dirty(d) )
+
+ /* There are two legitimate reasons for taking an EPT violation.
+ * One is a guest access to MMIO space. */
+ if ( gla_validity == EPT_GLA_VALIDITY_MATCH && p2m_is_mmio(t) )
+ {
+ handle_mmio();
+ return;
+ }
+
+ /* The other is log-dirty mode, writing to a read-only page */
+ if ( paging_mode_log_dirty(d)
+ && (gla_validity == EPT_GLA_VALIDITY_MATCH
+ || gla_validity == EPT_GLA_VALIDITY_GPT_WALK)
+ && p2m_is_ram(t) && (t != p2m_ram_ro) )
{
paging_mark_dirty(d, mfn_x(mfn));
p2m_change_type(d, gfn, p2m_ram_logdirty, p2m_ram_rw);
@@ -1992,16 +1996,39 @@ static void ept_handle_violation(unsigne
return;
}
- /* This can only happen in log-dirty mode, writing back A/D bits. */
- if ( unlikely(gla_validity == EPT_GLA_VALIDITY_GPT_WALK) )
- goto crash;
-
- ASSERT(gla_validity == EPT_GLA_VALIDITY_MATCH);
- handle_mmio();
-
- return;
-
- crash:
+ /* Everything else is an error. */
+ gla = __vmread(GUEST_LINEAR_ADDRESS);
+ gdprintk(XENLOG_ERR, "EPT violation %#lx (%c%c%c/%c%c%c), "
+ "gpa %#"PRIpaddr", mfn %#lx, type %i.\n",
+ qualification,
+ (qualification & EPT_READ_VIOLATION) ? 'r' : '-',
+ (qualification & EPT_WRITE_VIOLATION) ? 'w' : '-',
+ (qualification & EPT_EXEC_VIOLATION) ? 'x' : '-',
+ (qualification & EPT_EFFECTIVE_READ) ? 'r' : '-',
+ (qualification & EPT_EFFECTIVE_WRITE) ? 'w' : '-',
+ (qualification & EPT_EFFECTIVE_EXEC) ? 'x' : '-',
+ gpa, mfn_x(mfn), t);
+
+ if ( qualification & EPT_GAW_VIOLATION )
+ gdprintk(XENLOG_ERR, " --- GPA too wide (max %u bits)\n",
+ 9 * (unsigned) d->arch.hvm_domain.vmx.ept_control.gaw + 21);
+
+ switch ( gla_validity )
+ {
+ case EPT_GLA_VALIDITY_PDPTR_LOAD:
+ gdprintk(XENLOG_ERR, " --- PDPTR load failed\n");
+ break;
+ case EPT_GLA_VALIDITY_GPT_WALK:
+ gdprintk(XENLOG_ERR, " --- guest PT walk to %#lx failed\n", gla);
+ break;
+ case EPT_GLA_VALIDITY_RSVD:
+ gdprintk(XENLOG_ERR, " --- GLA_validity 2 (reserved)\n");
+ break;
+ case EPT_GLA_VALIDITY_MATCH:
+ gdprintk(XENLOG_ERR, " --- guest access to %#lx failed\n", gla);
+ break;
+ }
+
domain_crash(d);
}
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/arch/x86/hvm/vmx/vpmu_core2.c
--- a/xen/arch/x86/hvm/vmx/vpmu_core2.c Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/arch/x86/hvm/vmx/vpmu_core2.c Tue Nov 04 12:43:19 2008 +0900
@@ -35,6 +35,26 @@
#include <asm/hvm/vmx/vpmu.h>
#include <asm/hvm/vmx/vpmu_core2.h>
+u32 core2_counters_msr[] = {
+ MSR_CORE_PERF_FIXED_CTR0,
+ MSR_CORE_PERF_FIXED_CTR1,
+ MSR_CORE_PERF_FIXED_CTR2};
+
+/* Core 2 Non-architectual Performance Control MSRs. */
+u32 core2_ctrls_msr[] = {
+ MSR_CORE_PERF_FIXED_CTR_CTRL,
+ MSR_IA32_PEBS_ENABLE,
+ MSR_IA32_DS_AREA};
+
+struct pmumsr core2_counters = {
+ 3,
+ core2_counters_msr
+};
+
+struct pmumsr core2_ctrls = {
+ 3,
+ core2_ctrls_msr
+};
static int arch_pmc_cnt;
static int core2_get_pmc_count(void)
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/arch/x86/hvm/vpt.c
--- a/xen/arch/x86/hvm/vpt.c Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/arch/x86/hvm/vpt.c Tue Nov 04 12:43:19 2008 +0900
@@ -355,8 +355,8 @@ void pt_migrate(struct vcpu *v)
}
void create_periodic_time(
- struct vcpu *v, struct periodic_time *pt, uint64_t period,
- uint8_t irq, char one_shot, time_cb *cb, void *data)
+ struct vcpu *v, struct periodic_time *pt, uint64_t delta,
+ uint64_t period, uint8_t irq, time_cb *cb, void *data)
{
ASSERT(pt->source != 0);
@@ -368,13 +368,13 @@ void create_periodic_time(
pt->do_not_freeze = 0;
pt->irq_issued = 0;
- /* Periodic timer must be at least 0.9ms. */
- if ( (period < 900000) && !one_shot )
+ /* Periodic timer must be at least 0.1ms. */
+ if ( (period < 100000) && period )
{
if ( !test_and_set_bool(pt->warned_timeout_too_short) )
gdprintk(XENLOG_WARNING, "HVM_PlatformTime: program too "
"small period %"PRIu64"\n", period);
- period = 900000;
+ period = 100000;
}
pt->period = period;
@@ -382,15 +382,15 @@ void create_periodic_time(
pt->last_plt_gtime = hvm_get_guest_time(pt->vcpu);
pt->irq = irq;
pt->period_cycles = (u64)period;
- pt->one_shot = one_shot;
- pt->scheduled = NOW() + period;
+ pt->one_shot = !period;
+ pt->scheduled = NOW() + delta;
/*
* Offset LAPIC ticks from other timer ticks. Otherwise guests which use
* LAPIC ticks for process accounting can see long sequences of process
* ticks incorrectly accounted to interrupt processing.
*/
- if ( pt->source == PTSRC_lapic )
- pt->scheduled += period >> 1;
+ if ( !pt->one_shot && (pt->source == PTSRC_lapic) )
+ pt->scheduled += delta >> 1;
pt->cb = cb;
pt->priv = data;
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/arch/x86/irq.c
--- a/xen/arch/x86/irq.c Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/arch/x86/irq.c Tue Nov 04 12:43:19 2008 +0900
@@ -793,6 +793,10 @@ int map_domain_pirq(
ASSERT(spin_is_locked(&d->event_lock));
+ /* XXX Until pcidev and msi locking is fixed. */
+ if ( type == MAP_PIRQ_TYPE_MSI )
+ return -EINVAL;
+
if ( !IS_PRIV(current->domain) )
return -EPERM;
@@ -840,7 +844,7 @@ int map_domain_pirq(
d->arch.pirq_vector[pirq] = vector;
d->arch.vector_pirq[vector] = pirq;
-done:
+ done:
spin_unlock_irqrestore(&desc->lock, flags);
return ret;
}
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/arch/x86/mm.c Tue Nov 04 12:43:19 2008 +0900
@@ -566,19 +566,21 @@ static int get_page_and_type_from_pagenr
static int get_page_and_type_from_pagenr(unsigned long page_nr,
unsigned long type,
struct domain *d,
+ int partial,
int preemptible)
{
struct page_info *page = mfn_to_page(page_nr);
int rc;
- if ( unlikely(!get_page_from_pagenr(page_nr, d)) )
+ if ( likely(partial >= 0) &&
+ unlikely(!get_page_from_pagenr(page_nr, d)) )
return -EINVAL;
rc = (preemptible ?
get_page_type_preemptible(page, type) :
(get_page_type(page, type) ? 0 : -EINVAL));
- if ( rc )
+ if ( unlikely(rc) && partial >= 0 )
put_page(page);
return rc;
@@ -761,7 +763,7 @@ get_page_from_l2e(
}
rc = get_page_and_type_from_pagenr(
- l2e_get_pfn(l2e), PGT_l1_page_table, d, 0);
+ l2e_get_pfn(l2e), PGT_l1_page_table, d, 0, 0);
if ( unlikely(rc == -EINVAL) && get_l2_linear_pagetable(l2e, pfn, d) )
rc = 0;
@@ -772,7 +774,7 @@ define_get_linear_pagetable(l3);
define_get_linear_pagetable(l3);
static int
get_page_from_l3e(
- l3_pgentry_t l3e, unsigned long pfn, struct domain *d, int preemptible)
+ l3_pgentry_t l3e, unsigned long pfn, struct domain *d, int partial, int
preemptible)
{
int rc;
@@ -786,7 +788,7 @@ get_page_from_l3e(
}
rc = get_page_and_type_from_pagenr(
- l3e_get_pfn(l3e), PGT_l2_page_table, d, preemptible);
+ l3e_get_pfn(l3e), PGT_l2_page_table, d, partial, preemptible);
if ( unlikely(rc == -EINVAL) && get_l3_linear_pagetable(l3e, pfn, d) )
rc = 0;
@@ -797,7 +799,7 @@ define_get_linear_pagetable(l4);
define_get_linear_pagetable(l4);
static int
get_page_from_l4e(
- l4_pgentry_t l4e, unsigned long pfn, struct domain *d, int preemptible)
+ l4_pgentry_t l4e, unsigned long pfn, struct domain *d, int partial, int
preemptible)
{
int rc;
@@ -811,7 +813,7 @@ get_page_from_l4e(
}
rc = get_page_and_type_from_pagenr(
- l4e_get_pfn(l4e), PGT_l3_page_table, d, preemptible);
+ l4e_get_pfn(l4e), PGT_l3_page_table, d, partial, preemptible);
if ( unlikely(rc == -EINVAL) && get_l4_linear_pagetable(l4e, pfn, d) )
rc = 0;
@@ -961,23 +963,32 @@ static int put_page_from_l2e(l2_pgentry_
return 1;
}
+static int __put_page_type(struct page_info *, int preemptible);
static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn,
- int preemptible)
+ int partial, int preemptible)
{
if ( (l3e_get_flags(l3e) & _PAGE_PRESENT) &&
(l3e_get_pfn(l3e) != pfn) )
+ {
+ if ( unlikely(partial > 0) )
+ return __put_page_type(l3e_get_page(l3e), preemptible);
return put_page_and_type_preemptible(l3e_get_page(l3e), preemptible);
+ }
return 1;
}
#if CONFIG_PAGING_LEVELS >= 4
static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn,
- int preemptible)
+ int partial, int preemptible)
{
if ( (l4e_get_flags(l4e) & _PAGE_PRESENT) &&
(l4e_get_pfn(l4e) != pfn) )
+ {
+ if ( unlikely(partial > 0) )
+ return __put_page_type(l4e_get_page(l4e), preemptible);
return put_page_and_type_preemptible(l4e_get_page(l4e), preemptible);
+ }
return 1;
}
#endif
@@ -1184,7 +1195,7 @@ static int alloc_l3_table(struct page_in
unsigned long pfn = page_to_mfn(page);
l3_pgentry_t *pl3e;
unsigned int i;
- int rc = 0;
+ int rc = 0, partial = page->partial_pte;
#if CONFIG_PAGING_LEVELS == 3
/*
@@ -1213,7 +1224,8 @@ static int alloc_l3_table(struct page_in
if ( is_pv_32on64_domain(d) )
memset(pl3e + 4, 0, (L3_PAGETABLE_ENTRIES - 4) * sizeof(*pl3e));
- for ( i = page->nr_validated_ptes; i < L3_PAGETABLE_ENTRIES; i++ )
+ for ( i = page->nr_validated_ptes; i < L3_PAGETABLE_ENTRIES;
+ i++, partial = 0 )
{
if ( is_pv_32bit_domain(d) && (i == 3) )
{
@@ -1224,16 +1236,17 @@ static int alloc_l3_table(struct page_in
rc = get_page_and_type_from_pagenr(l3e_get_pfn(pl3e[i]),
PGT_l2_page_table |
PGT_pae_xen_l2,
- d, preemptible);
+ d, partial, preemptible);
}
else if ( !is_guest_l3_slot(i) ||
- (rc = get_page_from_l3e(pl3e[i], pfn, d, preemptible)) > 0 )
+ (rc = get_page_from_l3e(pl3e[i], pfn, d,
+ partial, preemptible)) > 0 )
continue;
if ( rc == -EAGAIN )
{
page->nr_validated_ptes = i;
- page->partial_pte = 1;
+ page->partial_pte = partial ?: 1;
}
else if ( rc == -EINTR && i )
{
@@ -1257,7 +1270,7 @@ static int alloc_l3_table(struct page_in
if ( !is_guest_l3_slot(i) )
continue;
unadjust_guest_l3e(pl3e[i], d);
- put_page_from_l3e(pl3e[i], pfn, 0);
+ put_page_from_l3e(pl3e[i], pfn, 0, 0);
}
}
@@ -1272,18 +1285,20 @@ static int alloc_l4_table(struct page_in
unsigned long pfn = page_to_mfn(page);
l4_pgentry_t *pl4e = page_to_virt(page);
unsigned int i;
- int rc = 0;
-
- for ( i = page->nr_validated_ptes; i < L4_PAGETABLE_ENTRIES; i++ )
+ int rc = 0, partial = page->partial_pte;
+
+ for ( i = page->nr_validated_ptes; i < L4_PAGETABLE_ENTRIES;
+ i++, partial = 0 )
{
if ( !is_guest_l4_slot(d, i) ||
- (rc = get_page_from_l4e(pl4e[i], pfn, d, preemptible)) > 0 )
+ (rc = get_page_from_l4e(pl4e[i], pfn, d,
+ partial, preemptible)) > 0 )
continue;
if ( rc == -EAGAIN )
{
page->nr_validated_ptes = i;
- page->partial_pte = 1;
+ page->partial_pte = partial ?: 1;
}
else if ( rc == -EINTR )
{
@@ -1299,7 +1314,7 @@ static int alloc_l4_table(struct page_in
MEM_LOG("Failure in alloc_l4_table: entry %d", i);
while ( i-- > 0 )
if ( is_guest_l4_slot(d, i) )
- put_page_from_l4e(pl4e[i], pfn, 0);
+ put_page_from_l4e(pl4e[i], pfn, 0, 0);
}
if ( rc < 0 )
return rc;
@@ -1377,24 +1392,20 @@ static int free_l3_table(struct page_inf
struct domain *d = page_get_owner(page);
unsigned long pfn = page_to_mfn(page);
l3_pgentry_t *pl3e;
- unsigned int i = page->nr_validated_ptes - !page->partial_pte;
- int rc = 0;
-
-#ifdef DOMAIN_DESTRUCT_AVOID_RECURSION
- if ( d->arch.relmem == RELMEM_l3 )
- return 0;
-#endif
+ int rc = 0, partial = page->partial_pte;
+ unsigned int i = page->nr_validated_ptes - !partial;
pl3e = map_domain_page(pfn);
do {
if ( is_guest_l3_slot(i) )
{
- rc = put_page_from_l3e(pl3e[i], pfn, preemptible);
+ rc = put_page_from_l3e(pl3e[i], pfn, partial, preemptible);
+ if ( rc < 0 )
+ break;
+ partial = 0;
if ( rc > 0 )
continue;
- if ( rc )
- break;
unadjust_guest_l3e(pl3e[i], d);
}
} while ( i-- );
@@ -1404,7 +1415,7 @@ static int free_l3_table(struct page_inf
if ( rc == -EAGAIN )
{
page->nr_validated_ptes = i;
- page->partial_pte = 1;
+ page->partial_pte = partial ?: -1;
}
else if ( rc == -EINTR && i < L3_PAGETABLE_ENTRIES - 1 )
{
@@ -1421,23 +1432,21 @@ static int free_l4_table(struct page_inf
struct domain *d = page_get_owner(page);
unsigned long pfn = page_to_mfn(page);
l4_pgentry_t *pl4e = page_to_virt(page);
- unsigned int i = page->nr_validated_ptes - !page->partial_pte;
- int rc = 0;
-
-#ifdef DOMAIN_DESTRUCT_AVOID_RECURSION
- if ( d->arch.relmem == RELMEM_l4 )
- return 0;
-#endif
+ int rc = 0, partial = page->partial_pte;
+ unsigned int i = page->nr_validated_ptes - !partial;
do {
if ( is_guest_l4_slot(d, i) )
- rc = put_page_from_l4e(pl4e[i], pfn, preemptible);
- } while ( rc >= 0 && i-- );
+ rc = put_page_from_l4e(pl4e[i], pfn, partial, preemptible);
+ if ( rc < 0 )
+ break;
+ partial = 0;
+ } while ( i-- );
if ( rc == -EAGAIN )
{
page->nr_validated_ptes = i;
- page->partial_pte = 1;
+ page->partial_pte = partial ?: -1;
}
else if ( rc == -EINTR && i < L4_PAGETABLE_ENTRIES - 1 )
{
@@ -1713,7 +1722,7 @@ static int mod_l3_entry(l3_pgentry_t *pl
return rc ? 0 : -EFAULT;
}
- rc = get_page_from_l3e(nl3e, pfn, d, preemptible);
+ rc = get_page_from_l3e(nl3e, pfn, d, 0, preemptible);
if ( unlikely(rc < 0) )
return page_unlock(l3pg), rc;
rc = 0;
@@ -1742,7 +1751,7 @@ static int mod_l3_entry(l3_pgentry_t *pl
}
page_unlock(l3pg);
- put_page_from_l3e(ol3e, pfn, 0);
+ put_page_from_l3e(ol3e, pfn, 0, 0);
return rc;
}
@@ -1791,7 +1800,7 @@ static int mod_l4_entry(l4_pgentry_t *pl
return rc ? 0 : -EFAULT;
}
- rc = get_page_from_l4e(nl4e, pfn, d, preemptible);
+ rc = get_page_from_l4e(nl4e, pfn, d, 0, preemptible);
if ( unlikely(rc < 0) )
return page_unlock(l4pg), rc;
rc = 0;
@@ -1812,7 +1821,7 @@ static int mod_l4_entry(l4_pgentry_t *pl
}
page_unlock(l4pg);
- put_page_from_l4e(ol4e, pfn, 0);
+ put_page_from_l4e(ol4e, pfn, 0, 0);
return rc;
}
@@ -1847,7 +1856,8 @@ int get_page(struct page_info *page, str
nx = x + 1;
d = nd;
if ( unlikely((x & PGC_count_mask) == 0) || /* Not allocated? */
- unlikely((nx & PGC_count_mask) == 0) || /* Count overflow? */
+ /* Keep one spare reference to be acquired by get_page_light(). */
+ unlikely(((nx + 1) & PGC_count_mask) <= 1) || /* Overflow? */
unlikely(d != _domain) ) /* Wrong owner? */
{
if ( !_shadow_mode_refcounts(domain) && !domain->is_dying )
@@ -1867,6 +1877,28 @@ int get_page(struct page_info *page, str
while ( unlikely(nd != d) || unlikely(y != x) );
return 1;
+}
+
+/*
+ * Special version of get_page() to be used exclusively when
+ * - a page is known to already have a non-zero reference count
+ * - the page does not need its owner to be checked
+ * - it will not be called more than once without dropping the thus
+ * acquired reference again.
+ * Due to get_page() reserving one reference, this call cannot fail.
+ */
+static void get_page_light(struct page_info *page)
+{
+ u32 x, nx, y = page->count_info;
+
+ do {
+ x = y;
+ nx = x + 1;
+ BUG_ON(!(x & PGC_count_mask)); /* Not allocated? */
+ BUG_ON(!(nx & PGC_count_mask)); /* Overflow? */
+ y = cmpxchg(&page->count_info, x, nx);
+ }
+ while ( unlikely(y != x) );
}
@@ -1909,6 +1941,7 @@ static int alloc_page_type(struct page_i
wmb();
if ( rc == -EAGAIN )
{
+ get_page_light(page);
page->u.inuse.type_info |= PGT_partial;
}
else if ( rc == -EINTR )
@@ -1973,6 +2006,7 @@ int free_page_type(struct page_info *pag
page->nr_validated_ptes = 1U << PAGETABLE_ORDER;
page->partial_pte = 0;
}
+
switch ( type & PGT_type_mask )
{
case PGT_l1_page_table:
@@ -1998,6 +2032,15 @@ int free_page_type(struct page_info *pag
BUG();
}
+ return rc;
+}
+
+
+static int __put_final_page_type(
+ struct page_info *page, unsigned long type, int preemptible)
+{
+ int rc = free_page_type(page, type, preemptible);
+
/* No need for atomic update of type_info here: noone else updates it. */
if ( rc == 0 )
{
@@ -2016,8 +2059,8 @@ int free_page_type(struct page_info *pag
}
else if ( rc == -EINTR )
{
- ASSERT(!(page->u.inuse.type_info &
- (PGT_count_mask|PGT_validated|PGT_partial)));
+ ASSERT((page->u.inuse.type_info &
+ (PGT_count_mask|PGT_validated|PGT_partial)) == 1);
if ( !(shadow_mode_enabled(page_get_owner(page)) &&
(page->count_info & PGC_page_table)) )
page->tlbflush_timestamp = tlbflush_current_time();
@@ -2028,6 +2071,7 @@ int free_page_type(struct page_info *pag
{
BUG_ON(rc != -EAGAIN);
wmb();
+ get_page_light(page);
page->u.inuse.type_info |= PGT_partial;
}
@@ -2039,6 +2083,7 @@ static int __put_page_type(struct page_i
int preemptible)
{
unsigned long nx, x, y = page->u.inuse.type_info;
+ int rc = 0;
for ( ; ; )
{
@@ -2062,7 +2107,10 @@ static int __put_page_type(struct page_i
x, nx)) != x) )
continue;
/* We cleared the 'valid bit' so we do the clean up. */
- return free_page_type(page, x, preemptible);
+ rc = __put_final_page_type(page, x, preemptible);
+ if ( x & PGT_partial )
+ put_page(page);
+ break;
}
/*
@@ -2084,7 +2132,7 @@ static int __put_page_type(struct page_i
return -EINTR;
}
- return 0;
+ return rc;
}
@@ -2092,6 +2140,7 @@ static int __get_page_type(struct page_i
int preemptible)
{
unsigned long nx, x, y = page->u.inuse.type_info;
+ int rc = 0;
ASSERT(!(type & ~(PGT_type_mask | PGT_pae_xen_l2)));
@@ -2214,10 +2263,13 @@ static int __get_page_type(struct page_i
page->nr_validated_ptes = 0;
page->partial_pte = 0;
}
- return alloc_page_type(page, type, preemptible);
- }
-
- return 0;
+ rc = alloc_page_type(page, type, preemptible);
+ }
+
+ if ( (x & PGT_partial) && !(nx & PGT_partial) )
+ put_page(page);
+
+ return rc;
}
void put_page_type(struct page_info *page)
@@ -2296,7 +2348,7 @@ int new_guest_cr3(unsigned long mfn)
#endif
okay = paging_mode_refcounts(d)
? get_page_from_pagenr(mfn, d)
- : !get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d, 0);
+ : !get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d, 0, 0);
if ( unlikely(!okay) )
{
MEM_LOG("Error while installing new baseptr %lx", mfn);
@@ -2431,6 +2483,29 @@ static inline cpumask_t vcpumask_to_pcpu
return pmask;
}
+#ifdef __i386__
+static inline void *fixmap_domain_page(unsigned long mfn)
+{
+ unsigned int cpu = smp_processor_id();
+ void *ptr = (void *)fix_to_virt(FIX_PAE_HIGHMEM_0 + cpu);
+
+ l1e_write(fix_pae_highmem_pl1e - cpu,
+ l1e_from_pfn(mfn, __PAGE_HYPERVISOR));
+ flush_tlb_one_local(ptr);
+ return ptr;
+}
+static inline void fixunmap_domain_page(const void *ptr)
+{
+ unsigned int cpu = virt_to_fix((unsigned long)ptr) - FIX_PAE_HIGHMEM_0;
+
+ l1e_write(fix_pae_highmem_pl1e - cpu, l1e_empty());
+ this_cpu(make_cr3_timestamp) = this_cpu(tlbflush_time);
+}
+#else
+#define fixmap_domain_page(mfn) mfn_to_virt(mfn)
+#define fixunmap_domain_page(ptr) ((void)(ptr))
+#endif
+
int do_mmuext_op(
XEN_GUEST_HANDLE(mmuext_op_t) uops,
unsigned int count,
@@ -2517,7 +2592,7 @@ int do_mmuext_op(
if ( paging_mode_refcounts(FOREIGNDOM) )
break;
- rc = get_page_and_type_from_pagenr(mfn, type, FOREIGNDOM, 1);
+ rc = get_page_and_type_from_pagenr(mfn, type, FOREIGNDOM, 0, 1);
okay = !rc;
if ( unlikely(!okay) )
{
@@ -2598,7 +2673,7 @@ int do_mmuext_op(
okay = get_page_from_pagenr(mfn, d);
else
okay = !get_page_and_type_from_pagenr(
- mfn, PGT_root_page_table, d, 0);
+ mfn, PGT_root_page_table, d, 0, 0);
if ( unlikely(!okay) )
{
MEM_LOG("Error while installing new mfn %lx", mfn);
@@ -2697,6 +2772,66 @@ int do_mmuext_op(
if ( ents != 0 )
this_cpu(percpu_mm_info).deferred_ops |= DOP_RELOAD_LDT;
}
+ break;
+ }
+
+ case MMUEXT_CLEAR_PAGE:
+ {
+ unsigned char *ptr;
+
+ okay = !get_page_and_type_from_pagenr(mfn, PGT_writable_page,
+ FOREIGNDOM, 0, 0);
+ if ( unlikely(!okay) )
+ {
+ MEM_LOG("Error while clearing mfn %lx", mfn);
+ break;
+ }
+
+ /* A page is dirtied when it's being cleared. */
+ paging_mark_dirty(d, mfn);
+
+ ptr = fixmap_domain_page(mfn);
+ clear_page(ptr);
+ fixunmap_domain_page(ptr);
+
+ put_page_and_type(page);
+ break;
+ }
+
+ case MMUEXT_COPY_PAGE:
+ {
+ const unsigned char *src;
+ unsigned char *dst;
+ unsigned long src_mfn;
+
+ src_mfn = gmfn_to_mfn(FOREIGNDOM, op.arg2.src_mfn);
+ okay = get_page_from_pagenr(src_mfn, FOREIGNDOM);
+ if ( unlikely(!okay) )
+ {
+ MEM_LOG("Error while copying from mfn %lx", src_mfn);
+ break;
+ }
+
+ okay = !get_page_and_type_from_pagenr(mfn, PGT_writable_page,
+ FOREIGNDOM, 0, 0);
+ if ( unlikely(!okay) )
+ {
+ put_page(mfn_to_page(src_mfn));
+ MEM_LOG("Error while copying to mfn %lx", mfn);
+ break;
+ }
+
+ /* A page is dirtied when it's being copied to. */
+ paging_mark_dirty(d, mfn);
+
+ src = map_domain_page(src_mfn);
+ dst = fixmap_domain_page(mfn);
+ copy_page(dst, src);
+ fixunmap_domain_page(dst);
+ unmap_domain_page(src);
+
+ put_page_and_type(page);
+ put_page(mfn_to_page(src_mfn));
break;
}
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/arch/x86/mm/hap/p2m-ept.c
--- a/xen/arch/x86/mm/hap/p2m-ept.c Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/arch/x86/mm/hap/p2m-ept.c Tue Nov 04 12:43:19 2008 +0900
@@ -157,9 +157,6 @@ ept_set_entry(struct domain *d, unsigned
{
if ( mfn_valid(mfn_x(mfn)) || (p2mt == p2m_mmio_direct) )
{
- /* Track the highest gfn for which we have ever had a valid
mapping */
- if ( gfn > d->arch.p2m->max_mapped_pfn )
- d->arch.p2m->max_mapped_pfn = gfn;
ept_entry->emt = epte_get_entry_emt(d, gfn, mfn_x(mfn));
ept_entry->sp_avail = walk_level ? 1 : 0;
@@ -233,6 +230,11 @@ ept_set_entry(struct domain *d, unsigned
unmap_domain_page(split_table);
}
+
+ /* Track the highest gfn for which we have ever had a valid mapping */
+ if ( mfn_valid(mfn_x(mfn))
+ && (gfn + (1UL << order) - 1 > d->arch.p2m->max_mapped_pfn) )
+ d->arch.p2m->max_mapped_pfn = gfn + (1UL << order) - 1;
/* Success */
rv = 1;
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/arch/x86/mm/p2m.c Tue Nov 04 12:43:19 2008 +0900
@@ -322,7 +322,8 @@ p2m_set_entry(struct domain *d, unsigned
}
/* Track the highest gfn for which we have ever had a valid mapping */
- if ( mfn_valid(mfn) && (gfn > d->arch.p2m->max_mapped_pfn) )
+ if ( mfn_valid(mfn)
+ && (gfn + (1UL << page_order) - 1 > d->arch.p2m->max_mapped_pfn) )
d->arch.p2m->max_mapped_pfn = gfn + (1UL << page_order) - 1;
if ( iommu_enabled && (is_hvm_domain(d) || need_iommu(d)) )
@@ -956,18 +957,18 @@ guest_physmap_add_entry(struct domain *d
/* First, remove m->p mappings for existing p->m mappings */
for ( i = 0; i < (1UL << page_order); i++ )
{
- omfn = gfn_to_mfn(d, gfn, &ot);
+ omfn = gfn_to_mfn(d, gfn + i, &ot);
if ( p2m_is_ram(ot) )
{
ASSERT(mfn_valid(omfn));
- set_gpfn_from_mfn(mfn_x(omfn)+i, INVALID_M2P_ENTRY);
+ set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
}
}
/* Then, look for m->p mappings for this range and deal with them */
for ( i = 0; i < (1UL << page_order); i++ )
{
- ogfn = mfn_to_gfn(d, _mfn(mfn));
+ ogfn = mfn_to_gfn(d, _mfn(mfn+i));
if (
#ifdef __x86_64__
(ogfn != 0x5555555555555555L)
@@ -975,20 +976,20 @@ guest_physmap_add_entry(struct domain *d
(ogfn != 0x55555555L)
#endif
&& (ogfn != INVALID_M2P_ENTRY)
- && (ogfn != gfn) )
+ && (ogfn != gfn + i) )
{
/* This machine frame is already mapped at another physical
* address */
P2M_DEBUG("aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n",
- mfn, ogfn, gfn);
+ mfn + i, ogfn, gfn + i);
omfn = gfn_to_mfn(d, ogfn, &ot);
if ( p2m_is_ram(ot) )
{
ASSERT(mfn_valid(omfn));
P2M_DEBUG("old gfn=%#lx -> mfn %#lx\n",
ogfn , mfn_x(omfn));
- if ( mfn_x(omfn) == mfn )
- p2m_remove_page(d, ogfn, mfn, 0);
+ if ( mfn_x(omfn) == (mfn + i) )
+ p2m_remove_page(d, ogfn, mfn + i, 0);
}
}
}
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/arch/x86/msi.c
--- a/xen/arch/x86/msi.c Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/arch/x86/msi.c Tue Nov 04 12:43:19 2008 +0900
@@ -33,8 +33,7 @@ DECLARE_BITMAP(msix_fixmap_pages, MAX_MS
static int msix_fixmap_alloc(void)
{
- int i;
- int rc = -1;
+ int i, rc = -1;
spin_lock(&msix_fixmap_lock);
for ( i = 0; i < MAX_MSIX_PAGES; i++ )
@@ -52,12 +51,8 @@ static int msix_fixmap_alloc(void)
static void msix_fixmap_free(int idx)
{
- if ( idx < FIX_MSIX_IO_RESERV_BASE )
- return;
-
- spin_lock(&msix_fixmap_lock);
- clear_bit(idx - FIX_MSIX_IO_RESERV_BASE, &msix_fixmap_pages);
- spin_unlock(&msix_fixmap_lock);
+ if ( idx >= FIX_MSIX_IO_RESERV_BASE )
+ clear_bit(idx - FIX_MSIX_IO_RESERV_BASE, &msix_fixmap_pages);
}
/*
@@ -78,19 +73,19 @@ static void msi_compose_msg(struct pci_d
msg->address_lo =
MSI_ADDR_BASE_LO |
((INT_DEST_MODE == 0) ?
- MSI_ADDR_DESTMODE_PHYS:
- MSI_ADDR_DESTMODE_LOGIC) |
+ MSI_ADDR_DESTMODE_PHYS:
+ MSI_ADDR_DESTMODE_LOGIC) |
((INT_DELIVERY_MODE != dest_LowestPrio) ?
- MSI_ADDR_REDIRECTION_CPU:
- MSI_ADDR_REDIRECTION_LOWPRI) |
+ MSI_ADDR_REDIRECTION_CPU:
+ MSI_ADDR_REDIRECTION_LOWPRI) |
MSI_ADDR_DEST_ID(dest);
msg->data =
MSI_DATA_TRIGGER_EDGE |
MSI_DATA_LEVEL_ASSERT |
((INT_DELIVERY_MODE != dest_LowestPrio) ?
- MSI_DATA_DELIVERY_FIXED:
- MSI_DATA_DELIVERY_LOWPRI) |
+ MSI_DATA_DELIVERY_FIXED:
+ MSI_DATA_DELIVERY_LOWPRI) |
MSI_DATA_VECTOR(vector);
}
}
@@ -128,7 +123,7 @@ static void read_msi_msg(struct msi_desc
{
void __iomem *base;
base = entry->mask_base +
- entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
+ entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
@@ -205,9 +200,9 @@ static void write_msi_msg(struct msi_des
entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
writel(msg->address_lo,
- base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
+ base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
writel(msg->address_hi,
- base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
+ base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
writel(msg->data, base + PCI_MSIX_ENTRY_DATA_OFFSET);
break;
}
@@ -230,7 +225,7 @@ void set_msi_irq_affinity(unsigned int i
dest = cpu_mask_to_apicid(mask);
if ( !desc )
- return;
+ return;
ASSERT(spin_is_locked(&irq_desc[irq].lock));
spin_lock(&desc->dev->lock);
@@ -398,8 +393,8 @@ static void msi_free_vector(int vector)
unsigned long start;
writel(1, entry->mask_base + entry->msi_attrib.entry_nr
- * PCI_MSIX_ENTRY_SIZE
- + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
+ * PCI_MSIX_ENTRY_SIZE
+ + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
start = (unsigned long)entry->mask_base & ~(PAGE_SIZE - 1);
msix_fixmap_free(virt_to_fix(start));
@@ -460,20 +455,20 @@ static int msi_capability_init(struct pc
entry->vector = vector;
if ( is_mask_bit_support(control) )
entry->mask_base = (void __iomem *)(long)msi_mask_bits_reg(pos,
- is_64bit_address(control));
+
is_64bit_address(control));
entry->dev = dev;
if ( entry->msi_attrib.maskbit )
{
unsigned int maskbits, temp;
/* All MSIs are unmasked by default, Mask them all */
maskbits = pci_conf_read32(bus, slot, func,
- msi_mask_bits_reg(pos, is_64bit_address(control)));
+ msi_mask_bits_reg(pos,
is_64bit_address(control)));
temp = (1 << multi_msi_capable(control));
temp = ((temp - 1) & ~temp);
maskbits |= temp;
pci_conf_write32(bus, slot, func,
- msi_mask_bits_reg(pos, is_64bit_address(control)),
- maskbits);
+ msi_mask_bits_reg(pos, is_64bit_address(control)),
+ maskbits);
}
list_add_tail(&entry->list, &dev->msi_list);
@@ -575,14 +570,14 @@ static int __pci_enable_msi(struct msi_i
pdev = pci_lock_pdev(msi->bus, msi->devfn);
if ( !pdev )
- return -ENODEV;
+ return -ENODEV;
if ( find_msi_entry(pdev, msi->vector, PCI_CAP_ID_MSI) )
{
- spin_unlock(&pdev->lock);
+ spin_unlock(&pdev->lock);
dprintk(XENLOG_WARNING, "vector %d has already mapped to MSI on "
- "device %02x:%02x.%01x.\n", msi->vector, msi->bus,
- PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
+ "device %02x:%02x.%01x.\n", msi->vector, msi->bus,
+ PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
return 0;
}
@@ -601,7 +596,7 @@ static void __pci_disable_msi(int vector
entry = irq_desc[vector].msi_desc;
if ( !entry )
- return;
+ return;
/*
* Lock here is safe. msi_desc can not be removed without holding
* both irq_desc[].lock (which we do) and pdev->lock.
@@ -649,20 +644,20 @@ static int __pci_enable_msix(struct msi_
pdev = pci_lock_pdev(msi->bus, msi->devfn);
if ( !pdev )
- return -ENODEV;
+ return -ENODEV;
pos = pci_find_cap_offset(msi->bus, slot, func, PCI_CAP_ID_MSIX);
control = pci_conf_read16(msi->bus, slot, func, msi_control_reg(pos));
nr_entries = multi_msix_capable(control);
if (msi->entry_nr > nr_entries)
{
- spin_unlock(&pdev->lock);
+ spin_unlock(&pdev->lock);
return -EINVAL;
}
if ( find_msi_entry(pdev, msi->vector, PCI_CAP_ID_MSIX) )
{
- spin_unlock(&pdev->lock);
+ spin_unlock(&pdev->lock);
dprintk(XENLOG_WARNING, "vector %d has already mapped to MSIX on "
"device %02x:%02x.%01x.\n", msi->vector, msi->bus,
PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
@@ -684,7 +679,7 @@ static void __pci_disable_msix(int vecto
entry = irq_desc[vector].msi_desc;
if ( !entry )
- return;
+ return;
/*
* Lock here is safe. msi_desc can not be removed without holding
* both irq_desc[].lock (which we do) and pdev->lock.
@@ -712,7 +707,7 @@ int pci_enable_msi(struct msi_info *msi)
ASSERT(spin_is_locked(&irq_desc[msi->vector].lock));
return msi->table_base ? __pci_enable_msix(msi) :
- __pci_enable_msi(msi);
+ __pci_enable_msi(msi);
}
void pci_disable_msi(int vector)
@@ -720,7 +715,7 @@ void pci_disable_msi(int vector)
irq_desc_t *desc = &irq_desc[vector];
ASSERT(spin_is_locked(&desc->lock));
if ( !desc->msi_desc )
- return;
+ return;
if ( desc->msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
__pci_disable_msi(vector);
@@ -734,7 +729,7 @@ static void msi_free_vectors(struct pci_
irq_desc_t *desc;
unsigned long flags;
-retry:
+ retry:
list_for_each_entry_safe( entry, tmp, &dev->msi_list, list )
{
desc = &irq_desc[entry->vector];
@@ -742,7 +737,7 @@ retry:
local_irq_save(flags);
if ( !spin_trylock(&desc->lock) )
{
- local_irq_restore(flags);
+ local_irq_restore(flags);
goto retry;
}
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/arch/x86/oprofile/nmi_int.c
--- a/xen/arch/x86/oprofile/nmi_int.c Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/arch/x86/oprofile/nmi_int.c Tue Nov 04 12:43:19 2008 +0900
@@ -36,6 +36,55 @@ static char *cpu_type;
static char *cpu_type;
extern int is_active(struct domain *d);
+extern int is_passive(struct domain *d);
+
+int passive_domain_do_rdmsr(struct cpu_user_regs *regs)
+{
+ u64 msr_content;
+ int type, index;
+ struct vpmu_struct *vpmu = vcpu_vpmu(current);
+
+ if ( model->is_arch_pmu_msr == NULL )
+ return 0;
+ if ( !model->is_arch_pmu_msr((u64)regs->ecx, &type, &index) )
+ return 0;
+ if ( !(vpmu->flags & PASSIVE_DOMAIN_ALLOCATED) )
+ if ( ! model->allocated_msr(current) )
+ return 0;
+
+ model->load_msr(current, type, index, &msr_content);
+ regs->eax = msr_content & 0xFFFFFFFF;
+ regs->edx = msr_content >> 32;
+ return 1;
+}
+
+
+int passive_domain_do_wrmsr(struct cpu_user_regs *regs)
+{
+ u64 msr_content;
+ int type, index;
+ struct vpmu_struct *vpmu = vcpu_vpmu(current);
+
+ if ( model->is_arch_pmu_msr == NULL )
+ return 0;
+ if ( !model->is_arch_pmu_msr((u64)regs->ecx, &type, &index) )
+ return 0;
+
+ if ( !(vpmu->flags & PASSIVE_DOMAIN_ALLOCATED) )
+ if ( ! model->allocated_msr(current) )
+ return 0;
+
+ msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
+ model->save_msr(current, type, index, msr_content);
+ return 1;
+}
+
+void passive_domain_destroy(struct vcpu *v)
+{
+ struct vpmu_struct *vpmu = vcpu_vpmu(v);
+ if ( vpmu->flags & PASSIVE_DOMAIN_ALLOCATED )
+ model->free_msr(v);
+}
static int nmi_callback(struct cpu_user_regs *regs, int cpu)
{
@@ -46,6 +95,8 @@ static int nmi_callback(struct cpu_user_
if ( ovf && is_active(current->domain) && !xen_mode )
send_guest_vcpu_virq(current, VIRQ_XENOPROF);
+ if ( ovf == 2 )
+ test_and_set_bool(current->nmi_pending);
return 1;
}
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/arch/x86/oprofile/op_model_ppro.c
--- a/xen/arch/x86/oprofile/op_model_ppro.c Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/arch/x86/oprofile/op_model_ppro.c Tue Nov 04 12:43:19 2008 +0900
@@ -18,6 +18,8 @@
#include <xen/sched.h>
#include <asm/regs.h>
#include <asm/current.h>
+#include <asm/hvm/vmx/vpmu.h>
+#include <asm/hvm/vmx/vpmu_core2.h>
#include "op_x86_model.h"
#include "op_counter.h"
@@ -39,9 +41,11 @@
#define CTRL_SET_KERN(val,k) (val |= ((k & 1) << 17))
#define CTRL_SET_UM(val, m) (val |= (m << 8))
#define CTRL_SET_EVENT(val, e) (val |= e)
-
+#define IS_ACTIVE(val) (val & (1 << 22) )
+#define IS_ENABLE(val) (val & (1 << 20) )
static unsigned long reset_value[NUM_COUNTERS];
int ppro_has_global_ctrl = 0;
+extern int is_passive(struct domain *d);
static void ppro_fill_in_addresses(struct op_msrs * const msrs)
{
@@ -103,6 +107,7 @@ static int ppro_check_ctrs(unsigned int
int ovf = 0;
unsigned long eip = regs->eip;
int mode = xenoprofile_get_mode(current, regs);
+ struct arch_msr_pair *msrs_content = vcpu_vpmu(current)->context;
for (i = 0 ; i < NUM_COUNTERS; ++i) {
if (!reset_value[i])
@@ -111,7 +116,18 @@ static int ppro_check_ctrs(unsigned int
if (CTR_OVERFLOWED(low)) {
xenoprof_log_event(current, regs, eip, mode, i);
CTR_WRITE(reset_value[i], msrs, i);
- ovf = 1;
+ if ( is_passive(current->domain) && (mode != 2) &&
+ (vcpu_vpmu(current)->flags &
PASSIVE_DOMAIN_ALLOCATED) )
+ {
+ if ( IS_ACTIVE(msrs_content[i].control) )
+ {
+ msrs_content[i].counter = (low |
(u64)high << 32);
+ if ( IS_ENABLE(msrs_content[i].control)
)
+ ovf = 2;
+ }
+ }
+ if ( !ovf )
+ ovf = 1;
}
}
@@ -159,6 +175,82 @@ static void ppro_stop(struct op_msrs con
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
}
+static int ppro_is_arch_pmu_msr(u64 msr_index, int *type, int *index)
+{
+ if ( (msr_index >= MSR_IA32_PERFCTR0) &&
+ (msr_index < (MSR_IA32_PERFCTR0 + NUM_COUNTERS)) )
+ {
+ *type = MSR_TYPE_ARCH_COUNTER;
+ *index = msr_index - MSR_IA32_PERFCTR0;
+ return 1;
+ }
+ if ( (msr_index >= MSR_P6_EVNTSEL0) &&
+ (msr_index < (MSR_P6_EVNTSEL0 + NUM_CONTROLS)) )
+ {
+ *type = MSR_TYPE_ARCH_CTRL;
+ *index = msr_index - MSR_P6_EVNTSEL0;
+ return 1;
+ }
+
+ return 0;
+}
+
+static int ppro_allocate_msr(struct vcpu *v)
+{
+ struct vpmu_struct *vpmu = vcpu_vpmu(v);
+ struct arch_msr_pair *msr_content;
+
+ msr_content = xmalloc_bytes( sizeof(struct arch_msr_pair) *
NUM_COUNTERS );
+ if ( !msr_content )
+ goto out;
+ memset(msr_content, 0, sizeof(struct arch_msr_pair) * NUM_COUNTERS);
+ vpmu->context = (void *)msr_content;
+ vpmu->flags = 0;
+ vpmu->flags |= PASSIVE_DOMAIN_ALLOCATED;
+ return 1;
+out:
+ gdprintk(XENLOG_WARNING, "Insufficient memory for oprofile, oprofile
is "
+ "unavailable on domain %d vcpu %d.\n",
+ v->vcpu_id, v->domain->domain_id);
+ return 0;
+}
+
+static void ppro_free_msr(struct vcpu *v)
+{
+ struct vpmu_struct *vpmu = vcpu_vpmu(v);
+
+ xfree(vpmu->context);
+ vpmu->flags &= ~PASSIVE_DOMAIN_ALLOCATED;
+}
+
+static void ppro_load_msr(struct vcpu *v, int type, int index, u64
*msr_content)
+{
+ struct arch_msr_pair *msrs = vcpu_vpmu(v)->context;
+ switch ( type )
+ {
+ case MSR_TYPE_ARCH_COUNTER:
+ *msr_content = msrs[index].counter;
+ break;
+ case MSR_TYPE_ARCH_CTRL:
+ *msr_content = msrs[index].control;
+ break;
+ }
+}
+
+static void ppro_save_msr(struct vcpu *v, int type, int index, u64 msr_content)
+{
+ struct arch_msr_pair *msrs = vcpu_vpmu(v)->context;
+
+ switch ( type )
+ {
+ case MSR_TYPE_ARCH_COUNTER:
+ msrs[index].counter = msr_content;
+ break;
+ case MSR_TYPE_ARCH_CTRL:
+ msrs[index].control = msr_content;
+ break;
+ }
+}
struct op_x86_model_spec const op_ppro_spec = {
.num_counters = NUM_COUNTERS,
@@ -167,5 +259,10 @@ struct op_x86_model_spec const op_ppro_s
.setup_ctrs = &ppro_setup_ctrs,
.check_ctrs = &ppro_check_ctrs,
.start = &ppro_start,
- .stop = &ppro_stop
+ .stop = &ppro_stop,
+ .is_arch_pmu_msr = &ppro_is_arch_pmu_msr,
+ .allocated_msr = &ppro_allocate_msr,
+ .free_msr = &ppro_free_msr,
+ .load_msr = &ppro_load_msr,
+ .save_msr = &ppro_save_msr
};
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/arch/x86/oprofile/op_x86_model.h
--- a/xen/arch/x86/oprofile/op_x86_model.h Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/arch/x86/oprofile/op_x86_model.h Tue Nov 04 12:43:19 2008 +0900
@@ -41,6 +41,11 @@ struct op_x86_model_spec {
struct cpu_user_regs * const regs);
void (*start)(struct op_msrs const * const msrs);
void (*stop)(struct op_msrs const * const msrs);
+ int (*is_arch_pmu_msr)(u64 msr_index, int *type, int *index);
+ int (*allocated_msr)(struct vcpu *v);
+ void (*free_msr)(struct vcpu *v);
+ void (*load_msr)(struct vcpu * const v, int type, int index, u64
*msr_content);
+ void (*save_msr)(struct vcpu * const v, int type, int index, u64
msr_content);
};
extern struct op_x86_model_spec const op_ppro_spec;
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/arch/x86/setup.c Tue Nov 04 12:43:19 2008 +0900
@@ -969,6 +969,7 @@ void __init __start_xen(unsigned long mb
serial_init_postirq();
BUG_ON(!local_irq_is_enabled());
+ spin_debug_enable();
for_each_present_cpu ( i )
{
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/arch/x86/smpboot.c
--- a/xen/arch/x86/smpboot.c Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/arch/x86/smpboot.c Tue Nov 04 12:43:19 2008 +0900
@@ -101,7 +101,7 @@ static int __devinitdata tsc_sync_disabl
static int __devinitdata tsc_sync_disabled;
/* Per CPU bogomips and other parameters */
-struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
+struct cpuinfo_x86 cpu_data[NR_CPUS];
EXPORT_SYMBOL(cpu_data);
u32 x86_cpu_to_apicid[NR_CPUS] __read_mostly =
@@ -112,7 +112,7 @@ static void map_cpu_to_logical_apicid(vo
/* State of each CPU. */
DEFINE_PER_CPU(int, cpu_state) = { 0 };
-static void *stack_base[NR_CPUS] __cacheline_aligned;
+static void *stack_base[NR_CPUS];
static DEFINE_SPINLOCK(cpu_add_remove_lock);
/*
@@ -805,14 +805,6 @@ static inline int alloc_cpu_id(void)
return cpu;
}
-static struct vcpu *prepare_idle_vcpu(unsigned int cpu)
-{
- if (idle_vcpu[cpu])
- return idle_vcpu[cpu];
-
- return alloc_idle_vcpu(cpu);
-}
-
static void *prepare_idle_stack(unsigned int cpu)
{
if (!stack_base[cpu])
@@ -849,7 +841,7 @@ static int __devinit do_boot_cpu(int api
booting_cpu = cpu;
- v = prepare_idle_vcpu(cpu);
+ v = alloc_idle_vcpu(cpu);
BUG_ON(v == NULL);
/* start_eip had better be page-aligned! */
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/arch/x86/time.c
--- a/xen/arch/x86/time.c Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/arch/x86/time.c Tue Nov 04 12:43:19 2008 +0900
@@ -1063,8 +1063,6 @@ void init_percpu_time(void)
/* Late init function (after all CPUs are booted). */
int __init init_xen_time(void)
{
- local_irq_disable();
-
/* check if TSC is invariant during deep C state
this is a new feature introduced by Nehalem*/
if ( cpuid_edx(0x80000007) & (1u<<8) )
@@ -1078,8 +1076,6 @@ int __init init_xen_time(void)
init_platform_timer();
do_settime(get_cmos_time(), 0, NOW());
-
- local_irq_enable();
return 0;
}
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/arch/x86/traps.c Tue Nov 04 12:43:19 2008 +0900
@@ -1030,7 +1030,7 @@ static int handle_gdt_ldt_mapping_fault(
#endif
static int __spurious_page_fault(
- unsigned long addr, struct cpu_user_regs *regs)
+ unsigned long addr, unsigned int error_code)
{
unsigned long mfn, cr3 = read_cr3();
#if CONFIG_PAGING_LEVELS >= 4
@@ -1052,17 +1052,17 @@ static int __spurious_page_fault(
return 0;
/* Reserved bit violations are never spurious faults. */
- if ( regs->error_code & PFEC_reserved_bit )
+ if ( error_code & PFEC_reserved_bit )
return 0;
required_flags = _PAGE_PRESENT;
- if ( regs->error_code & PFEC_write_access )
+ if ( error_code & PFEC_write_access )
required_flags |= _PAGE_RW;
- if ( regs->error_code & PFEC_user_mode )
+ if ( error_code & PFEC_user_mode )
required_flags |= _PAGE_USER;
disallowed_flags = 0;
- if ( regs->error_code & PFEC_insn_fetch )
+ if ( error_code & PFEC_insn_fetch )
disallowed_flags |= _PAGE_NX;
mfn = cr3 >> PAGE_SHIFT;
@@ -1120,7 +1120,7 @@ static int __spurious_page_fault(
dprintk(XENLOG_WARNING, "Spurious fault in domain %u:%u "
"at addr %lx, e/c %04x\n",
current->domain->domain_id, current->vcpu_id,
- addr, regs->error_code);
+ addr, error_code);
#if CONFIG_PAGING_LEVELS >= 4
dprintk(XENLOG_WARNING, " l4e = %"PRIpte"\n", l4e_get_intpte(l4e));
#endif
@@ -1129,14 +1129,11 @@ static int __spurious_page_fault(
#endif
dprintk(XENLOG_WARNING, " l2e = %"PRIpte"\n", l2e_get_intpte(l2e));
dprintk(XENLOG_WARNING, " l1e = %"PRIpte"\n", l1e_get_intpte(l1e));
-#ifndef NDEBUG
- show_registers(regs);
-#endif
return 1;
}
static int spurious_page_fault(
- unsigned long addr, struct cpu_user_regs *regs)
+ unsigned long addr, unsigned int error_code)
{
unsigned long flags;
int is_spurious;
@@ -1146,7 +1143,7 @@ static int spurious_page_fault(
* page tables from becoming invalid under our feet during the walk.
*/
local_irq_save(flags);
- is_spurious = __spurious_page_fault(addr, regs);
+ is_spurious = __spurious_page_fault(addr, error_code);
local_irq_restore(flags);
return is_spurious;
@@ -1208,8 +1205,12 @@ asmlinkage void do_page_fault(struct cpu
asmlinkage void do_page_fault(struct cpu_user_regs *regs)
{
unsigned long addr, fixup;
+ unsigned int error_code;
addr = read_cr2();
+
+ /* fixup_page_fault() might change regs->error_code, so cache it here. */
+ error_code = regs->error_code;
DEBUGGER_trap_entry(TRAP_page_fault, regs);
@@ -1220,7 +1221,7 @@ asmlinkage void do_page_fault(struct cpu
if ( unlikely(!guest_mode(regs)) )
{
- if ( spurious_page_fault(addr, regs) )
+ if ( spurious_page_fault(addr, error_code) )
return;
if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
@@ -1239,11 +1240,11 @@ asmlinkage void do_page_fault(struct cpu
panic("FATAL PAGE FAULT\n"
"[error_code=%04x]\n"
"Faulting linear address: %p\n",
- regs->error_code, _p(addr));
+ error_code, _p(addr));
}
if ( unlikely(current->domain->arch.suppress_spurious_page_faults
- && spurious_page_fault(addr, regs)) )
+ && spurious_page_fault(addr, error_code)) )
return;
propagate_page_fault(addr, regs->error_code);
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/arch/x86/x86_32/domain_page.c
--- a/xen/arch/x86/x86_32/domain_page.c Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/arch/x86/x86_32/domain_page.c Tue Nov 04 12:43:19 2008 +0900
@@ -43,7 +43,7 @@ void *map_domain_page(unsigned long mfn)
void *map_domain_page(unsigned long mfn)
{
unsigned long va;
- unsigned int idx, i;
+ unsigned int idx, i, flags;
struct vcpu *v;
struct mapcache_domain *dcache;
struct mapcache_vcpu *vcache;
@@ -69,7 +69,7 @@ void *map_domain_page(unsigned long mfn)
goto out;
}
- spin_lock(&dcache->lock);
+ spin_lock_irqsave(&dcache->lock, flags);
/* Has some other CPU caused a wrap? We must flush if so. */
if ( unlikely(dcache->epoch != vcache->shadow_epoch) )
@@ -105,7 +105,7 @@ void *map_domain_page(unsigned long mfn)
set_bit(idx, dcache->inuse);
dcache->cursor = idx + 1;
- spin_unlock(&dcache->lock);
+ spin_unlock_irqrestore(&dcache->lock, flags);
l1e_write(&dcache->l1tab[idx], l1e_from_pfn(mfn, __PAGE_HYPERVISOR));
@@ -114,7 +114,7 @@ void *map_domain_page(unsigned long mfn)
return (void *)va;
}
-void unmap_domain_page(void *va)
+void unmap_domain_page(const void *va)
{
unsigned int idx;
struct vcpu *v;
@@ -241,7 +241,7 @@ void *map_domain_page_global(unsigned lo
return (void *)va;
}
-void unmap_domain_page_global(void *va)
+void unmap_domain_page_global(const void *va)
{
unsigned long __va = (unsigned long)va;
l2_pgentry_t *pl2e;
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/arch/x86/x86_64/compat/mm.c
--- a/xen/arch/x86/x86_64/compat/mm.c Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/arch/x86/x86_64/compat/mm.c Tue Nov 04 12:43:19 2008 +0900
@@ -231,6 +231,8 @@ int compat_mmuext_op(XEN_GUEST_HANDLE(mm
case MMUEXT_PIN_L4_TABLE:
case MMUEXT_UNPIN_TABLE:
case MMUEXT_NEW_BASEPTR:
+ case MMUEXT_CLEAR_PAGE:
+ case MMUEXT_COPY_PAGE:
arg1 = XLAT_mmuext_op_arg1_mfn;
break;
default:
@@ -257,6 +259,9 @@ int compat_mmuext_op(XEN_GUEST_HANDLE(mm
case MMUEXT_TLB_FLUSH_MULTI:
case MMUEXT_INVLPG_MULTI:
arg2 = XLAT_mmuext_op_arg2_vcpumask;
+ break;
+ case MMUEXT_COPY_PAGE:
+ arg2 = XLAT_mmuext_op_arg2_src_mfn;
break;
default:
arg2 = -1;
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/arch/x86/x86_64/cpufreq.c
--- a/xen/arch/x86/x86_64/cpufreq.c Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/arch/x86/x86_64/cpufreq.c Tue Nov 04 12:43:19 2008 +0900
@@ -56,34 +56,13 @@ compat_set_px_pminfo(uint32_t cpu, struc
return -EFAULT;
#define XLAT_processor_performance_HNDL_states(_d_, _s_) do { \
- xen_processor_px_t *xen_states = NULL; \
-\
- if ( likely((_s_)->state_count > 0) ) \
- { \
- XEN_GUEST_HANDLE(compat_processor_px_t) states; \
- compat_processor_px_t state; \
- int i; \
-\
- xen_states = xlat_malloc_array(xlat_page_current, \
- xen_processor_px_t, (_s_)->state_count); \
- if ( unlikely(xen_states == NULL) ) \
- return -EFAULT; \
-\
- if ( unlikely(!compat_handle_okay((_s_)->states, \
- (_s_)->state_count)) ) \
- return -EFAULT; \
- guest_from_compat_handle(states, (_s_)->states); \
-\
- for ( i = 0; i < _s_->state_count; i++ ) \
- { \
- if ( unlikely(copy_from_guest_offset(&state, states, i, 1)) ) \
- return -EFAULT; \
- XLAT_processor_px(&xen_states[i], &state); \
- } \
- } \
-\
- set_xen_guest_handle((_d_)->states, xen_states); \
+ XEN_GUEST_HANDLE(compat_processor_px_t) states; \
+ if ( unlikely(!compat_handle_okay((_s_)->states, (_s_)->state_count)) ) \
+ return -EFAULT; \
+ guest_from_compat_handle(states, (_s_)->states); \
+ (_d_)->states = guest_handle_cast(states, xen_processor_px_t); \
} while (0)
+
XLAT_processor_performance(xen_perf, perf);
#undef XLAT_processor_performance_HNDL_states
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/common/event_channel.c
--- a/xen/common/event_channel.c Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/common/event_channel.c Tue Nov 04 12:43:19 2008 +0900
@@ -386,7 +386,7 @@ static long __evtchn_close(struct domain
if ( v->virq_to_evtchn[chn1->u.virq] != port1 )
continue;
v->virq_to_evtchn[chn1->u.virq] = 0;
- spin_barrier(&v->virq_lock);
+ spin_barrier_irq(&v->virq_lock);
}
break;
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/common/kernel.c
--- a/xen/common/kernel.c Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/common/kernel.c Tue Nov 04 12:43:19 2008 +0900
@@ -221,7 +221,8 @@ DO(xen_version)(int cmd, XEN_GUEST_HANDL
fi.submap |= 1U << XENFEAT_supervisor_mode_kernel;
#ifdef CONFIG_X86
if ( !is_hvm_vcpu(current) )
- fi.submap |= 1U << XENFEAT_mmu_pt_update_preserve_ad;
+ fi.submap |= (1U << XENFEAT_mmu_pt_update_preserve_ad) |
+ (1U << XENFEAT_highmem_assist);
#endif
break;
default:
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/common/keyhandler.c
--- a/xen/common/keyhandler.c Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/common/keyhandler.c Tue Nov 04 12:43:19 2008 +0900
@@ -183,9 +183,9 @@ static void dump_domains(unsigned char k
{
printk("General information for domain %u:\n", d->domain_id);
cpuset_print(tmpstr, sizeof(tmpstr), d->domain_dirty_cpumask);
- printk(" refcnt=%d nr_pages=%d xenheap_pages=%d "
+ printk(" refcnt=%d dying=%d nr_pages=%d xenheap_pages=%d "
"dirty_cpus=%s\n",
- atomic_read(&d->refcnt),
+ atomic_read(&d->refcnt), d->is_dying,
d->tot_pages, d->xenheap_pages, tmpstr);
printk(" handle=%02x%02x%02x%02x-%02x%02x-%02x%02x-"
"%02x%02x-%02x%02x%02x%02x%02x%02x vm_assist=%08lx\n",
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/common/spinlock.c
--- a/xen/common/spinlock.c Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/common/spinlock.c Tue Nov 04 12:43:19 2008 +0900
@@ -1,15 +1,56 @@
#include <xen/config.h>
+#include <xen/irq.h>
#include <xen/smp.h>
#include <xen/spinlock.h>
+#ifndef NDEBUG
+
+static atomic_t spin_debug __read_mostly = ATOMIC_INIT(0);
+
+static void check_lock(struct lock_debug *debug)
+{
+ int irq_safe = !local_irq_is_enabled();
+
+ if ( unlikely(atomic_read(&spin_debug) <= 0) )
+ return;
+
+ /* A few places take liberties with this. */
+ /* BUG_ON(in_irq() && !irq_safe); */
+
+ if ( unlikely(debug->irq_safe != irq_safe) )
+ {
+ int seen = cmpxchg(&debug->irq_safe, -1, irq_safe);
+ BUG_ON(seen == !irq_safe);
+ }
+}
+
+void spin_debug_enable(void)
+{
+ atomic_inc(&spin_debug);
+}
+
+void spin_debug_disable(void)
+{
+ atomic_dec(&spin_debug);
+}
+
+#else /* defined(NDEBUG) */
+
+#define check_lock(l) ((void)0)
+
+#endif
+
void _spin_lock(spinlock_t *lock)
{
+ check_lock(&lock->debug);
_raw_spin_lock(&lock->raw);
}
void _spin_lock_irq(spinlock_t *lock)
{
- local_irq_disable();
+ ASSERT(local_irq_is_enabled());
+ local_irq_disable();
+ check_lock(&lock->debug);
_raw_spin_lock(&lock->raw);
}
@@ -17,6 +58,7 @@ unsigned long _spin_lock_irqsave(spinloc
{
unsigned long flags;
local_irq_save(flags);
+ check_lock(&lock->debug);
_raw_spin_lock(&lock->raw);
return flags;
}
@@ -40,26 +82,39 @@ void _spin_unlock_irqrestore(spinlock_t
int _spin_is_locked(spinlock_t *lock)
{
+ check_lock(&lock->debug);
return _raw_spin_is_locked(&lock->raw);
}
int _spin_trylock(spinlock_t *lock)
{
+ check_lock(&lock->debug);
return _raw_spin_trylock(&lock->raw);
}
void _spin_barrier(spinlock_t *lock)
{
+ check_lock(&lock->debug);
do { mb(); } while ( _raw_spin_is_locked(&lock->raw) );
mb();
}
+void _spin_barrier_irq(spinlock_t *lock)
+{
+ unsigned long flags;
+ local_irq_save(flags);
+ _spin_barrier(lock);
+ local_irq_restore(flags);
+}
+
void _spin_lock_recursive(spinlock_t *lock)
{
int cpu = smp_processor_id();
/* Don't allow overflow of recurse_cpu field. */
BUILD_BUG_ON(NR_CPUS > 0xfffu);
+
+ check_lock(&lock->debug);
if ( likely(lock->recurse_cpu != cpu) )
{
@@ -83,12 +138,15 @@ void _spin_unlock_recursive(spinlock_t *
void _read_lock(rwlock_t *lock)
{
+ check_lock(&lock->debug);
_raw_read_lock(&lock->raw);
}
void _read_lock_irq(rwlock_t *lock)
{
- local_irq_disable();
+ ASSERT(local_irq_is_enabled());
+ local_irq_disable();
+ check_lock(&lock->debug);
_raw_read_lock(&lock->raw);
}
@@ -96,6 +154,7 @@ unsigned long _read_lock_irqsave(rwlock_
{
unsigned long flags;
local_irq_save(flags);
+ check_lock(&lock->debug);
_raw_read_lock(&lock->raw);
return flags;
}
@@ -119,12 +178,15 @@ void _read_unlock_irqrestore(rwlock_t *l
void _write_lock(rwlock_t *lock)
{
+ check_lock(&lock->debug);
_raw_write_lock(&lock->raw);
}
void _write_lock_irq(rwlock_t *lock)
{
- local_irq_disable();
+ ASSERT(local_irq_is_enabled());
+ local_irq_disable();
+ check_lock(&lock->debug);
_raw_write_lock(&lock->raw);
}
@@ -132,6 +194,7 @@ unsigned long _write_lock_irqsave(rwlock
{
unsigned long flags;
local_irq_save(flags);
+ check_lock(&lock->debug);
_raw_write_lock(&lock->raw);
return flags;
}
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/common/timer.c
--- a/xen/common/timer.c Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/common/timer.c Tue Nov 04 12:43:19 2008 +0900
@@ -25,10 +25,12 @@
* We pull handlers off the timer list this far in future,
* rather than reprogramming the time hardware.
*/
-#define TIMER_SLOP (50*1000) /* ns */
+static unsigned int timer_slop __read_mostly = 50000; /* 50 us */
+integer_param("timer_slop", timer_slop);
struct timers {
spinlock_t lock;
+ bool_t overflow;
struct timer **heap;
struct timer *list;
struct timer *running;
@@ -200,6 +202,7 @@ static int add_entry(struct timers *time
return rc;
/* Fall back to adding to the slower linked list. */
+ timers->overflow = 1;
t->status = TIMER_STATUS_in_list;
return add_to_list(&timers->list, t);
}
@@ -258,6 +261,7 @@ void set_timer(struct timer *timer, s_ti
__stop_timer(timer);
timer->expires = expires;
+ timer->expires_end = expires + timer_slop;
if ( likely(timer->status != TIMER_STATUS_killed) )
__add_timer(timer);
@@ -344,19 +348,30 @@ void kill_timer(struct timer *timer)
}
+static void execute_timer(struct timers *ts, struct timer *t)
+{
+ void (*fn)(void *) = t->function;
+ void *data = t->data;
+
+ ts->running = t;
+ spin_unlock_irq(&ts->lock);
+ (*fn)(data);
+ spin_lock_irq(&ts->lock);
+ ts->running = NULL;
+}
+
+
static void timer_softirq_action(void)
{
struct timer *t, **heap, *next;
struct timers *ts;
- s_time_t now, deadline;
- void (*fn)(void *);
- void *data;
+ s_time_t now;
ts = &this_cpu(timers);
heap = ts->heap;
- /* If we are using overflow linked list, try to allocate a larger heap. */
- if ( unlikely(ts->list != NULL) )
+ /* If we overflowed the heap, try to allocate a larger heap. */
+ if ( unlikely(ts->overflow) )
{
/* old_limit == (2^n)-1; new_limit == (2^(n+4))-1 */
int old_limit = GET_HEAP_LIMIT(heap);
@@ -377,7 +392,26 @@ static void timer_softirq_action(void)
spin_lock_irq(&ts->lock);
- /* Try to move timers from overflow linked list to more efficient heap. */
+ now = NOW();
+
+ /* Execute ready heap timers. */
+ while ( (GET_HEAP_SIZE(heap) != 0) &&
+ ((t = heap[1])->expires_end < now) )
+ {
+ remove_from_heap(heap, t);
+ t->status = TIMER_STATUS_inactive;
+ execute_timer(ts, t);
+ }
+
+ /* Execute ready list timers. */
+ while ( ((t = ts->list) != NULL) && (t->expires_end < now) )
+ {
+ ts->list = t->list_next;
+ t->status = TIMER_STATUS_inactive;
+ execute_timer(ts, t);
+ }
+
+ /* Try to move timers from linked list to more efficient heap. */
next = ts->list;
ts->list = NULL;
while ( unlikely((t = next) != NULL) )
@@ -387,51 +421,44 @@ static void timer_softirq_action(void)
add_entry(ts, t);
}
- now = NOW();
-
- while ( (GET_HEAP_SIZE(heap) != 0) &&
- ((t = heap[1])->expires < (now + TIMER_SLOP)) )
- {
- remove_entry(ts, t);
-
- ts->running = t;
-
- fn = t->function;
- data = t->data;
-
- spin_unlock_irq(&ts->lock);
- (*fn)(data);
- spin_lock_irq(&ts->lock);
- }
-
- deadline = GET_HEAP_SIZE(heap) ? heap[1]->expires : 0;
-
- while ( unlikely((t = ts->list) != NULL) )
- {
- if ( t->expires >= (now + TIMER_SLOP) )
+ ts->overflow = (ts->list != NULL);
+ if ( unlikely(ts->overflow) )
+ {
+ /* Find earliest deadline at head of list or top of heap. */
+ this_cpu(timer_deadline) = ts->list->expires;
+ if ( (GET_HEAP_SIZE(heap) != 0) &&
+ ((t = heap[1])->expires < this_cpu(timer_deadline)) )
+ this_cpu(timer_deadline) = t->expires;
+ }
+ else
+ {
+ /*
+ * Find the earliest deadline that encompasses largest number of timers
+ * on the heap. To do this we take timers from the heap while their
+ * valid deadline ranges continue to intersect.
+ */
+ s_time_t start = 0, end = STIME_MAX;
+ struct timer **list_tail = &ts->list;
+
+ while ( (GET_HEAP_SIZE(heap) != 0) &&
+ ((t = heap[1])->expires <= end) )
{
- if ( (deadline == 0) || (deadline > t->expires) )
- deadline = t->expires;
- break;
+ remove_entry(ts, t);
+
+ t->status = TIMER_STATUS_in_list;
+ t->list_next = NULL;
+ *list_tail = t;
+ list_tail = &t->list_next;
+
+ start = t->expires;
+ if ( end > t->expires_end )
+ end = t->expires_end;
}
- ts->list = t->list_next;
- t->status = TIMER_STATUS_inactive;
-
- ts->running = t;
-
- fn = t->function;
- data = t->data;
-
- spin_unlock_irq(&ts->lock);
- (*fn)(data);
- spin_lock_irq(&ts->lock);
- }
-
- ts->running = NULL;
-
- this_cpu(timer_deadline) = deadline;
- if ( !reprogram_timer(deadline) )
+ this_cpu(timer_deadline) = start;
+ }
+
+ if ( !reprogram_timer(this_cpu(timer_deadline)) )
raise_softirq(TIMER_SOFTIRQ);
spin_unlock_irq(&ts->lock);
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/common/xenoprof.c
--- a/xen/common/xenoprof.c Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/common/xenoprof.c Tue Nov 04 12:43:19 2008 +0900
@@ -85,7 +85,7 @@ int is_active(struct domain *d)
return ((x != NULL) && (x->domain_type == XENOPROF_DOMAIN_ACTIVE));
}
-static int is_passive(struct domain *d)
+int is_passive(struct domain *d)
{
struct xenoprof *x = d->xenoprof;
return ((x != NULL) && (x->domain_type == XENOPROF_DOMAIN_PASSIVE));
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/common/xmalloc.c
--- a/xen/common/xmalloc.c Tue Nov 04 12:07:22 2008 +0900
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,286 +0,0 @@
-/******************************************************************************
- * Simple allocator for Xen. If larger than a page, simply use the
- * page-order allocator.
- *
- * Copyright (C) 2005 Rusty Russell IBM Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-/*
- * TODO (Keir, 17/2/05):
- * 1. Use space in page_info to avoid xmalloc_hdr in allocated blocks.
- * 2. page_info points into free list to make xfree() O(1) complexity.
- * 3. Perhaps make this a sub-page buddy allocator? xmalloc() == O(1).
- * (Disadvantage is potentially greater internal fragmentation).
- */
-
-#include <xen/config.h>
-#include <xen/mm.h>
-#include <xen/spinlock.h>
-#include <xen/timer.h>
-#include <xen/cache.h>
-#include <xen/prefetch.h>
-#include <xen/irq.h>
-#include <xen/smp.h>
-
-/*
- * XMALLOC_DEBUG:
- * 1. Free data blocks are filled with poison bytes.
- * 2. In-use data blocks have guard bytes at the start and end.
- */
-#ifndef NDEBUG
-#define XMALLOC_DEBUG 1
-#endif
-
-static LIST_HEAD(freelist);
-static DEFINE_SPINLOCK(freelist_lock);
-
-struct xmalloc_hdr
-{
- /* Size is total including this header. */
- size_t size;
- struct list_head freelist;
-} __cacheline_aligned;
-
-static void add_to_freelist(struct xmalloc_hdr *hdr)
-{
-#if XMALLOC_DEBUG
- memset(hdr + 1, 0xa5, hdr->size - sizeof(*hdr));
-#endif
- list_add(&hdr->freelist, &freelist);
-}
-
-static void del_from_freelist(struct xmalloc_hdr *hdr)
-{
-#if XMALLOC_DEBUG
- size_t i;
- unsigned char *data = (unsigned char *)(hdr + 1);
- for ( i = 0; i < (hdr->size - sizeof(*hdr)); i++ )
- BUG_ON(data[i] != 0xa5);
- BUG_ON((hdr->size <= 0) || (hdr->size >= PAGE_SIZE));
-#endif
- list_del(&hdr->freelist);
-}
-
-static void *data_from_header(struct xmalloc_hdr *hdr)
-{
-#if XMALLOC_DEBUG
- /* Data block contain SMP_CACHE_BYTES of guard canary. */
- unsigned char *data = (unsigned char *)(hdr + 1);
- memset(data, 0x5a, SMP_CACHE_BYTES);
- memset(data + hdr->size - sizeof(*hdr) - SMP_CACHE_BYTES,
- 0x5a, SMP_CACHE_BYTES);
- return data + SMP_CACHE_BYTES;
-#else
- return hdr + 1;
-#endif
-}
-
-static struct xmalloc_hdr *header_from_data(void *p)
-{
-#if XMALLOC_DEBUG
- unsigned char *data = (unsigned char *)p - SMP_CACHE_BYTES;
- struct xmalloc_hdr *hdr = (struct xmalloc_hdr *)data - 1;
- size_t i;
-
- /* Check header guard canary. */
- for ( i = 0; i < SMP_CACHE_BYTES; i++ )
- BUG_ON(data[i] != 0x5a);
-
- /* Check footer guard canary. */
- data += hdr->size - sizeof(*hdr) - SMP_CACHE_BYTES;
- for ( i = 0; i < SMP_CACHE_BYTES; i++ )
- BUG_ON(data[i] != 0x5a);
-
- return hdr;
-#else
- return (struct xmalloc_hdr *)p - 1;
-#endif
-}
-
-static void maybe_split(struct xmalloc_hdr *hdr, size_t size, size_t block)
-{
- struct xmalloc_hdr *extra;
- size_t leftover = block - size;
-
- /* If enough is left to make a block, put it on free list. */
- if ( leftover >= (2 * sizeof(struct xmalloc_hdr)) )
- {
- extra = (struct xmalloc_hdr *)((unsigned long)hdr + size);
- extra->size = leftover;
- add_to_freelist(extra);
- }
- else
- {
- size = block;
- }
-
- hdr->size = size;
- /* Debugging aid. */
- hdr->freelist.next = hdr->freelist.prev = NULL;
-}
-
-static void *xmalloc_new_page(size_t size)
-{
- struct xmalloc_hdr *hdr;
-
- hdr = alloc_xenheap_page();
- if ( hdr == NULL )
- return NULL;
-
- spin_lock(&freelist_lock);
- maybe_split(hdr, size, PAGE_SIZE);
- spin_unlock(&freelist_lock);
-
- return data_from_header(hdr);
-}
-
-/* Big object? Just use the page allocator. */
-static void *xmalloc_whole_pages(size_t size)
-{
- struct xmalloc_hdr *hdr;
- unsigned int pageorder = get_order_from_bytes(size);
-
- hdr = alloc_xenheap_pages(pageorder);
- if ( hdr == NULL )
- return NULL;
-
- hdr->size = (1 << (pageorder + PAGE_SHIFT));
- /* Debugging aid. */
- hdr->freelist.next = hdr->freelist.prev = NULL;
-
- return data_from_header(hdr);
-}
-
-/* Return size, increased to alignment with align. */
-static inline size_t align_up(size_t size, size_t align)
-{
- return (size + align - 1) & ~(align - 1);
-}
-
-void *_xmalloc(size_t size, size_t align)
-{
- struct xmalloc_hdr *i;
-
- ASSERT(!in_irq());
-
- /* We currently always return cacheline aligned. */
- BUG_ON(align > SMP_CACHE_BYTES);
-
-#if XMALLOC_DEBUG
- /* Add room for canaries at start and end of data block. */
- size += 2 * SMP_CACHE_BYTES;
-#endif
-
- /* Add room for header, pad to align next header. */
- size += sizeof(struct xmalloc_hdr);
- size = align_up(size, __alignof__(struct xmalloc_hdr));
-
- /* For big allocs, give them whole pages. */
- if ( size >= PAGE_SIZE )
- return xmalloc_whole_pages(size);
-
- /* Search free list. */
- spin_lock(&freelist_lock);
- list_for_each_entry( i, &freelist, freelist )
- {
- if ( i->size < size )
- continue;
- del_from_freelist(i);
- maybe_split(i, size, i->size);
- spin_unlock(&freelist_lock);
- return data_from_header(i);
- }
- spin_unlock(&freelist_lock);
-
- /* Alloc a new page and return from that. */
- return xmalloc_new_page(size);
-}
-
-void xfree(void *p)
-{
- struct xmalloc_hdr *i, *tmp, *hdr;
-
- ASSERT(!in_irq());
-
- if ( p == NULL )
- return;
-
- hdr = header_from_data(p);
-
- /* We know hdr will be on same page. */
- BUG_ON(((long)p & PAGE_MASK) != ((long)hdr & PAGE_MASK));
-
- /* Not previously freed. */
- BUG_ON(hdr->freelist.next || hdr->freelist.prev);
-
- /* Big allocs free directly. */
- if ( hdr->size >= PAGE_SIZE )
- {
- free_xenheap_pages(hdr, get_order_from_bytes(hdr->size));
- return;
- }
-
- /* Merge with other free block, or put in list. */
- spin_lock(&freelist_lock);
- list_for_each_entry_safe( i, tmp, &freelist, freelist )
- {
- unsigned long _i = (unsigned long)i;
- unsigned long _hdr = (unsigned long)hdr;
-
- /* Do not merge across page boundaries. */
- if ( ((_i ^ _hdr) & PAGE_MASK) != 0 )
- continue;
-
- /* We follow this block? Swallow it. */
- if ( (_i + i->size) == _hdr )
- {
- del_from_freelist(i);
- i->size += hdr->size;
- hdr = i;
- }
-
- /* We precede this block? Swallow it. */
- if ( (_hdr + hdr->size) == _i )
- {
- del_from_freelist(i);
- hdr->size += i->size;
- }
- }
-
- /* Did we merge an entire page? */
- if ( hdr->size == PAGE_SIZE )
- {
- BUG_ON((((unsigned long)hdr) & (PAGE_SIZE-1)) != 0);
- free_xenheap_pages(hdr, 0);
- }
- else
- {
- add_to_freelist(hdr);
- }
-
- spin_unlock(&freelist_lock);
-}
-
-/*
- * Local variables:
- * mode: C
- * c-set-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/drivers/char/serial.c
--- a/xen/drivers/char/serial.c Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/drivers/char/serial.c Tue Nov 04 12:43:19 2008 +0900
@@ -74,7 +74,7 @@ void serial_tx_interrupt(struct serial_p
while ( !spin_trylock(&port->tx_lock) )
{
if ( !port->driver->tx_empty(port) )
- return;
+ goto out;
cpu_relax();
}
@@ -89,7 +89,10 @@ void serial_tx_interrupt(struct serial_p
}
}
- spin_unlock_irqrestore(&port->tx_lock, flags);
+ spin_unlock(&port->tx_lock);
+
+ out:
+ local_irq_restore(flags);
}
static void __serial_putc(struct serial_port *port, char c)
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/drivers/cpufreq/cpufreq.c
--- a/xen/drivers/cpufreq/cpufreq.c Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/drivers/cpufreq/cpufreq.c Tue Nov 04 12:43:19 2008 +0900
@@ -31,6 +31,7 @@
#include <xen/errno.h>
#include <xen/delay.h>
#include <xen/cpumask.h>
+#include <xen/list.h>
#include <xen/sched.h>
#include <xen/timer.h>
#include <xen/xmalloc.h>
@@ -44,8 +45,12 @@
#include <acpi/acpi.h>
#include <acpi/cpufreq/cpufreq.h>
-/* TODO: change to link list later as domain number may be sparse */
-static cpumask_t cpufreq_dom_map[NR_CPUS];
+struct cpufreq_dom {
+ unsigned int dom;
+ cpumask_t map;
+ struct list_head node;
+};
+static LIST_HEAD(cpufreq_dom_list_head);
int cpufreq_limit_change(unsigned int cpu)
{
@@ -72,48 +77,80 @@ int cpufreq_add_cpu(unsigned int cpu)
{
int ret = 0;
unsigned int firstcpu;
- unsigned int dom;
+ unsigned int dom, domexist = 0;
unsigned int j;
+ struct list_head *pos;
+ struct cpufreq_dom *cpufreq_dom = NULL;
struct cpufreq_policy new_policy;
struct cpufreq_policy *policy;
struct processor_performance *perf = &processor_pminfo[cpu]->perf;
/* to protect the case when Px was not controlled by xen */
- if (!processor_pminfo[cpu] || !(perf->init & XEN_PX_INIT))
+ if (!processor_pminfo[cpu] ||
+ !(perf->init & XEN_PX_INIT) ||
+ !cpu_online(cpu))
+ return -EINVAL;
+
+ if (cpufreq_cpu_policy[cpu])
return 0;
-
- if (!cpu_online(cpu) || cpufreq_cpu_policy[cpu])
- return -EINVAL;
ret = cpufreq_statistic_init(cpu);
if (ret)
return ret;
dom = perf->domain_info.domain;
- if (cpus_weight(cpufreq_dom_map[dom])) {
+
+ list_for_each(pos, &cpufreq_dom_list_head) {
+ cpufreq_dom = list_entry(pos, struct cpufreq_dom, node);
+ if (dom == cpufreq_dom->dom) {
+ domexist = 1;
+ break;
+ }
+ }
+
+ if (domexist) {
/* share policy with the first cpu since on same boat */
- firstcpu = first_cpu(cpufreq_dom_map[dom]);
+ firstcpu = first_cpu(cpufreq_dom->map);
policy = cpufreq_cpu_policy[firstcpu];
cpufreq_cpu_policy[cpu] = policy;
- cpu_set(cpu, cpufreq_dom_map[dom]);
+ cpu_set(cpu, cpufreq_dom->map);
cpu_set(cpu, policy->cpus);
+
+ /* domain coordination sanity check */
+ if ((perf->domain_info.coord_type !=
+ processor_pminfo[firstcpu]->perf.domain_info.coord_type) ||
+ (perf->domain_info.num_processors !=
+ processor_pminfo[firstcpu]->perf.domain_info.num_processors)) {
+ ret = -EINVAL;
+ goto err2;
+ }
printk(KERN_EMERG"adding CPU %u\n", cpu);
} else {
+ cpufreq_dom = xmalloc(struct cpufreq_dom);
+ if (!cpufreq_dom) {
+ cpufreq_statistic_exit(cpu);
+ return -ENOMEM;
+ }
+ memset(cpufreq_dom, 0, sizeof(struct cpufreq_dom));
+ cpufreq_dom->dom = dom;
+ cpu_set(cpu, cpufreq_dom->map);
+ list_add(&cpufreq_dom->node, &cpufreq_dom_list_head);
+
/* for the first cpu, setup policy and do init work */
policy = xmalloc(struct cpufreq_policy);
if (!policy) {
+ list_del(&cpufreq_dom->node);
+ xfree(cpufreq_dom);
cpufreq_statistic_exit(cpu);
return -ENOMEM;
}
memset(policy, 0, sizeof(struct cpufreq_policy));
-
+ policy->cpu = cpu;
+ cpu_set(cpu, policy->cpus);
cpufreq_cpu_policy[cpu] = policy;
- cpu_set(cpu, cpufreq_dom_map[dom]);
- cpu_set(cpu, policy->cpus);
-
- policy->cpu = cpu;
+
ret = cpufreq_driver->init(policy);
if (ret)
goto err1;
@@ -124,7 +161,7 @@ int cpufreq_add_cpu(unsigned int cpu)
* After get full cpumap of the coordination domain,
* we can safely start gov here.
*/
- if (cpus_weight(cpufreq_dom_map[dom]) ==
+ if (cpus_weight(cpufreq_dom->map) ==
perf->domain_info.num_processors) {
memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
policy->governor = NULL;
@@ -138,51 +175,68 @@ err2:
err2:
cpufreq_driver->exit(policy);
err1:
- for_each_cpu_mask(j, cpufreq_dom_map[dom]) {
+ for_each_cpu_mask(j, cpufreq_dom->map) {
cpufreq_cpu_policy[j] = NULL;
cpufreq_statistic_exit(j);
}
- cpus_clear(cpufreq_dom_map[dom]);
+ list_del(&cpufreq_dom->node);
+ xfree(cpufreq_dom);
xfree(policy);
return ret;
}
int cpufreq_del_cpu(unsigned int cpu)
{
- unsigned int dom;
+ unsigned int dom, domexist = 0;
+ struct list_head *pos;
+ struct cpufreq_dom *cpufreq_dom = NULL;
struct cpufreq_policy *policy;
struct processor_performance *perf = &processor_pminfo[cpu]->perf;
/* to protect the case when Px was not controlled by xen */
- if (!processor_pminfo[cpu] || !(perf->init & XEN_PX_INIT))
+ if (!processor_pminfo[cpu] ||
+ !(perf->init & XEN_PX_INIT) ||
+ !cpu_online(cpu))
+ return -EINVAL;
+
+ if (!cpufreq_cpu_policy[cpu])
return 0;
-
- if (!cpu_online(cpu) || !cpufreq_cpu_policy[cpu])
- return -EINVAL;
dom = perf->domain_info.domain;
policy = cpufreq_cpu_policy[cpu];
- printk(KERN_EMERG"deleting CPU %u\n", cpu);
+ list_for_each(pos, &cpufreq_dom_list_head) {
+ cpufreq_dom = list_entry(pos, struct cpufreq_dom, node);
+ if (dom == cpufreq_dom->dom) {
+ domexist = 1;
+ break;
+ }
+ }
+
+ if (!domexist)
+ return -EINVAL;
/* for the first cpu of the domain, stop gov */
- if (cpus_weight(cpufreq_dom_map[dom]) ==
+ if (cpus_weight(cpufreq_dom->map) ==
perf->domain_info.num_processors)
__cpufreq_governor(policy, CPUFREQ_GOV_STOP);
cpufreq_cpu_policy[cpu] = NULL;
cpu_clear(cpu, policy->cpus);
- cpu_clear(cpu, cpufreq_dom_map[dom]);
+ cpu_clear(cpu, cpufreq_dom->map);
cpufreq_statistic_exit(cpu);
/* for the last cpu of the domain, clean room */
/* It's safe here to free freq_table, drv_data and policy */
- if (!cpus_weight(cpufreq_dom_map[dom])) {
+ if (!cpus_weight(cpufreq_dom->map)) {
cpufreq_driver->exit(policy);
+ list_del(&cpufreq_dom->node);
+ xfree(cpufreq_dom);
xfree(policy);
}
+ printk(KERN_EMERG"deleting CPU %u\n", cpu);
return 0;
}
@@ -258,6 +312,24 @@ int set_px_pminfo(uint32_t acpi_id, stru
if ( dom0_px_info->flags & XEN_PX_PCT )
{
+ /* space_id check */
+ if (dom0_px_info->control_register.space_id !=
+ dom0_px_info->status_register.space_id)
+ {
+ ret = -EINVAL;
+ goto out;
+ }
+
+#ifdef CONFIG_IA64
+ /* for IA64, currently it only supports FFH */
+ if (dom0_px_info->control_register.space_id !=
+ ACPI_ADR_SPACE_FIXED_HARDWARE)
+ {
+ ret = -EINVAL;
+ goto out;
+ }
+#endif
+
memcpy ((void *)&pxpt->control_register,
(void *)&dom0_px_info->control_register,
sizeof(struct xen_pct_register));
@@ -267,8 +339,16 @@ int set_px_pminfo(uint32_t acpi_id, stru
print_PCT(&pxpt->control_register);
print_PCT(&pxpt->status_register);
}
+
if ( dom0_px_info->flags & XEN_PX_PSS )
{
+ /* capability check */
+ if (dom0_px_info->state_count <= 1)
+ {
+ ret = -EINVAL;
+ goto out;
+ }
+
if ( !(pxpt->states = xmalloc_array(struct xen_processor_px,
dom0_px_info->state_count)) )
{
@@ -280,14 +360,28 @@ int set_px_pminfo(uint32_t acpi_id, stru
pxpt->state_count = dom0_px_info->state_count;
print_PSS(pxpt->states,pxpt->state_count);
}
+
if ( dom0_px_info->flags & XEN_PX_PSD )
{
+#ifdef CONFIG_X86
+ /* for X86, check domain coordination */
+ /* for IA64, _PSD is optional for current IA64 cpufreq algorithm */
+ if (dom0_px_info->shared_type != CPUFREQ_SHARED_TYPE_ALL &&
+ dom0_px_info->shared_type != CPUFREQ_SHARED_TYPE_ANY &&
+ dom0_px_info->shared_type != CPUFREQ_SHARED_TYPE_HW)
+ {
+ ret = -EINVAL;
+ goto out;
+ }
+#endif
+
pxpt->shared_type = dom0_px_info->shared_type;
memcpy ((void *)&pxpt->domain_info,
(void *)&dom0_px_info->domain_info,
sizeof(struct xen_psd_package));
print_PSD(&pxpt->domain_info);
}
+
if ( dom0_px_info->flags & XEN_PX_PPC )
{
pxpt->platform_limit = dom0_px_info->platform_limit;
@@ -295,7 +389,6 @@ int set_px_pminfo(uint32_t acpi_id, stru
if ( pxpt->init == XEN_PX_INIT )
{
-
ret = cpufreq_limit_change(cpuid);
goto out;
}
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/include/asm-x86/config.h
--- a/xen/include/asm-x86/config.h Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/include/asm-x86/config.h Tue Nov 04 12:43:19 2008 +0900
@@ -40,14 +40,6 @@
#define CONFIG_HOTPLUG 1
#define CONFIG_HOTPLUG_CPU 1
-
-/*
- * Avoid deep recursion when tearing down pagetables during domain destruction,
- * causing dom0 to become unresponsive and Xen to miss time-critical softirq
- * deadlines. This will ultimately be replaced by built-in preemptibility of
- * get_page_type().
- */
-#define DOMAIN_DESTRUCT_AVOID_RECURSION 1
#define HZ 100
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/include/asm-x86/event.h
--- a/xen/include/asm-x86/event.h Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/include/asm-x86/event.h Tue Nov 04 12:43:19 2008 +0900
@@ -11,36 +11,8 @@
#include <xen/shared.h>
-static inline void vcpu_kick(struct vcpu *v)
-{
- /*
- * NB1. 'pause_flags' and 'processor' must be checked /after/ update of
- * pending flag. These values may fluctuate (after all, we hold no
- * locks) but the key insight is that each change will cause
- * evtchn_upcall_pending to be polled.
- *
- * NB2. We save the running flag across the unblock to avoid a needless
- * IPI for domains that we IPI'd to unblock.
- */
- int running = v->is_running;
- vcpu_unblock(v);
- if ( running )
- smp_send_event_check_cpu(v->processor);
-}
-
-static inline void vcpu_mark_events_pending(struct vcpu *v)
-{
- int already_pending = test_and_set_bit(
- 0, (unsigned long *)&vcpu_info(v, evtchn_upcall_pending));
-
- if ( already_pending )
- return;
-
- if ( is_hvm_vcpu(v) )
- hvm_assert_evtchn_irq(v);
- else
- vcpu_kick(v);
-}
+void vcpu_kick(struct vcpu *v);
+void vcpu_mark_events_pending(struct vcpu *v);
int hvm_local_events_need_delivery(struct vcpu *v);
static inline int local_events_need_delivery(void)
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/include/asm-x86/fixmap.h
--- a/xen/include/asm-x86/fixmap.h Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/include/asm-x86/fixmap.h Tue Nov 04 12:43:19 2008 +0900
@@ -29,6 +29,7 @@
* from the end of virtual memory backwards.
*/
enum fixed_addresses {
+ FIX_RESERVED, /* Index 0 is reserved since fix_to_virt(0) > FIXADDR_TOP. */
#ifdef __i386__
FIX_PAE_HIGHMEM_0,
FIX_PAE_HIGHMEM_END = FIX_PAE_HIGHMEM_0 + NR_CPUS-1,
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/include/asm-x86/hvm/vmx/vpmu.h
--- a/xen/include/asm-x86/hvm/vmx/vpmu.h Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/include/asm-x86/hvm/vmx/vpmu.h Tue Nov 04 12:43:19 2008 +0900
@@ -67,7 +67,7 @@ struct vpmu_struct {
#define VPMU_CONTEXT_ALLOCATED 0x1
#define VPMU_CONTEXT_LOADED 0x2
#define VPMU_RUNNING 0x4
-
+#define PASSIVE_DOMAIN_ALLOCATED 0x8
int vpmu_do_wrmsr(struct cpu_user_regs *regs);
int vpmu_do_rdmsr(struct cpu_user_regs *regs);
int vpmu_do_interrupt(struct cpu_user_regs *regs);
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/include/asm-x86/hvm/vmx/vpmu_core2.h
--- a/xen/include/asm-x86/hvm/vmx/vpmu_core2.h Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/include/asm-x86/hvm/vmx/vpmu_core2.h Tue Nov 04 12:43:19 2008 +0900
@@ -23,28 +23,6 @@
#ifndef __ASM_X86_HVM_VPMU_CORE_H_
#define __ASM_X86_HVM_VPMU_CORE_H_
-/* Core 2 Non-architectual Performance Counter MSRs. */
-u32 core2_counters_msr[] = {
- MSR_CORE_PERF_FIXED_CTR0,
- MSR_CORE_PERF_FIXED_CTR1,
- MSR_CORE_PERF_FIXED_CTR2};
-
-/* Core 2 Non-architectual Performance Control MSRs. */
-u32 core2_ctrls_msr[] = {
- MSR_CORE_PERF_FIXED_CTR_CTRL,
- MSR_IA32_PEBS_ENABLE,
- MSR_IA32_DS_AREA};
-
-struct pmumsr core2_counters = {
- 3,
- core2_counters_msr
-};
-
-struct pmumsr core2_ctrls = {
- 3,
- core2_ctrls_msr
-};
-
struct arch_msr_pair {
u64 counter;
u64 control;
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/include/asm-x86/hvm/vpt.h
--- a/xen/include/asm-x86/hvm/vpt.h Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/include/asm-x86/hvm/vpt.h Tue Nov 04 12:43:19 2008 +0900
@@ -32,41 +32,6 @@
#include <asm/hvm/irq.h>
#include <public/hvm/save.h>
-struct HPETState;
-struct HPET_timer_fn_info {
- struct HPETState *hs;
- unsigned int tn;
-};
-
-struct hpet_registers {
- /* Memory-mapped, software visible registers */
- uint64_t capability; /* capabilities */
- uint64_t config; /* configuration */
- uint64_t isr; /* interrupt status reg */
- uint64_t mc64; /* main counter */
- struct { /* timers */
- uint64_t config; /* configuration/cap */
- uint64_t cmp; /* comparator */
- uint64_t fsb; /* FSB route, not supported now */
- } timers[HPET_TIMER_NUM];
-
- /* Hidden register state */
- uint64_t period[HPET_TIMER_NUM]; /* Last value written to comparator */
-};
-
-typedef struct HPETState {
- struct hpet_registers hpet;
- struct vcpu *vcpu;
- uint64_t stime_freq;
- uint64_t hpet_to_ns_scale; /* hpet ticks to ns (multiplied by 2^10) */
- uint64_t hpet_to_ns_limit; /* max hpet ticks convertable to ns */
- uint64_t mc_offset;
- struct timer timers[HPET_TIMER_NUM];
- struct HPET_timer_fn_info timer_fn_info[HPET_TIMER_NUM];
- spinlock_t lock;
-} HPETState;
-
-
/*
* Abstract layer of periodic time, one short time.
*/
@@ -107,6 +72,34 @@ typedef struct PITState {
struct periodic_time pt0;
spinlock_t lock;
} PITState;
+
+struct hpet_registers {
+ /* Memory-mapped, software visible registers */
+ uint64_t capability; /* capabilities */
+ uint64_t config; /* configuration */
+ uint64_t isr; /* interrupt status reg */
+ uint64_t mc64; /* main counter */
+ struct { /* timers */
+ uint64_t config; /* configuration/cap */
+ uint64_t cmp; /* comparator */
+ uint64_t fsb; /* FSB route, not supported now */
+ } timers[HPET_TIMER_NUM];
+
+ /* Hidden register state */
+ uint64_t period[HPET_TIMER_NUM]; /* Last value written to comparator */
+ uint64_t comparator64[HPET_TIMER_NUM]; /* 64 bit running comparator */
+};
+
+typedef struct HPETState {
+ struct hpet_registers hpet;
+ struct vcpu *vcpu;
+ uint64_t stime_freq;
+ uint64_t hpet_to_ns_scale; /* hpet ticks to ns (multiplied by 2^10) */
+ uint64_t hpet_to_ns_limit; /* max hpet ticks convertable to ns */
+ uint64_t mc_offset;
+ struct periodic_time pt[HPET_TIMER_NUM];
+ spinlock_t lock;
+} HPETState;
typedef struct RTCState {
/* Hardware state */
@@ -160,13 +153,13 @@ void pt_migrate(struct vcpu *v);
* The given periodic timer structure must be initialised with zero bytes,
* except for the 'source' field which must be initialised with the
* correct PTSRC_ value. The initialised timer structure can then be passed
- * to {create,destroy}_periodic_time() and number of times and in any order.
+ * to {create,destroy}_periodic_time() any number of times and in any order.
* Note that, for a given periodic timer, invocations of these functions MUST
* be serialised.
*/
void create_periodic_time(
- struct vcpu *v, struct periodic_time *pt, uint64_t period,
- uint8_t irq, char one_shot, time_cb *cb, void *data);
+ struct vcpu *v, struct periodic_time *pt, uint64_t delta,
+ uint64_t period, uint8_t irq, time_cb *cb, void *data);
void destroy_periodic_time(struct periodic_time *pt);
int pv_pit_handler(int port, int data, int write);
@@ -185,7 +178,6 @@ void pmtimer_deinit(struct domain *d);
void pmtimer_deinit(struct domain *d);
void pmtimer_reset(struct domain *d);
-void hpet_migrate_timers(struct vcpu *v);
void hpet_init(struct vcpu *v);
void hpet_deinit(struct domain *d);
void hpet_reset(struct domain *d);
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/include/asm-x86/mm.h Tue Nov 04 12:43:19 2008 +0900
@@ -61,12 +61,36 @@ struct page_info
/*
* When PGT_partial is true then this field is valid and indicates
* that PTEs in the range [0, @nr_validated_ptes) have been validated.
- * If @partial_pte is true then PTE at @nr_validated_ptes+1 has been
- * partially validated.
+ * An extra page reference must be acquired (or not dropped) whenever
+ * PGT_partial gets set, and it must be dropped when the flag gets
+ * cleared. This is so that a get() leaving a page in partially
+ * validated state (where the caller would drop the reference acquired
+ * due to the getting of the type [apparently] failing [-EAGAIN])
+ * would not accidentally result in a page left with zero general
+ * reference count, but non-zero type reference count (possible when
+ * the partial get() is followed immediately by domain destruction).
+ * Likewise, the ownership of the single type reference for partially
+ * (in-)validated pages is tied to this flag, i.e. the instance
+ * setting the flag must not drop that reference, whereas the instance
+ * clearing it will have to.
+ *
+ * If @partial_pte is positive then PTE at @nr_validated_ptes+1 has
+ * been partially validated. This implies that the general reference
+ * to the page (acquired from get_page_from_lNe()) would be dropped
+ * (again due to the apparent failure) and hence must be re-acquired
+ * when resuming the validation, but must not be dropped when picking
+ * up the page for invalidation.
+ *
+ * If @partial_pte is negative then PTE at @nr_validated_ptes+1 has
+ * been partially invalidated. This is basically the opposite case of
+ * above, i.e. the general reference to the page was not dropped in
+ * put_page_from_lNe() (due to the apparent failure), and hence it
+ * must be dropped when the put operation is resumed (and completes),
+ * but it must not be acquired if picking up the page for validation.
*/
struct {
u16 nr_validated_ptes;
- bool_t partial_pte;
+ s8 partial_pte;
};
/*
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/include/asm-x86/page.h
--- a/xen/include/asm-x86/page.h Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/include/asm-x86/page.h Tue Nov 04 12:43:19 2008 +0900
@@ -314,6 +314,9 @@ unsigned long clone_idle_pagetable(struc
#define __PAGE_HYPERVISOR_NOCACHE \
(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_PCD | _PAGE_ACCESSED)
+#define GRANT_PTE_FLAGS \
+ (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_NX | _PAGE_GNTTAB)
+
#ifndef __ASSEMBLY__
static inline int get_order_from_bytes(paddr_t size)
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/include/asm-x86/softirq.h
--- a/xen/include/asm-x86/softirq.h Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/include/asm-x86/softirq.h Tue Nov 04 12:43:19 2008 +0900
@@ -3,7 +3,8 @@
#define NMI_MCE_SOFTIRQ (NR_COMMON_SOFTIRQS + 0)
#define TIME_CALIBRATE_SOFTIRQ (NR_COMMON_SOFTIRQS + 1)
+#define VCPU_KICK_SOFTIRQ (NR_COMMON_SOFTIRQS + 2)
-#define NR_ARCH_SOFTIRQS 2
+#define NR_ARCH_SOFTIRQS 3
#endif /* __ASM_SOFTIRQ_H__ */
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/include/asm-x86/x86_32/page.h
--- a/xen/include/asm-x86/x86_32/page.h Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/include/asm-x86/x86_32/page.h Tue Nov 04 12:43:19 2008 +0900
@@ -105,9 +105,6 @@ extern unsigned int PAGE_HYPERVISOR_NOCA
#define get_pte_flags(x) (((int)((x) >> 32) & ~0xFFF) | ((int)(x) & 0xFFF))
#define put_pte_flags(x) (((intpte_t)((x) & ~0xFFF) << 32) | ((x) & 0xFFF))
-#define GRANT_PTE_FLAGS \
- (_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_GNTTAB)
-
/*
* Disallow unused flag bits plus PAT/PSE, PCD, PWT and GLOBAL.
* Permit the NX bit if the hardware supports it.
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/include/asm-x86/x86_64/page.h
--- a/xen/include/asm-x86/x86_64/page.h Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/include/asm-x86/x86_64/page.h Tue Nov 04 12:43:19 2008 +0900
@@ -119,13 +119,10 @@ typedef l4_pgentry_t root_pgentry_t;
#define L3_DISALLOW_MASK (BASE_DISALLOW_MASK)
#define L4_DISALLOW_MASK (BASE_DISALLOW_MASK)
-#define COMPAT_L3_DISALLOW_MASK 0xFFFFF1FEU
+#define COMPAT_L3_DISALLOW_MASK 0xFFFFF198U
#define PAGE_HYPERVISOR (__PAGE_HYPERVISOR | _PAGE_GLOBAL)
#define PAGE_HYPERVISOR_NOCACHE (__PAGE_HYPERVISOR_NOCACHE | _PAGE_GLOBAL)
-
-#define GRANT_PTE_FLAGS \
- (_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_GNTTAB|_PAGE_USER)
#define USER_MAPPINGS_ARE_GLOBAL
#ifdef USER_MAPPINGS_ARE_GLOBAL
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/include/asm-x86/xenoprof.h
--- a/xen/include/asm-x86/xenoprof.h Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/include/asm-x86/xenoprof.h Tue Nov 04 12:43:19 2008 +0900
@@ -64,6 +64,9 @@ void xenoprof_backtrace(
"xenoprof/x86 with autotranslated mode enabled" \
"isn't supported yet\n"); \
} while (0)
+int passive_domain_do_rdmsr(struct cpu_user_regs *regs);
+int passive_domain_do_wrmsr(struct cpu_user_regs *regs);
+void passive_domain_destroy(struct vcpu *v);
#endif /* __ASM_X86_XENOPROF_H__ */
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/include/public/features.h
--- a/xen/include/public/features.h Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/include/public/features.h Tue Nov 04 12:43:19 2008 +0900
@@ -59,6 +59,9 @@
/* x86: Does this Xen host support the MMU_PT_UPDATE_PRESERVE_AD hypercall? */
#define XENFEAT_mmu_pt_update_preserve_ad 5
+/* x86: Does this Xen host support the MMU_{CLEAR,COPY}_PAGE hypercall? */
+#define XENFEAT_highmem_assist 6
+
#define XENFEAT_NR_SUBMAPS 1
#endif /* __XEN_PUBLIC_FEATURES_H__ */
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/include/public/trace.h
--- a/xen/include/public/trace.h Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/include/public/trace.h Tue Nov 04 12:43:19 2008 +0900
@@ -142,7 +142,9 @@
#define TRC_HVM_INVLPG64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x14)
#define TRC_HVM_MCE (TRC_HVM_HANDLER + 0x15)
#define TRC_HVM_IO_ASSIST (TRC_HVM_HANDLER + 0x16)
+#define TRC_HVM_IO_ASSIST64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x16)
#define TRC_HVM_MMIO_ASSIST (TRC_HVM_HANDLER + 0x17)
+#define TRC_HVM_MMIO_ASSIST64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x17)
#define TRC_HVM_CLTS (TRC_HVM_HANDLER + 0x18)
#define TRC_HVM_LMSW (TRC_HVM_HANDLER + 0x19)
#define TRC_HVM_LMSW64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x19)
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/include/public/xen.h
--- a/xen/include/public/xen.h Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/include/public/xen.h Tue Nov 04 12:43:19 2008 +0900
@@ -231,6 +231,13 @@ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t);
* cmd: MMUEXT_SET_LDT
* linear_addr: Linear address of LDT base (NB. must be page-aligned).
* nr_ents: Number of entries in LDT.
+ *
+ * cmd: MMUEXT_CLEAR_PAGE
+ * mfn: Machine frame number to be cleared.
+ *
+ * cmd: MMUEXT_COPY_PAGE
+ * mfn: Machine frame number of the destination page.
+ * src_mfn: Machine frame number of the source page.
*/
#define MMUEXT_PIN_L1_TABLE 0
#define MMUEXT_PIN_L2_TABLE 1
@@ -247,12 +254,15 @@ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t);
#define MMUEXT_FLUSH_CACHE 12
#define MMUEXT_SET_LDT 13
#define MMUEXT_NEW_USER_BASEPTR 15
+#define MMUEXT_CLEAR_PAGE 16
+#define MMUEXT_COPY_PAGE 17
#ifndef __ASSEMBLY__
struct mmuext_op {
unsigned int cmd;
union {
- /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR */
+ /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR
+ * CLEAR_PAGE, COPY_PAGE */
xen_pfn_t mfn;
/* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */
unsigned long linear_addr;
@@ -266,6 +276,8 @@ struct mmuext_op {
#else
void *vcpumask;
#endif
+ /* COPY_PAGE */
+ xen_pfn_t src_mfn;
} arg2;
};
typedef struct mmuext_op mmuext_op_t;
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/include/xen/cpuidle.h
--- a/xen/include/xen/cpuidle.h Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/include/xen/cpuidle.h Tue Nov 04 12:43:19 2008 +0900
@@ -30,12 +30,18 @@
#define ACPI_PROCESSOR_MAX_POWER 8
#define CPUIDLE_NAME_LEN 16
+#define ACPI_CSTATE_EM_NONE 0
+#define ACPI_CSTATE_EM_SYSIO 1
+#define ACPI_CSTATE_EM_FFH 2
+#define ACPI_CSTATE_EM_HALT 3
+
struct acpi_processor_cx
{
+ u8 idx;
u8 valid;
u8 type;
u32 address;
- u8 space_id;
+ u8 entry_method; /* ACPI_CSTATE_EM_xxx */
u32 latency;
u32 latency_ticks;
u32 power;
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/include/xen/domain_page.h
--- a/xen/include/xen/domain_page.h Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/include/xen/domain_page.h Tue Nov 04 12:43:19 2008 +0900
@@ -24,7 +24,7 @@ void *map_domain_page(unsigned long mfn)
* Pass a VA within a page previously mapped in the context of the
* currently-executing VCPU via a call to map_domain_page().
*/
-void unmap_domain_page(void *va);
+void unmap_domain_page(const void *va);
/*
* Similar to the above calls, except the mapping is accessible in all
@@ -32,7 +32,7 @@ void unmap_domain_page(void *va);
* mappings can also be unmapped from any context.
*/
void *map_domain_page_global(unsigned long mfn);
-void unmap_domain_page_global(void *va);
+void unmap_domain_page_global(const void *va);
#define DMCACHE_ENTRY_VALID 1U
#define DMCACHE_ENTRY_HELD 2U
@@ -75,7 +75,7 @@ map_domain_page_with_cache(unsigned long
}
static inline void
-unmap_domain_page_with_cache(void *va, struct domain_mmap_cache *cache)
+unmap_domain_page_with_cache(const void *va, struct domain_mmap_cache *cache)
{
ASSERT(cache != NULL);
cache->flags &= ~DMCACHE_ENTRY_HELD;
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/include/xen/spinlock.h
--- a/xen/include/xen/spinlock.h Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/include/xen/spinlock.h Tue Nov 04 12:43:19 2008 +0900
@@ -5,21 +5,38 @@
#include <asm/system.h>
#include <asm/spinlock.h>
+#ifndef NDEBUG
+struct lock_debug {
+ int irq_safe; /* +1: IRQ-safe; 0: not IRQ-safe; -1: don't know yet */
+};
+#define _LOCK_DEBUG { -1 }
+void spin_debug_enable(void);
+void spin_debug_disable(void);
+#else
+struct lock_debug { };
+#define _LOCK_DEBUG { }
+#define spin_debug_enable() ((void)0)
+#define spin_debug_disable() ((void)0)
+#endif
+
typedef struct {
raw_spinlock_t raw;
u16 recurse_cpu:12;
u16 recurse_cnt:4;
+ struct lock_debug debug;
} spinlock_t;
-#define SPIN_LOCK_UNLOCKED { _RAW_SPIN_LOCK_UNLOCKED, 0xfffu, 0 }
+
+#define SPIN_LOCK_UNLOCKED { _RAW_SPIN_LOCK_UNLOCKED, 0xfffu, 0, _LOCK_DEBUG }
#define DEFINE_SPINLOCK(l) spinlock_t l = SPIN_LOCK_UNLOCKED
#define spin_lock_init(l) (*(l) = (spinlock_t)SPIN_LOCK_UNLOCKED)
typedef struct {
raw_rwlock_t raw;
+ struct lock_debug debug;
} rwlock_t;
-#define RW_LOCK_UNLOCKED { _RAW_RW_LOCK_UNLOCKED }
+#define RW_LOCK_UNLOCKED { _RAW_RW_LOCK_UNLOCKED, _LOCK_DEBUG }
#define DEFINE_RWLOCK(l) rwlock_t l = RW_LOCK_UNLOCKED
#define rwlock_init(l) (*(l) = (rwlock_t)RW_LOCK_UNLOCKED)
@@ -34,6 +51,7 @@ int _spin_is_locked(spinlock_t *lock);
int _spin_is_locked(spinlock_t *lock);
int _spin_trylock(spinlock_t *lock);
void _spin_barrier(spinlock_t *lock);
+void _spin_barrier_irq(spinlock_t *lock);
void _spin_lock_recursive(spinlock_t *lock);
void _spin_unlock_recursive(spinlock_t *lock);
@@ -67,6 +85,7 @@ void _write_unlock_irqrestore(rwlock_t *
/* Ensure a lock is quiescent between two critical operations. */
#define spin_barrier(l) _spin_barrier(l)
+#define spin_barrier_irq(l) _spin_barrier_irq(l)
/*
* spin_[un]lock_recursive(): Use these forms when the lock can (safely!) be
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/include/xen/time.h
--- a/xen/include/xen/time.h Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/include/xen/time.h Tue Nov 04 12:43:19 2008 +0900
@@ -52,6 +52,7 @@ struct tm gmtime(unsigned long t);
#define SECONDS(_s) ((s_time_t)((_s) * 1000000000ULL))
#define MILLISECS(_ms) ((s_time_t)((_ms) * 1000000ULL))
#define MICROSECS(_us) ((s_time_t)((_us) * 1000ULL))
+#define STIME_MAX ((s_time_t)((uint64_t)~0ull>>1))
extern void update_vcpu_system_time(struct vcpu *v);
extern void update_domain_wallclock_time(struct domain *d);
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/include/xen/timer.h
--- a/xen/include/xen/timer.h Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/include/xen/timer.h Tue Nov 04 12:43:19 2008 +0900
@@ -15,12 +15,13 @@ struct timer {
struct timer {
/* System time expiry value (nanoseconds since boot). */
s_time_t expires;
+ s_time_t expires_end;
/* Position in active-timer data structure. */
union {
/* Timer-heap offset. */
unsigned int heap_offset;
- /* Overflow linked list. */
+ /* Linked list. */
struct timer *list_next;
};
diff -r 10f0e1bb8e5e -r e75cb35c798b xen/include/xlat.lst
--- a/xen/include/xlat.lst Tue Nov 04 12:07:22 2008 +0900
+++ b/xen/include/xlat.lst Tue Nov 04 12:43:19 2008 +0900
@@ -56,6 +56,6 @@
! processor_flags platform.h
! processor_power platform.h
! pct_register platform.h
-! processor_px platform.h
+? processor_px platform.h
! psd_package platform.h
! processor_performance platform.h
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|