On Monday 10 April 2006 10:01 am, Keir Fraser wrote:
>
> The TSC needs recalibrating on the affected CPU. This will require
> adapting local_time_calibration() in arch/x86/time.c.
>
Thanks a lot - I followed your suggestion and added this functionality in the
two patches below (and attached).
The first patch (cpufreq-xen-dom0-func.diff) adds a dom0 "setcpufreq"
hypercall that sets cpu_khz to the specified value and re-runs
local_time_calibration() on any CPU(s) the setcpufreq call specifies.
In the second patch (cpufreq-xen-linux-2.6.16, to be applied to the
linux-2.6.hg tree), the Linux cpufreq drivers running in dom0 use this call
immediately after writing whichever MSR triggers the frequency/voltage shift,
but before triggering a cpufreq callback to adjust the dom0 kernel's own
timer and TSC parameters.
Note that I only implemented this for powernow-k8 right now, since that's the
only hardware I could test it on. It's very obvious how to adapt it to the
other cpufreq drivers, by just replacing rdmsr and wrmsr with the Xen wrapper
versions I provided, and adding in the setcpufreq hypercall at the end.
The patches seem to work correctly - I've had virtually no latency issues or
temporary freezes after frequency transitions now, although I occasionally
still get a mouse freeze and "psmouse: lost synchronization, throwing X bytes
away" messages in the syslog. This is on an Athlon 64 laptop stepping between
800/1600/2000 MHz.
I didn't test this on an SMP system yet, but I think it's SMP-safe, but only
if local_time_calibration() is reentrant (since it's scheduled once a second
and there may be a collision in rare cases). Is this an issue?
I can test it on a dual-core Athlon 64 next week if needed (my SMP Xen test
machine is unavailable until then). Someone with a Core Duo should patch
speedstep-centrino.c and test it there too.
Try this out and let me know if there are problems. It doesn't seem to have
the issues I was having before, but that's a matter of user experience, so I
might be mistaken or just imagining it.
- Matt Yourst
diff -r 886594fa3aef xen/arch/x86/dom0_ops.c
--- a/xen/arch/x86/dom0_ops.c Sat Apr 8 12:10:04 2006 +0100
+++ b/xen/arch/x86/dom0_ops.c Mon Apr 10 19:44:19 2006 -0400
@@ -339,6 +339,13 @@ long arch_do_dom0_op(struct dom0_op *op,
}
break;
+ case DOM0_SETCPUFREQ:
+ {
+ extern int set_cpu_freq(cpumap_t cpumap, unsigned long khz);
+ ret = set_cpu_freq(op->u.setcpufreq.cpumap, op->u.setcpufreq.khz);
+ break;
+ }
+
case DOM0_GETMEMLIST:
{
int i;
diff -r 886594fa3aef xen/arch/x86/time.c
--- a/xen/arch/x86/time.c Sat Apr 8 12:10:04 2006 +0100
+++ b/xen/arch/x86/time.c Mon Apr 10 19:44:19 2006 -0400
@@ -914,6 +914,60 @@ void __init early_time_init(void)
setup_irq(0, &irq0);
}
+/*
+ * This is called when the hypervisor is notified of a CPU core
+ * frequency change initiated by a driver in dom0.
+ *
+ * This should be called after the new frequency has stabilized.
+ *
+ * The CPUs specified may include any CPU or core (if cores have
+ * independent PLLs). In an SMP or multi-core system, it may
+ * take a while for the recalibration function to be scheduled
+ * on the intended target CPUs; there is no guarantee this will
+ * happen by the time this call returns.
+ *
+ */
+typedef struct percpu_freq_update {
+ cpumap_t cpumap;
+ unsigned long khz;
+} percpu_freq_update_t;
+
+void set_cpu_freq_percpu(void *p) {
+ percpu_freq_update_t *data = (percpu_freq_update_t*)p;
+ int affected;
+
+ if (!data) {
+ printk(" Adjust freq on cpu %d: no data!\n", smp_processor_id());
+ return;
+ }
+
+ affected = ((data->cpumap & (1 << smp_processor_id())) != 0);
+
+ printk(" Frequency change request on cpu %d: cpumap %08llx, khz %ld
(%s)\n",
+ smp_processor_id(), (unsigned long long)data->cpumap, data->khz,
+ (affected ? "adjusting" : "skipping"));
+
+ if (affected) {
+ cpu_khz = data->khz;
+ local_time_calibration(NULL);
+
+ printk(" Recalibrated timers on cpu %d to %ld khz\n",
+ smp_processor_id(), data->khz);
+ }
+}
+
+int set_cpu_freq(cpumap_t cpumap, unsigned long khz) {
+ percpu_freq_update_t freq_update;
+
+ printk("CPU frequency change request: cpumap %08llx, khz %ld",
+ (unsigned long long)cpumap, khz);
+
+ freq_update.cpumap = cpumap;
+ freq_update.khz = khz;
+
+ return on_each_cpu(set_cpu_freq_percpu, &freq_update, 1, 1);
+}
+
void send_timer_event(struct vcpu *v)
{
send_guest_vcpu_virq(v, VIRQ_TIMER);
diff -r 886594fa3aef xen/include/public/dom0_ops.h
--- a/xen/include/public/dom0_ops.h Sat Apr 8 12:10:04 2006 +0100
+++ b/xen/include/public/dom0_ops.h Mon Apr 10 19:44:19 2006 -0400
@@ -283,6 +283,28 @@ typedef struct dom0_getpageframeinfo2 {
GUEST_HANDLE(ulong) array;
} dom0_getpageframeinfo2_t;
DEFINE_GUEST_HANDLE(dom0_getpageframeinfo2_t);
+
+/*
+ * Notify hypervisor of a CPU core frequency change completed
+ * by cpufreq driver in dom0, triggering an internal timer
+ * recalibration.
+ *
+ * This should be called after the new frequency has stabilized.
+ *
+ * The CPUs specified may include any CPU or core (if cores have
+ * independent PLLs). In an SMP or multi-core system, it may
+ * take a while for the recalibration function to be scheduled
+ * on the intended target CPUs; there is no guarantee this will
+ * happen by the time this call returns.
+ *
+ */
+#define DOM0_SETCPUFREQ 30
+typedef struct dom0_setcpufreq {
+ /* IN variables */
+ cpumap_t cpumap;
+ unsigned long khz;
+} dom0_setcpufreq_t;
+DEFINE_GUEST_HANDLE(dom0_setcpufreq_t);
/*
* Request memory range (@mfn, @mfn+@nr_mfns-1) to have type @type.
@@ -496,6 +518,7 @@ typedef struct dom0_op {
struct dom0_shadow_control shadow_control;
struct dom0_setdomainmaxmem setdomainmaxmem;
struct dom0_getpageframeinfo2 getpageframeinfo2;
+ struct dom0_setcpufreq setcpufreq;
struct dom0_add_memtype add_memtype;
struct dom0_del_memtype del_memtype;
struct dom0_read_memtype read_memtype;
---------------------------------------------------------------
diff -r 640f8b15b9dd arch/i386/kernel/cpu/cpufreq/powernow-k8.c
--- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c Fri Apr 7 01:32:54
2006
+0100
+++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c Mon Apr 10 19:33:49
2006
-0400
@@ -48,6 +48,37 @@
#define VERSION "version 1.60.0"
#include "powernow-k8.h"
+/* Xen support */
+
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+int xen_access_msr(u32 msr, int write, u32* out1, u32* out2, u32 in1, u32
in2) {
+ dom0_op_t op;
+ op.cmd = DOM0_MSR;
+ op.u.msr.write = write;
+ op.u.msr.cpu_mask = 1; /* only first CPU: not clear how to read multiple
CPUs */
+ op.u.msr.msr = msr;
+ op.u.msr.in1 = in1;
+ op.u.msr.in2 = in2;
+ BUG_ON(HYPERVISOR_dom0_op(&op));
+
+ if (!write) {
+ *out1 = op.u.msr.out1; /* low 32 bits */
+ *out2 = op.u.msr.out2; /* high 32 bits */
+ }
+
+ return 0;
+}
+
+#define cpu_rdmsr(msr, val1, val2) xen_access_msr((msr), 0, &(val1), &(val2),
0, 0)
+#define cpu_wrmsr(msr, val1, val2) xen_access_msr((msr), 1, NULL, NULL,
(val1), (val2))
+
+#else
+
+#define cpu_rdmsr(msr, val1, val2) rdmsr(msr, val1, val2)
+#define cpu_wrmsr(msr, val1, val2) wrmsr(msr, val1, val2)
+
+#endif
+
/* serialize freq changes */
static DECLARE_MUTEX(fidvid_sem);
@@ -98,7 +129,7 @@ static int pending_bit_stuck(void)
{
u32 lo, hi;
- rdmsr(MSR_FIDVID_STATUS, lo, hi);
+ cpu_rdmsr(MSR_FIDVID_STATUS, lo, hi);
return lo & MSR_S_LO_CHANGE_PENDING ? 1 : 0;
}
@@ -116,7 +147,7 @@ static int query_current_values_with_pen
dprintk("detected change pending stuck\n");
return 1;
}
- rdmsr(MSR_FIDVID_STATUS, lo, hi);
+ cpu_rdmsr(MSR_FIDVID_STATUS, lo, hi);
} while (lo & MSR_S_LO_CHANGE_PENDING);
data->currvid = hi & MSR_S_HI_CURRENT_VID;
@@ -145,13 +176,13 @@ static void fidvid_msr_init(void)
u32 lo, hi;
u8 fid, vid;
- rdmsr(MSR_FIDVID_STATUS, lo, hi);
+ cpu_rdmsr(MSR_FIDVID_STATUS, lo, hi);
vid = hi & MSR_S_HI_CURRENT_VID;
fid = lo & MSR_S_LO_CURRENT_FID;
lo = fid | (vid << MSR_C_LO_VID_SHIFT);
hi = MSR_C_HI_STP_GNT_BENIGN;
dprintk("cpu%d, init lo 0x%x, hi 0x%x\n", smp_processor_id(), lo, hi);
- wrmsr(MSR_FIDVID_CTL, lo, hi);
+ cpu_wrmsr(MSR_FIDVID_CTL, lo, hi);
}
@@ -173,7 +204,7 @@ static int write_new_fid(struct powernow
fid, lo, data->plllock * PLL_LOCK_CONVERSION);
do {
- wrmsr(MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION);
+ cpu_wrmsr(MSR_FIDVID_CTL, lo, data->plllock *
PLL_LOCK_CONVERSION);
if (i++ > 100) {
printk(KERN_ERR PFX "internal error - pending bit very
stuck - no further
pstate changes possible\n");
return 1;
@@ -215,7 +246,7 @@ static int write_new_vid(struct powernow
vid, lo, STOP_GRANT_5NS);
do {
- wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS);
+ cpu_wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS);
if (i++ > 100) {
printk(KERN_ERR PFX "internal error - pending bit very
stuck - no further
pstate changes possible\n");
return 1;
@@ -294,7 +325,7 @@ static int core_voltage_pre_transition(s
smp_processor_id(),
data->currfid, data->currvid, reqvid, data->rvo);
- rdmsr(MSR_FIDVID_STATUS, lo, maxvid);
+ cpu_rdmsr(MSR_FIDVID_STATUS, lo, maxvid);
maxvid = 0x1f & (maxvid >> 16);
dprintk("ph1 maxvid=0x%x\n", maxvid);
if (reqvid < maxvid) /* lower numbers are higher voltages */
@@ -892,6 +923,19 @@ static int transition_frequency(struct p
res = transition_fid_vid(data, fid, vid);
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+ {
+ dom0_op_t op;
+ int rc;
+ // printk("powernow-k8: notifying Xen of transition to %d khz on cpu
%d\n", freqs.new, freqs.cpu);
+ op.cmd = DOM0_SETCPUFREQ;
+ op.u.setcpufreq.cpumap = (1 << freqs.cpu);
+ op.u.setcpufreq.khz = freqs.new;
+ rc = HYPERVISOR_dom0_op(&op);
+ // printk("powernow-k8: notified Xen of transition to %d khz on cpu %d
(rc %d)\n", freqs.new, freqs.cpu, rc);
+ }
+#endif
+
freqs.new = find_khz_freq_from_fid(data->currfid);
for_each_cpu_mask(i, cpu_core_map[data->cpu]) {
freqs.cpu = i;
diff -r 640f8b15b9dd include/xen/interface/dom0_ops.h
--- a/include/xen/interface/dom0_ops.h Fri Apr 7 01:32:54 2006 +0100
+++ b/include/xen/interface/dom0_ops.h Mon Apr 10 19:33:49 2006 -0400
@@ -283,6 +283,28 @@ typedef struct dom0_getpageframeinfo2 {
GUEST_HANDLE(ulong) array;
} dom0_getpageframeinfo2_t;
DEFINE_GUEST_HANDLE(dom0_getpageframeinfo2_t);
+
+/*
+ * Notify hypervisor of a CPU core frequency change completed
+ * by cpufreq driver in dom0, triggering an internal timer
+ * recalibration.
+ *
+ * This should be called after the new frequency has stabilized.
+ *
+ * The CPUs specified may include any CPU or core (if cores have
+ * independent PLLs). In an SMP or multi-core system, it may
+ * take a while for the recalibration function to be scheduled
+ * on the intended target CPUs; there is no guarantee this will
+ * happen by the time this call returns.
+ *
+ */
+#define DOM0_SETCPUFREQ 30
+typedef struct dom0_setcpufreq {
+ /* IN variables */
+ cpumap_t cpumap;
+ unsigned long khz;
+} dom0_setcpufreq_t;
+DEFINE_GUEST_HANDLE(dom0_setcpufreq_t);
/*
* Request memory range (@mfn, @mfn+@nr_mfns-1) to have type @type.
@@ -496,6 +518,7 @@ typedef struct dom0_op {
struct dom0_shadow_control shadow_control;
struct dom0_setdomainmaxmem setdomainmaxmem;
struct dom0_getpageframeinfo2 getpageframeinfo2;
+ struct dom0_setcpufreq setcpufreq;
struct dom0_add_memtype add_memtype;
struct dom0_del_memtype del_memtype;
struct dom0_read_memtype read_memtype;
-------------------------------------------------------
Matt T. Yourst yourst@xxxxxxxxxxxxxxxxx
Binghamton University, Department of Computer Science
-------------------------------------------------------
cpufreq-xen-dom0-func.diff
Description: Text Data
cpufreq-xen-linux-2.6.16.diff
Description: Text Data
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|