Would it be possible to do a virtual CPU Freq on DomU kernels? I know that
right now we can schedule a weight for CPU time in the different domains, but
it may be nifty to say "firewall, you get 50MHZ, MySQL you get 900MHZ" or
something similar.
--- "Matt T. Yourst" <yourst@xxxxxxxxxx> wrote:
> On Monday 10 April 2006 10:01 am, Keir Fraser wrote:
> >
> > The TSC needs recalibrating on the affected CPU. This will require
> > adapting local_time_calibration() in arch/x86/time.c.
> >
>
> Thanks a lot - I followed your suggestion and added this functionality in
> the
> two patches below (and attached).
>
> The first patch (cpufreq-xen-dom0-func.diff) adds a dom0 "setcpufreq"
> hypercall that sets cpu_khz to the specified value and re-runs
> local_time_calibration() on any CPU(s) the setcpufreq call specifies.
>
> In the second patch (cpufreq-xen-linux-2.6.16, to be applied to the
> linux-2.6.hg tree), the Linux cpufreq drivers running in dom0 use this call
> immediately after writing whichever MSR triggers the frequency/voltage
> shift,
> but before triggering a cpufreq callback to adjust the dom0 kernel's own
> timer and TSC parameters.
>
> Note that I only implemented this for powernow-k8 right now, since that's
> the
> only hardware I could test it on. It's very obvious how to adapt it to the
> other cpufreq drivers, by just replacing rdmsr and wrmsr with the Xen
> wrapper
> versions I provided, and adding in the setcpufreq hypercall at the end.
>
> The patches seem to work correctly - I've had virtually no latency issues
> or
> temporary freezes after frequency transitions now, although I occasionally
> still get a mouse freeze and "psmouse: lost synchronization, throwing X
> bytes
> away" messages in the syslog. This is on an Athlon 64 laptop stepping
> between
> 800/1600/2000 MHz.
>
> I didn't test this on an SMP system yet, but I think it's SMP-safe, but
> only
> if local_time_calibration() is reentrant (since it's scheduled once a
> second
> and there may be a collision in rare cases). Is this an issue?
>
> I can test it on a dual-core Athlon 64 next week if needed (my SMP Xen test
> machine is unavailable until then). Someone with a Core Duo should patch
> speedstep-centrino.c and test it there too.
>
> Try this out and let me know if there are problems. It doesn't seem to have
> the issues I was having before, but that's a matter of user experience, so
> I
> might be mistaken or just imagining it.
>
> - Matt Yourst
>
> diff -r 886594fa3aef xen/arch/x86/dom0_ops.c
> --- a/xen/arch/x86/dom0_ops.c Sat Apr 8 12:10:04 2006 +0100
> +++ b/xen/arch/x86/dom0_ops.c Mon Apr 10 19:44:19 2006 -0400
> @@ -339,6 +339,13 @@ long arch_do_dom0_op(struct dom0_op *op,
> }
> break;
>
> + case DOM0_SETCPUFREQ:
> + {
> + extern int set_cpu_freq(cpumap_t cpumap, unsigned long khz);
> + ret = set_cpu_freq(op->u.setcpufreq.cpumap, op->u.setcpufreq.khz);
> + break;
> + }
> +
> case DOM0_GETMEMLIST:
> {
> int i;
> diff -r 886594fa3aef xen/arch/x86/time.c
> --- a/xen/arch/x86/time.c Sat Apr 8 12:10:04 2006 +0100
> +++ b/xen/arch/x86/time.c Mon Apr 10 19:44:19 2006 -0400
> @@ -914,6 +914,60 @@ void __init early_time_init(void)
> setup_irq(0, &irq0);
> }
>
> +/*
> + * This is called when the hypervisor is notified of a CPU core
> + * frequency change initiated by a driver in dom0.
> + *
> + * This should be called after the new frequency has stabilized.
> + *
> + * The CPUs specified may include any CPU or core (if cores have
> + * independent PLLs). In an SMP or multi-core system, it may
> + * take a while for the recalibration function to be scheduled
> + * on the intended target CPUs; there is no guarantee this will
> + * happen by the time this call returns.
> + *
> + */
> +typedef struct percpu_freq_update {
> + cpumap_t cpumap;
> + unsigned long khz;
> +} percpu_freq_update_t;
> +
> +void set_cpu_freq_percpu(void *p) {
> + percpu_freq_update_t *data = (percpu_freq_update_t*)p;
> + int affected;
> +
> + if (!data) {
> + printk(" Adjust freq on cpu %d: no data!\n", smp_processor_id());
> + return;
> + }
> +
> + affected = ((data->cpumap & (1 << smp_processor_id())) != 0);
> +
> + printk(" Frequency change request on cpu %d: cpumap %08llx, khz %ld
> (%s)\n",
> + smp_processor_id(), (unsigned long long)data->cpumap,
data->khz,
> + (affected ? "adjusting" : "skipping"));
> +
> + if (affected) {
> + cpu_khz = data->khz;
> + local_time_calibration(NULL);
> +
> + printk(" Recalibrated timers on cpu %d to %ld khz\n",
> + smp_processor_id(), data->khz);
> + }
> +}
> +
> +int set_cpu_freq(cpumap_t cpumap, unsigned long khz) {
> + percpu_freq_update_t freq_update;
> +
> + printk("CPU frequency change request: cpumap %08llx, khz %ld",
> + (unsigned long long)cpumap, khz);
> +
> + freq_update.cpumap = cpumap;
> + freq_update.khz = khz;
> +
> + return on_each_cpu(set_cpu_freq_percpu, &freq_update, 1, 1);
> +}
> +
> void send_timer_event(struct vcpu *v)
> {
> send_guest_vcpu_virq(v, VIRQ_TIMER);
> diff -r 886594fa3aef xen/include/public/dom0_ops.h
> --- a/xen/include/public/dom0_ops.h Sat Apr 8 12:10:04 2006 +0100
> +++ b/xen/include/public/dom0_ops.h Mon Apr 10 19:44:19 2006 -0400
> @@ -283,6 +283,28 @@ typedef struct dom0_getpageframeinfo2 {
> GUEST_HANDLE(ulong) array;
> } dom0_getpageframeinfo2_t;
> DEFINE_GUEST_HANDLE(dom0_getpageframeinfo2_t);
> +
> +/*
> + * Notify hypervisor of a CPU core frequency change completed
> + * by cpufreq driver in dom0, triggering an internal timer
> + * recalibration.
> + *
> + * This should be called after the new frequency has stabilized.
> + *
> + * The CPUs specified may include any CPU or core (if cores have
> + * independent PLLs). In an SMP or multi-core system, it may
> + * take a while for the recalibration function to be scheduled
> + * on the intended target CPUs; there is no guarantee this will
> + * happen by the time this call returns.
> + *
> + */
> +#define DOM0_SETCPUFREQ 30
> +typedef struct dom0_setcpufreq {
> + /* IN variables */
> + cpumap_t cpumap;
> + unsigned long khz;
> +} dom0_setcpufreq_t;
> +DEFINE_GUEST_HANDLE(dom0_setcpufreq_t);
>
> /*
> * Request memory range (@mfn, @mfn+@nr_mfns-1) to have type @type.
> @@ -496,6 +518,7 @@ typedef struct dom0_op {
> struct dom0_shadow_control shadow_control;
> struct dom0_setdomainmaxmem setdomainmaxmem;
> struct dom0_getpageframeinfo2 getpageframeinfo2;
> + struct dom0_setcpufreq setcpufreq;
> struct dom0_add_memtype add_memtype;
> struct dom0_del_memtype del_memtype;
> struct dom0_read_memtype read_memtype;
>
> ---------------------------------------------------------------
>
> diff -r 640f8b15b9dd arch/i386/kernel/cpu/cpufreq/powernow-k8.c
> --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c Fri Apr 7 01:32:54
> 2006
> +0100
> +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c Mon Apr 10 19:33:49
> 2006
> -0400
> @@ -48,6 +48,37 @@
> #define VERSION "version 1.60.0"
> #include "powernow-k8.h"
>
> +/* Xen support */
> +
> +#ifdef CONFIG_XEN_PRIVILEGED_GUEST
> +int xen_access_msr(u32 msr, int write, u32* out1, u32* out2, u32 in1, u32
> in2) {
> + dom0_op_t op;
> + op.cmd = DOM0_MSR;
> + op.u.msr.write = write;
> + op.u.msr.cpu_mask = 1; /* only first CPU: not clear how to read multiple
> CPUs */
> + op.u.msr.msr = msr;
> + op.u.msr.in1 = in1;
> + op.u.msr.in2 = in2;
> + BUG_ON(HYPERVISOR_dom0_op(&op));
> +
> + if (!write) {
> + *out1 = op.u.msr.out1; /* low 32 bits */
> + *out2 = op.u.msr.out2; /* high 32 bits */
> + }
> +
> + return 0;
> +}
> +
> +#define cpu_rdmsr(msr, val1, val2) xen_access_msr((msr), 0, &(val1),
> &(val2),
> 0, 0)
> +#define cpu_wrmsr(msr, val1, val2) xen_access_msr(
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|