[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [RFC PATCH 7/7] xen/arm: cpufreq: add cpufreq driver



Xen changes frequencies on CPUs using this driver.

Signed-off-by: Oleksandr Dmytryshyn <oleksandr.dmytryshyn@xxxxxxxxxxxxxxx>
---
 drivers/xen/Kconfig         |  20 +
 drivers/xen/Makefile        |   1 +
 drivers/xen/xen-cpufreq.c   | 882 ++++++++++++++++++++++++++++++++++++++++++++
 include/xen/interface/xen.h |   1 +
 4 files changed, 904 insertions(+)
 create mode 100644 drivers/xen/xen-cpufreq.c

diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 254a5cc..bb2d3d5 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -220,6 +220,26 @@ config XEN_ACPI_PROCESSOR
          called xen_acpi_processor  If you do not know what to choose, select
          M here. If the CPUFREQ drivers are built in, select Y here.
 
+config XEN_CPUFREQ
+       bool "Xen Cpufreq driver"
+       depends on XEN_DOM0 && !CPU_FREQ
+       default n
+       help
+         This driver uploads Power Management information to the Xen
+         hypervisor and changes CPUs frequency using CPU Frequency scaling
+         drivers.
+
+         To do that the driver uses CPU Frequency scaling drivers to parse
+         the Power Management data and uploads said information to the Xen
+         hypervisor. Then the Xen hypervisor can select the proper Pxx states.
+
+         Then the Xen hypervisor can change CPUs frequency by giving commands
+         via this driver to the CPU Frequency scaling driver.
+
+         To compile this driver as a module, choose M here: the module will be
+         called xen_acpi_processor  If you do not know what to choose, select
+         M here. If the CPUFREQ drivers are built in, select Y here.
+
 config XEN_MCE_LOG
        bool "Xen platform mcelog"
        depends on XEN_DOM0 && X86_64 && X86_MCE
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index b7c835f..0345d65 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -33,6 +33,7 @@ obj-$(CONFIG_XEN_PRIVCMD)             += xen-privcmd.o
 obj-$(CONFIG_XEN_ACPI_HOTPLUG_MEMORY)  += xen-acpi-memhotplug.o
 obj-$(CONFIG_XEN_ACPI_HOTPLUG_CPU)     += xen-acpi-cpuhotplug.o
 obj-$(CONFIG_XEN_ACPI_PROCESSOR)       += xen-acpi-processor.o
+obj-$(CONFIG_XEN_CPUFREQ)              += xen-cpufreq.o
 xen-evtchn-y                           := evtchn.o
 xen-gntdev-y                           := gntdev.o
 xen-gntalloc-y                         := gntalloc.o
diff --git a/drivers/xen/xen-cpufreq.c b/drivers/xen/xen-cpufreq.c
new file mode 100644
index 0000000..a0d9adc
--- /dev/null
+++ b/drivers/xen/xen-cpufreq.c
@@ -0,0 +1,882 @@
+/*
+ *  Copyright (C) 2001 Russell King
+ *            (C) 2002 - 2003 Dominik Brodowski <linux@xxxxxxxx>
+ *
+ *  Oct 2005 - Ashok Raj <ashok.raj@xxxxxxxxx>
+ *     Added handling for CPU hotplug
+ *  Feb 2006 - Jacob Shin <jacob.shin@xxxxxxx>
+ *     Fix handling for CPU hotplug -- affected CPUs
+ *
+ *           (C) 2014 GlobalLogic Inc.
+ *
+ * Based on drivers/cpufreq/cpufreq.c
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/notifier.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/irq.h>
+#include <linux/workqueue.h>
+#include <linux/cpufreq.h>
+
+#include <trace/events/power.h>
+
+#include <xen/xen.h>
+#include <xen/events.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/platform.h>
+#include <xen/interface/sysctl.h>
+#include <asm/xen/hypercall.h>
+
+#ifdef CONFIG_CPUMASK_OFFSTACK
+#error CONFIG_CPUMASK_OFFSTACK config should not be used with this driver
+#endif
+
+static int xen_nr_cpus;
+static int xen_irq;
+
+#define for_each_xen_cpu(cpu, mask)                    \
+       for ((cpu) = -1;                                \
+               (cpu) = cpumask_next((cpu), (mask)),    \
+               (cpu) < xen_nr_cpus;)
+
+static struct cpufreq_driver *xen_cpufreq_driver;
+static DEFINE_PER_CPU(struct cpufreq_policy *, xen_cpufreq_cpu_data);
+
+static DEFINE_SPINLOCK(xen_cpufreq_driver_lock);
+
+/*
+ * xen_cpu_policy_rwsem is a per CPU reader-writer semaphore designed to cure
+ * all cpufreq/hotplug/workqueue/etc related lock issues.
+ *
+ * The rules for this semaphore:
+ * - Any routine that wants to read from the policy structure will
+ *   do a down_read on this semaphore.
+ * - Any routine that will write to the policy structure and/or may take away
+ *   the policy altogether (eg. CPU hotplug), will hold this lock in write
+ *   mode before doing so.
+ *
+ * Additional rules:
+ * - Governor routines that can be called in cpufreq hotplug path should not
+ *   take this sem as top level hotplug notifier handler takes this.
+ * - Lock should not be held across
+ *     __cpufreq_governor(data, CPUFREQ_GOV_STOP);
+ */
+static DEFINE_PER_CPU(int, xen_cpufreq_policy_cpu);
+static DEFINE_PER_CPU(struct rw_semaphore, xen_cpu_policy_rwsem);
+
+#define lock_policy_rwsem(mode, cpu)                                   \
+static int lock_policy_rwsem_##mode                                    \
+(int cpu)                                                              \
+{                                                                      \
+       int policy_cpu = per_cpu(xen_cpufreq_policy_cpu, cpu);          \
+       BUG_ON(policy_cpu == -1);                                       \
+       down_##mode(&per_cpu(xen_cpu_policy_rwsem, policy_cpu));        \
+                                                                       \
+       return 0;                                                       \
+}
+
+lock_policy_rwsem(write, cpu);
+
+static void unlock_policy_rwsem_write(int cpu)
+{
+       int policy_cpu = per_cpu(xen_cpufreq_policy_cpu, cpu);
+       BUG_ON(policy_cpu == -1);
+       up_write(&per_cpu(xen_cpu_policy_rwsem, policy_cpu));
+}
+
+/**
+ * The "transition" notifier list for kernel code that needs to handle
+ * changes to devices when the CPU clock speed changes.
+ * The mutex locks this list.
+ */
+static struct srcu_notifier_head xen_cpufreq_transition_notifier_list;
+
+static bool init_cpufreq_transition_notifier_list_called;
+static int __init init_cpufreq_transition_notifier_list(void)
+{
+       srcu_init_notifier_head(&xen_cpufreq_transition_notifier_list);
+       init_cpufreq_transition_notifier_list_called = true;
+       return 0;
+}
+pure_initcall(init_cpufreq_transition_notifier_list);
+
+static struct cpufreq_policy *xen_cpufreq_cpu_get(unsigned int cpu)
+{
+       struct cpufreq_policy *data = NULL;
+       unsigned long flags;
+
+       if (cpu >= xen_nr_cpus)
+               goto err_out;
+
+       /* get the cpufreq driver */
+       spin_lock_irqsave(&xen_cpufreq_driver_lock, flags);
+
+       if (!xen_cpufreq_driver)
+               goto err_out_unlock;
+
+       /* get the CPU */
+       data = per_cpu(xen_cpufreq_cpu_data, cpu);
+
+err_out_unlock:
+       spin_unlock_irqrestore(&xen_cpufreq_driver_lock, flags);
+err_out:
+       return data;
+}
+
+static void xen_cpufreq_cpu_put(struct cpufreq_policy *data)
+{
+       module_put(xen_cpufreq_driver->owner);
+}
+
+static int push_data_to_hypervisor(struct cpufreq_policy *policy,
+                                  struct cpufreq_frequency_table *table)
+{
+       int ret = 0;
+       unsigned int i;
+       unsigned int cpu;
+       uint32_t platform_limit = 0;
+       unsigned int max_freq = 0;
+       unsigned int state_count = 0;
+       unsigned int prev_freq = 0;
+       struct xen_processor_px *dst_states;
+       struct xen_processor_performance *dst_perf;
+       struct xen_platform_op op = {
+               .cmd                    = XENPF_set_processor_pminfo,
+               .interface_version      = XENPF_INTERFACE_VERSION,
+               .u.set_pminfo.type      = XEN_PM_PX,
+       };
+
+       dst_perf = &op.u.set_pminfo.perf;
+
+       /* Check freq table and find max frequency */
+       for (i = 0; (table[i].frequency != CPUFREQ_TABLE_END); i++) {
+               unsigned int freq = table[i].frequency;
+               if (freq == CPUFREQ_ENTRY_INVALID)
+                       continue;
+
+               if (table[i].index != state_count || freq <= prev_freq) {
+                       pr_err("Frequency table format error\n");
+                       return -EINVAL;
+               }
+
+               prev_freq = freq;
+               state_count++;
+               if (freq > max_freq)
+                       max_freq = freq;
+       }
+
+       if (!state_count)
+               return -EINVAL;
+
+       dst_perf->state_count = state_count;
+
+       dst_states = kcalloc(state_count,
+                            sizeof(struct xen_processor_px), GFP_KERNEL);
+
+       if (!dst_states)
+               return -ENOMEM;
+
+       set_xen_guest_handle(dst_perf->states, dst_states);
+
+       /*
+        * Freq table should start from lower values
+        * dst_states should start from higer values
+        */
+       for (i = 0; (table[i].frequency != CPUFREQ_TABLE_END); i++) {
+               unsigned int freq = table[i].frequency;
+               unsigned int tbl_index = state_count - 1 - table[i].index;
+               if (freq == CPUFREQ_ENTRY_INVALID)
+                       continue;
+
+               if (freq == max_freq)
+                       platform_limit = tbl_index;
+
+               dst_states[tbl_index].core_frequency = freq / 1000;
+               dst_states[tbl_index].transition_latency =
+                               policy->cpuinfo.transition_latency / 1000;
+       }
+
+       dst_perf->shared_type = policy->shared_type;
+       dst_perf->platform_limit = platform_limit;
+       dst_perf->domain_info.domain = policy->cpu;
+       dst_perf->domain_info.num_processors = xen_nr_cpus;
+       dst_perf->flags = XEN_PX_PSS | XEN_PX_PSD | XEN_PX_PPC;
+
+       for_each_xen_cpu(cpu, policy->cpus) {
+               op.u.set_pminfo.id = cpu;
+               ret = HYPERVISOR_dom0_op(&op);
+               if (ret) {
+                       pr_debug("Hypervisor error(%d) for CPU%u\n", ret, cpu);
+                       goto err_free_states;
+               }
+               pr_debug("CPU%u - P-states uploaded\n", cpu);
+
+               for (i = 0; i < dst_perf->state_count; i++) {
+                       pr_debug("    state %d: %d MHz, %d uS\n",
+                                i, (u32) dst_states[i].core_frequency,
+                                (u32) dst_states[i].transition_latency);
+               }
+       }
+
+err_free_states:
+       kfree(dst_states);
+       return ret;
+}
+
+/*
+ * Returns:
+ *   Negative: Failure
+ *   0:        Success
+ *   Positive: When we have a managed CPU and the sysfs got symlinked
+ */
+static int xen_cpufreq_add_dev_policy(unsigned int cpu,
+                                 struct cpufreq_policy *policy)
+{
+       int ret = 0;
+#ifdef CONFIG_SMP
+       unsigned long flags;
+       unsigned int j;
+
+       for_each_cpu(j, policy->cpus) {
+               struct cpufreq_policy *managed_policy;
+
+               if (cpu == j)
+                       continue;
+
+               /* Check for existing affected CPUs.
+                * They may not be aware of it due to CPU Hotplug.
+                * cpufreq_cpu_put is called when the device is removed
+                * in __cpufreq_remove_dev()
+                */
+               managed_policy = xen_cpufreq_cpu_get(j);
+               if (unlikely(managed_policy)) {
+                       /* Set proper policy_cpu */
+                       unlock_policy_rwsem_write(cpu);
+                       per_cpu(xen_cpufreq_policy_cpu, cpu) =
+                                               managed_policy->cpu;
+
+                       if (lock_policy_rwsem_write(cpu) < 0) {
+                               /* Should not go through policy unlock path */
+                               if (xen_cpufreq_driver->exit)
+                                       xen_cpufreq_driver->exit(policy);
+                               xen_cpufreq_cpu_put(managed_policy);
+                               return -EBUSY;
+                       }
+
+                       spin_lock_irqsave(&xen_cpufreq_driver_lock, flags);
+                       cpumask_copy(managed_policy->cpus, policy->cpus);
+                       per_cpu(xen_cpufreq_cpu_data, cpu) = managed_policy;
+                       spin_unlock_irqrestore(&xen_cpufreq_driver_lock, flags);
+
+                       pr_debug("CPU already managed, adding link\n");
+
+                       /*
+                        * Success. We only needed to be added to the mask.
+                        * Call driver->exit() because only the cpu parent of
+                        * the kobj needed to call init().
+                        */
+                       if (xen_cpufreq_driver->exit)
+                               xen_cpufreq_driver->exit(policy);
+
+                       return 1;
+               }
+       }
+#endif
+       return ret;
+}
+
+/**
+ * xen_cpufreq_add_dev - add a CPU device
+ *
+ * Adds the cpufreq interface for a CPU device.
+ */
+static int xen_cpufreq_add_dev(unsigned int cpu)
+{
+       int ret = 0;
+       struct cpufreq_policy *policy;
+       unsigned long flags;
+       unsigned int j;
+
+       pr_debug("adding CPU %u\n", cpu);
+
+#ifdef CONFIG_SMP
+       /* check whether a different CPU already registered this
+        * CPU because it is in the same boat. */
+       policy = xen_cpufreq_cpu_get(cpu);
+       if (unlikely(policy)) {
+               xen_cpufreq_cpu_put(policy);
+               return 0;
+       }
+#endif
+
+       if (!try_module_get(xen_cpufreq_driver->owner)) {
+               ret = -EINVAL;
+               goto module_out;
+       }
+
+       ret = -ENOMEM;
+       policy = kzalloc(sizeof(struct cpufreq_policy), GFP_KERNEL);
+       if (!policy)
+               goto nomem_out;
+
+       if (!alloc_cpumask_var(&policy->cpus, GFP_KERNEL))
+               goto err_free_policy;
+
+       if (!zalloc_cpumask_var(&policy->related_cpus, GFP_KERNEL))
+               goto err_free_cpumask;
+
+       policy->cpu = cpu;
+       cpumask_copy(policy->cpus, cpumask_of(cpu));
+
+       /* Initially set CPU itself as the policy_cpu */
+       per_cpu(xen_cpufreq_policy_cpu, cpu) = cpu;
+       ret = (lock_policy_rwsem_write(cpu) < 0);
+       WARN_ON(ret);
+
+       /* call driver. From then on the cpufreq must be able
+        * to accept all calls to ->verify and ->setpolicy for this CPU
+        */
+       ret = xen_cpufreq_driver->init(policy);
+       if (ret) {
+               pr_debug("initialization failed\n");
+               goto err_unlock_policy;
+       }
+       ret = xen_cpufreq_add_dev_policy(cpu, policy);
+       if (ret) {
+               if (ret > 0)
+                       /* This is a managed cpu, symlink created,
+                          exit with 0 */
+                       ret = 0;
+               goto err_unlock_policy;
+       }
+
+       spin_lock_irqsave(&xen_cpufreq_driver_lock, flags);
+       for_each_cpu(j, policy->cpus) {
+               per_cpu(xen_cpufreq_cpu_data, j) = policy;
+               per_cpu(xen_cpufreq_policy_cpu, j) = policy->cpu;
+       }
+       spin_unlock_irqrestore(&xen_cpufreq_driver_lock, flags);
+
+       unlock_policy_rwsem_write(cpu);
+
+       module_put(xen_cpufreq_driver->owner);
+       pr_debug("initialization complete\n");
+
+       return 0;
+
+err_unlock_policy:
+       unlock_policy_rwsem_write(cpu);
+       free_cpumask_var(policy->related_cpus);
+err_free_cpumask:
+       free_cpumask_var(policy->cpus);
+err_free_policy:
+       kfree(policy);
+nomem_out:
+       module_put(xen_cpufreq_driver->owner);
+module_out:
+       return ret;
+}
+
+/**
+ * __cpufreq_remove_dev - remove a CPU device
+ *
+ * Removes the cpufreq interface for a CPU device.
+ * Caller should already have policy_rwsem in write mode for this CPU.
+ * This routine frees the rwsem before returning.
+ */
+static int __xen_cpufreq_remove_dev(unsigned int cpu)
+{
+       unsigned long flags;
+       struct cpufreq_policy *data;
+#ifdef CONFIG_SMP
+       unsigned int j;
+#endif
+
+       pr_debug("unregistering CPU %u\n", cpu);
+
+       spin_lock_irqsave(&xen_cpufreq_driver_lock, flags);
+       data = per_cpu(xen_cpufreq_cpu_data, cpu);
+
+       if (!data) {
+               spin_unlock_irqrestore(&xen_cpufreq_driver_lock, flags);
+               unlock_policy_rwsem_write(cpu);
+               return -EINVAL;
+       }
+       per_cpu(xen_cpufreq_cpu_data, cpu) = NULL;
+
+
+#ifdef CONFIG_SMP
+       /* if this isn't the CPU which is the parent of the kobj, we
+        * only need to unlink, put and exit
+        */
+       if (unlikely(cpu != data->cpu)) {
+               pr_debug("removing link\n");
+               cpumask_clear_cpu(cpu, data->cpus);
+               spin_unlock_irqrestore(&xen_cpufreq_driver_lock, flags);
+               xen_cpufreq_cpu_put(data);
+               unlock_policy_rwsem_write(cpu);
+               return 0;
+       }
+#endif
+
+#ifdef CONFIG_SMP
+
+       /* if we have other CPUs still registered, we need to unlink them,
+        * or else wait_for_completion below will lock up. Clean the
+        * per_cpu(cpufreq_cpu_data) while holding the lock, and remove
+        * the sysfs links afterwards.
+        */
+       if (unlikely(cpumask_weight(data->cpus) > 1)) {
+               for_each_cpu(j, data->cpus) {
+                       if (j == cpu)
+                               continue;
+                       per_cpu(xen_cpufreq_cpu_data, j) = NULL;
+               }
+       }
+
+       spin_unlock_irqrestore(&xen_cpufreq_driver_lock, flags);
+
+       if (unlikely(cpumask_weight(data->cpus) > 1)) {
+               for_each_cpu(j, data->cpus) {
+                       if (j == cpu)
+                               continue;
+                       pr_debug("removing link for cpu %u\n", j);
+                       unlock_policy_rwsem_write(cpu);
+                       lock_policy_rwsem_write(cpu);
+                       xen_cpufreq_cpu_put(data);
+               }
+       }
+#else
+       spin_unlock_irqrestore(&xen_cpufreq_driver_lock, flags);
+#endif
+
+       unlock_policy_rwsem_write(cpu);
+
+       lock_policy_rwsem_write(cpu);
+       if (xen_cpufreq_driver->exit)
+               xen_cpufreq_driver->exit(data);
+       unlock_policy_rwsem_write(cpu);
+
+       free_cpumask_var(data->related_cpus);
+       free_cpumask_var(data->cpus);
+       kfree(data);
+
+       return 0;
+}
+
+static int xen_cpufreq_remove_dev(unsigned int cpu)
+{
+       int retval;
+
+       if (unlikely(lock_policy_rwsem_write(cpu)))
+               BUG();
+
+       retval = __xen_cpufreq_remove_dev(cpu);
+       return retval;
+}
+
+/*********************************************************************
+ *            EXTERNALLY AFFECTING FREQUENCY CHANGES                 *
+ *********************************************************************/
+
+/**
+ * adjust_jiffies - adjust the system "loops_per_jiffy"
+ *
+ * This function alters the system "loops_per_jiffy" for the clock
+ * speed change. Note that loops_per_jiffy cannot be updated on SMP
+ * systems as each CPU might be scaled differently. So, use the arch
+ * per-CPU loops_per_jiffy value wherever possible.
+ */
+#ifndef CONFIG_SMP
+static unsigned long l_p_j_ref;
+static unsigned int  l_p_j_ref_freq;
+
+static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
+{
+       if (ci->flags & CPUFREQ_CONST_LOOPS)
+               return;
+
+       if (!l_p_j_ref_freq) {
+               l_p_j_ref = loops_per_jiffy;
+               l_p_j_ref_freq = ci->old;
+               pr_debug("saving %lu as reference value for loops_per_jiffy; "
+                       "freq is %u kHz\n", l_p_j_ref, l_p_j_ref_freq);
+       }
+       if ((val == CPUFREQ_POSTCHANGE  && ci->old != ci->new) ||
+           (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) {
+               loops_per_jiffy = cpufreq_scale(l_p_j_ref, l_p_j_ref_freq,
+                                                               ci->new);
+               pr_debug("scaling loops_per_jiffy to %lu "
+                       "for frequency %u kHz\n", loops_per_jiffy, ci->new);
+       }
+}
+#else
+static inline void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
+{
+       return;
+}
+#endif
+
+
+/**
+ * cpufreq_notify_transition - call notifier chain and adjust_jiffies
+ * on frequency transition.
+ *
+ * This function calls the transition notifiers and the "adjust_jiffies"
+ * function. It is called twice on all CPU frequency changes that have
+ * external effects.
+ */
+void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state)
+{
+       struct cpufreq_policy *policy;
+
+       BUG_ON(irqs_disabled());
+
+       freqs->flags = xen_cpufreq_driver->flags;
+       pr_debug("notification %u of frequency transition to %u kHz\n",
+                state, freqs->new);
+
+       policy = per_cpu(xen_cpufreq_cpu_data, freqs->cpu);
+       switch (state) {
+       case CPUFREQ_PRECHANGE:
+               /* detect if the driver reported a value as "old frequency"
+                * which is not equal to what the cpufreq core thinks is
+                * "old frequency".
+                */
+               if (!(xen_cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
+                       if ((policy) && (policy->cpu == freqs->cpu) &&
+                           (policy->cur) && (policy->cur != freqs->old)) {
+                               pr_debug("Warning: CPU frequency is"
+                                        " %u, cpufreq assumed %u kHz.\n",
+                                        freqs->old, policy->cur);
+                               freqs->old = policy->cur;
+                       }
+               }
+               srcu_notifier_call_chain(&xen_cpufreq_transition_notifier_list,
+                                        CPUFREQ_PRECHANGE, freqs);
+               adjust_jiffies(CPUFREQ_PRECHANGE, freqs);
+               break;
+
+       case CPUFREQ_POSTCHANGE:
+               adjust_jiffies(CPUFREQ_POSTCHANGE, freqs);
+               pr_debug("FREQ: %lu - CPU: %lu\n", (unsigned long)freqs->new,
+                        (unsigned long)freqs->cpu);
+               trace_power_frequency(POWER_PSTATE, freqs->new, freqs->cpu);
+               trace_cpu_frequency(freqs->new, freqs->cpu);
+               srcu_notifier_call_chain(&xen_cpufreq_transition_notifier_list,
+                                        CPUFREQ_POSTCHANGE, freqs);
+               if (likely(policy) && likely(policy->cpu == freqs->cpu))
+                       policy->cur = freqs->new;
+               break;
+       }
+}
+EXPORT_SYMBOL_GPL(cpufreq_notify_transition);
+
+/*********************************************************************
+ *                              GOVERNORS                            *
+ *********************************************************************/
+
+int __xen_cpufreq_driver_target(struct cpufreq_policy *policy,
+                               unsigned int target_freq,
+                               unsigned int relation)
+{
+       int retval = -EINVAL;
+       unsigned int old_target_freq = target_freq;
+
+       /* Make sure that target_freq is within supported range */
+       if (target_freq > policy->max)
+               target_freq = policy->max;
+       if (target_freq < policy->min)
+               target_freq = policy->min;
+
+       pr_debug("target for CPU %u: %u kHz, relation %u, requested %u kHz\n",
+                policy->cpu, target_freq, relation, old_target_freq);
+
+       if (target_freq == policy->cur)
+               return 0;
+
+       if (xen_cpufreq_driver->target)
+               retval = xen_cpufreq_driver->target(policy, target_freq,
+                                                   relation);
+
+       return retval;
+}
+
+int xen_cpufreq_driver_target(unsigned int cpu,
+                             unsigned int target_freq,
+                             unsigned int relation)
+{
+       int ret = -EINVAL;
+       struct cpufreq_policy *policy;
+
+       policy = xen_cpufreq_cpu_get(cpu);
+       if (!policy)
+               goto no_policy;
+
+       if (unlikely(lock_policy_rwsem_write(policy->cpu)))
+               goto fail;
+
+       ret = __xen_cpufreq_driver_target(policy, target_freq, relation);
+
+       unlock_policy_rwsem_write(policy->cpu);
+
+fail:
+       xen_cpufreq_cpu_put(policy);
+no_policy:
+       return ret;
+}
+
+/*********************************************************************
+ *                    HANDLE COMMANDS FROM XEN                       *
+ *********************************************************************/
+static void xen_cpufreq_work_hnd(struct work_struct *w);
+
+static struct workqueue_struct *xen_cpufreq_wq;
+static DECLARE_WORK(xen_cpufreq_work, xen_cpufreq_work_hnd);
+
+static void xen_cpufreq_work_hnd(struct work_struct *w)
+{
+       int ret;
+       struct xen_sysctl op = {
+               .cmd                    = XEN_SYSCTL_cpufreq_op,
+               .interface_version      = XEN_SYSCTL_INTERFACE_VERSION,
+       };
+       struct xen_sysctl_cpufreq_op *cf_op = &op.u.cpufreq_op;
+
+       cf_op->cmd = XEN_SYSCTL_CPUFREQ_get_target;
+       ret = HYPERVISOR_sysctl(&op);
+       if (ret) {
+               pr_err("Hypervisor cpufreq error get targer (%d)\n", ret);
+               return;
+       }
+
+       ret = xen_cpufreq_driver_target(cf_op->u.target.cpu,
+                                       cf_op->u.target.freq,
+                                       cf_op->u.target.relation);
+
+       cf_op->cmd = XEN_SYSCTL_CPUFREQ_set_result;
+       cf_op->u.result = ret;
+       ret = HYPERVISOR_sysctl(&op);
+       if (ret) {
+               pr_err("Hypervisor cpufreq set result error (%d)\n", ret);
+               return;
+       }
+}
+
+static irqreturn_t xen_cpufreq_interrupt(int irq, void *data)
+{
+       queue_work(xen_cpufreq_wq, &xen_cpufreq_work);
+       return IRQ_HANDLED;
+}
+
+/*********************************************************************
+ *               REGISTER / UNREGISTER CPUFREQ DRIVER                *
+ *********************************************************************/
+
+/**
+ * cpufreq_register_driver - register a CPU Frequency driver
+ * @driver_data: A struct cpufreq_driver containing the values#
+ * submitted by the CPU Frequency driver.
+ *
+ *   Registers a CPU Frequency driver to this core code. This code
+ * returns zero on success, -EBUSY when another driver got here first
+ * (and isn't unregistered in the meantime).
+ *
+ */
+int cpufreq_register_driver(struct cpufreq_driver *driver_data)
+{
+       unsigned long flags;
+       int ret;
+       unsigned int cpu;
+       struct cpufreq_frequency_table *table;
+       struct cpufreq_policy *policy;
+       cpumask_var_t pushed_cpus;
+       int irq;
+
+       if (!xen_nr_cpus)
+               return -EPROBE_DEFER;
+
+       if (!driver_data || !driver_data->verify || !driver_data->init ||
+           (!driver_data->target))
+               return -EINVAL;
+
+       pr_debug("trying to register driver %s\n", driver_data->name);
+
+       if (driver_data->setpolicy)
+               driver_data->flags |= CPUFREQ_CONST_LOOPS;
+
+       spin_lock_irqsave(&xen_cpufreq_driver_lock, flags);
+
+       if (xen_cpufreq_driver) {
+               spin_unlock_irqrestore(&xen_cpufreq_driver_lock, flags);
+               return -EBUSY;
+       }
+       xen_cpufreq_driver = driver_data;
+       spin_unlock_irqrestore(&xen_cpufreq_driver_lock, flags);
+
+       irq = bind_virq_to_irq(VIRQ_CPUFREQ, 0);
+       if (irq < 0) {
+               pr_err("Bind virq (%d) error (%d)\n", VIRQ_CPUFREQ, irq);
+               ret = irq;
+               goto err_remove_drv;
+       }
+
+       irq_clear_status_flags(irq, IRQ_NOREQUEST|IRQ_NOAUTOEN|IRQ_NOPROBE);
+
+       ret = request_irq(irq, xen_cpufreq_interrupt, 0,
+                          "xen_cpufreq", NULL);
+
+       if (ret < 0) {
+               pr_err("Request irq (%d) error (%d)\n", irq, ret);
+               goto err_unbind_from_irqhnd;
+       }
+
+       xen_irq = irq;
+
+       for (cpu = 0; cpu < xen_nr_cpus; cpu++) {
+               ret = xen_cpufreq_add_dev(cpu);
+               if (ret)
+                       goto err_remove_cpu;
+       }
+
+       if (!zalloc_cpumask_var(&pushed_cpus, GFP_KERNEL))
+               goto err_remove_cpu;
+
+       for (cpu = 0; cpu < xen_nr_cpus; cpu++) {
+               if (cpumask_test_cpu(cpu, pushed_cpus))
+                       continue;
+
+               policy = xen_cpufreq_cpu_get(cpu);
+               if (!policy) {
+                       ret = -EINVAL;
+                       goto err_free_cpumask;
+               }
+
+               cpumask_or(pushed_cpus, pushed_cpus, policy->cpus);
+               table = cpufreq_frequency_get_table(policy->cpu);
+               if (!table) {
+                       ret = -EINVAL;
+                       goto err_free_cpumask;
+               }
+
+               ret = push_data_to_hypervisor(policy, table);
+               if (ret)
+                       goto err_free_cpumask;
+       }
+
+       free_cpumask_var(pushed_cpus);
+
+       pr_debug("driver %s up and running\n", driver_data->name);
+
+       return 0;
+
+err_free_cpumask:
+       free_cpumask_var(pushed_cpus);
+err_remove_cpu:
+       for (cpu = 0; cpu < xen_nr_cpus; cpu++)
+               xen_cpufreq_remove_dev(cpu);
+err_unbind_from_irqhnd:
+       unbind_from_irqhandler(irq, NULL);
+err_remove_drv:
+       spin_lock_irqsave(&xen_cpufreq_driver_lock, flags);
+       xen_cpufreq_driver = NULL;
+       spin_unlock_irqrestore(&xen_cpufreq_driver_lock, flags);
+       return ret;
+}
+EXPORT_SYMBOL_GPL(cpufreq_register_driver);
+
+
+/**
+ * cpufreq_unregister_driver - unregister the current CPUFreq driver
+ *
+ *    Unregister the current CPUFreq driver. Only call this if you have
+ * the right to do so, i.e. if you have succeeded in initialising before!
+ * Returns zero if successful, and -EINVAL if the cpufreq_driver is
+ * currently not initialised.
+ */
+int cpufreq_unregister_driver(struct cpufreq_driver *driver)
+{
+       unsigned long flags;
+       unsigned int cpu;
+
+       if (!xen_cpufreq_driver || (driver != xen_cpufreq_driver))
+               return -EINVAL;
+
+       pr_debug("unregistering driver %s\n", driver->name);
+
+       unbind_from_irqhandler(xen_irq, NULL);
+
+       for (cpu = 0; cpu < xen_nr_cpus; cpu++)
+               xen_cpufreq_remove_dev(cpu);
+
+       spin_lock_irqsave(&xen_cpufreq_driver_lock, flags);
+       xen_cpufreq_driver = NULL;
+       spin_unlock_irqrestore(&xen_cpufreq_driver_lock, flags);
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(cpufreq_unregister_driver);
+
+static int __init xen_cpufreq_init(void)
+{
+       int ret;
+       int i;
+
+       struct xen_sysctl op = {
+               .cmd                    = XEN_SYSCTL_physinfo,
+               .interface_version      = XEN_SYSCTL_INTERFACE_VERSION,
+       };
+
+       ret = HYPERVISOR_sysctl(&op);
+       if (ret) {
+               pr_err("Hypervisor get physinfo error (%d)\n", ret);
+               return ret;
+       }
+
+       xen_nr_cpus = op.u.physinfo.nr_cpus;
+       if (xen_nr_cpus == 0 || xen_nr_cpus > NR_CPUS) {
+               xen_nr_cpus = 0;
+               pr_err("Wrong CPUs amount (%d)\n", xen_nr_cpus);
+               return -EINVAL;
+       }
+
+       for (i = 0; i < xen_nr_cpus; i++) {
+               per_cpu(xen_cpufreq_policy_cpu, i) = -1;
+               init_rwsem(&per_cpu(xen_cpu_policy_rwsem, i));
+       }
+
+       xen_cpufreq_wq = create_workqueue("xen_cpufreq");
+       if (!xen_cpufreq_wq) {
+               pr_err("Create workqueue error\n");
+               ret = -ENOMEM;
+               goto err_create_wq;
+       }
+
+       return 0;
+
+err_create_wq:
+       xen_nr_cpus = 0;
+       return ret;
+}
+
+MODULE_AUTHOR("Oleksandr Dmytryshyn <oleksandr.dmytryshyn@xxxxxxxxxxxxxxx>");
+MODULE_DESCRIPTION("Xen cpufreq driver which uploads PM data to Xen 
hypervisor");
+MODULE_LICENSE("GPL");
+
+core_initcall(xen_cpufreq_init);
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
index cf64566..0520194 100644
--- a/include/xen/interface/xen.h
+++ b/include/xen/interface/xen.h
@@ -81,6 +81,7 @@
 #define VIRQ_DOM_EXC    3  /* (DOM0) Exceptional event for some domain.   */
 #define VIRQ_DEBUGGER   6  /* (DOM0) A domain has paused for debugging.   */
 #define VIRQ_PCPU_STATE 9  /* (DOM0) PCPU state changed                   */
+#define VIRQ_CPUFREQ    13 /* (DOM0) Notify xen-cpufreq driver.           */
 
 /* Architecture-specific VIRQ definitions. */
 #define VIRQ_ARCH_0    16
-- 
1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.