[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [RFC PATCH v5 11/12] cpufreq: add hwdom-cpufreq driver



This driver uses hwdom to change frequencies on physical
CPUs.
Workflow:
 * cpufreq governor driver in Xen wants to change the
   frequency of the physical CPU
 * hwdom-cpufreq driver sets parameters in the shared
   memory
 * hwdom-cpufreq driver sends an event via event channel
   to notify the hardware domain
 * cpufreq driver in the hardware domain reads parameters
   from the shared memory, changes frequency and copies
   the result of the operation to the shared memory
 * cpufreq driver in the hwdom sends an event via event
   channel to notify the hwdom-cpufreq driver

Signed-off-by: Oleksandr Dmytryshyn <oleksandr.dmytryshyn@xxxxxxxxxxxxxxx>
---
 xen/Rules.mk                        |   1 +
 xen/common/sysctl.c                 |   8 +
 xen/drivers/cpufreq/Makefile        |   1 +
 xen/drivers/cpufreq/hwdom-cpufreq.c | 422 ++++++++++++++++++++++++++++++++++++
 xen/include/xen/cpufreq.h           |   2 +
 5 files changed, 434 insertions(+)
 create mode 100644 xen/drivers/cpufreq/hwdom-cpufreq.c

diff --git a/xen/Rules.mk b/xen/Rules.mk
index 3b0b89b..cccbc72 100644
--- a/xen/Rules.mk
+++ b/xen/Rules.mk
@@ -56,6 +56,7 @@ CFLAGS-$(perfc_arrays)  += -DPERF_ARRAYS
 CFLAGS-$(lock_profile)  += -DLOCK_PROFILE
 CFLAGS-$(HAS_ACPI)      += -DHAS_ACPI
 CFLAGS-$(HAS_CPUFREQ)   += -DHAS_CPUFREQ
+CFLAGS-$(HAS_HWDOM_CPUFREQ) += -DHAS_HWDOM_CPUFREQ
 CFLAGS-$(HAS_PM)        += -DHAS_PM
 CFLAGS-$(HAS_CPU_TURBO) += -DHAS_CPU_TURBO
 CFLAGS-$(HAS_GDBSX)     += -DHAS_GDBSX
diff --git a/xen/common/sysctl.c b/xen/common/sysctl.c
index 0dcf06a..fd0cd0d 100644
--- a/xen/common/sysctl.c
+++ b/xen/common/sysctl.c
@@ -27,6 +27,7 @@
 #include <xsm/xsm.h>
 #include <xen/pmstat.h>
 #include <xen/gcov.h>
+#include <xen/cpufreq.h>
 
 long do_sysctl(XEN_GUEST_HANDLE_PARAM(xen_sysctl_t) u_sysctl)
 {
@@ -362,6 +363,13 @@ long do_sysctl(XEN_GUEST_HANDLE_PARAM(xen_sysctl_t) 
u_sysctl)
         break;
 #endif
 
+#ifdef HAS_HWDOM_CPUFREQ
+    case XEN_SYSCTL_cpufreq_op:
+        ret = sysctl_cpufreq_op(&op->u.cpufreq_op);
+        copyback = 1;
+        break;
+#endif
+
     default:
         ret = arch_do_sysctl(op, u_sysctl);
         copyback = 0;
diff --git a/xen/drivers/cpufreq/Makefile b/xen/drivers/cpufreq/Makefile
index b87d127..891997c 100644
--- a/xen/drivers/cpufreq/Makefile
+++ b/xen/drivers/cpufreq/Makefile
@@ -2,3 +2,4 @@ obj-y += cpufreq.o
 obj-y += cpufreq_ondemand.o
 obj-y += cpufreq_misc_governors.o
 obj-y += utility.o
+obj-$(HAS_HWDOM_CPUFREQ) += hwdom-cpufreq.o
diff --git a/xen/drivers/cpufreq/hwdom-cpufreq.c 
b/xen/drivers/cpufreq/hwdom-cpufreq.c
new file mode 100644
index 0000000..3932dca
--- /dev/null
+++ b/xen/drivers/cpufreq/hwdom-cpufreq.c
@@ -0,0 +1,422 @@
+/*
+ *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@xxxxxxxxx>
+ *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@xxxxxxxxx>
+ *  Copyright (C) 2002 - 2004 Dominik Brodowski <linux@xxxxxxxx>
+ *  Copyright (C) 2006        Denis Sadykov <denis.m.sadykov@xxxxxxxxx>
+ *
+ *  Feb 2008 - Liu Jinsong <jinsong.liu@xxxxxxxxx>
+ *      porting acpi-cpufreq.c from Linux 2.6.23 to Xen hypervisor
+ *
+ *  Copyright (C) 2014 GlobalLogic Inc.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or (at
+ *  your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#include <xen/types.h>
+#include <xen/errno.h>
+#include <xen/sched.h>
+#include <xen/event.h>
+#include <xen/irq.h>
+#include <xen/spinlock.h>
+#include <xen/cpufreq.h>
+#include <xen/err.h>
+#include <xen/timer.h>
+#include <asm/shared.h>
+#include <asm/current.h>
+#include <asm/system.h>
+
+#define WAIT_HWDOM_ANSWER_TOUT          (2000)  /* ms */
+
+struct hwdom_cpufreq_cpu_data {
+    struct processor_performance *perf_data;
+    struct cpufreq_frequency_table *freq_table;
+};
+
+struct hwdom_cpufreq {
+    struct hwdom_cpufreq_cpu_data *cpu_data[NR_CPUS];
+    struct domain *domain;
+    spinlock_t drv_lock;
+    spinlock_t hwdom_res_lock;
+    bool_t is_timer_active;
+    spinlock_t timer_lock;
+    struct timer timer;
+    uint32_t port;
+    int32_t hwdom_res;
+};
+
+static struct hwdom_cpufreq hwdom_cpufreq;
+
+int cpufreq_cpu_init(unsigned int cpuid)
+{
+    return cpufreq_add_cpu(cpuid);
+}
+
+/* Notify the hwdom (to do some command) */
+static void notify_cpufreq_domain(void)
+{
+    uint32_t port;
+    struct domain *domain;
+
+    spin_lock(&hwdom_cpufreq.drv_lock);
+    port = hwdom_cpufreq.port;
+    domain = hwdom_cpufreq.domain;
+    spin_unlock(&hwdom_cpufreq.drv_lock);
+
+    notify_via_xen_event_channel(domain, port);
+}
+
+static void cpufreq_hwdom_idle(void)
+{
+    struct cpufreq_sh_info *cpufreq_info;
+
+    stop_timer(&hwdom_cpufreq.timer);
+
+    spin_lock(&hwdom_cpufreq.timer_lock);
+    hwdom_cpufreq.is_timer_active = false;
+    spin_unlock(&hwdom_cpufreq.timer_lock);
+
+    cpufreq_info = arch_get_cpufreq_addr(dom0);
+
+    cpufreq_info->cmd = CPUFREQ_CMD_idle;
+
+    smp_wmb(); /* above must be visible before notify_cpufreq_domain() */
+
+    /* Notification is not needed in case CPUFREQ_CMD_idle */
+}
+
+static void cpufreq_hwdom_change_freq(uint32_t cpu, uint32_t freq,
+                                      uint32_t relation)
+{
+    struct cpufreq_sh_info *cpufreq_info;
+
+    spin_lock(&hwdom_cpufreq.timer_lock);
+    hwdom_cpufreq.is_timer_active = true;
+    spin_unlock(&hwdom_cpufreq.timer_lock);
+
+    set_timer(&hwdom_cpufreq.timer, NOW() + MILLISECS(WAIT_HWDOM_ANSWER_TOUT));
+
+    cpufreq_info = arch_get_cpufreq_addr(dom0);
+
+    cpufreq_info->cpu = cpu;
+    cpufreq_info->freq = freq;
+    cpufreq_info->relation = relation;
+    cpufreq_info->cmd = CPUFREQ_CMD_change_freq;
+
+    smp_wmb(); /* above must be visible before notify_cpufreq_domain() */
+
+    notify_cpufreq_domain();
+}
+
+static bool_t cpufreq_is_waiting_answer(void)
+{
+    bool_t ret;
+
+    spin_lock(&hwdom_cpufreq.timer_lock);
+    ret = hwdom_cpufreq.is_timer_active;
+    spin_unlock(&hwdom_cpufreq.timer_lock);
+
+    return ret;
+}
+
+static void cpufreq_set_hwdom_res(int32_t result)
+{
+    spin_lock(&hwdom_cpufreq.hwdom_res_lock);
+    hwdom_cpufreq.hwdom_res = result;
+    spin_unlock(&hwdom_cpufreq.hwdom_res_lock);
+}
+
+static int32_t cpufreq_get_hwdom_res(void)
+{
+    int32_t ret;
+
+    spin_lock(&hwdom_cpufreq.hwdom_res_lock);
+    ret = hwdom_cpufreq.hwdom_res;
+    spin_unlock(&hwdom_cpufreq.hwdom_res_lock);
+
+    return ret;
+}
+
+static void cpufreq_hwdom_answer_tout(void *data)
+{
+    cpufreq_hwdom_idle();
+    cpufreq_set_hwdom_res(-ETIME);
+}
+
+/* Notification from the hwdom (frequency changed) */
+static void cpufreq_notification(struct vcpu *v, unsigned int port)
+{
+    struct cpufreq_sh_info *cpufreq_info;
+
+    /* if we are not waiting answer just skip strange notifications */
+    if ( !cpufreq_is_waiting_answer() )
+        return;
+
+    cpufreq_hwdom_idle();
+
+    cpufreq_info = arch_get_cpufreq_addr(dom0);
+
+    /* Set previous result in the Hardware domain then read it */
+    smp_rmb();
+    cpufreq_set_hwdom_res(cpufreq_info->result);
+}
+
+int sysctl_cpufreq_op(xen_sysctl_cpufreq_op_t *op)
+{
+    int ret = 0;
+    uint32_t domain_id = current->domain->domain_id;
+    uint32_t port;
+    struct domain *d;
+
+    switch ( op->cmd )
+    {
+    case XEN_SYSCTL_CPUFREQ_event_start:
+    case XEN_SYSCTL_CPUFREQ_event_stop:
+        d = rcu_lock_domain_by_id(domain_id);
+        if ( d == NULL )
+            return -ESRCH;
+        break;
+
+    default:
+        return -EOPNOTSUPP;
+    }
+
+    switch ( op->cmd )
+    {
+    case XEN_SYSCTL_CPUFREQ_event_start:
+        /* Allocate event channel */
+        ret = alloc_unbound_xen_event_channel(d->vcpu[0], domain_id,
+                                              cpufreq_notification);
+        if (ret < 0)
+            goto out;
+
+        op->port = ret;
+
+        spin_lock(&hwdom_cpufreq.drv_lock);
+        hwdom_cpufreq.port = ret;
+        hwdom_cpufreq.domain = d;
+        spin_unlock(&hwdom_cpufreq.drv_lock);
+
+        ret = 0;
+        break;
+
+    case XEN_SYSCTL_CPUFREQ_event_stop:
+        spin_lock(&hwdom_cpufreq.drv_lock);
+        port = hwdom_cpufreq.port;
+        hwdom_cpufreq.port = 0;
+        hwdom_cpufreq.domain = NULL;
+        spin_unlock(&hwdom_cpufreq.drv_lock);
+
+        /* Free hwdom's event channel and leave the other one unbound */
+        free_xen_event_channel(d->vcpu[0], port);
+        break;
+    }
+out:
+    rcu_unlock_domain(d);
+    return ret;
+}
+
+static int hwdom_cpufreq_verify(struct cpufreq_policy *policy)
+{
+    struct hwdom_cpufreq_cpu_data *data;
+    struct processor_performance *perf;
+
+    if ( !policy || !(data = hwdom_cpufreq.cpu_data[policy->cpu]) ||
+         !processor_pminfo[policy->cpu] )
+        return -EINVAL;
+
+    perf = &processor_pminfo[policy->cpu]->perf;
+
+    cpufreq_verify_within_limits(policy, 0,
+        perf->states[perf->platform_limit].core_frequency * 1000);
+
+    return cpufreq_frequency_table_verify(policy, data->freq_table);
+}
+
+static int hwdom_cpufreq_target(struct cpufreq_policy *policy,
+                               unsigned int target_freq, unsigned int relation)
+{
+    struct hwdom_cpufreq_cpu_data *data = hwdom_cpufreq.cpu_data[policy->cpu];
+    struct processor_performance *perf;
+    struct cpufreq_freqs freqs;
+    cpumask_t online_policy_cpus;
+    unsigned int next_state = 0; /* Index into freq_table */
+    unsigned int next_perf_state = 0; /* Index into perf table */
+    unsigned int j;
+    int ret = 0;
+
+    if ( unlikely(data == NULL ||
+         data->perf_data == NULL || data->freq_table == NULL) )
+        return -ENODEV;
+
+    perf = data->perf_data;
+    ret = cpufreq_frequency_table_target(policy,
+                                         data->freq_table,
+                                         target_freq,
+                                         relation, &next_state);
+    if ( unlikely(ret) )
+        return -ENODEV;
+
+    cpumask_and(&online_policy_cpus, &cpu_online_map, policy->cpus);
+
+    next_perf_state = data->freq_table[next_state].index;
+    if ( perf->state == next_perf_state )
+    {
+        if ( unlikely(policy->resume) )
+            policy->resume = 0;
+        else
+            return 0;
+    }
+
+    freqs.old = perf->states[perf->state].core_frequency * 1000;
+    freqs.new = data->freq_table[next_state].frequency;
+
+    if ( cpufreq_is_waiting_answer() )
+        return -EAGAIN;
+
+     /* return previous result */
+    ret = cpufreq_get_hwdom_res();
+
+     /* Do send cmd for Hardware domain */
+    cpufreq_hwdom_change_freq(policy->cpu, freqs.new, (uint32_t)relation);
+
+    for_each_cpu( j, &online_policy_cpus )
+        cpufreq_statistic_update(j, perf->state, next_perf_state);
+
+    perf->state = next_perf_state;
+    policy->cur = freqs.new;
+
+    return ret;
+}
+
+static int hwdom_cpufreq_cpu_init(struct cpufreq_policy *policy)
+{
+    struct processor_performance *perf;
+    struct hwdom_cpufreq_cpu_data *data;
+    unsigned int cpu = policy->cpu;
+    unsigned int valid_states = 0;
+    int i;
+    int ret = 0;
+
+    data = xzalloc(struct hwdom_cpufreq_cpu_data);
+    if ( !data )
+        return -ENOMEM;
+
+    hwdom_cpufreq.cpu_data[cpu] = data;
+
+    data->perf_data = &processor_pminfo[cpu]->perf;
+
+    perf = data->perf_data;
+    policy->shared_type = perf->shared_type;
+
+    data->freq_table = xmalloc_array(struct cpufreq_frequency_table,
+                                     (perf->state_count+1));
+    if ( !data->freq_table )
+    {
+        ret = -ENOMEM;
+        goto err_unreg;
+    }
+
+    /* detect transition latency */
+    policy->cpuinfo.transition_latency = 0;
+    for ( i = 0; i < perf->state_count; i++ )
+    {
+        if ( (perf->states[i].transition_latency * 1000) >
+             policy->cpuinfo.transition_latency )
+            policy->cpuinfo.transition_latency =
+                perf->states[i].transition_latency * 1000;
+    }
+
+    policy->governor = cpufreq_opt_governor ? : CPUFREQ_DEFAULT_GOVERNOR;
+
+    /* table init */
+    for ( i = 0; i < perf->state_count; i++ )
+    {
+        if ( i > 0 && perf->states[i].core_frequency >=
+            data->freq_table[valid_states-1].frequency / 1000 )
+            continue;
+
+        data->freq_table[valid_states].index = i;
+        data->freq_table[valid_states].frequency =
+            perf->states[i].core_frequency * 1000;
+        valid_states++;
+    }
+    data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END;
+    perf->state = 0;
+
+    ret = cpufreq_frequency_table_cpuinfo(policy, data->freq_table);
+    if ( ret )
+        goto err_freqfree;
+
+
+    /* We will set the minimal frequency now. So set policy->resume to 0 */
+    policy->resume = 0;
+
+    /* Set the minimal frequency */
+    return hwdom_cpufreq_target(policy, policy->min, CPUFREQ_RELATION_L);
+
+ err_freqfree:
+    xfree(data->freq_table);
+ err_unreg:
+    xfree(data);
+    hwdom_cpufreq.cpu_data[cpu] = NULL;
+
+    return ret;
+}
+
+static int hwdom_cpufreq_cpu_exit(struct cpufreq_policy *policy)
+{
+    struct hwdom_cpufreq_cpu_data *data = hwdom_cpufreq.cpu_data[policy->cpu];
+
+    if ( data )
+    {
+        hwdom_cpufreq.cpu_data[policy->cpu] = NULL;
+        xfree(data->freq_table);
+        xfree(data);
+    }
+
+    return 0;
+}
+
+static struct cpufreq_driver hwdom_cpufreq_driver = {
+    .name   = "hwdom-cpufreq",
+    .verify = hwdom_cpufreq_verify,
+    .target = hwdom_cpufreq_target,
+    .init   = hwdom_cpufreq_cpu_init,
+    .exit   = hwdom_cpufreq_cpu_exit,
+};
+
+static int __init hwdom_cpufreq_driver_init(void)
+{
+    int ret = 0;
+
+    if ( cpufreq_controller != FREQCTL_xen )
+        return 0;
+
+    spin_lock_init(&hwdom_cpufreq.drv_lock);
+    spin_lock_init(&hwdom_cpufreq.hwdom_res_lock);
+
+    ret = cpufreq_register_driver(&hwdom_cpufreq_driver);
+    if ( ret )
+        return ret;
+
+    init_timer(&hwdom_cpufreq.timer, cpufreq_hwdom_answer_tout, NULL, 0);
+
+    return ret;
+}
+
+__initcall(hwdom_cpufreq_driver_init);
diff --git a/xen/include/xen/cpufreq.h b/xen/include/xen/cpufreq.h
index d7b6c34..0c8c19d 100644
--- a/xen/include/xen/cpufreq.h
+++ b/xen/include/xen/cpufreq.h
@@ -264,4 +264,6 @@ int write_userspace_scaling_setspeed(unsigned int cpu, 
unsigned int freq);
 void cpufreq_dbs_timer_suspend(void);
 void cpufreq_dbs_timer_resume(void);
 
+int sysctl_cpufreq_op(xen_sysctl_cpufreq_op_t *op);
+
 #endif /* __XEN_CPUFREQ_PM_H__ */
-- 
1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.