2 * Copyright (c) 2010, 2014 Mellanox Technologies. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 #include <linux/sched.h>
35 #include <linux/mutex.h>
36 #include <asm/atomic.h>
40 #if defined(CONFIG_X86) && defined(CONFIG_APM_MODULE)
42 /* Each CPU is put into a group. In most cases, the group number is
43 * equal to the CPU number of one of the CPUs in the group. The
44 * exception is group NR_CPUS which is the default group. This is
45 * protected by sys_tune_startup_mutex. */
46 DEFINE_PER_CPU(int, idle_cpu_group) = NR_CPUS;
48 /* For each group, a count of the number of CPUs in the group which
49 * are known to be busy. A busy CPU might be running the busy loop
50 * below or general kernel code. The count is decremented on entry to
51 * the old pm_idle handler and incremented on exit. The aim is to
52 * avoid the count going to zero or negative. This situation can
53 * occur temporarily during module unload or CPU hot-plug but
54 * normality will be restored when the affected CPUs next exit the
56 static atomic_t busy_cpu_count[NR_CPUS+1];
58 /* A workqueue item to be executed to cause the CPU to exit from the
60 DEFINE_PER_CPU(struct work_struct, sys_tune_cpu_work);
62 #define sys_tune_set_state(CPU,STATE) \
66 /* A mutex to protect most of the module datastructures. */
67 static DEFINE_MUTEX(sys_tune_startup_mutex);
69 /* The old pm_idle handler. */
70 static void (*old_pm_idle)(void) = NULL;
72 static void sys_tune_pm_idle(void)
74 atomic_t *busy_cpus_ptr;
76 int cpu = smp_processor_id();
78 busy_cpus_ptr = &(busy_cpu_count[per_cpu(idle_cpu_group, cpu)]);
80 sys_tune_set_state(cpu, 2);
83 while (!need_resched()) {
84 busy_cpus = atomic_read(busy_cpus_ptr);
86 /* If other CPUs in this group are busy then let this
87 * CPU go idle. We mustn't let the number of busy
88 * CPUs drop below 1. */
90 old_pm_idle != NULL &&
91 ( atomic_cmpxchg(busy_cpus_ptr, busy_cpus,
92 busy_cpus-1) == busy_cpus ) ) {
94 sys_tune_set_state(cpu, 3);
95 /* This check might not be necessary, but it
96 * seems safest to include it because there
97 * might be a kernel version which requires
103 /* This CPU is busy again. */
104 sys_tune_set_state(cpu, 1);
105 atomic_add(1, busy_cpus_ptr);
111 sys_tune_set_state(cpu, 0);
115 void sys_tune_work_func(struct work_struct *work)
117 /* Do nothing. Since this function is running in process
118 * context, the idle thread isn't running on this CPU. */
123 static void sys_tune_smp_call(void *info)
125 schedule_work(&get_cpu_var(sys_tune_cpu_work));
126 put_cpu_var(sys_tune_cpu_work);
132 static void sys_tune_refresh(void)
134 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26)
135 on_each_cpu(&sys_tune_smp_call, NULL, 0, 1);
137 on_each_cpu(&sys_tune_smp_call, NULL, 1);
141 static void sys_tune_refresh(void)
143 /* The current thread is executing on the one and only CPU so
144 * the idle thread isn't running. */
150 static int sys_tune_cpu_group(int cpu)
153 const cpumask_t *mask;
157 #if defined(topology_thread_cpumask) && defined(ST_HAVE_EXPORTED_CPU_SIBLING_MAP)
158 /* Keep one hyperthread busy per core. */
159 mask = topology_thread_cpumask(cpu);
163 for_each_cpu_mask(cpu, *(mask)) {
164 group = per_cpu(idle_cpu_group, other_cpu);
165 if (group != NR_CPUS)
174 static void sys_tune_add_cpu(int cpu)
178 /* Do nothing if this CPU has already been added. */
179 if (per_cpu(idle_cpu_group, cpu) != NR_CPUS)
182 group = sys_tune_cpu_group(cpu);
183 per_cpu(idle_cpu_group, cpu) = group;
184 atomic_inc(&(busy_cpu_count[group]));
188 static void sys_tune_del_cpu(int cpu)
193 if (per_cpu(idle_cpu_group, cpu) == NR_CPUS)
196 group = per_cpu(idle_cpu_group, cpu);
197 /* If the CPU was busy, this can cause the count to drop to
198 * zero. To rectify this, we need to cause one of the other
199 * CPUs in the group to exit the idle loop. If the CPU was
200 * not busy then this causes the contribution for this CPU to
201 * go to -1 which can cause the overall count to drop to zero
202 * or go negative. To rectify this situation we need to cause
203 * this CPU to exit the idle loop. */
204 atomic_dec(&(busy_cpu_count[group]));
205 per_cpu(idle_cpu_group, cpu) = NR_CPUS;
210 static int sys_tune_cpu_notify(struct notifier_block *self,
211 unsigned long action, void *hcpu)
213 int cpu = (long)hcpu;
216 #ifdef CPU_ONLINE_FROZEN
217 case CPU_ONLINE_FROZEN:
220 mutex_lock(&sys_tune_startup_mutex);
221 sys_tune_add_cpu(cpu);
222 mutex_unlock(&sys_tune_startup_mutex);
223 /* The CPU might have already entered the idle loop in
224 * the wrong group. Make sure it exits the idle loop
225 * so that it picks up the correct group. */
229 #ifdef CPU_DEAD_FROZEN
230 case CPU_DEAD_FROZEN:
233 mutex_lock(&sys_tune_startup_mutex);
234 sys_tune_del_cpu(cpu);
235 mutex_unlock(&sys_tune_startup_mutex);
236 /* The deleted CPU may have been the only busy CPU in
237 * the group. Make sure one of the other CPUs in the
238 * group exits the idle loop. */
246 static struct notifier_block sys_tune_cpu_nb = {
247 .notifier_call = sys_tune_cpu_notify,
251 static void sys_tune_ensure_init(void)
253 BUG_ON (old_pm_idle != NULL);
255 /* Atomically update pm_idle to &sys_tune_pm_idle. The old value
256 * is stored in old_pm_idle before installing the new
259 old_pm_idle = pm_idle;
260 } while (cmpxchg(&pm_idle, old_pm_idle, &sys_tune_pm_idle) !=
265 void sys_tune_fini(void)
267 #if defined(CONFIG_X86) && defined(CONFIG_APM_MODULE)
271 unregister_cpu_notifier(&sys_tune_cpu_nb);
273 mutex_lock(&sys_tune_startup_mutex);
276 old = cmpxchg(&pm_idle, &sys_tune_pm_idle, old_pm_idle);
278 for_each_online_cpu(cpu)
279 sys_tune_del_cpu(cpu);
281 mutex_unlock(&sys_tune_startup_mutex);
283 /* Our handler may still be executing on other CPUs.
284 * Schedule this thread on all CPUs to make sure all
285 * idle threads get interrupted. */
288 /* Make sure the work item has finished executing on all CPUs.
289 * This in turn ensures that all idle threads have been
291 flush_scheduled_work();
292 #endif /* CONFIG_X86 */
295 void sys_tune_init(void)
297 #if defined(CONFIG_X86) && defined(CONFIG_APM_MODULE)
300 for_each_possible_cpu(cpu) {
301 INIT_WORK(&per_cpu(sys_tune_cpu_work, cpu),
305 /* Start by registering the handler to ensure we don't miss
307 register_cpu_notifier(&sys_tune_cpu_nb);
309 mutex_lock(&sys_tune_startup_mutex);
311 for_each_online_cpu(cpu)
312 sys_tune_add_cpu(cpu);
314 sys_tune_ensure_init();
317 mutex_unlock(&sys_tune_startup_mutex);
319 /* Ensure our idle handler starts to run. */