一、介绍
linux中为了解决非必要功耗的消耗,提供了多种电源管理方式,诸如休眠(suspend)、关机(power off和shutdown)、复位(reboot和reset)。为了解决运行时不必要的功耗消耗,linux提供了runtime pm、cpu/device dvfs、cpu hotplug、cpu idle、clock gate、power gate、reset等电源管理的机制。为了解决运行时电源管理对性能的影响,linux提供了pm qos的功能,用于平衡性能与功耗,这样既能降低功耗,又不影响性能。
二、电源管理操作原理实现和流程
1.suspend
suspend一般是我们所说的s3状态,也就是关闭到mem,省电等级低于关机和s4。这个状态就是将系统所有进程全部冻结,只保留first cpu进行运行(等待用户的唤醒中断),但是硬件设备不断电(最直观的感受可以通过键盘鼠标是可以唤醒系统,键盘灯或者鼠标等显示电源还在线)。
susepend的大致流程如下图
详细流程如下图
详细代码实现
step1:linux系统提供了一个/sys/power/state文件接口,一旦文件被修改,将调用state_store()函数处理电源管理需求。suspend一般是我们常说的s3休眠,即echo mem > /sys/power/state后,将执行pm_suspend(mem_sleep_current)。
static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr,
const char *buf, size_t n)
{
suspend_state_t state;
int error;
error = pm_autosleep_lock();
if (error)
return error;
if (pm_autosleep_state() > PM_SUSPEND_ON) {
error = -EBUSY;
goto out;
}
state = decode_state(buf, n);
if (state < PM_SUSPEND_MAX) {
if (state == PM_SUSPEND_MEM)
state = mem_sleep_current;
/*****写入的state值合法且非'disk',执行pm_suspend(state)进入suspend console****/
error = pm_suspend(state);
} else if (state == PM_SUSPEND_MAX) {
error = hibernate();
} else {
error = -EINVAL;
}
out:
pm_autosleep_unlock();
return error ? error : n;
}
power_attr(state);
step2:如果state值是合法的,将打印suspend entry (mem_sleep_lables[state])并调用enter_state(state);开始进入将要suspend的状态。
/**
* pm_suspend - Externally visible function for suspending the system.
* @state: System sleep state to enter.
*
* Check if the value of @state represents one of the supported states,
* execute enter_state() and update system suspend statistics.
*/
int pm_suspend(suspend_state_t state)
{
int error;
if (state <= PM_SUSPEND_ON || state >= PM_SUSPEND_MAX)
return -EINVAL;
/****打印suspend entry提示信息****/
pr_info("suspend entry (%s)\n", mem_sleep_labels[state]);
/****开始进入suspend请求的状态****/
error = enter_state(state);
if (error) {
suspend_stats.fail++;
dpm_save_failed_errno(error);
} else {
suspend_stats.success++;
}
pr_info("suspend exit\n");
return error;
}
EXPORT_SYMBOL(pm_suspend);
step3:enter_state函数主要调用两个函数,分别是suspend_prepare(state)和suspend_devices_and_enter(state)
/**
* enter_state - Do common work needed to enter system sleep state.
* @state: System sleep state to enter.
*
* Make sure that no one else is trying to put the system into a sleep state.
* Fail if that's not the case. Otherwise, prepare for system suspend, make the
* system enter the given sleep state and clean up after wakeup.
*/
static int enter_state(suspend_state_t state)
{
int error;
trace_suspend_resume(TPS("suspend_enter"), state, true);
if (state == PM_SUSPEND_TO_IDLE) {
#ifdef CONFIG_PM_DEBUG
if (pm_test_level != TEST_NONE && pm_test_level <= TEST_CPUS) {
pr_warn("Unsupported test mode for suspend to idle, please choose none/freezer/devices/platform.\n");
return -EAGAIN;
}
#endif
} else if (!valid_state(state)) {
return -EINVAL;
}
if (!mutex_trylock(&system_transition_mutex))
return -EBUSY;
if (state == PM_SUSPEND_TO_IDLE)
s2idle_begin();
if (sync_on_suspend_enabled) {
trace_suspend_resume(TPS("sync_filesystems"), 0, true);
ksys_sync_helper();
trace_suspend_resume(TPS("sync_filesystems"), 0, false);
}
pm_pr_dbg("Preparing system for sleep (%s)\n", mem_sleep_labels[state]);
pm_suspend_clear_flags();
/****准备进入休眠状态****/
error = suspend_prepare(state);
if (error)
goto Unlock;
if (suspend_test(TEST_FREEZER))
goto Finish;
trace_suspend_resume(TPS("suspend_enter"), state, false);
pm_pr_dbg("Suspending system (%s)\n", mem_sleep_labels[state]);
pm_restrict_gfp_mask();
/****devices进入suspend状态****/
error = suspend_devices_and_enter(state);
pm_restore_gfp_mask();
Finish:
events_check_enabled = false;
pm_pr_dbg("Finishing wakeup.\n");
suspend_finish();
Unlock:
mutex_unlock(&system_transition_mutex);
return error;
}
step4:suspend_prepare()函数开始进入"suspend" console和冻结用户进程
/**
* suspend_prepare - Prepare for entering system sleep state.
* @state: Target system sleep state.
*
* Common code run for every system sleep state that can be entered (except for
* hibernation). Run suspend notifiers, allocate the "suspend" console and
* freeze processes.
*/
static int suspend_prepare(suspend_state_t state)
{
int error;
if (!sleep_state_supported(state))
return -EPERM;
/****准备进入suspend console****/
pm_prepare_console();
error = pm_notifier_call_chain_robust(PM_SUSPEND_PREPARE, PM_POST_SUSPEND);
if (error)
goto Restore;
trace_suspend_resume(TPS("freeze_processes"), 0, true);
/****冻结用户进程****/
error = suspend_freeze_processes();
trace_suspend_resume(TPS("freeze_processes"), 0, false);
if (!error)
return 0;
suspend_stats.failed_freeze++;
dpm_save_failed_step(SUSPEND_FREEZE);
pm_notifier_call_chain(PM_POST_SUSPEND);
Restore:
pm_restore_console();
return error;
}
step5:
/**
* suspend_prepare - Prepare for entering system sleep state.
* @state: Target system sleep state.
*
* Common code run for every system sleep state that can be entered (except for
* hibernation). Run suspend notifiers, allocate the "suspend" console and
* freeze processes.
*/
static int suspend_prepare(suspend_state_t state)
{
int error;
if (!sleep_state_supported(state))
return -EPERM;
/****准备进入suspend console****/
pm_prepare_console();
error = pm_notifier_call_chain_robust(PM_SUSPEND_PREPARE, PM_POST_SUSPEND);
if (error)
goto Restore;
trace_suspend_resume(TPS("freeze_processes"), 0, true);
/****冻结用户进程****/
error = suspend_freeze_processes();
trace_suspend_resume(TPS("freeze_processes"), 0, false);
if (!error)
return 0;
suspend_stats.failed_freeze++;
dpm_save_failed_step(SUSPEND_FREEZE);
pm_notifier_call_chain(PM_POST_SUSPEND);
Restore:
pm_restore_console();
return error;
}
2.autosleep
autosleep也是从android wakelocks补丁集中演化而来的,用于取代wakelock中的自动休眠功能。它基于wakeup source实现。根据使用场景,低功耗状态可以是Freeze, Standby, Suspend to RAM和suspend to disk中的任意一种。它依赖wakeup events framework判断系统有没有事情正在做,只要系统没有正在处理和新增的wakeup events, 就尝试suspend, 如果suspend过程中有events产生,就resume。
autosleep的实现位于kernel/power/autosleep.c中,基于wakeup count & hibernate功能, 并通过PM core的main模块在开启CONFIG_PM_AUTOSLEEP配置时,向用户空间提供sysfs文件(sys/power/autosleep)。代码实现参考:
首先,在开启CONFIG_PM_AUTOSLEEP配置时,向用户空间提供sysfs文件(sys/power/autosleep)。主要通过pm_autosleep_set_state接口,进行pm的autosleep状态设置。
#ifdef CONFIG_PM_AUTOSLEEP
//文件读时函数入口
static ssize_t autosleep_show(struct kobject *kobj,
struct kobj_attribute *attr,
char *buf)
{
suspend_state_t state = pm_autosleep_state();
if (state == PM_SUSPEND_ON)
return sprintf(buf, "off\n");
#ifdef CONFIG_SUSPEND
if (state < PM_SUSPEND_MAX)
return sprintf(buf, "%s\n", pm_states[state] ?
pm_states[state] : "error");
#endif
#ifdef CONFIG_HIBERNATION
return sprintf(buf, "disk\n");
#else
return sprintf(buf, "error");
#endif
}
//文件写时函数入口
static ssize_t autosleep_store(struct kobject *kobj,
struct kobj_attribute *attr,
const char *buf, size_t n)
{
suspend_state_t state = decode_state(buf, n);
int error;
if (state == PM_SUSPEND_ON
&& strcmp(buf, "off") && strcmp(buf, "off\n"))
return -EINVAL;
if (state == PM_SUSPEND_MEM)
state = mem_sleep_current;
//pm处理进入autosleep状态接口
error = pm_autosleep_set_state(state);
return error ? error : n;
}
//定义一个0644权限的sysfs,在这里是/sys/power/autosleep文件
power_attr(autosleep);
#endif /* CONFIG_PM_AUTOSLEEP */
在kernel PM初始化时(/kernel/power/main.c:pm_init),调用pm_autosleep_init初始化autosleep所需的两个全局参数autosleep_ws和autosleep_wq:
// SPDX-License-Identifier: GPL-2.0
/*
* kernel/power/autosleep.c
*
* Opportunistic sleep support.
*
* Copyright (C) 2012 Rafael J. Wysocki <[email protected]>
*/
#include <linux/device.h>
#include <linux/mutex.h>
#include <linux/pm_wakeup.h>
#include "power.h"
static suspend_state_t autosleep_state;
static struct workqueue_struct *autosleep_wq;
/*
* Note: it is only safe to mutex_lock(&autosleep_lock) if a wakeup_source
* is active, otherwise a deadlock with try_to_suspend() is possible.
* Alternatively mutex_lock_interruptible() can be used. This will then fail
* if an auto_sleep cycle tries to freeze processes.
*/
static DEFINE_MUTEX(autosleep_lock);
static struct wakeup_source *autosleep_ws;
1)autosleep_ws:在autosleep执行关键操作时, 阻止系统休眠
2)autosleep_wq:一个workqueue, 用于触发实际的休眠动作(休眠应由进程或者线程触发)
int __init pm_autosleep_init(void)
{
autosleep_ws = wakeup_source_register(NULL, "autosleep");
if (!autosleep_ws)
return -ENOMEM;
autosleep_wq = alloc_ordered_workqueue("autosleep", 0);
if (autosleep_wq)
return 0;
wakeup_source_unregister(autosleep_ws);
return -ENOMEM;
}
前面说过,在开启CONFIG_PM_AUTOSLEEP配置时,向用户空间提供sysfs文件(sys/power/autosleep)。主要通过pm_autosleep_set_state接口,进行pm的autosleep状态设置。pm_autosleep_set_state负责设置autosleep的状态, autosleep状态有freeze, standby, STR, STD等状态(具体依赖于系统支持的电源管理状态)。
int pm_autosleep_set_state(suspend_state_t state)
{
#ifndef CONFIG_HIBERNATION
if (state >= PM_SUSPEND_MAX)
return -EINVAL;
#endif
__pm_stay_awake(autosleep_ws);
mutex_lock(&autosleep_lock);
autosleep_state = state;
__pm_relax(autosleep_ws);
if (state > PM_SUSPEND_ON) { /***如果设置的autosleep要进入的状态高于就绪状态(s1,s2,s3,s4等高于s0的状态),就执行autosleep***/
pm_wakep_autosleep_enabled(true);
queue_up_suspend_work();
} else { /***否则不执行autosleep***/
pm_wakep_autosleep_enabled(false);
}
mutex_unlock(&autosleep_lock);
return 0;
}
一旦autosleep开启,将使能pm_wakep_autosleep_enabled和执行queue_up_suspend_work功能。
1)pm_wakep_autosleep_enabled主要用于更新wakeup source中和auto sleep有关的信息,代码和执行逻辑如下:
#ifdef CONFIG_PM_AUTOSLEEP
/**
* pm_wakep_autosleep_enabled - Modify autosleep_enabled for all wakeup sources.
* @set: Whether to set or to clear the autosleep_enabled flags.
*/
void pm_wakep_autosleep_enabled(bool set)
{
struct wakeup_source *ws;
ktime_t now = ktime_get();
int srcuidx;
srcuidx = srcu_read_lock(&wakeup_srcu);
list_for_each_entry_rcu_locked(ws, &wakeup_sources, entry) {
spin_lock_irq(&ws->lock);
if (ws->autosleep_enabled != set) {
ws->autosleep_enabled = set;
if (ws->active) {
if (set)
ws->start_prevent_time = now;
else
update_prevent_sleep_time(ws, now);
}
}
spin_unlock_irq(&ws->lock);
}
srcu_read_unlock(&wakeup_srcu, srcuidx);
}
#endif /* CONFIG_PM_AUTOSLEEP */
2)queue_up_suspend_work调用queue_work设置pm状态,而queue_work是try_to_suspend的别名,try_to_suspend会调用hibernate或者pm_suspend(相关实现参考suspend)接口完成pm状态的设置。代码实现如下:
static void try_to_suspend(struct work_struct *work)
{
unsigned int initial_count, final_count;
if (!pm_get_wakeup_count(&initial_count, true))
goto out;
mutex_lock(&autosleep_lock);
if (!pm_save_wakeup_count(initial_count) ||
system_state != SYSTEM_RUNNING) {
mutex_unlock(&autosleep_lock);
goto out;
}
if (autosleep_state == PM_SUSPEND_ON) {
mutex_unlock(&autosleep_lock);
return;
}
if (autosleep_state >= PM_SUSPEND_MAX)
hibernate();
else
pm_suspend(autosleep_state);
mutex_unlock(&autosleep_lock);
if (!pm_get_wakeup_count(&final_count, false))
goto out;
/*
* If the wakeup occurred for an unknown reason, wait to prevent the
* system from trying to suspend and waking up in a tight loop.
*/
if (final_count == initial_count)
schedule_timeout_uninterruptible(HZ / 2);
out:
queue_up_suspend_work();
}
static DECLARE_WORK(suspend_work, try_to_suspend);
void queue_up_suspend_work(void)
{
if (autosleep_state > PM_SUSPEND_ON)
queue_work(autosleep_wq, &suspend_work);
}
3.poweroff
poweroff是通过sysrq处理程序(sysrq相关介绍参考linux内核中的sysrq-CSDN博客)来实现正常关闭机器电源的操作的。它的流程如下:
其代码如下:
// SPDX-License-Identifier: GPL-2.0-only
/*
* poweroff.c - sysrq handler to gracefully power down machine.
*/
#include <linux/kernel.h>
#include <linux/sysrq.h>
#include <linux/init.h>
#include <linux/pm.h>
#include <linux/workqueue.h>
#include <linux/reboot.h>
#include <linux/cpumask.h>
/*
* When the user hits Sys-Rq o to power down the machine this is the
* callback we use.
*/
static void do_poweroff(struct work_struct *dummy)
{
kernel_power_off();
}
static DECLARE_WORK(poweroff_work, do_poweroff);
static void handle_poweroff(int key)
{
/* run sysrq poweroff on boot cpu */
schedule_work_on(cpumask_first(cpu_online_mask), &poweroff_work);
}
static const struct sysrq_key_op sysrq_poweroff_op = {
.handler = handle_poweroff,
.help_msg = "poweroff(o)",
.action_msg = "Power Off",
.enable_mask = SYSRQ_ENABLE_BOOT,
};
static int __init pm_sysrq_init(void)
{
register_sysrq_key('o', &sysrq_poweroff_op);
return 0;
}
subsys_initcall(pm_sysrq_init);
当发出poweroff操作时,将向/proc/sysrq-trigger写入'o'操作,以此触发handle_poweroff处理函数,并最终执行kernel_power_off()函数关闭系统电源。
/*
* kernel/reboot.c
*/
/**
* kernel_power_off - power_off the system
*
* Shutdown everything and perform a clean system power_off.
*/
void kernel_power_off(void)
{
kernel_shutdown_prepare(SYSTEM_POWER_OFF);
do_kernel_power_off_prepare();
migrate_to_reboot_cpu();
syscore_shutdown();
pr_emerg("Power down\n");
kmsg_dump(KMSG_DUMP_SHUTDOWN);
machine_power_off();
}
EXPORT_SYMBOL_GPL(kernel_power_off);
在kernel_power_off()函数中以此执行内核shutdown准备 -> 停用cpu -> syscore shutdown -> 打印关机日志 -> 机器关机。