linux内核中的电源管理

一、介绍

linux中为了解决非必要功耗的消耗,提供了多种电源管理方式,诸如休眠(suspend)、关机(power off和shutdown)、复位(reboot和reset)。为了解决运行时不必要的功耗消耗,linux提供了runtime pm、cpu/device dvfs、cpu hotplug、cpu idle、clock gate、power gate、reset等电源管理的机制。为了解决运行时电源管理对性能的影响,linux提供了pm qos的功能,用于平衡性能与功耗,这样既能降低功耗,又不影响性能。

二、电源管理操作原理实现和流程

1.suspend

suspend一般是我们所说的s3状态,也就是关闭到mem,省电等级低于关机和s4。这个状态就是将系统所有进程全部冻结,只保留first cpu进行运行(等待用户的唤醒中断),但是硬件设备不断电(最直观的感受可以通过键盘鼠标是可以唤醒系统,键盘灯或者鼠标等显示电源还在线)。

susepend的大致流程如下图

详细流程如下图

详细代码实现

step1:linux系统提供了一个/sys/power/state文件接口,一旦文件被修改,将调用state_store()函数处理电源管理需求。suspend一般是我们常说的s3休眠,即echo mem > /sys/power/state后,将执行pm_suspend(mem_sleep_current)。

static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr,
			   const char *buf, size_t n)
{
	suspend_state_t state;
	int error;

	error = pm_autosleep_lock();
	if (error)
		return error;

	if (pm_autosleep_state() > PM_SUSPEND_ON) {
		error = -EBUSY;
		goto out;
	}

	state = decode_state(buf, n);
	if (state < PM_SUSPEND_MAX) {
		if (state == PM_SUSPEND_MEM)
			state = mem_sleep_current;

        /*****写入的state值合法且非'disk',执行pm_suspend(state)进入suspend console****/
		error = pm_suspend(state);
	} else if (state == PM_SUSPEND_MAX) {
		error = hibernate();
	} else {
		error = -EINVAL;
	}

 out:
	pm_autosleep_unlock();
	return error ? error : n;
}

power_attr(state);

step2:如果state值是合法的,将打印suspend entry (mem_sleep_lables[state])并调用enter_state(state);开始进入将要suspend的状态。

/**
 * pm_suspend - Externally visible function for suspending the system.
 * @state: System sleep state to enter.
 *
 * Check if the value of @state represents one of the supported states,
 * execute enter_state() and update system suspend statistics.
 */
int pm_suspend(suspend_state_t state)
{
	int error;

	if (state <= PM_SUSPEND_ON || state >= PM_SUSPEND_MAX)
		return -EINVAL;

    /****打印suspend entry提示信息****/
	pr_info("suspend entry (%s)\n", mem_sleep_labels[state]);
    /****开始进入suspend请求的状态****/
	error = enter_state(state);
	if (error) {
		suspend_stats.fail++;
		dpm_save_failed_errno(error);
	} else {
		suspend_stats.success++;
	}
	pr_info("suspend exit\n");
	return error;
}
EXPORT_SYMBOL(pm_suspend);

step3:enter_state函数主要调用两个函数,分别是suspend_prepare(state)和suspend_devices_and_enter(state)

/**
 * enter_state - Do common work needed to enter system sleep state.
 * @state: System sleep state to enter.
 *
 * Make sure that no one else is trying to put the system into a sleep state.
 * Fail if that's not the case.  Otherwise, prepare for system suspend, make the
 * system enter the given sleep state and clean up after wakeup.
 */
static int enter_state(suspend_state_t state)
{
    int error;

    trace_suspend_resume(TPS("suspend_enter"), state, true);
    if (state == PM_SUSPEND_TO_IDLE) {
#ifdef CONFIG_PM_DEBUG
        if (pm_test_level != TEST_NONE && pm_test_level <= TEST_CPUS) {
            pr_warn("Unsupported test mode for suspend to idle, please choose none/freezer/devices/platform.\n");
            return -EAGAIN;
        }
#endif
    } else if (!valid_state(state)) {
        return -EINVAL;
    }
    if (!mutex_trylock(&system_transition_mutex))
        return -EBUSY;

    if (state == PM_SUSPEND_TO_IDLE)
        s2idle_begin();

    if (sync_on_suspend_enabled) {
        trace_suspend_resume(TPS("sync_filesystems"), 0, true);
        ksys_sync_helper();
        trace_suspend_resume(TPS("sync_filesystems"), 0, false);
    }

    pm_pr_dbg("Preparing system for sleep (%s)\n", mem_sleep_labels[state]);
    pm_suspend_clear_flags();
    /****准备进入休眠状态****/
    error = suspend_prepare(state);
    if (error)
        goto Unlock;

    if (suspend_test(TEST_FREEZER))
        goto Finish;

    trace_suspend_resume(TPS("suspend_enter"), state, false);
    pm_pr_dbg("Suspending system (%s)\n", mem_sleep_labels[state]);
    pm_restrict_gfp_mask();
    /****devices进入suspend状态****/
    error = suspend_devices_and_enter(state);
    pm_restore_gfp_mask();

 Finish:
    events_check_enabled = false;
    pm_pr_dbg("Finishing wakeup.\n");
    suspend_finish();
 Unlock:
    mutex_unlock(&system_transition_mutex);
    return error;
}

step4:suspend_prepare()函数开始进入"suspend" console和冻结用户进程

/**
 * suspend_prepare - Prepare for entering system sleep state.
 * @state: Target system sleep state.
 *
 * Common code run for every system sleep state that can be entered (except for
 * hibernation).  Run suspend notifiers, allocate the "suspend" console and
 * freeze processes.
 */
static int suspend_prepare(suspend_state_t state)
{
    int error;

    if (!sleep_state_supported(state))
        return -EPERM;

    /****准备进入suspend console****/
    pm_prepare_console();

    error = pm_notifier_call_chain_robust(PM_SUSPEND_PREPARE, PM_POST_SUSPEND);
    if (error)
        goto Restore;

    trace_suspend_resume(TPS("freeze_processes"), 0, true);
    /****冻结用户进程****/
    error = suspend_freeze_processes();
    trace_suspend_resume(TPS("freeze_processes"), 0, false);
    if (!error)
        return 0;

    suspend_stats.failed_freeze++;
    dpm_save_failed_step(SUSPEND_FREEZE);
    pm_notifier_call_chain(PM_POST_SUSPEND);
 Restore:
    pm_restore_console();
    return error;
}

step5:

/**
 * suspend_prepare - Prepare for entering system sleep state.
 * @state: Target system sleep state.
 *
 * Common code run for every system sleep state that can be entered (except for
 * hibernation).  Run suspend notifiers, allocate the "suspend" console and
 * freeze processes.
 */
static int suspend_prepare(suspend_state_t state)
{
    int error;

    if (!sleep_state_supported(state))
        return -EPERM;

    /****准备进入suspend console****/
    pm_prepare_console();

    error = pm_notifier_call_chain_robust(PM_SUSPEND_PREPARE, PM_POST_SUSPEND);
    if (error)
        goto Restore;

    trace_suspend_resume(TPS("freeze_processes"), 0, true);
    /****冻结用户进程****/
    error = suspend_freeze_processes();
    trace_suspend_resume(TPS("freeze_processes"), 0, false);
    if (!error)
        return 0;

    suspend_stats.failed_freeze++;
    dpm_save_failed_step(SUSPEND_FREEZE);
    pm_notifier_call_chain(PM_POST_SUSPEND);
 Restore:
    pm_restore_console();
    return error;
}

2.autosleep

autosleep也是从android wakelocks补丁集中演化而来的,用于取代wakelock中的自动休眠功能。它基于wakeup source实现。根据使用场景,低功耗状态可以是Freeze, Standby, Suspend to RAM和suspend to disk中的任意一种。它依赖wakeup events framework判断系统有没有事情正在做,只要系统没有正在处理和新增的wakeup events, 就尝试suspend, 如果suspend过程中有events产生,就resume。

autosleep的实现位于kernel/power/autosleep.c中,基于wakeup count & hibernate功能, 并通过PM core的main模块在开启CONFIG_PM_AUTOSLEEP配置时,向用户空间提供sysfs文件(sys/power/autosleep)。代码实现参考:

首先,在开启CONFIG_PM_AUTOSLEEP配置时,向用户空间提供sysfs文件(sys/power/autosleep)。主要通过pm_autosleep_set_state接口,进行pm的autosleep状态设置。

#ifdef CONFIG_PM_AUTOSLEEP
//文件读时函数入口
static ssize_t autosleep_show(struct kobject *kobj,
                  struct kobj_attribute *attr,
                  char *buf)
{
    suspend_state_t state = pm_autosleep_state();

    if (state == PM_SUSPEND_ON)
        return sprintf(buf, "off\n");

#ifdef CONFIG_SUSPEND
    if (state < PM_SUSPEND_MAX)
        return sprintf(buf, "%s\n", pm_states[state] ?
                    pm_states[state] : "error");
#endif
#ifdef CONFIG_HIBERNATION
    return sprintf(buf, "disk\n");
#else
    return sprintf(buf, "error");
#endif
}

//文件写时函数入口
static ssize_t autosleep_store(struct kobject *kobj,
                   struct kobj_attribute *attr,
                   const char *buf, size_t n)
{
    suspend_state_t state = decode_state(buf, n);
    int error;

    if (state == PM_SUSPEND_ON
        && strcmp(buf, "off") && strcmp(buf, "off\n"))
        return -EINVAL;

    if (state == PM_SUSPEND_MEM)
        state = mem_sleep_current;

    //pm处理进入autosleep状态接口
    error = pm_autosleep_set_state(state);
    return error ? error : n;
}

//定义一个0644权限的sysfs,在这里是/sys/power/autosleep文件
power_attr(autosleep);
#endif /* CONFIG_PM_AUTOSLEEP */

在kernel PM初始化时(/kernel/power/main.c:pm_init),调用pm_autosleep_init初始化autosleep所需的两个全局参数autosleep_ws和autosleep_wq:

// SPDX-License-Identifier: GPL-2.0
/*
 * kernel/power/autosleep.c
 *
 * Opportunistic sleep support.
 *
 * Copyright (C) 2012 Rafael J. Wysocki <[email protected]>
 */

#include <linux/device.h>
#include <linux/mutex.h>
#include <linux/pm_wakeup.h>

#include "power.h"

static suspend_state_t autosleep_state;
static struct workqueue_struct *autosleep_wq;
/*
 * Note: it is only safe to mutex_lock(&autosleep_lock) if a wakeup_source
 * is active, otherwise a deadlock with try_to_suspend() is possible.
 * Alternatively mutex_lock_interruptible() can be used.  This will then fail
 * if an auto_sleep cycle tries to freeze processes.
 */
static DEFINE_MUTEX(autosleep_lock);
static struct wakeup_source *autosleep_ws;

1)autosleep_ws:在autosleep执行关键操作时, 阻止系统休眠

2)autosleep_wq:一个workqueue, 用于触发实际的休眠动作(休眠应由进程或者线程触发)

int __init pm_autosleep_init(void)
{
	autosleep_ws = wakeup_source_register(NULL, "autosleep");
	if (!autosleep_ws)
		return -ENOMEM;

	autosleep_wq = alloc_ordered_workqueue("autosleep", 0);
	if (autosleep_wq)
		return 0;

	wakeup_source_unregister(autosleep_ws);
	return -ENOMEM;
}

前面说过,在开启CONFIG_PM_AUTOSLEEP配置时,向用户空间提供sysfs文件(sys/power/autosleep)。主要通过pm_autosleep_set_state接口,进行pm的autosleep状态设置。pm_autosleep_set_state负责设置autosleep的状态, autosleep状态有freeze, standby, STR, STD等状态(具体依赖于系统支持的电源管理状态)。

int pm_autosleep_set_state(suspend_state_t state)
{

#ifndef CONFIG_HIBERNATION
	if (state >= PM_SUSPEND_MAX)
		return -EINVAL;
#endif

	__pm_stay_awake(autosleep_ws);

	mutex_lock(&autosleep_lock);

	autosleep_state = state;

	__pm_relax(autosleep_ws);

	if (state > PM_SUSPEND_ON) { /***如果设置的autosleep要进入的状态高于就绪状态(s1,s2,s3,s4等高于s0的状态),就执行autosleep***/
		pm_wakep_autosleep_enabled(true);
		queue_up_suspend_work();
	} else { /***否则不执行autosleep***/
		pm_wakep_autosleep_enabled(false);
	}

	mutex_unlock(&autosleep_lock);
	return 0;
}

一旦autosleep开启,将使能pm_wakep_autosleep_enabled和执行queue_up_suspend_work功能。

1)pm_wakep_autosleep_enabled主要用于更新wakeup source中和auto sleep有关的信息,代码和执行逻辑如下:

#ifdef CONFIG_PM_AUTOSLEEP
/**
 * pm_wakep_autosleep_enabled - Modify autosleep_enabled for all wakeup sources.
 * @set: Whether to set or to clear the autosleep_enabled flags.
 */
void pm_wakep_autosleep_enabled(bool set)
{
	struct wakeup_source *ws;
	ktime_t now = ktime_get();
	int srcuidx;

	srcuidx = srcu_read_lock(&wakeup_srcu);
	list_for_each_entry_rcu_locked(ws, &wakeup_sources, entry) {
		spin_lock_irq(&ws->lock);
		if (ws->autosleep_enabled != set) {
			ws->autosleep_enabled = set;
			if (ws->active) {
				if (set)
					ws->start_prevent_time = now;
				else
					update_prevent_sleep_time(ws, now);
			}
		}
		spin_unlock_irq(&ws->lock);
	}
	srcu_read_unlock(&wakeup_srcu, srcuidx);
}
#endif /* CONFIG_PM_AUTOSLEEP */

2)queue_up_suspend_work调用queue_work设置pm状态,而queue_work是try_to_suspend的别名,try_to_suspend会调用hibernate或者pm_suspend(相关实现参考suspend)接口完成pm状态的设置。代码实现如下:

static void try_to_suspend(struct work_struct *work)
{
    unsigned int initial_count, final_count;

    if (!pm_get_wakeup_count(&initial_count, true))
        goto out;

    mutex_lock(&autosleep_lock);

    if (!pm_save_wakeup_count(initial_count) ||
        system_state != SYSTEM_RUNNING) {
        mutex_unlock(&autosleep_lock);
        goto out;
    }

    if (autosleep_state == PM_SUSPEND_ON) {
        mutex_unlock(&autosleep_lock);
        return;
    }
    if (autosleep_state >= PM_SUSPEND_MAX)
        hibernate();
    else
        pm_suspend(autosleep_state);

    mutex_unlock(&autosleep_lock);

    if (!pm_get_wakeup_count(&final_count, false))
        goto out;

    /*
     * If the wakeup occurred for an unknown reason, wait to prevent the
     * system from trying to suspend and waking up in a tight loop.
     */
    if (final_count == initial_count)
        schedule_timeout_uninterruptible(HZ / 2);

 out:
    queue_up_suspend_work();
}

static DECLARE_WORK(suspend_work, try_to_suspend);

void queue_up_suspend_work(void)
{
    if (autosleep_state > PM_SUSPEND_ON)
        queue_work(autosleep_wq, &suspend_work);
}

3.poweroff

poweroff是通过sysrq处理程序(sysrq相关介绍参考linux内核中的sysrq-CSDN博客)来实现正常关闭机器电源的操作的。它的流程如下:

其代码如下:

// SPDX-License-Identifier: GPL-2.0-only
/*
 * poweroff.c - sysrq handler to gracefully power down machine.
 */

#include <linux/kernel.h>
#include <linux/sysrq.h>
#include <linux/init.h>
#include <linux/pm.h>
#include <linux/workqueue.h>
#include <linux/reboot.h>
#include <linux/cpumask.h>

/*
 * When the user hits Sys-Rq o to power down the machine this is the
 * callback we use.
 */

static void do_poweroff(struct work_struct *dummy)
{
    kernel_power_off();
}

static DECLARE_WORK(poweroff_work, do_poweroff);

static void handle_poweroff(int key)
{
    /* run sysrq poweroff on boot cpu */
    schedule_work_on(cpumask_first(cpu_online_mask), &poweroff_work);
}

static const struct sysrq_key_op    sysrq_poweroff_op = {
    .handler        = handle_poweroff,
    .help_msg       = "poweroff(o)",
    .action_msg     = "Power Off",
    .enable_mask    = SYSRQ_ENABLE_BOOT,
};

static int __init pm_sysrq_init(void)
{
    register_sysrq_key('o', &sysrq_poweroff_op);
    return 0;
}

subsys_initcall(pm_sysrq_init);

当发出poweroff操作时,将向/proc/sysrq-trigger写入'o'操作,以此触发handle_poweroff处理函数,并最终执行kernel_power_off()函数关闭系统电源。

/*
 * kernel/reboot.c
 */

/**
 *	kernel_power_off - power_off the system
 *
 *	Shutdown everything and perform a clean system power_off.
 */
void kernel_power_off(void)
{
	kernel_shutdown_prepare(SYSTEM_POWER_OFF);
	do_kernel_power_off_prepare();
	migrate_to_reboot_cpu();
	syscore_shutdown();
	pr_emerg("Power down\n");
	kmsg_dump(KMSG_DUMP_SHUTDOWN);
	machine_power_off();
}
EXPORT_SYMBOL_GPL(kernel_power_off);

在kernel_power_off()函数中以此执行内核shutdown准备 -> 停用cpu -> syscore shutdown -> 打印关机日志 -> 机器关机。

猜你喜欢

转载自blog.csdn.net/tombaby_come/article/details/133890548
今日推荐