第19章 Linux电源管理的系统架构和驱动之PM QoS(电源管理服务质量)

19.7 PM QoS(电源管理服务质量)

    Linux内核的PM QoS系统针对内核和应用程序提供一套接口,通过这个接口,用户可以设定自身对性能的期望。一类是系统级的需求,通过cpu_dma_latency、network_latency和network_throughput(吞吐率)这些参数来设定;另一类是单个设备可以根据自身的性能需求发起per-device的PM QoS请求。

在内核空间,通过pm_qos_add_request()函数可以注册PM QoS请求:

linux/pm_qos.h

void pm_qos_add_request(struct pm_qos_request *req, int pm_qos_class, s32 value);

kernel/power/qos.c

/**
 * pm_qos_add_request - inserts new qos request into the list
 * @req: pointer to a preallocated handle
 * @pm_qos_class: identifies which list of qos request to use
 * @value: defines the qos request
 *
 * This function inserts a new entry in the pm_qos_class list of requested qos
 * performance characteristics.  It recomputes the aggregate QoS expectations
 * for the pm_qos_class of parameters and initializes the pm_qos_request
 * handle.  Caller needs to save this handle for later use in updates and
 * removal.
 */
void pm_qos_add_request(struct pm_qos_request *req,
                        int pm_qos_class, s32 value)
{
        if (!req) /*guard against callers passing in null */
                return;

        if (pm_qos_request_active(req)) {
                WARN(1, KERN_ERR "pm_qos_add_request() called for already added request\n");
                return;
        }

        switch (req->type) {
        case PM_QOS_REQ_AFFINE_CORES:
                if (cpumask_empty(&req->cpus_affine)) {
                        req->type = PM_QOS_REQ_ALL_CORES;
                        cpumask_setall(&req->cpus_affine);
                        WARN(1, KERN_ERR "Affine cores not set for request with affinity flag\n");
                }
                break;
#ifdef CONFIG_SMP
        case PM_QOS_REQ_AFFINE_IRQ:
                if (irq_can_set_affinity(req->irq)) {
                        struct irq_desc *desc = irq_to_desc(req->irq);
                        struct cpumask *mask = desc->irq_data.affinity;

                        /* Get the current affinity */
                        cpumask_copy(&req->cpus_affine, mask);
                        req->irq_notify.irq = req->irq;
                        req->irq_notify.notify = pm_qos_irq_notify;
                        req->irq_notify.release = pm_qos_irq_release;

                } else {
                        req->type = PM_QOS_REQ_ALL_CORES;
                        cpumask_setall(&req->cpus_affine);
                        WARN(1, KERN_ERR "IRQ-%d not set for request with affinity flag\n",
                                        req->irq);
                }
                break;
#endif
        default:
                WARN(1, KERN_ERR "Unknown request type %d\n", req->type);
                /* fall through */
        case PM_QOS_REQ_ALL_CORES:
                cpumask_setall(&req->cpus_affine);
                break;
        }

        req->pm_qos_class = pm_qos_class;
        INIT_DELAYED_WORK(&req->work, pm_qos_work_fn);
        trace_pm_qos_add_request(pm_qos_class, value);
        pm_qos_update_target(pm_qos_array[pm_qos_class]->constraints,
                             req, PM_QOS_ADD_REQ, value);

#ifdef CONFIG_SMP
        if (req->type == PM_QOS_REQ_AFFINE_IRQ &&
                        irq_can_set_affinity(req->irq)) {
                int ret = 0;

                ret = irq_set_affinity_notifier(req->irq,
                                        &req->irq_notify);
                if (ret) {
                        WARN(1, "IRQ affinity notify set failed\n");
                        req->type = PM_QOS_REQ_ALL_CORES;
                        cpumask_setall(&req->cpus_affine);
                        pm_qos_update_target(
                                pm_qos_array[pm_qos_class]->constraints,
                                req, PM_QOS_UPDATE_REQ, value);
                }
        }
#endif
}

EXPORT_SYMBOL_GPL(pm_qos_add_request);

通过pm_qos_update_request()函数更新已注册的PM QoS请求:

linux/pm_qos.h

void pm_qos_update_request(struct pm_qos_request *req, s32 new_value);

扫描二维码关注公众号,回复: 1531543 查看本文章

void pm_qos_update_request_timeout(struct pm_qos_request *req,
                                   s32 new_value, unsigned long timeout_us);

kernel/power/qos.c

/**
 * pm_qos_update_request - modifies an existing qos request
 * @req : handle to list element holding a pm_qos request to use
 * @value: defines the qos request
 *
 * Updates an existing qos request for the pm_qos_class of parameters along
 * with updating the target pm_qos_class value.
 *
 * Attempts are made to make this code callable on hot code paths.
 */
void pm_qos_update_request(struct pm_qos_request *req,
                           s32 new_value)
{
        if (!req) /*guard against callers passing in null */
                return;

        if (!pm_qos_request_active(req)) {
                WARN(1, KERN_ERR "pm_qos_update_request() called for unknown object\n");
                return;
        }

        cancel_delayed_work_sync(&req->work);
        __pm_qos_update_request(req, new_value);
}
EXPORT_SYMBOL_GPL(pm_qos_update_request);


/**
 * pm_qos_update_request_timeout - modifies an existing qos request temporarily.
 * @req : handle to list element holding a pm_qos request to use
 * @new_value: defines the temporal qos request
 * @timeout_us: the effective duration of this qos request in usecs.
 *
 * After timeout_us, this qos request is cancelled automatically.
 */
void pm_qos_update_request_timeout(struct pm_qos_request *req, s32 new_value,
                                   unsigned long timeout_us)
{
        if (!req)
                return;
        if (WARN(!pm_qos_request_active(req),
                 "%s called for unknown object.", __func__))
                return;

        cancel_delayed_work_sync(&req->work);
        trace_pm_qos_update_request_timeout(req->pm_qos_class,
                                            new_value, timeout_us);
        if (new_value != req->node.prio)
                pm_qos_update_target(
                        pm_qos_array[req->pm_qos_class]->constraints,
                        req, PM_QOS_UPDATE_REQ, new_value);

        schedule_delayed_work(&req->work, usecs_to_jiffies(timeout_us));
}

通过pm_qos_remove_request()函数删除已注册的PM QoS请求:

linux/pm_qos.h

void pm_qos_remove_request(struct pm_qos_request *req);

kernel/power/qos.c
/**
 * pm_qos_remove_request - modifies an existing qos request
 * @req: handle to request list element
 *
 * Will remove pm qos request from the list of constraints and
 * recompute the current target value for the pm_qos_class.  Call this
 * on slow code paths.
 */
void pm_qos_remove_request(struct pm_qos_request *req)
{
        if (!req) /*guard against callers passing in null */
                return;
                /* silent return to keep pcm code cleaner */

        if (!pm_qos_request_active(req)) {
                WARN(1, "pm_qos_remove_request() called for unknown object\n");
                return;
        }

        cancel_delayed_work_sync(&req->work);

#ifdef CONFIG_SMP
        if (req->type == PM_QOS_REQ_AFFINE_IRQ) {
                int ret = 0;
                /* Get the current affinity */
                ret = irq_set_affinity_notifier(req->irq, NULL);
                if (ret)
                        WARN(1, "IRQ affinity notify set failed\n");
        }
#endif

        trace_pm_qos_remove_request(req->pm_qos_class, PM_QOS_DEFAULT_VALUE);
        pm_qos_update_target(pm_qos_array[req->pm_qos_class]->constraints,
                             req, PM_QOS_REMOVE_REQ,
                             PM_QOS_DEFAULT_VALUE);
        memset(req, 0, sizeof(*req));
}
EXPORT_SYMBOL_GPL(pm_qos_remove_request);

        譬如在drivers/media/platform/via-camera.c摄像头驱动中,当摄像头开启后,通过如下语句阻止CPU进入C3级别的深度Idle:

static int viacam_streamon(struct file *filp, void *priv, enum v4l2_buf_type t)
{
        struct via_camera *cam = priv;
        int ret = 0;

        if (t != V4L2_BUF_TYPE_VIDEO_CAPTURE)
                return -EINVAL;

        mutex_lock(&cam->lock); //互斥锁锁定期间允许临界区阻塞,适用于临界区大的情况
        if (cam->opstate != S_IDLE) {
                ret = -EBUSY;
                goto out;
        }
        /*
         * Enforce the V4l2 "only one owner gets to read data" rule.
         */
        if (cam->owner && cam->owner != filp) {
                ret = -EBUSY;
                goto out;
        }
        cam->owner = filp;
        /*
         * Configure things if need be.
         */
        if (test_bit(CF_CONFIG_NEEDED, &cam->flags)) {
                ret = viacam_configure_sensor(cam);
                if (ret)
                        goto out;
                ret = viacam_config_controller(cam);
                if (ret)
                        goto out;
        }
        /*
         * If the CPU goes into C3, the DMA transfer gets corrupted and
         * users start filing unsightly bug reports.  Put in a "latency"
         * requirement which will keep the CPU out of the deeper sleep
         * states.
         */
        pm_qos_add_request(&cam->qos_request, PM_QOS_CPU_DMA_LATENCY, 50);//阻止CPU进入C3级别的深度Idle
        /*
         * Fire things up.
         */
        INIT_LIST_HEAD(&cam->buffer_queue);
        ret = videobuf_streamon(&cam->vb_queue);
        if (!ret)
                viacam_start_engine(cam);
out:
        mutex_unlock(&cam->lock);
        return ret;
}

       这是因为,在CPUIdle子系统中,会根据PM_QOS_CPU_DMA_LATENCY请求的情况选择合适的C状态,如drivers/cpuidle/governors/ladder.c中的ladder_select_state()就会判断目标C状态的exit_latency与QoS要求的关系,如代码清单19.11所示。

代码清单19.11 CPUIdle LADDER governor对QoS的判断

/**
 * ladder_select_state - selects the next state to enter
 * @drv: cpuidle driver
 * @dev: the CPU
 */
static int ladder_select_state(struct cpuidle_driver *drv,
                                struct cpuidle_device *dev)
{
        struct ladder_device *ldev = this_cpu_ptr(&ladder_devices);
        struct ladder_device_state *last_state;
        int last_residency, last_idx = ldev->last_state_idx;
        int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);

        /* Special case when user has set very strict latency requirement */
        if (unlikely(latency_req == 0)) {
                ladder_do_selection(ldev, last_idx, 0);
                return 0;
        }

        last_state = &ldev->states[last_idx];

        if (drv->states[last_idx].flags & CPUIDLE_FLAG_TIME_VALID) {
                last_residency = cpuidle_get_last_residency(dev) - \
                                         drv->states[last_idx].exit_latency;
        }
        else
                last_residency = last_state->threshold.promotion_time + 1;

        /* consider promotion */
        if (last_idx < drv->state_count - 1 &&
            !drv->states[last_idx + 1].disabled &&
            !dev->states_usage[last_idx + 1].disable &&
            last_residency > last_state->threshold.promotion_time &&
            drv->states[last_idx + 1].exit_latency <= latency_req) {
                last_state->stats.promotion_count++;
                last_state->stats.demotion_count = 0;
                if (last_state->stats.promotion_count >= last_state->threshold.promotion_count) {
                        ladder_do_selection(ldev, last_idx, last_idx + 1);
                        return last_idx + 1;
                }
        }

        /* consider demotion */
        if (last_idx > CPUIDLE_DRIVER_STATE_START &&
            (drv->states[last_idx].disabled ||
            dev->states_usage[last_idx].disable ||
            drv->states[last_idx].exit_latency > latency_req)) {
                int i;

                for (i = last_idx - 1; i > CPUIDLE_DRIVER_STATE_START; i--) {
                        if (drv->states[i].exit_latency <= latency_req)
                                break;
                }
                ladder_do_selection(ldev, last_idx, i);
                return i;
        }

        if (last_idx > CPUIDLE_DRIVER_STATE_START &&
            last_residency < last_state->threshold.demotion_time) {
                last_state->stats.demotion_count++;
                last_state->stats.promotion_count = 0;
                if (last_state->stats.demotion_count >= last_state->threshold.demotion_count) {
                        ladder_do_selection(ldev, last_idx, last_idx - 1);
                        return last_idx - 1;
                }
        }

        /* otherwise remain at the current state */
        return last_idx;
}

    LADDER在选择是否进入更深层次的C状态时,会比较C状态的exit_latency要小于通过pm_qos_request(PM_QOS_CPU_DMA_LATENCY)得到的PM QoS请求的延迟。

        同样的逻辑也出现于drivers/cpuidle/governors/menu.c中。

代码清单19.12 CPUIdle MENU governor对QoS的判断

/**
 * menu_select - selects the next idle state to enter
 * @drv: cpuidle driver containing state data
 * @dev: the CPU
 */
static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
{
        struct menu_device *data = this_cpu_ptr(&menu_devices);
        int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);
        int i;
        unsigned int interactivity_req;
        unsigned long nr_iowaiters, cpu_load;

        if (data->needs_update) {
                menu_update(drv, dev);
                data->needs_update = 0;
        }

        data->last_state_idx = CPUIDLE_DRIVER_STATE_START - 1;

        /* Special case when user has set very strict latency requirement */
        if (unlikely(latency_req == 0))
                return 0;

        /* determine the expected residency time, round up */
        data->next_timer_us = ktime_to_us(tick_nohz_get_sleep_length());
        get_iowait_load(&nr_iowaiters, &cpu_load);
        data->bucket = which_bucket(data->next_timer_us, nr_iowaiters);
        /*
         * Force the result of multiplication to be 64 bits even if both
         * operands are 32 bits.
         * Make sure to round up for half microseconds.
         */
        data->predicted_us = div_round64((uint64_t)data->next_timer_us *
                                         data->correction_factor[data->bucket],
                                         RESOLUTION * DECAY);

        get_typical_interval(data);

        /*
         * Performance multiplier defines a minimum predicted idle
         * duration / latency ratio. Adjust the latency limit if
         * necessary.
         */
        interactivity_req = data->predicted_us / performance_multiplier(nr_iowaiters, cpu_load);
        if (latency_req > interactivity_req)
                latency_req = interactivity_req;

        /*
         * We want to default to C1 (hlt), not to busy polling
         * unless the timer is happening really really soon.
         */
        if (data->next_timer_us > 5 &&
            !drv->states[CPUIDLE_DRIVER_STATE_START].disabled &&
                dev->states_usage[CPUIDLE_DRIVER_STATE_START].disable == 0)
                data->last_state_idx = CPUIDLE_DRIVER_STATE_START;

        /*
         * Find the idle state with the lowest power while satisfying
         * our constraints.
         */
        for (i = CPUIDLE_DRIVER_STATE_START; i < drv->state_count; i++) {
                struct cpuidle_state *s = &drv->states[i];
                struct cpuidle_state_usage *su = &dev->states_usage[i];

                if (s->disabled || su->disable)
                        continue;
                if (s->target_residency > data->predicted_us)
                        continue;
                if (s->exit_latency > latency_req)
                        continue;


                data->last_state_idx = i;
        }

        return data->last_state_idx;
}

        回到drivers/media/platform/via-camera.c中,当摄像头关闭后,会通过如下语句告知上述代码对

PM_QOS_CPU_DMA_LATENCY的性能要求取消:

static int viacam_streamoff(struct file *filp, void *priv, enum v4l2_buf_type t)
{
        struct via_camera *cam = priv;
        int ret;

        if (t != V4L2_BUF_TYPE_VIDEO_CAPTURE)
                return -EINVAL;
        mutex_lock(&cam->lock);
        if (cam->opstate != S_RUNNING) {
                ret = -EINVAL;
                goto out;
        }
        pm_qos_remove_request(&cam->qos_request);
        viacam_stop_engine(cam);
        /*
         * Videobuf will recycle all of the outstanding buffers, but
         * we should be sure we don't retain any references to
         * any of them.
         */
        ret = videobuf_streamoff(&cam->vb_queue);
        INIT_LIST_HEAD(&cam->buffer_queue);
out:
        mutex_unlock(&cam->lock);
        return ret;
}

备注:

     应用程序通过向/dev/cpu_dma_latency和/dev/network_latency设备节点写入值来发起QoS的性能请求。



猜你喜欢

转载自blog.csdn.net/xiezhi123456/article/details/80623330
今日推荐