进程调度四　linux CFS调度器

一、CFS调度器结构：

1、CFS运行队列：

　　每个CPU都有自己的运行队列，对应不同的调度器也有自己的运行队列，管理CFS调度的的队列为cfs_rq：

struct cfs_rq {
	struct load_weight load;
	unsigned int nr_running, h_nr_running;

	u64 min_vruntime;
}

成员	描叙
load	`struct load_weight`用来记录权重信息
nr_runing	就绪队列上调度实体的个数
min_vruntime	就绪队列上所有调度实体的最小虚拟时间

2、CFS调度实体：

　　调度的实体用于描叙调度的具体信息，包含调度的权重、虚拟运行时间等：

struct sched_entity {
	struct load_weight	load;		/* for load-balancing */
	struct rb_node		run_node;
	struct list_head	group_node;
	unsigned int		on_rq;

	u64			exec_start;
	u64			sum_exec_runtime;
	u64			vruntime;
	u64			prev_sum_exec_runtime;

	u64			nr_migrations;
	struct sched_entity	*parent;
	/* rq on which this entity is (to be) queued: */
	struct cfs_rq		*cfs_rq;
	/* rq "owned" by this entity/group: */
	struct cfs_rq		*my_q;
}

成员	描叙
load	权重信息
run_node	CFS调度器的每个就绪队列维护了一颗红黑树，run_node是挂载点
on_rq	调度实体se加入就绪队列后，on_rq置1。从就绪队列删除后，on_rq置0
vruntime	调度实体已经运行的虚拟时间总和

3、CFS调度类：

　　调度类是进程调度的具体实现，是进程调度的具体实现，包含入队、出队、抢占判断、下一个运行

进程的选择等：

const struct sched_class fair_sched_class = {
	.next			= &idle_sched_class,
	.enqueue_task		= enqueue_task_fair,
	.dequeue_task		= dequeue_task_fair,
	.yield_task		= yield_task_fair,
	.yield_to_task		= yield_to_task_fair,

	.check_preempt_curr	= check_preempt_wakeup,

	.pick_next_task		= pick_next_task_fair,
	.put_prev_task		= put_prev_task_fair,

	.set_curr_task          = set_curr_task_fair,
	.task_tick		= task_tick_fair,
	.task_fork		= task_fork_fair,

	.prio_changed		= prio_changed_fair,
	.switched_from		= switched_from_fair,
	.switched_to		= switched_to_fair,

	.get_rr_interval	= get_rr_interval_fair,

	.update_curr		= update_curr_fair,

#ifdef CONFIG_FAIR_GROUP_SCHED
	.task_move_group	= task_move_group_fair,
#endif
};

（１）CFS进程的创建：

　　do_fork()---->_do_fork()---->copy_process()---->sched_fork()

　　p->sched_class->task_fork　会调用到fair_sched_class的task_fork_fair成员：

int sched_fork(unsigned long clone_flags, struct task_struct *p)
{
	p->state = TASK_RUNNING;

	if (dl_prio(p->prio)) {
		put_cpu();
		return -EAGAIN;
	} else if (rt_prio(p->prio)) {
		p->sched_class = &rt_sched_class;
	} else {
		p->sched_class = &fair_sched_class;
	}

	if (p->sched_class->task_fork)
		p->sched_class->task_fork(p);

......
}

static void task_fork_fair(struct task_struct *p)
{
	cfs_rq = task_cfs_rq(current);
	curr = cfs_rq->curr;

	__set_task_cpu(p, this_cpu);//把当前CPU绑定到该进程中

	update_curr(cfs_rq);//更新当前正在运行的调度实体的运行时间信息

	if (curr)
		se->vruntime = curr->vruntime;//初始化当前创建的新进程的虚拟时间
	place_entity(cfs_rq, se, 1);

	se->vruntime -= cfs_rq->min_vruntime;
}

task_fork_fair函数所完成的工作可以用下图清晰的表明，主要包含计算并更新虚拟时间vruntime：

（2）唤醒进程

　　wake_up_new_task -> activate_task ->enqueue_task ->p->sched_class->enqueue_task(rq, p, flags);

　　这里会调动到enqueue_task_fair成员：

/*
 * The enqueue_task method is called before nr_running is
 * increased. Here we update the fair scheduling stats and
 * then put the task into the rbtree:
 */
static void
enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
{
	for_each_sched_entity(se) {
		if (se->on_rq)//如果在就绪队列中，不需要再次添加则break
			break;
		cfs_rq = cfs_rq_of(se);
		enqueue_entity(cfs_rq, se, flags);//入队操作
   }
......
}

static void
enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
{
    //如果是新建的进程再添加上cfs_rq->min_vruntime
	if (!(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_WAKING))
		se->vruntime += cfs_rq->min_vruntime;

    //更新当前进程的vruntime和CFS就绪队列的min_vruntime
	update_curr(cfs_rq);

	update_stats_enqueue(cfs_rq, se);
	check_spread(cfs_rq, se);
	if (se != cfs_rq->curr)
		__enqueue_entity(cfs_rq, se);//把该调度实体加入到CFS就绪队列的红黑树中
	se->on_rq = 1;
......
}

（4）调度进程：

　　__schedule()是调度的核心函数，其作用是让调度器选择和切换到一个合适的进程运行，内核代码中

对于schedule的调度场景以及描叙的非常详细：

/*
 * __schedule() is the main scheduler function.
 *
 * The main means of driving the scheduler and thus entering this function are:
 *
 *   1. Explicit blocking: mutex, semaphore, waitqueue, etc.
 *
 *   2. TIF_NEED_RESCHED flag is checked on interrupt and userspace return
 *      paths. For example, see arch/x86/entry_64.S.
 *      To drive preemption between tasks, the scheduler sets the flag in timer
 *      interrupt handler scheduler_tick().
 *
 *   3. Wakeups don't really cause entry into schedule(). They add a
 *      task to the run-queue and that's it.
 *
 *      Now, if the new task added to the run-queue preempts the current
 *      task, then the wakeup sets TIF_NEED_RESCHED and schedule() gets
 *      called on the nearest possible occasion:
 *
 *       - If the kernel is preemptible (CONFIG_PREEMPT=y):
 *
 *         - in syscall or exception context, at the next outmost
 *           preempt_enable(). (this might be as soon as the wake_up()'s
 *           spin_unlock()!)
 *         - in IRQ context, return from interrupt-handler to
 *           preemptible context
 *
 *       - If the kernel is not preemptible (CONFIG_PREEMPT is not set)
 *         then at the next:
 *
 *          - cond_resched() call
 *          - explicit schedule() call
 *          - return from syscall or exception to user-space
 *          - return from interrupt-handler to user-space
 *
 * WARNING: must be called with preemption disabled!
 */

static void __sched notrace __schedule(bool preempt)
{
	next = pick_next_task(rq, prev);//选择下个合适的进程开始运行

	clear_tsk_need_resched(prev);//清除TIF_NEED_RESCHED flag

	rq = context_switch(rq, prev, next); //切换进程
}

pick_next_task会调用到CFS的成员：fair_sched_class.pick_next_task(rq, prev)选择一个合适的进程执行。

用下图总结性的看下进程的切换流程：

（5）进程的睡眠：

CFS 调度管理的进程再说睡眠时会调用到 enqueue_task_fair：

static void
dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
{
	for_each_sched_entity(se) {
		cfs_rq = cfs_rq_of(se);
		dequeue_entity(cfs_rq, se, flags);//将调度实体se从对应的就绪队列cfs_rq上删除
    }
}

static void
dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
{
	/*
	 * Update run-time statistics of the 'current'.
	 */
	update_curr(cfs_rq);//更新虚拟时间vruntime

	if (se != cfs_rq->curr)
		__dequeue_entity(cfs_rq, se);//从红黑树节点删除调度实体
	se->on_rq = 0;//更新on_rq成员

	account_entity_dequeue(cfs_rq, se);

	if (!(flags & DEQUEUE_SLEEP))
		se->vruntime -= cfs_rq->min_vruntime;//减去当前就绪队列对应的最小虚拟时间
}

作者：frank_zyp
您的支持是对博主最大的鼓励，感谢您的认真阅读。
本文无所谓版权，欢迎转载。

进程调度四 linux CFS调度器

猜你喜欢

进程调度四　linux CFS调度器