一、CFS调度器结构:
1、CFS运行队列:
每个CPU都有自己的运行队列,对应不同的调度器也有自己的运行队列,管理CFS调度的的队列为cfs_rq:
struct cfs_rq {
struct load_weight load;
unsigned int nr_running, h_nr_running;
u64 min_vruntime;
}
成员 | 描叙 |
load | struct load_weight 用来记录权重信息 |
nr_runing | 就绪队列上调度实体的个数 |
min_vruntime | 就绪队列上所有调度实体的最小虚拟时间 |
2、CFS调度实体:
调度的实体用于描叙调度的具体信息,包含调度的权重、虚拟运行时间等:
struct sched_entity {
struct load_weight load; /* for load-balancing */
struct rb_node run_node;
struct list_head group_node;
unsigned int on_rq;
u64 exec_start;
u64 sum_exec_runtime;
u64 vruntime;
u64 prev_sum_exec_runtime;
u64 nr_migrations;
struct sched_entity *parent;
/* rq on which this entity is (to be) queued: */
struct cfs_rq *cfs_rq;
/* rq "owned" by this entity/group: */
struct cfs_rq *my_q;
}
成员 | 描叙 |
load | 权重信息 |
run_node | CFS调度器的每个就绪队列维护了一颗红黑树,run_node是挂载点 |
on_rq | 调度实体se加入就绪队列后,on_rq置1。从就绪队列删除后,on_rq置0 |
vruntime | 调度实体已经运行的虚拟时间总和 |
3、CFS调度类:
调度类是进程调度的具体实现,是进程调度的具体实现,包含入队、出队、抢占判断、下一个运行
进程的选择等:
const struct sched_class fair_sched_class = {
.next = &idle_sched_class,
.enqueue_task = enqueue_task_fair,
.dequeue_task = dequeue_task_fair,
.yield_task = yield_task_fair,
.yield_to_task = yield_to_task_fair,
.check_preempt_curr = check_preempt_wakeup,
.pick_next_task = pick_next_task_fair,
.put_prev_task = put_prev_task_fair,
.set_curr_task = set_curr_task_fair,
.task_tick = task_tick_fair,
.task_fork = task_fork_fair,
.prio_changed = prio_changed_fair,
.switched_from = switched_from_fair,
.switched_to = switched_to_fair,
.get_rr_interval = get_rr_interval_fair,
.update_curr = update_curr_fair,
#ifdef CONFIG_FAIR_GROUP_SCHED
.task_move_group = task_move_group_fair,
#endif
};
(1)CFS进程的创建:
do_fork()---->_do_fork()---->copy_process()---->sched_fork()
p->sched_class->task_fork 会调用到fair_sched_class的task_fork_fair成员:
int sched_fork(unsigned long clone_flags, struct task_struct *p)
{
p->state = TASK_RUNNING;
if (dl_prio(p->prio)) {
put_cpu();
return -EAGAIN;
} else if (rt_prio(p->prio)) {
p->sched_class = &rt_sched_class;
} else {
p->sched_class = &fair_sched_class;
}
if (p->sched_class->task_fork)
p->sched_class->task_fork(p);
......
}
static void task_fork_fair(struct task_struct *p)
{
cfs_rq = task_cfs_rq(current);
curr = cfs_rq->curr;
__set_task_cpu(p, this_cpu);//把当前CPU绑定到该进程中
update_curr(cfs_rq);//更新当前正在运行的调度实体的运行时间信息
if (curr)
se->vruntime = curr->vruntime;//初始化当前创建的新进程的虚拟时间
place_entity(cfs_rq, se, 1);
se->vruntime -= cfs_rq->min_vruntime;
}
task_fork_fair函数所完成的工作可以用下图清晰的表明,主要包含计算并更新虚拟时间vruntime:
(2)唤醒进程
wake_up_new_task -> activate_task ->enqueue_task ->p->sched_class->enqueue_task(rq, p, flags);
这里会调动到enqueue_task_fair成员:
/*
* The enqueue_task method is called before nr_running is
* increased. Here we update the fair scheduling stats and
* then put the task into the rbtree:
*/
static void
enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
{
for_each_sched_entity(se) {
if (se->on_rq)//如果在就绪队列中,不需要再次添加则break
break;
cfs_rq = cfs_rq_of(se);
enqueue_entity(cfs_rq, se, flags);//入队操作
}
......
}
static void
enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
{
//如果是新建的进程再添加上cfs_rq->min_vruntime
if (!(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_WAKING))
se->vruntime += cfs_rq->min_vruntime;
//更新当前进程的vruntime和CFS就绪队列的min_vruntime
update_curr(cfs_rq);
update_stats_enqueue(cfs_rq, se);
check_spread(cfs_rq, se);
if (se != cfs_rq->curr)
__enqueue_entity(cfs_rq, se);//把该调度实体加入到CFS就绪队列的红黑树中
se->on_rq = 1;
......
}
(4)调度进程:
__schedule()是调度的核心函数,其作用是让调度器选择和切换到一个合适的进程运行,内核代码中
对于schedule的调度场景以及描叙的非常详细:
/*
* __schedule() is the main scheduler function.
*
* The main means of driving the scheduler and thus entering this function are:
*
* 1. Explicit blocking: mutex, semaphore, waitqueue, etc.
*
* 2. TIF_NEED_RESCHED flag is checked on interrupt and userspace return
* paths. For example, see arch/x86/entry_64.S.
* To drive preemption between tasks, the scheduler sets the flag in timer
* interrupt handler scheduler_tick().
*
* 3. Wakeups don't really cause entry into schedule(). They add a
* task to the run-queue and that's it.
*
* Now, if the new task added to the run-queue preempts the current
* task, then the wakeup sets TIF_NEED_RESCHED and schedule() gets
* called on the nearest possible occasion:
*
* - If the kernel is preemptible (CONFIG_PREEMPT=y):
*
* - in syscall or exception context, at the next outmost
* preempt_enable(). (this might be as soon as the wake_up()'s
* spin_unlock()!)
* - in IRQ context, return from interrupt-handler to
* preemptible context
*
* - If the kernel is not preemptible (CONFIG_PREEMPT is not set)
* then at the next:
*
* - cond_resched() call
* - explicit schedule() call
* - return from syscall or exception to user-space
* - return from interrupt-handler to user-space
*
* WARNING: must be called with preemption disabled!
*/
static void __sched notrace __schedule(bool preempt)
{
next = pick_next_task(rq, prev);//选择下个合适的进程开始运行
clear_tsk_need_resched(prev);//清除TIF_NEED_RESCHED flag
rq = context_switch(rq, prev, next); //切换进程
}
pick_next_task会调用到CFS的成员:fair_sched_class.pick_next_task(rq, prev)选择一个合适的进程执行。
用下图总结性的看下进程的切换流程:
(5)进程的睡眠:
CFS 调度管理的进程再说睡眠时会调用到 enqueue_task_fair:
static void
dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
{
for_each_sched_entity(se) {
cfs_rq = cfs_rq_of(se);
dequeue_entity(cfs_rq, se, flags);//将调度实体se从对应的就绪队列cfs_rq上删除
}
}
static void
dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
{
/*
* Update run-time statistics of the 'current'.
*/
update_curr(cfs_rq);//更新虚拟时间vruntime
if (se != cfs_rq->curr)
__dequeue_entity(cfs_rq, se);//从红黑树节点删除调度实体
se->on_rq = 0;//更新on_rq成员
account_entity_dequeue(cfs_rq, se);
if (!(flags & DEQUEUE_SLEEP))
se->vruntime -= cfs_rq->min_vruntime;//减去当前就绪队列对应的最小虚拟时间
}
作者:frank_zyp
您的支持是对博主最大的鼓励,感谢您的认真阅读。
本文无所谓版权,欢迎转载。