linux内核工程师 2.09节 load_balance详解

Kernel version: linux-2.6.34

Knowledge Preparation:
http://www.ibm.com/developerworks/cn/linux/l-cn-schldom/index.html

Function:
Check this_cpu to ensure it is balanced within domain. Attempt to move tasks if there is an imbalance.

Data Structure:
enum cpu_idle_type {
    CPU_IDLE,
    CPU_NOT_IDLE,
    CPU_NEWLY_IDLE,
    CPU_MAX_IDLE_TYPES
};

typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t;

/* Working cpumask for load_balance and load_balance_newidle. */
static DEFINE_PER_CPU(cpumask_var_t, load_balance_tmpmask);

static int load_balance(int this_cpu, struct rq *this_rq, struct sched_domain *sd, enum cpu_idle_type idle, int *balance) { int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; struct sched_group *group; unsigned long imbalance; struct rq *busiest; unsigned long flags;

/* load_balance_tmpmask is a per-cpu variable */

struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);

/* cpu_active_mask is a global variable defined in cpumask.h */

cpumask_copy(cpus, cpu_active_mask); /* * When power savings policy is enabled for the parent domain, idle * sibling can pick up load irrespective of busy siblings. In this case, * let the state of idle sibling percolate up as CPU_IDLE, instead of * portraying it as CPU_NOT_IDLE. */ if (idle != CPU_NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER && !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) sd_idle = 1;

/* ++sd->lb_count[idle] */ schedstat_inc(sd, lb_count[idle]); redo:

/* update schedule domain' member last_update */ update_shares(sd);

/**
* find_busiest_group - Returns the busiest group within the sched_domain
* if there is an imbalance. If there isn't an imbalance, and
* the user has opted for power-savings, it returns a group whose
* CPUs can be put to idle by rebalancing those tasks elsewhere, if
* such a group exists.
*
* Also calculates the amount of weighted load which should be moved
* to restore balance.
*
* @sd: The sched_domain whose busiest group is to be returned.
* @this_cpu: The cpu for which load balancing is currently being performed.
* @imbalance: Variable which stores amount of weighted load which should
*        be moved to restore balance/put a group to idle.
* @idle: The idle status of this_cpu.
* @sd_idle: The idleness of sd
* @cpus: The set of CPUs under consideration for load-balancing.
* @balance: Pointer to a variable indicating if this_cpu
*    is the appropriate cpu to perform load balancing at this_level.
*
* Returns:    - the busiest group if imbalance exists.
*        - If no imbalance and user has opted for power-savings balance,
*           return the least loaded group whose CPUs can be
*           put to idle by rebalancing its tasks onto our group.
*/

group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle,cpus, balance); if (*balance == 0) goto out_balanced; if (!group) { schedstat_inc(sd, lb_nobusyg[idle]); goto out_balanced; }

/*
* find_busiest_queue - find the busiest runqueue among the cpus in group.
*/

busiest = find_busiest_queue(group, idle, imbalance, cpus); if (!busiest) { schedstat_inc(sd, lb_nobusyq[idle]); goto out_balanced; } BUG_ON(busiest == this_rq); schedstat_add(sd, lb_imbalance[idle], imbalance); ld_moved = 0; if (busiest->nr_running > 1) { /* * Attempt to move tasks. If find_busiest_group has found * an imbalance but busiest->nr_running <= 1, the group is * still unbalanced. ld_moved simply stays zero, so it is * correctly treated as an imbalance. */ local_irq_save(flags); double_rq_lock(this_rq, busiest);

/*
* move_tasks tries to move up to max_load_move weighted load from busiest to
* this_rq, as part of a balancing operation within domain "sd".
* Returns 1 if successful and 0 otherwise.
*
* Called with both runqueues locked.
*/

ld_moved = move_tasks(this_rq, this_cpu, busiest, imbalance, sd, idle, &all_pinned); double_rq_unlock(this_rq, busiest); local_irq_restore(flags); /* * some other cpu did the load balance for us. */ if (ld_moved && this_cpu != smp_processor_id()) resched_cpu(this_cpu); /* All tasks on this runqueue were pinned by CPU affinity */ if (unlikely(all_pinned)) { cpumask_clear_cpu(cpu_of(busiest), cpus); if (!cpumask_empty(cpus)) goto redo; goto out_balanced; } }

/* if move_tasks failed */

if (!ld_moved) { schedstat_inc(sd, lb_failed[idle]); sd->nr_balance_failed++; if (need_active_balance(sd, sd_idle, idle)) { raw_spin_lock_irqsave(&busiest->lock, flags); /* don't kick the migration_thread, if the curr * task on busiest cpu can't be moved to this_cpu */ if (!cpumask_test_cpu(this_cpu, &busiest->curr->cpus_allowed)) { raw_spin_unlock_irqrestore(&busiest->lock, flags); all_pinned = 1; goto out_one_pinned; }

/* move_tasks() is pulling, now set active_balance as 1, set push_cpu as this_cpu, means pushing happening within busiest run queue */

if (!busiest->active_balance) { busiest->active_balance = 1; busiest->push_cpu = this_cpu; active_balance = 1; } raw_spin_unlock_irqrestore(&busiest->lock, flags);

/* there is a migration thread specially processing migration at each run queue, now wake up it */ if (active_balance) wake_up_process(busiest->migration_thread); /* * We've kicked active balancing, reset the failure * counter. */ sd->nr_balance_failed = sd->cache_nice_tries+1; } } else sd->nr_balance_failed = 0; if (likely(!active_balance)) { /* We were unbalanced, so reset the balancing interval */ sd->balance_interval = sd->min_interval; } else { /* * If we've begun active balancing, start to back off. This * case may not be covered by the all_pinned logic if there * is only 1 task on the busy runqueue (because we don't call * move_tasks). */ if (sd->balance_interval < sd->max_interval) sd->balance_interval *= 2; } if (!ld_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER && !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) ld_moved = -1; goto out; out_balanced: schedstat_inc(sd, lb_balanced[idle]); sd->nr_balance_failed = 0; out_one_pinned: /* tune up the balancing interval */ if ((all_pinned && sd->balance_interval < MAX_PINNED_INTERVAL) || (sd->balance_interval < sd->max_interval)) sd->balance_interval *= 2; if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) ld_moved = -1; else ld_moved = 0; out: if (ld_moved) update_shares(sd); return ld_moved; }

linux内核工程师 2.09节 load_balance详解

猜你喜欢