1: Entrada de inicio del análisis del script de enlace
arch / arm / kernel / vmlinux.lds
* arch/arm/include/asm/page.h
*
* Copyright (C) 1995-2003 Russell King
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
OUTPUT_ARCH(arm)
ENTRY(stext)
jiffies = jiffies_64;
SECTIONS
{
... ...
Se puede ver en el script de enlace que la entrada de inicio es "ENTRY (stext)", ubicada en arch / arm / kernel / head. S
Dos: análisis del proceso de inicio de Linux
1 、 arco / brazo / núcleo / cabeza. S
La nota indica que antes de que se inicie el kernel de Linux, es necesario cerrar la MMU y cerrar el D-cache, no importa el I-cache, r0 = 0 .....
/*
* Kernel startup entry point.
* ---------------------------
*
* This is normally called from the decompressor code. The requirements
* are: MMU = off, D-cache = off, I-cache = dont care, r0 = 0,
* r1 = machine nr, r2 = atags or dtb pointer.
*/
ENTRY(stext)
... ...
safe_svcmode_maskall r9 @ 确保CPU处于SVC模式,并且关闭了所有中断
mrc p15, 0, r9, c0, c0 @ get processor id
(1) bl __lookup_processor_type @ r5=procinfo r9=cpuid
movs r10, r5 @ invalid processor (r5=0)?
THUMB( it eq ) @ force fixup-able long branch encoding
beq __error_p @ yes, error 'p'
... ...
/*
* r1 = machine no, r2 = atags or dtb,
* r8 = phys_offset, r9 = cpuid, r10 = procinfo
*/
(2) bl __vet_atags
... ...
bl __create_page_tables @ 创建页表
/*
* The following calls CPU specific code in a position independent
* manner. See arch/arm/mm/proc-*.S for details. r10 = base of
* xxx_proc_info structure selected by __lookup_processor_type
* above. On return, the CPU will be ready for the MMU to be
* turned on, and r0 will hold the CPU control register value.
*/
(3) ldr r13, =__mmap_switched @ address to jump to after
@ mmu has been enabled
adr lr, BSYM(1f) @ return (PIC) address
mov r8, r4 @ set TTBR1 to swapper_pg_dir
ldr r12, [r10, #PROCINFO_INITFUNC]
add r12, r12, r10
ret r12
(4) b __enable_mmu
(1) bl __lookup_processor_type Verifique si el sistema actual es compatible con esta CPU y, si lo hace, obtenga la información procinfo y guárdela en la estructura proc_info_list.
__lookup_processor_type:
adr r3, __lookup_processor_type_data
ldmia r3, {r4 - r6}
sub r3, r3, r4 @ get offset between virt&phys
add r5, r5, r3 @ convert virt addresses to
add r6, r6, r3 @ physical address space
1: ldmia r5, {r3, r4} @ value, mask
and r4, r4, r9 @ mask wanted bits
teq r3, r4
beq 2f
add r5, r5, #PROC_INFO_SZ @ sizeof(proc_info_list)
cmp r5, r6
blo 1b
mov r5, #0 @ unknown processor
2: ret lr
ENDPROC(__lookup_processor_type)
struct proc_info_list {
unsigned int cpu_val;
unsigned int cpu_mask;
unsigned long __cpu_mm_mmu_flags; /* used by head.S */
unsigned long __cpu_io_mmu_flags; /* used by head.S */
unsigned long __cpu_flush; /* used by head.S */
const char *arch_name;
const char *elf_name;
unsigned int elf_hwcap;
const char *cpu_name;
struct processor *proc;
struct cpu_tlb_fns *tlb;
struct cpu_user_fns *user;
struct cpu_cache_fns *cache;
};
(2) bl __vet_atags verifica la legalidad de atags o árbol de dispositivos (dtb)
__vet_atags:
tst r2, #0x3 @ aligned?
bne 1f
ldr r5, [r2, #0]
#ifdef CONFIG_OF_FLATTREE
ldr r6, =OF_DT_MAGIC @ is it a DTB?
cmp r5, r6
beq 2f
#endif
cmp r5, #ATAG_CORE_SIZE @ is first tag ATAG_CORE?
cmpne r5, #ATAG_CORE_SIZE_EMPTY
bne 1f
ldr r5, [r2, #4]
ldr r6, =ATAG_CORE
cmp r5, r6
bne 1f
2: ret lr @ atag/dtb pointer is ok
1: mov r2, #0
ret lr
ENDPROC(__vet_atags)
(3) ldr r13, = __ mmap_switched guarda la dirección de la función en el registro r13 y finalmente llama a la función start_kernel
__mmap_switched:
adr r3, __mmap_switched_data
ldmia r3!, {r4, r5, r6, r7}
cmp r4, r5 @ Copy data segment if needed
1: cmpne r5, r6
ldrne fp, [r4], #4
strne fp, [r5], #4
bne 1b
mov fp, #0 @ Clear BSS (and zero fp)
1: cmp r6, r7
strcc fp, [r6],#4
bcc 1b
ARM( ldmia r3, {r4, r5, r6, r7, sp})
THUMB( ldmia r3, {r4, r5, r6, r7} )
THUMB( ldr sp, [r3, #16] )
str r9, [r4] @ Save processor ID
str r1, [r5] @ Save machine type
str r2, [r6] @ Save atags pointer
cmp r7, #0
strne r0, [r7] @ Save control register values
b start_kernel
ENDPROC(__mmap_switched)
(4) b __enable_mmu llama a la función __turn_mmu_on para encender la MMU, y finalmente ejecuta la función guardada en el registro r13, que es la función start_kernel mencionada en (3)
__enable_mmu:
... ...
b __turn_mmu_on
ENDPROC(__enable_mmu)
ENTRY(__turn_mmu_on)
mov r0, r0
instr_sync
mcr p15, 0, r0, c1, c0, 0 @ write control reg
mrc p15, 0, r3, c0, c0, 0 @ read id reg
instr_sync
mov r3, r3
mov r3, r13
ret r3
__turn_mmu_on_end:
ENDPROC(__turn_mmu_on)
2. \ init \ Main.c función start_kernel (): inicialización de varios módulos funcionales
asmlinkage __visible void __init start_kernel(void)
{
char *command_line;
char *after_dashes;
/*
* Need to run as early as possible, to initialize the
* lockdep hash:
*/
lockdep_init(); @ 死锁检测模块,此函数会初始化两个hash表,需优先执行
set_task_stack_end_magic(&init_task); @ 设置任务栈结束魔术数,用于栈溢出检测
smp_setup_processor_id(); @ 多核处理器,设置处理器ID
debug_objects_early_init(); @ debug相关初始化
/*
* Set up the the initial canary ASAP:
*/
boot_init_stack_canary(); @ 栈溢出检测初始化
cgroup_init_early(); @ cgroup控制linux系统资源初始化
local_irq_disable(); @ 关闭当前CPU中断
early_boot_irqs_disabled = true;
/*
* Interrupts are still disabled. Do necessary setups, then
* enable them
*/
boot_cpu_init(); @ CPU初始化
page_address_init(); @ 页地址初始化
pr_notice("%s", linux_banner); @ 打印linux版本号,编译时间等信息
setup_arch(&command_line); @ 架构初始化,会解析传递进来的atags或者设备树文件。
mm_init_cpumask(&init_mm); @ 内存初始化
setup_command_line(command_line); @ 存储命令行参数
setup_nr_cpu_ids(); @ 获取CPU核心数
setup_per_cpu_areas(); @ 设置每个CPU数据
smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */
build_all_zonelists(NULL, NULL); @ 建立系统内存页区(zone)链表
page_alloc_init(); @ 处理用于热插拔CPU的页
pr_notice("Kernel command line: %s\n", boot_command_line);
parse_early_param(); @ 解析命令行中的console参数
after_dashes = parse_args("Booting kernel",
static_command_line, __start___param,
__stop___param - __start___param,
-1, -1, &unknown_bootoption);
if (!IS_ERR_OR_NULL(after_dashes))
parse_args("Setting init args", after_dashes, NULL, 0, -1, -1,
set_init_arg);
jump_label_init();
/*
* These use large bootmem allocations and must precede
* kmem_cache_init()
*/
setup_log_buf(0); @ 设置log使用的缓冲区
pidhash_init(); @ 构建PID哈希表
vfs_caches_init_early(); @ 预先初始化vfs的目录项和索引节点缓存
sort_main_extable(); @ 定义内核异常列表
trap_init(); @ 完成对系统保留中断向量的初始化
mm_init(); @ 内存管理初始化
/*
* Set up the scheduler prior starting any interrupts (such as the
* timer interrupt). Full topology setup happens at smp_init()
* time - but meanwhile we still have a functioning scheduler.
*/
sched_init(); @ 初始化调度器
/*
* Disable preemption - early bootup scheduling is extremely
* fragile until we cpu_idle() for the first time.
*/
preempt_disable(); @ 关闭优先级抢占
if (WARN(!irqs_disabled(),
"Interrupts were enabled *very* early, fixing it\n"))
local_irq_disable();
idr_init_cache(); @ IDR初始化
rcu_init(); @ 初始化RCU
/* trace_printk() and trace points may be used after this */
trace_init(); @ 跟踪调试相关初始化
context_tracking_init();
radix_tree_init(); @ 基数树相关数据结构初始化
/* init some links before init_ISA_irqs() */
early_irq_init(); @ 中断初始化
init_IRQ();
tick_init(); @ tick初始化
rcu_init_nohz();
init_timers(); @ 初始化定时器
hrtimers_init(); @ 初始化高精度定时器
softirq_init(); @ 软中断初始化
timekeeping_init();
time_init(); @ 初始化系统时间
sched_clock_postinit();
perf_event_init();
profile_init();
call_function_init();
WARN(!irqs_disabled(), "Interrupts were enabled early\n");
early_boot_irqs_disabled = false;
local_irq_enable(); @ 使能中断
kmem_cache_init_late(); @ linux内存分配器slab初始化
/*
* HACK ALERT! This is early. We're enabling the console before
* we've done PCI setups etc, and console_init() must be aware of
* this. But we do want output early, in case something goes wrong.
*/
console_init(); @ 控制台初始化
if (panic_later)
panic("Too many boot %s vars at `%s'", panic_later,
panic_param);
lockdep_info();
/*
* Need to run this when irqs are enabled, because it wants
* to self-test [hard/soft]-irqs on/off lock inversion bugs
* too:
*/
locking_selftest(); @ 锁自测
#ifdef CONFIG_BLK_DEV_INITRD
if (initrd_start && !initrd_below_start_ok &&
page_to_pfn(virt_to_page((void *)initrd_start)) < min_low_pfn) {
pr_crit("initrd overwritten (0x%08lx < 0x%08lx) - disabling it.\n",
page_to_pfn(virt_to_page((void *)initrd_start)),
min_low_pfn);
initrd_start = 0;
}
#endif
page_ext_init();
debug_objects_mem_init();
kmemleak_init(); @ 检测内存泄漏初始化
setup_per_cpu_pageset();
numa_policy_init();
if (late_time_init)
late_time_init();
sched_clock_init();
calibrate_delay();
pidmap_init(); @ PID位图初始化
anon_vma_init(); @ 生成anon_vma slab缓存
acpi_early_init();
#ifdef CONFIG_X86
if (efi_enabled(EFI_RUNTIME_SERVICES))
efi_enter_virtual_mode();
#endif
#ifdef CONFIG_X86_ESPFIX64
/* Should be run before the first non-init thread is created */
init_espfix_bsp();
#endif
thread_info_cache_init();
cred_init();
fork_init(); @ 初始化一些结构体以使用fork函数
proc_caches_init(); @ 给各种资源管理结构分配缓存
buffer_init(); @ 初始化缓冲缓存
key_init(); @ 初始化密匙
security_init();
dbg_late_init();
vfs_caches_init(totalram_pages); @ 为VFS创建缓存
signals_init(); @ 初始化信号
/* rootfs populating might need page-writeback */
page_writeback_init(); @ 页回写初始化
proc_root_init(); @ 注册并挂载proc文件系统
nsfs_init();
cpuset_init(); @ 初始化cpuset
cgroup_init(); @ 初始化cgroup
taskstats_init_early(); @ 进程状态初始化
delayacct_init();
check_bugs(); @ 检查写缓存一致性
acpi_subsystem_init();
sfi_init_late();
if (efi_enabled(EFI_RUNTIME_SERVICES)) {
efi_late_init();
efi_free_boot_services();
}
ftrace_init();
/* Do the rest non-__init'ed, we're now alive */
rest_init(); @调用rest_init函数
}
Introducción a la función rest_init (): proceso de creación, hilo
static noinline void __init_refok rest_init(void)
{
int pid;
rcu_scheduler_starting(); @ 启动RCU锁调度器
smpboot_thread_init();
/*
* We need to spawn init first so that it obtains pid 1, however
* the init task will end up wanting to create kthreads, which, if
* we schedule it before we create kthreadd, will OOPS.
*/
kernel_thread(kernel_init, NULL, CLONE_FS); @ 创建kernel_init线程,也就是init内核进程,进程PID为1.
numa_default_policy();
pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES); @ 创建kthreadd内核进程,PID为2,负责所有内核进程的调度和管理
rcu_read_lock();
kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns);
rcu_read_unlock();
complete(&kthreadd_done);
/*
* The boot idle thread must execute schedule()
* at least once to get things moving:
*/
init_idle_bootup_task(current);
schedule_preempt_disabled();
/* Call into cpu_idle with preempt disabled */
cpu_startup_entry(CPUHP_ONLINE); @ 进入idle空闲进程,PID为0,其他进程要工作需抢占idle进程
}
Introducción a la función kernel_init (): busque el proceso de inicio, inicie el kernel de Linux
static int __ref kernel_init(void *unused)
{
int ret;
kernel_init_freeable(); @ 初始化init进程
/* need to finish all async __init code before freeing the memory */
async_synchronize_full();
free_initmem();
mark_rodata_ro();
system_state = SYSTEM_RUNNING;
numa_default_policy();
flush_delayed_fput();
if (ramdisk_execute_command) { @ 其值为“/init”,也就是根目录下的init程序。
ret = run_init_process(ramdisk_execute_command); @ 如果存在“/init”程序就执行run_init_process函数来运行
if (!ret)
return 0;
pr_err("Failed to execute %s (error %d)\n",
ramdisk_execute_command, ret);
}
/*
* We try each of these until one succeeds.
*
* The Bourne shell can be used instead of init if we are
* trying to recover a really broken machine.
*/
if (execute_command) { @ 同上,寻找一个一个可以运行的init程序
ret = run_init_process(execute_command);
if (!ret)
return 0;
panic("Requested init %s failed (error %d).",
execute_command, ret);
}
if (!try_to_run_init_process("/sbin/init") || @ 如果上面两个变量都为空,则尝试下面几个。
!try_to_run_init_process("/etc/init") ||
!try_to_run_init_process("/bin/init") ||
!try_to_run_init_process("/bin/sh"))
return 0;
@ 都为空,linux启动失败!
panic("No working init found. Try passing init= option to kernel. "
"See Linux Documentation/init.txt for guidance.");
}
kernel_init_freeable (): inicializa el proceso de inicio
static noinline void __init kernel_init_freeable(void)
{
/*
* Wait until kthreadd is all set-up.
*/
wait_for_completion(&kthreadd_done); @ 等待kthreadd进程准备就绪
... ...
smp_init(); @ SMP初始化
sched_init_smp(); @ 多核(SMP)调度初始化
do_basic_setup(); @ linux设备驱动初始化,会调用driver_init完成linux下驱动模型子系统的初始化
@ 以标准输入(0)的方式打开设备“/dev/console”文件描述符
/* Open the /dev/console on the rootfs, this should never fail */
if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0)
pr_err("Warning: unable to open an initial console.\n");
(void) sys_dup(0); @ 标准输出(1),标准错误(2)
(void) sys_dup(0);
/*
* check if there is an early userspace init. If yes, let it do all
* the work
*/
if (!ramdisk_execute_command)
ramdisk_execute_command = "/init";
if (sys_access((const char __user *) ramdisk_execute_command, 0) != 0) {
ramdisk_execute_command = NULL;
prepare_namespace(); @ 挂载根文件系统
}
/*
* Ok, we have completed the initial bootup, and
* we're essentially up and running. Get rid of the
* initmem segments and start the user-mode stuff..
*
* rootfs is available now, try loading the public keys
* and default modules
*/
integrity_load_keys();
load_default_modules();
}