mips架构linux启动分析(六)(arch_mem_init(node内存)内存初始化)

这里开始就是各个node的内存的初始化了。
OK,直接看代码.
static void __init arch_mem_init(char **cmdline_p)
{
	extern void plat_mem_setup(void);

//定义板级的屏幕的相关信息
	plat_mem_setup();
//把代码段也加入到boot_mem_map进行管理(bootm机制)
	arch_mem_addpart(PFN_DOWN(__pa_symbol(&_text)) << PAGE_SHIFT,
			 PFN_UP(__pa_symbol(&_edata)) << PAGE_SHIFT,
			 BOOT_MEM_RAM);
//把init段(初始化段)也加入到boot_mem_map中进行管理
	arch_mem_addpart(PFN_UP(__pa_symbol(&__init_begin)) << PAGE_SHIFT, PFN_DOWN(__pa_symbol(&__init_end)) << PAGE_SHIFT, BOOT_MEM_INIT_RAM);

//在arcs_cmdline之后加入一个空格后,在把builtin_cmdline加入其中
	if (builtin_cmdline[0]) {
	   strlcat(arcs_cmdline, " ", COMMAND_LINE_SIZE);
	   strlcat(arcs_cmdline, builtin_cmdline, COMMAND_LINE_SIZE);
	}
//把arcs_cmdline拷贝到boot_command_line
	strlcpy(boot_command_line, arcs_cmdline, COMMAND_LINE_SIZE);
//把boot_command_line拷贝到command_line中
	strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
//用指针cmdline_p指向command_line
	*cmdline_p = command_line;
//对参数进行解析
	parse_early_param();
//bootmem的初始化
	bootmem_init();
//mips中crashkernel时的信息段
	mips_parse_crashkernel();
//把crashkernel段设置为保留段
	if (crashk_res.start != crashk_res.end)
		reserve_bootmem(crashk_res.start,
				crashk_res.end - crashk_res.start + 1,
				BOOTMEM_DEFAULT);
//空函数
	device_tree_init();
//分配的一个非线形段(no_liner section),具体作用,还不是很清楚??????????
	sparse_init();
//tlb信息的初始化
	plat_swiotlb_setup();
设置区zones
	paging_init();
}

定义板级screen的信息:

void __init plat_mem_setup(void)
{
#ifdef CONFIG_VT
#if defined(CONFIG_VGA_CONSOLE)
	conswitchp = &vga_con;

	screen_info = (struct screen_info) {
		.orig_x			= 0,
		.orig_y			= 25,
		.orig_video_cols	= 80,
		.orig_video_lines	= 25,
		.orig_video_isVGA	= VIDEO_TYPE_VGAC,
		.orig_video_points	= 16,
	};
#elif defined(CONFIG_DUMMY_CONSOLE)
	conswitchp = &dummy_con;
#endif
#endif
}

怎么把一个段加入到boot_mem_map中进行管理呢?

static void __init arch_mem_addpart(phys_t mem, phys_t end, int type)
{
	phys_t size;
	int i;

	size = end - mem;
//如果要加入的段在boot_mem_map中,就退出
	for (i = 0; i < boot_mem_map.nr_map; i++) {
		if (mem >= boot_mem_map.map[i].addr &&
		    mem < (boot_mem_map.map[i].addr +
			   boot_mem_map.map[i].size))
			return;
	}
//把这段区域加入到boot_mem_map中
	add_memory_region(mem, size, type);
}

void __init add_memory_region(phys_t start, phys_t size, long type)
{
	int x = boot_mem_map.nr_map;
	int i;

//把要加入的段和已有的段进行合并
	for (i = 0; i < boot_mem_map.nr_map; i++) {
//从boot_mem_map第一个开始读取
		struct boot_mem_map_entry *entry = boot_mem_map.map + i;
		unsigned long top;
//如果类型不同,则进行下一个
		if (entry->type != type)
			continue;
//如果这个region大于要加入段的地址,则进行下一个
		if (start + size < entry->addr)
			continue;			/* no overlap */
//如果这个region小于要加入段的地址,则进行下一个
		if (entry->addr + entry->size < start)
			continue;			/* no overlap */
//到这里说明两个地址有重叠的部分,然后进行合并
		top = max(entry->addr + entry->size, start + size);
		entry->addr = min(entry->addr, start);
		entry->size = top - entry->addr;

		return;
	}
//如果没有重叠的region,则增加一个新的boot_mem_map进行管理
	boot_mem_map.map[x].addr = start;
	boot_mem_map.map[x].size = size;
	boot_mem_map.map[x].type = type;
	boot_mem_map.nr_map++;
}

bootmem的初始化:

static void __init bootmem_init(void)
{
//initrd的初始化
    init_initrd();
    finalize_initrd();
}

static unsigned long __init init_initrd(void)
{
	unsigned long end;

	/*
	 *判断现在的initrd时有效的
	 */
	if (!initrd_start || initrd_end <= initrd_start)
		goto disable;
//判断initrd是不是页对齐的
	if (initrd_start & ~PAGE_MASK) {
		pr_err("initrd start must be page aligned\n");
		goto disable;
	}

	if (initrd_start < PAGE_OFFSET) {
		pr_err("initrd start < PAGE_OFFSET\n");
		goto disable;
	}

//end保存initrd_end的物理地址
	end = __pa(initrd_end);
//initrd_end保存initrd_end的虚拟地址
	initrd_end = (unsigned long)__va(end);
//initrd_start保存initrd_start的虚拟地址
	initrd_start = (unsigned long)__va(__pa(initrd_start));

	ROOT_DEV = Root_RAM0;
//返回initrd_end的物理页帧地址
	return PFN_UP(end);
disable:
	initrd_start = 0;
	initrd_end = 0;
	return 0;
}

static void __init finalize_initrd(void)
{
	unsigned long size = initrd_end - initrd_start;
//判断initrd段长是否为零
	if (size == 0) {
		printk(KERN_INFO "Initrd not found or empty");
		goto disable;
	}
//如果initrd_end的物理地址大于lax_low_pfn则initrd越界
	if (__pa(initrd_end) > PFN_PHYS(max_low_pfn)) {
		printk(KERN_ERR "Initrd extends beyond end of memory");
		goto disable;
	}
//把initrd内存设置为reserve
	reserve_bootmem(__pa(initrd_start), size, BOOTMEM_DEFAULT);
	initrd_below_start_ok = 1;

	pr_info("Initial ramdisk at: 0x%lx (%lu bytes)\n",
		initrd_start, size);
	return;
disable:
	printk(KERN_CONT " - disabling initrd\n");
	initrd_start = 0;
	initrd_end = 0;
}

把内存设置为reserve内存的方法

#define PFN_UP(x)	(((x)+PAGE_SIZE-1)>>PAGE_SHIFT) //低位有数则页帧号加1
#define PFN_DOWN(x)	((x)>>PAGE_SHIFT)		//舍弃低位

int __init reserve_bootmem(unsigned long addr, unsigned long size,
			    int flags)
{
	unsigned long start, end;
//设置开始结束地址格式
	start = PFN_DOWN(addr);
	end = PFN_UP(addr + size);
//把start-end间的内存设置为reserve
	return mark_bootmem(start, end, 1, flags);
}

static int __init mark_bootmem(unsigned long start, unsigned long end,
				int reserve, int flags)
{
	unsigned long pos;
	bootmem_data_t *bdata;

	pos = start;
//遍历bdata_list链表,找到要标记的内存的区间
	list_for_each_entry(bdata, &bdata_list, list) {
		int err;
		unsigned long max;

		if (pos < bdata->node_min_pfn ||
		    pos >= bdata->node_low_pfn) {
			BUG_ON(pos != start);
			continue;
		}

		max = min(bdata->node_low_pfn, end);
//调用a会念书mark_bootmem_node把区间设置为已分配
		err = mark_bootmem_node(bdata, pos, max, reserve, flags);
		if (reserve && err) {
			mark_bootmem(start, pos, 0, 0);
			return err;
		}

		if (max == end)
			return 0;
		pos = bdata->node_low_pfn;
	}
	BUG();
}

static int __init mark_bootmem_node(bootmem_data_t *bdata,
				unsigned long start, unsigned long end,
				int reserve, int flags)
{
	unsigned long sidx, eidx;

	bdebug("nid=%td start=%lx end=%lx reserve=%d flags=%x\n",
		bdata - bootmem_node_data, start, end, reserve, flags);
//安全检查,确定要标记的内存的区间是存在的
	BUG_ON(start < bdata->node_min_pfn);
	BUG_ON(end > bdata->node_low_pfn);
//获取偏移大小
	sidx = start - bdata->node_min_pfn;
	eidx = end - bdata->node_min_pfn;
//调用__reserve设置为已分配,__free释放
	if (reserve)
		return __reserve(bdata, sidx, eidx, flags);
	else
		__free(bdata, sidx, eidx);
	return 0;
}

static void __init __free(bootmem_data_t *bdata,
			unsigned long sidx, unsigned long eidx)
{
	unsigned long idx;
//记录这里分配的值
	if (bdata->hint_idx > sidx)
		bdata->hint_idx = sidx;
//调用test_and_clear_bit把对应的位清零
	for (idx = sidx; idx < eidx; idx++)
		if (!test_and_clear_bit(idx, bdata->node_bootmem_map))
			BUG();
}

static int __init __reserve(bootmem_data_t *bdata, unsigned long sidx,
			unsigned long eidx, int flags)
{
	unsigned long idx;
	int exclusive = flags & BOOTMEM_EXCLUSIVE;
//调用test_and_set_bit把位图对应的位设置位1,原子操作
	for (idx = sidx; idx < eidx; idx++)
		if (test_and_set_bit(idx, bdata->node_bootmem_map)) {
			if (exclusive) {
				__free(bdata, sidx, idx);
				return -EBUSY;
			}
		}
	return 0;
}

具体的各个node初始化函数:

扫描二维码关注公众号,回复: 1810818 查看本文章

void __init paging_init(void)
{
	unsigned node;
	unsigned long zones_size[MAX_NR_ZONES] = {0, };
//初始化tlb表项
	pagetable_init();
//获取每个node的内存区间
	for_each_online_node(node) {
		unsigned long  start_pfn, end_pfn;

		get_pfn_range_for_nid(node, &start_pfn, &end_pfn);

		if (end_pfn > max_low_pfn)
			max_low_pfn = end_pfn;
	}
#ifdef CONFIG_ZONE_DMA32
//设置ZONE_DMA32区间大小
	zones_size[ZONE_DMA32] = MAX_DMA32_PFN;
#endif
//设置NORMAL区间大小
	zones_size[ZONE_NORMAL] = max_low_pfn;
//对这个node的zone进行初始化
	free_area_init_nodes(zones_size);
}

void __init free_area_init_nodes(unsigned long *max_zone_pfn)
{
	unsigned long start_pfn, end_pfn;
	int i, nid;

	/*arch_zone_lowest_possible_pfn记录每个node的起始地址
	*arch_zone_highest_possible_pfn记录每个node的结束地址
	*在这里进行清零操作
	*/
	memset(arch_zone_lowest_possible_pfn, 0,
				sizeof(arch_zone_lowest_possible_pfn));
	memset(arch_zone_highest_possible_pfn, 0,
				sizeof(arch_zone_highest_possible_pfn));
//根据memblock找到目前系统中的最低内存地址  
    arch_zone_lowest_possible_pfn[0] = find_min_pfn_with_active_regions();
//根据参数传递进来的值,得到最大的物理内存
    arch_zone_highest_possible_pfn[0] = max_zone_pfn[0];
	for (i = 1; i < MAX_NR_ZONES; i++) {
		if (i == ZONE_MOVABLE)
			continue;
//循环操作,第1个块的低地址,是第0块的高地址
		arch_zone_lowest_possible_pfn[i] =
			arch_zone_highest_possible_pfn[i-1];
		arch_zone_highest_possible_pfn[i] =
			max(max_zone_pfn[i], arch_zone_lowest_possible_pfn[i]);
	}
	arch_zone_lowest_possible_pfn[ZONE_MOVABLE] = 0;
	arch_zone_highest_possible_pfn[ZONE_MOVABLE] = 0;

	/* Find the PFNs that ZONE_MOVABLE begins at in each node */
	memset(zone_movable_pfn, 0, sizeof(zone_movable_pfn));
	find_zone_movable_pfns_for_nodes();
//对每个node都调用free_area_init_node进行初始化
	/* Initialise every node */
	mminit_verify_pageflags_layout();
	setup_nr_node_ids();
	for_each_online_node(nid) {
		pg_data_t *pgdat = NODE_DATA(nid);
		free_area_init_node(nid, NULL,
				find_min_pfn_for_node(nid), NULL);

		/* Any memory on that node */
		if (pgdat->node_present_pages)
			node_set_state(nid, N_MEMORY);
		check_for_memory(pgdat, nid);
	}
}


看一下是怎么初始化每个node的内存区域的:

void __init_refok free_area_init_node(int nid, unsigned long *zones_size,
		unsigned long node_start_pfn, unsigned long *zholes_size)
{
	pg_data_t *pgdat = NODE_DATA(nid);
	unsigned long start_pfn = 0;
	unsigned long end_pfn = 0;

//初始化first_deferred_pfn成员
	reset_deferred_meminit(pgdat);
//设置node的号
	pgdat->node_id = nid;
//设置开始地址
	pgdat->node_start_pfn = node_start_pfn;
//获取这个node对应的内存的开始结束地址
	get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
//计算这个node的所有page(spanned_page),以及可用page(present_page)
	calculate_node_totalpages(pgdat, start_pfn, end_pfn,
				  zones_size, zholes_size);
//如果不支持FLAT_MEM_NODE_MAP这个函数就为空
	alloc_node_mem_map(pgdat);
//核心初始化函数,初始化node_data结构体
	free_area_init_core(pgdat, start_pfn, end_pfn);
}

zone的结构体:

zone的结构体:
struct zone {
	unsigned long watermark[NR_WMARK];  //水位设置,内存回收时使用
	unsigned long percpu_drift_mark;
	unsigned long		lowmem_reserve[MAX_NR_ZONES];
	unsigned long		dirty_balance_reserve;

#ifdef CONFIG_NUMA
	int node;              //node号
	unsigned long		min_unmapped_pages;
	unsigned long		min_slab_pages;
#endif
	struct per_cpu_pageset __percpu *pageset;
	spinlock_t		lock;
	int                     all_unreclaimable; /* All pages pinned */
#if defined CONFIG_COMPACTION || defined CONFIG_CMA
	bool			compact_blockskip_flush;

	unsigned long		compact_cached_free_pfn;
	unsigned long		compact_cached_migrate_pfn;
#endif
#ifdef CONFIG_MEMORY_HOTPLUG
	seqlock_t		span_seqlock;
#endif
	struct free_area	free_area[MAX_ORDER];

#ifndef CONFIG_SPARSEMEM
	unsigned long		*pageblock_flags;
#endif /* CONFIG_SPARSEMEM */

#ifdef CONFIG_COMPACTION
	unsigned int		compact_considered;
	unsigned int		compact_defer_shift;
	int			compact_order_failed;
#endif

	ZONE_PADDING(_pad1_)

	spinlock_t		lru_lock;
	struct lruvec		lruvec;

	atomic_long_t		inactive_age;

	unsigned long		pages_scanned;	   /* since last reclaim */
	unsigned long		flags;		   /*zone flags*/
	atomic_long_t		vm_stat[NR_VM_ZONE_STAT_ITEMS];

	unsigned int inactive_ratio;


	ZONE_PADDING(_pad2_)
//等待队列
	wait_queue_head_t	* wait_table;
	unsigned long		wait_table_hash_nr_entries; //等待数量
	unsigned long		wait_table_bits;

	struct pglist_data	*zone_pgdat;    //指向所属的pgdat

	unsigned long		zone_start_pfn; //zone的开始地址

	unsigned long		spanned_pages; //总的大小,包括hole
	unsigned long		present_pages; //不包括hole的大小
	unsigned long		managed_pages; 

	int			nr_migrate_reserve_block;
	const char		*name;   //名字

} ____cacheline_internodealigned_in_smp;

zone结构的初始化(这里很多初始化对齐作用不是很了解,只要先知道这里时初始化zone就好了;等了解了内存回收机制时, 对这里的作用就清楚了)

//zone结构体初始化,其中涉及到很多内存回收时使用的成员,
static void __paginginit free_area_init_core(struct pglist_data *pgdat,
		unsigned long node_start_pfn, unsigned long node_end_pfn)
{
	enum zone_type j;
	int nid = pgdat->node_id;
	int ret;
/初始化node_data中的成员
	pgdat_resize_init(pgdat);
#ifdef CONFIG_NUMA_BALANCING
	spin_lock_init(&pgdat->numabalancing_migrate_lock);
	pgdat->numabalancing_migrate_nr_pages = 0;
	pgdat->numabalancing_migrate_next_window = jiffies;
#endif
	init_waitqueue_head(&pgdat->kswapd_wait);
	init_waitqueue_head(&pgdat->pfmemalloc_wait);
	pgdat_page_cgroup_init(pgdat);
//for循环,更新这个node中所有的zone的信息
	for (j = 0; j < REAL_MAX_ZONES; j++) {
		struct zone *zone = pgdat->node_zones + j;
		unsigned long size, realsize, freesize, memmap_pages;
		unsigned long zone_start_pfn;

		zone_start_pfn = zone->zone_start_pfn;

		size = zone->spanned_pages;
		realsize = freesize = zone->present_pages;

		memmap_pages = calc_memmap_size(size, realsize);
		if (freesize >= memmap_pages) {
			freesize -= memmap_pages;
			if (memmap_pages)
				printk(KERN_DEBUG
				  "%s zone: %lu pages used for memmap\n",
				       zone_names[j], memmap_pages);
		} else
			printk(KERN_WARNING
			   "%s zone: %lu pages exceeds freesize %lu\n",
				zone_names[j], memmap_pages, freesize);

		/* Account for reserved pages */
		if (j == 0 && freesize > dma_reserve) {
			freesize -= dma_reserve;
		printk(KERN_DEBUG "  %s zone: %lu pages reserved\n",
					zone_names[0], dma_reserve);
		}
		if (!is_highmem_idx(j))
			nr_kernel_pages += freesize;
		else if (nr_kernel_pages > memmap_pages * 2)
			nr_kernel_pages -= memmap_pages;
		nr_all_pages += freesize;

	zone->managed_pages = is_highmem_idx(j) ? realsize : freesize;
#ifdef CONFIG_NUMA
		zone->node = nid;
	zone->min_unmapped_pages=(freesize*sysctl_min_unmapped_ratio)/100;
	zone->min_slab_pages = (freesize * sysctl_min_slab_ratio) / 100;
#endif
//更新zone的name
		zone->name = zone_names[j];
//锁初始化
		spin_lock_init(&zone->lock);
		spin_lock_init(&zone->lru_lock);
		zone_seqlock_init(zone);
//指向zone所属的pgdat
		zone->zone_pgdat = pgdat;
		zone_pcp_init(zone);

		/* For bootup, initialized properly in watermark setup */
		mod_zone_page_state(zone, NR_ALLOC_BATCH, zone->managed_pages);

		lruvec_init(&zone->lruvec);
		if (!size)
			continue;

		set_pageblock_order();
		setup_usemap(pgdat, zone, zone_start_pfn, size);
		ret = init_currently_empty_zone(zone, zone_start_pfn,
						size, MEMMAP_EARLY);
		BUG_ON(ret);
		memmap_init(size, nid, j, zone_start_pfn);
	}
}

好了,现在node中的zone就填写完毕了。


OK,运行到这里,每个node对应的内存区域就已经初始化完毕了。

下面还由几个板级相关的点,记录下,比较重要,需要进行分析:

1,page_init中的pagetable_init函数,这是初始化pgd,pud,pmd,pte的表项,也就是虚拟地址到物理地址的转换
2,arch_mem_init函数中的aparse_init,功能数初始化一段非线形区域,具体作用还不是很清楚?????????????????????????????????????????
3,arch_mem_init函数中的plat_swiotlb_setup函数, 这是关于DMA操作,涉及到具体板卡的地址映射问题.
4,setup_arch函数中的plat_smp_setup,是mips多核cpu之间的IPI初始化,也就是核间互连寄存器的初始化(手册说的比较少,单看代码也不了解具体功能)
5,setup_arch函数中的cpu_cache_init函数,这是cpu的cache初始化。




猜你喜欢

转载自blog.csdn.net/u010383937/article/details/78599256
今日推荐