文章 | 内容 |
---|---|
Linux内存管理:Bootmem的率先登场 | Bootmem 启动过程内存分配器 |
Linux内存管理:Buddy System姗姗来迟 | Buddy System 伙伴系统内存分配器 |
Linux内存管理:Slab闪亮登场 | Slab 内存分配器 |
这是源码剖析专栏的第一篇文章
主要分成四大模块来剖析:内存管理、设备管理、系统启动和其他部分
其中内存管理分为Bootmem
、Buddy System
和Slab
三部分来阐述,本文主要阐述的是Bootmem
启动流程
Bootmem
分配器是Linux boot-time
阶段管理物理内存,并提供物理内存分配和回收的分配器。其作为内核初始化过程中第一个真正意义上的内存分配器,为内核早期的初始化活动提供了物理内存的分配和回收,以及为Buddy
分配器的创建提供了基础,Bootmem
分配器将自己管理的物理内存移交给Buddy
分配器之后,其使命已经完成,内核正式启用Buddy
分配器管理系统物理内存。
参考环境
- 源码版本:
Linux 2.6.30.4
- 编辑器:
VsCode
- 系统架构:
ARM/Linux
前面的先略过,此篇重在剖析
bootmem
初始化流程,在此之前确定了swapper_pg_dir
内核页表的位置
void __init paging_init(struct machine_desc *mdesc)
{
// ..
bootmem_init();
bootmem_init
void __init bootmem_init(void)
{
struct meminfo *mi = &meminfo; // 获取内存信息
通过meminfo
获得内存信息,其中struct meminfo
表示的是每个bank
的信息
struct meminfo {
int nr_banks; // bank数量
struct membank bank[NR_BANKS]; // bank数组
};
而对于每个struct membank
struct membank {
unsigned long start; // 开始地址 物理地址
unsigned long size; // 大小
int node; // node节点
};
其中有一步
initrd_node = check_initrd(mi);
获取虚拟磁盘文件所在节点,后面再来看看这个有啥用,此处不做讲解
for_each_node(node) {
unsigned long end_pfn = bootmem_init_node(node, mi);
遍历节点node
,宏定义为
#define for_each_node(node) for_each_node_state(node, N_POSSIBLE)
#define for_each_node_state(__node, __state) \
for_each_node_mask((__node), node_states[__state])
#define for_each_node_mask(node, mask) \
if (!nodes_empty(mask)) \
for ((node) = 0; (node) < 1; (node)++)
宏展开为
if (!nodes_empty(node_states[N_POSSIBLE]))
{
for (((node)) = 0; ((node)) < 1; ((node))++)
{
// ...
}
}
node_states
如下,这个后面再看看
enum node_states {
N_POSSIBLE, /* The node could become online at some point */
N_ONLINE, /* The node is online */
N_NORMAL_MEMORY, /* The node has regular memory */
#ifdef CONFIG_HIGHMEM
N_HIGH_MEMORY, /* The node has regular or high memory */
#else
N_HIGH_MEMORY = N_NORMAL_MEMORY,
#endif
N_CPU, /* The node has one or more cpus */
NR_NODE_STATES
};
bootmem_init_node
// 节点获得`end_fn`信息
unsigned long end_pfn = bootmem_init_node(node, mi);
pg_data_t *pgdat; // 表示的是一个node的信息
pg_data_t
表示一个node
for_each_nodebank(i, mi, node)
宏定义展开为
#define for_each_nodebank(iter,mi,no) \
for (iter = 0; iter < (mi)->nr_banks; iter++) \
if ((mi)->bank[iter].node == no)
for (i = 0; i < (mi)->nr_banks; i++)
{
if ((mi)->bank[i].node == node)
{
// 找到属于该节点的bank,node默认从0开始
}
}
struct membank *bank = &mi->bank[i]; // 获取该bank
获得起始页帧号和结束页帧号(用的是物理地址)
start = bank_pfn_start(bank); // 起始页帧号
end = bank_pfn_end(bank); // 结束页帧号
有时间可以看看这两个宏
#define bank_pfn_start(bank) __phys_to_pfn((bank)->start)
#define bank_pfn_end(bank) __phys_to_pfn((bank)->start + (bank)->size)
#define __phys_to_pfn(paddr) ((paddr) >> PAGE_SHIFT)
这个PAGE_SHIFT
右移的过程有点特殊,PAGE_SHIFT
的作用是通过对地址右移PAGE_SHIFT
得到一个地址所在页的页号
还有一种方法就是使用
PAGE_MASK
,即让其与PAGE_MASK
相与即可
/* PAGE_SHIFT determines the page size */
#define PAGE_SHIFT 12
#define PAGE_SIZE (1UL << PAGE_SHIFT)
#define PAGE_MASK (~(PAGE_SIZE-1))
map_memory_bank
map_memory_bank(bank);
即对bank
做映射
static inline void map_memory_bank(struct membank *bank)
{
#ifdef CONFIG_MMU
struct map_desc map;
map.pfn = bank_pfn_start(bank);
map.virtual = __phys_to_virt(bank_phys_start(bank));
map.length = bank_phys_size(bank);
map.type = MT_MEMORY;
create_mapping(&map);
#endif
}
其中map_desc
结构体为
struct map_desc {
unsigned long virtual; // 起始虚拟地址
unsigned long pfn; // 起始页帧号
unsigned long length; // 长度
unsigned int type; // bank类型
};
其中对于map.virtual
的获取为
#define __phys_to_virt(x) ((x) - PHYS_OFFSET + PAGE_OFFSET)
#define bank_phys_start(bank) (bank)->start
create_mapping
进行映射
void __init create_mapping(struct map_desc *md)
{
unsigned long phys, addr, length, end;
const struct mem_type *type;
pgd_t *pgd;
这是为了防止虚拟地址不是中断向量表地址并且用户区的情况(0-3G
),防止内存类型为IO
或者ROM
但虚拟地址为低端内存申请区的情况(3G-3G+768MB
)
if ((md->type == MT_DEVICE || md->type == MT_ROM) &&
md->virtual >= PAGE_OFFSET && md->virtual < VMALLOC_END) {
printk(KERN_WARNING "BUG: mapping for 0x%08llx at 0x%08lx "
"overlaps vmalloc space\n",
__pfn_to_phys((u64)md->pfn), md->virtual);
}
此处的md->pfn >= 0x100000
是为了判断页帧号大于4G
,不做概述
type = &mem_types[md->type]; // 获取内存类型
if (md->pfn >= 0x100000) {
create_36bit_mapping(md, type);
return;
}
addr
为虚拟地址,phys
为物理地址,length
为长度(看到没!,这里就用了md->virtual & PAGE_MASK
得到页号!)
addr = md->virtual & PAGE_MASK; // 页号,同上
phys = (unsigned long)__pfn_to_phys(md->pfn);
length = PAGE_ALIGN(md->length + (md->virtual & ~PAGE_MASK));
获取该虚拟地址在页目录表swapper_pg_dir
中的页目录项
pgd = pgd_offset_k(addr);
end = addr + length; // 页号的结束位置
#define pgd_offset_k(addr) pgd_offset(&init_mm, addr)
#define pgd_offset(mm, addr) ((mm)->pgd+pgd_index(addr))
#define pgd_index(addr) ((addr) >> PGDIR_SHIFT) // 得到虚拟地址
填充页目录表,进行映射
do {
unsigned long next = pgd_addr_end(addr, end); // 以2MB为单位排列的地址
// 根据type变量中保存的内存类型,更改对应于被指定BANK的各目录
alloc_init_section(pgd, addr, next, phys, type);
phys += next - addr;
addr = next;
} while (pgd++, addr != end); // 只要不超过end,就获取下一个2MB的虚拟起始地址
alloc_init_section
一个
section
是1MB
#define pgd_addr_end(addr, end) \
({
unsigned long __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK; \
(__boundary - 1 < (end) - 1)? __boundary: (end); \
})
#define PGDIR_SIZE (1UL << PGDIR_SHIFT) // 2MB
下面进行映射
#define pmd_offset(a, b) ((void *)0)
#define __pmd(x) (x)
#define SECTION_SIZE (1UL << SECTION_SHIFT)
#define SECTION_SHIFT 20
static void __init alloc_init_section(pgd_t *pgd, unsigned long addr,
unsigned long end, unsigned long phys,
const struct mem_type *type)
{
pmd_t *pmd = pmd_offset(pgd, addr);
if (((addr | end | phys) & ~SECTION_MASK) == 0) {
pmd_t *p = pmd;
if (addr & SECTION_SIZE)
pmd++;
do {
*pmd = __pmd(phys | type->prot_sect); // 页目录项属性
phys += SECTION_SIZE; // 地址地址加2MB
} while (pmd++, addr += SECTION_SIZE, addr != end);
flush_pmd_entry(p);
} else {
/*
* No need to loop; pte's aren't interested in the
* individual L1 entries.
*/
alloc_init_pte(pmd, addr, end, __phys_to_pfn(phys), type);
}
}
因此,对某一个节点的bank
就都进行了一一映射flat model
,即
map_memory_bank(bank);
}
回到bootmem_init_node
来,接下来是
start = bank_pfn_start(bank); // 起始页帧号
end = bank_pfn_end(bank); // 结束页帧号
if (start_pfn > start)
start_pfn = start;
if (end_pfn < end)
end_pfn = end;
可以得到起始页帧号和结束页帧号
bootmem_bootmap_pages
boot_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
此处的end_pfn
和start_pfn
表示的是一个node
的页帧号
static unsigned long __init bootmap_bytes(unsigned long pages)
{
unsigned long bytes = (pages + 7) / 8; // 1b表示1页帧
return ALIGN(bytes, sizeof(long));
}
unsigned long __init bootmem_bootmap_pages(unsigned long pages) // 计算页面中的位图大小
{
unsigned long bytes = bootmap_bytes(pages);
return PAGE_ALIGN(bytes) >> PAGE_SHIFT;
}
其中的宏定义为
#define ALIGN(x,a) __ALIGN_MASK(x,(typeof(x))(a)-1)
#define __ALIGN_MASK(x,mask) (((x)+(mask))&~(mask))
#define PAGE_ALIGN(addr) ALIGN(addr, PAGE_SIZE)
#define PAGE_SHIFT 12
#define PAGE_SIZE (1UL << PAGE_SHIFT)
该函数的主要作用是计算用位标识相应节点页数所需字节数(以4字节为一单位)
宏展开
unsigned long __init bootmem_bootmap_pages(unsigned long pages)
{
unsigned long bytes = bootmap_bytes(pages);
// 求出所需页数
return (((bytes)+((typeof(bytes))((1UL << 12))-1))&~((typeof(bytes))((1UL << 12))-1)) >> 12; //
}
static unsigned long __init bootmap_bytes(unsigned long pages)
{
unsigned long bytes = (pages + 7) / 8;
return (((bytes)+((typeof(bytes))(sizeof(long))-1))&~((typeof(bytes))(sizeof(long))-1));
}
有点复杂这算法,不过可以直接拿来用,用一页表示页帧号,则可以表示
4KB=b'4*1024*8
个页帧
find_bootmap_pfn
根据bootmem_bootmap_pages
求出bootmap
页数,现在需要决定求出的页该放在何处
static unsigned int __init
find_bootmap_pfn(int node, struct meminfo *mi, unsigned int bootmap_pages)
{
unsigned int start_pfn, i, bootmap_pfn;
start_pfn
指的是内核代码段_end
的下一个位置,__pa
指的是转换成物理地址
start_pfn = PAGE_ALIGN(__pa(_end)) >> PAGE_SHIFT;
bootmap_pfn = 0;
#define __pa(x) __virt_to_phys((unsigned long)(x))
#define __va(x) ((void *)__phys_to_virt((unsigned long)(x)))
下面来看具体过程
for_each_nodebank(i, mi, node) {
struct membank *bank = &mi->bank[i];
unsigned int start, end;
start = bank_pfn_start(bank); // 求出起始页帧号
end = bank_pfn_end(bank); // 求出末尾页帧号
if (end < start_pfn) //
continue;
if (start < start_pfn)
start = start_pfn; // 起始页帧号要比bss页帧号大
if (end <= start)
continue;
if (end - start >= bootmap_pages) {
bootmap_pfn = start; // 最后页帧号要能包含bootmap页
break;
}
}
if (bootmap_pfn == 0)
BUG();
return bootmap_pfn;
}
具体的要求是
- 起始位置要在
bss
区域之后 - 并且整个
bank
要比bootmap
大
应该很好找
node_set_online
这个没啥可说的,就是将当前节点状态设置为ONLINE
/*
* Initialise the bootmem allocator for this node, handing the
* memory banks over to bootmem.
*/
node_set_online(node);
相关函数和宏定义为
#define node_set_online(node) node_set_state((node), N_ONLINE)
static inline void node_set_state(int node, enum node_states state)
{
__node_set(node, &node_states[state]);
}
static inline void __node_set(int node, volatile nodemask_t *dstp)
{
set_bit(node, dstp->bits);
}
#define set_bit(nr,p) ATOMIC_BITOP_LE(set_bit,nr,p)
#define ATOMIC_BITOP_LE(name,nr,p) \
(__builtin_constant_p(nr) ? \
____atomic_##name(nr, p) : \
_##name##_le(nr,p))
#define ATOMIC_BITOP_BE(name,nr,p) \
(__builtin_constant_p(nr) ? \
____atomic_##name(nr, p) : \
_##name##_be(nr,p))
#else
#define ATOMIC_BITOP_LE(name,nr,p) _##name##_le(nr,p)
#define ATOMIC_BITOP_BE(name,nr,p) _##name##_be(nr,p)
#endif
static inline void ____atomic_set_bit(unsigned int bit, volatile unsigned long *p)
{
unsigned long flags;
unsigned long mask = 1UL << (bit & 31);
p += bit >> 5;
raw_local_irq_save(flags);
*p |= mask;
raw_local_irq_restore(flags);
}
节点状态有
enum node_states {
N_POSSIBLE, /* 将要联机的节点 */
N_ONLINE, /* 当前联机的节点 */
N_NORMAL_MEMORY, /* 普通内存类型的节点 */
#ifdef CONFIG_HIGHMEM
N_HIGH_MEMORY, /* 普通内存或高端内存类型的节点 */
#else
N_HIGH_MEMORY = N_NORMAL_MEMORY,
#endif
N_CPU, /* 节点具有1个或多个cpu */
NR_NODE_STATES
};
NODE_DATA
pgdat = NODE_DATA(node);
其中宏定义为
/*
* Return a pointer to the node data for node n.
*/
#define NODE_DATA(nid) (&discontig_node_data[nid])
其中discontig_node_data
为,则返回相应的节点描述符,即pg_data_t
类型
pg_data_t discontig_node_data[MAX_NUMNODES] = {
{
.bdata = &bootmem_node_data[0] },
{
.bdata = &bootmem_node_data[1] },
{
.bdata = &bootmem_node_data[2] },
{
.bdata = &bootmem_node_data[3] },
#if MAX_NUMNODES == 16
{
.bdata = &bootmem_node_data[4] },
{
.bdata = &bootmem_node_data[5] },
{
.bdata = &bootmem_node_data[6] },
{
.bdata = &bootmem_node_data[7] },
{
.bdata = &bootmem_node_data[8] },
{
.bdata = &bootmem_node_data[9] },
{
.bdata = &bootmem_node_data[10] },
{
.bdata = &bootmem_node_data[11] },
{
.bdata = &bootmem_node_data[12] },
{
.bdata = &bootmem_node_data[13] },
{
.bdata = &bootmem_node_data[14] },
{
.bdata = &bootmem_node_data[15] },
#endif
};
pg_data_t
类型如下
typedef struct pglist_data {
struct zone node_zones[MAX_NR_ZONES]; // zone数组
struct zonelist node_zonelists[MAX_ZONELISTS]; // 备份列表
int nr_zones; // zone个数
#ifdef CONFIG_FLAT_NODE_MEM_MAP /* means !SPARSEMEM */
struct page *node_mem_map; // 页数组指针指向相应节点的页的起始地址
#ifdef CONFIG_CGROUP_MEM_RES_CTLR
struct page_cgroup *node_page_cgroup;
#endif
#endif
struct bootmem_data *bdata; //
#ifdef CONFIG_MEMORY_HOTPLUG
/*
* Must be held any time you expect node_start_pfn, node_present_pages
* or node_spanned_pages stay constant. Holding this will also
* guarantee that any pfn_valid() stays that way.
*
* Nests above zone->lock and zone->size_seqlock.
*/
spinlock_t node_size_lock;
#endif
unsigned long node_start_pfn; // 起始页帧号
unsigned long node_present_pages; /* 物理页总数 */
unsigned long node_spanned_pages; /* 包含空洞的总页数 */
int node_id; // 节点号
wait_queue_head_t kswapd_wait; // 用于交换守护进程的等待队列
struct task_struct *kswapd; // 交换守护进程的task_strutc指针
int kswapd_max_order; // 使用交换系统,用于决定free区域的大小
} pg_data_t;
init_bootmem_node
对节点描述符的成员变量执行初始化
init_bootmem_node(pgdat, boot_pfn, start_pfn, end_pfn);
具有用于管理位图信息的
bdata
是bootmem_data
结构体变量,将bdata
连接到链表之后,将所有位图设置为正在使用
typedef struct bootmem_data {
unsigned long node_min_pfn; // 节点的起始页帧号
unsigned long node_low_pfn; // 节点的最后页帧号
void *node_bootmem_map; // 用于管理的位图
unsigned long last_end_off; // 最后分配的位置
unsigned long hint_idx; // 下一次使用的页(提示)
struct list_head list; // 链表指针
} bootmem_data_t;
就是
unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn,
unsigned long startpfn, unsigned long endpfn)
{
return init_bootmem_core(pgdat->bdata, freepfn, startpfn, endpfn);
}
static unsigned long __init init_bootmem_core(bootmem_data_t *bdata,
unsigned long mapstart, unsigned long start, unsigned long end)
{
unsigned long mapsize;
mminit_validate_memmodel_limits(&start, &end);
bdata->node_bootmem_map = phys_to_virt(PFN_PHYS(mapstart)); // 设置成员变量的值
bdata->node_min_pfn = start;
bdata->node_low_pfn = end;
link_bootmem(bdata); // 将bootmem_node插入链表
/*
* Initially all pages are reserved - setup_arch() has to
* register free RAM areas explicitly.
*/
mapsize = bootmap_bytes(end - start);
// 将用于管理的位图bootmem_data结构体的node_bootmem_map设置为0xff,也就是将所有页面设置为1,无可用页
memset(bdata->node_bootmem_map, 0xff, mapsize);
bdebug("nid=%td start=%lx map=%lx end=%lx mapsize=%lx\n",
bdata - bootmem_node_data, start, mapstart, end, mapsize);
return mapsize;
}
static void __init link_bootmem(bootmem_data_t *bdata)
{
struct list_head *iter;
list_for_each(iter, &bdata_list) {
bootmem_data_t *ent;
ent = list_entry(iter, bootmem_data_t, list);
if (bdata->node_min_pfn < ent->node_min_pfn)
break;
}
list_add_tail(&bdata->list, iter);
}
这里有一个地方有点奇怪
phys_to_virt(PFN_PHYS(mapstart))
,易知boot_pfn
是一个物理页帧号,而#define PFN_PHYS(x) ((phys_addr_t)(x) << PAGE_SHIFT)
,先是获得对应物理页帧的起始地址,然后将物理地址转为虚拟地址即可
free_bootmem_node
for_each_nodebank(i, mi, node) {
struct membank *bank = &mi->bank[i];
free_bootmem_node(pgdat, bank_phys_start(bank), bank_phys_size(bank));
memory_present(node, bank_pfn_start(bank), bank_pfn_end(bank));
}
在前面管理节点的位图node_bootmem_map
被设置为1
,因此在此处设置为0
void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
unsigned long size)
{
unsigned long start, end;
start = PFN_UP(physaddr);
end = PFN_DOWN(physaddr + size);
mark_bootmem_node(pgdat->bdata, start, end, 0, 0);
}
reserve_bootmem_node
在请求内存分配时,必须将实际包含用于管理的位图从分配对象中排除
在用于管理的位图pgdt->bdata->node_bootmem_map
中,将相应位均设置为1
,表示不可用(即boot_map
页)
这是一个”鸡生蛋、蛋生鸡“的问题
reserve_bootmem_node(pgdat, boot_pfn << PAGE_SHIFT,
boot_pages << PAGE_SHIFT, BOOTMEM_DEFAULT);
此处的boot_pages << PAGE_SHIFT
用意相信大家已经看出来了吧,是为了和boot_pfn << PAGE_SHIFT
一起确定end
地址的,所有才转换成字节地址
一页是
4KB
,此处boot_pages *2^12B=boot_pages *4KB
,即可求出字节数
#define BOOTMEM_DEFAULT 0
函数体
int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
unsigned long size, int flags)
{
unsigned long start, end;
start = PFN_DOWN(physaddr);
end = PFN_UP(physaddr + size);
return mark_bootmem_node(pgdat->bdata, start, end, 1, flags);
}
宏定义为
// 页帧的地址上界
#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
// 页帧的地址下界
#define PFN_DOWN(x) ((x) >> PAGE_SHIFT)
#define BOOTMEM_DEFAULT 0
再次峰回路转
reserve_node_zero
排除0
号节点
/*
* Reserve any special node zero regions.
*/
if (node == 0)
reserve_node_zero(NODE_DATA(node));
void __init reserve_node_zero(pg_data_t *pgdat)
{
unsigned long res_size = 0;
在节点用于管理页的位图的中,该函数是将内核及其页目录区域所在0
号节点的相应默认设置为正在使用
reserve_bootmem_node(pgdat, __pa(_stext), _end - _stext,
BOOTMEM_DEFAULT);
在内核位置设置为1
reserve_bootmem_node(pgdat, __pa(_stext), _end - _stext,
BOOTMEM_DEFAULT);
在页目录表位置设为1
reserve_bootmem_node(pgdat, PHYS_OFFSET, res_size,
BOOTMEM_DEFAULT);
此处要注意
swapper_pg_dir
和内核的位置
bootmem_reserve_initrd
排除虚拟内存盘节点
if (node == initrd_node)
bootmem_reserve_initrd(node);
函数如下,将对应于虚拟内存盘区域的页设置为正在使用
static void __init bootmem_reserve_initrd(int node)
{
#ifdef CONFIG_BLK_DEV_INITRD
pg_data_t *pgdat = NODE_DATA(node);
int res;
res = reserve_bootmem_node(pgdat, phys_initrd_start,
phys_initrd_size, BOOTMEM_EXCLUSIVE);
if (res == 0) {
initrd_start = __phys_to_virt(phys_initrd_start);
initrd_end = initrd_start + phys_initrd_size;
} else {
printk(KERN_ERR
"INITRD: 0x%08lx+0x%08lx overlaps in-use "
"memory region - disabling initrd\n",
phys_initrd_start, phys_initrd_size);
}
#endif
}
if (end_pfn > memend_pfn)
memend_pfn = end_pfn;
求出本节点的结束页帧号
bootmem_free_node
/*
* Now free memory in each node - free_area_init_node needs
* the sparse mem_map arrays initialized by sparse_init()
* for memmap_init_zone(), otherwise all PFNs are invalid.
*/
for_each_node(node)
bootmem_free_node(node, mi);
来分析函数体
pg_data_t *pgdat = NODE_DATA(node); // 获得节点描述符
int i;
start_pfn = pgdat->bdata->node_min_pfn; // 起始页帧
end_pfn = pgdat->bdata->node_low_pfn; // 结束页帧
// 对保存节点所属的zone的大小zone_size和保存zone中包含的空洞大小的zhole_size进行初始化
memset(zone_size, 0, sizeof(zone_size));
memset(zhole_size, 0, sizeof(zhole_size));
一个节点的划分是
zone
,主要有zone_dma
、zone_normal
和zone_highmem
等区域
zone_size[0] = end_pfn - start_pfn;
zhole_size[0] = zone_size[0];
将全部页帧保存在zone_size[0]
,zone_size
用于保存不同zone
类型具有的页数,zhole_size
数组用于保存不同zone
类型具体的空洞页数
zone_size
和zhole_size
数组均为3
个元素,每个元素记录了当前节点的页帧数和空洞数
for_each_nodebank(i, mi, node)
zhole_size[0] -= bank_pfn_size(&mi->bank[i]);
通过该循环将对应于zone
的空洞的页数保存在zhole_size
,bank_pfn_size
用于查找bank
的页帧数,不可能存在空洞,因此,即可求出空洞数
// node是节点
// zhole_size是空洞数
// zone_size是加上空洞数的总页数
// start_pfn是开始页帧号
free_area_init_node(node, zone_size, start_pfn, zhole_size);
free_area_init_node
该函数初始化了NUMA
系统中所有结点的pg_data_t
和zone
、page
的数据
先来看zone
结构体
struct zone {
unsigned long pages_min, pages_low, pages_high; // 页分配器主要使用的域
unsigned long lowmem_reserve[MAX_NR_ZONES]; // 应对内存溢出,确保下层内存
#ifdef CONFIG_NUMA
int node;
unsigned long min_unmapped_pages;
unsigned long min_slab_pages;
// 为每个cpu缓存1页,不返回伙伴系统 每个cpu都有,以快速分配
struct per_cpu_pageset *pageset[NR_CPUS];
#else
struct per_cpu_pageset pageset[NR_CPUS];
#endif
spinlock_t lock;
#ifdef CONFIG_MEMORY_HOTPLUG
/* see spanned/present_pages for more description */
seqlock_t span_seqlock;
#endif
struct free_area free_area[MAX_ORDER]; // 伙伴系统的骨干结构体,用于管理可用页
#ifndef CONFIG_SPARSEMEM
unsigned long *pageblock_flags;
#endif /* CONFIG_SPARSEMEM */
ZONE_PADDING(_pad1_)
spinlock_t lru_lock;
struct {
struct list_head list;
unsigned long nr_scan;
} lru[NR_LRU_LISTS];
struct zone_reclaim_stat reclaim_stat;
unsigned long pages_scanned; /* since last reclaim */
unsigned long flags; /* zone flags, see below */
atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
int prev_priority;
unsigned int inactive_ratio;
ZONE_PADDING(_pad2_)
wait_queue_head_t * wait_table;
unsigned long wait_table_hash_nr_entries;
unsigned long wait_table_bits;
struct pglist_data *zone_pgdat;
/* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
unsigned long zone_start_pfn; // zone的初始页帧号
unsigned long spanned_pages; /* 包含空洞的总页数 */
unsigned long present_pages; /* 减掉空洞的实际页数 */
const char *name;
} ____cacheline_internodealigned_in_smp;
struct free_area {
struct list_head free_list[MIGRATE_TYPES];
unsigned long nr_free;
};
其中有一个重要的宏定义,先拿出来
#define MIGRATE_UNMOVABLE 0
#define MIGRATE_RECLAIMABLE 1
#define MIGRATE_MOVABLE 2
#define MIGRATE_RESERVE 3
#define MIGRATE_ISOLATE 4 /* can't allocate from here */
#define MIGRATE_TYPES 5
来看函数体
void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
unsigned long node_start_pfn, unsigned long *zholes_size)
{
pg_data_t *pgdat = NODE_DATA(nid); // 求出节点描述符pgdat
pgdat->node_id = nid; // 设置node号
pgdat->node_start_pfn = node_start_pfn; // 设置起始页帧号
// 计算包含空洞在内的总页数node_spanned_pages和除空洞外的页数设置值node_present_pages
calculate_node_totalpages(pgdat, zones_size, zholes_size);
alloc_node_mem_map(pgdat); // 设置成员变量node_mem_map,指向页数组中相应节点的页起始位置
#ifdef CONFIG_FLAT_NODE_MEM_MAP
printk(KERN_DEBUG "free_area_init_node: node %d, pgdat %08lx, node_mem_map %08lx\n",
nid, (unsigned long)pgdat,
(unsigned long)pgdat->node_mem_map);
#endif
/* 对该节点的每个区[DMA,NORMAL,HIGH]的的结构进行初始化 */
free_area_init_core(pgdat, zones_size, zholes_size); // 填充zone结构体
}
可以略微看一下alloc_node_mem_map
和calculate_node_totalpages
,比较浅显易懂
static void __meminit calculate_node_totalpages(struct pglist_data *pgdat,
unsigned long *zones_size, unsigned long *zholes_size)
{
unsigned long realtotalpages, totalpages = 0;
enum zone_type i;
for (i = 0; i < MAX_NR_ZONES; i++)
totalpages += zone_spanned_pages_in_node(pgdat->node_id, i,
zones_size);
pgdat->node_spanned_pages = totalpages;
realtotalpages = totalpages;
for (i = 0; i < MAX_NR_ZONES; i++)
realtotalpages -=
zone_absent_pages_in_node(pgdat->node_id, i,
zholes_size);
pgdat->node_present_pages = realtotalpages;
printk(KERN_DEBUG "On node %d totalpages: %lu\n", pgdat->node_id,
realtotalpages);
}
static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat)
{
/* Skip empty nodes */
if (!pgdat->node_spanned_pages)
return;
#ifdef CONFIG_FLAT_NODE_MEM_MAP
if (!pgdat->node_mem_map) {
unsigned long size, start, end;
struct page *map;
start = pgdat->node_start_pfn & ~(MAX_ORDER_NR_PAGES - 1);
end = pgdat->node_start_pfn + pgdat->node_spanned_pages;
end = ALIGN(end, MAX_ORDER_NR_PAGES);
size = (end - start) * sizeof(struct page);
map = alloc_remap(pgdat->node_id, size);
if (!map)
map = alloc_bootmem_node(pgdat, size);
pgdat->node_mem_map = map + (pgdat->node_start_pfn - start);
}
#ifndef CONFIG_NEED_MULTIPLE_NODES
/*
* With no DISCONTIG 不连续模型, the global mem_map is just set as node 0's
*/
if (pgdat == NODE_DATA(0)) {
mem_map = NODE_DATA(0)->node_mem_map;
#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
if (page_to_pfn(mem_map) != pgdat->node_start_pfn)
mem_map -= (pgdat->node_start_pfn - ARCH_PFN_OFFSET);
#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
}
#endif
#endif /* CONFIG_FLAT_NODE_MEM_MAP */
}
为什么此处又要指定
pgdat->node_mem_map
,因为前面指向的是一个所有节点位图的总起始地址,此处要指向各自页帧位图的起始地址
free_area_init_core
初始化内存域数据结构涉及的繁重工作由free_area_init_core
执行,它会依次遍历结点的所有内存域
/* 对该节点的每个区[DMA,NORMAL,HIGH]的的结构进行初始化 */
free_area_init_core(pgdat, zones_size, zholes_size); // 填充zone结构体
/*
* Set up the zone data structures:
* - mark all pages reserved
* - mark all memory queues empty
* - clear the memory bitmaps
*/
static void __paginginit free_area_init_core(struct pglist_data *pgdat,
unsigned long *zones_size, unsigned long *zholes_size)
{
enum zone_type j;
int nid = pgdat->node_id;
unsigned long zone_start_pfn = pgdat->node_start_pfn;
int ret;
for (j = 0; j < MAX_NR_ZONES; j++) {
struct zone *zone = pgdat->node_zones + j;
unsigned long size, realsize, memmap_pages;
enum lru_list l;
// size为该管理区中的页框数,包括洞
size = zone_spanned_pages_in_node(nid, j, zones_size);
// realsize为管理区中的页框数,不包括洞
realsize = size - zone_absent_pages_in_node(nid, j,
zholes_size);
此处有个前提是在前期已经设置好了各个内存域的页帧数
是什么时候进行设置内存域zone
的页帧的?
即在此处得到了size
值,那么该值从哪里来?
size = zone_spanned_pages_in_node(nid, j, zones_size);
深究一下
static unsigned long __meminit zone_spanned_pages_in_node(int nid,
unsigned long zone_type,
unsigned long *ignored)
{
unsigned long node_start_pfn, node_end_pfn;
unsigned long zone_start_pfn, zone_end_pfn;
/* Get the start and end of the node and zone */
get_pfn_range_for_nid(nid, &node_start_pfn, &node_end_pfn);
// 得到zone的开始页帧和结束页帧
zone_start_pfn = arch_zone_lowest_possible_pfn[zone_type];
zone_end_pfn = arch_zone_highest_possible_pfn[zone_type];
// 剔除movable管理区的部分
adjust_zone_range_for_zone_movable(nid, zone_type,
node_start_pfn, node_end_pfn,
&zone_start_pfn, &zone_end_pfn);
/* Check that this node has pages within the zone's required range */
if (zone_end_pfn < node_start_pfn || zone_start_pfn > node_end_pfn)
return 0;
/* Move the zone boundaries inside the node if necessary */
zone_end_pfn = min(zone_end_pfn, node_end_pfn);
zone_start_pfn = max(zone_start_pfn, node_start_pfn);
/* Return the spanned pages */
return zone_end_pfn - zone_start_pfn;
}
#else
static inline unsigned long __meminit zone_spanned_pages_in_node(int nid,
unsigned long zone_type,
unsigned long *zones_size)
{
return zones_size[zone_type];
}
可见是从arch_zone_lowest_possible_pfn[]
和arch_zone_highest_possible_pfn[]
两个数组得到zone_start_pfn
和zone_end_pfn
的
而关于arch_zone_lowest_possible_pfn
和arch_zone_highest_possible_pfn
这两个数组
static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
因此,会在zone_sizes_init-> free_area_init_nodes
进行初始化
static void __init zone_sizes_init(void)
{
unsigned long max_zone_pfns[MAX_NR_ZONES];
memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
/*分别获取三个管理区的页面数*/
max_zone_pfns[ZONE_DMA] =
virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
#ifdef CONFIG_HIGHMEM
max_zone_pfns[ZONE_HIGHMEM] = highend_pfn;
#endif
free_area_init_nodes(max_zone_pfns);
}
void __init free_area_init_nodes(unsigned long *max_zone_pfn)
{
unsigned long nid;
int i;
/* Sort early_node_map as initialisation assumes it is sorted */
sort_node_map();/*将所有节点按起始页框号排序*/
/* Record where the zone boundaries are */
/*记录三个管理区的边界*/
memset(arch_zone_lowest_possible_pfn, 0,
sizeof(arch_zone_lowest_possible_pfn));
memset(arch_zone_highest_possible_pfn, 0,
sizeof(arch_zone_highest_possible_pfn));
arch_zone_lowest_possible_pfn[0] = find_min_pfn_with_active_regions();
arch_zone_highest_possible_pfn[0] = max_zone_pfn[0];
for (i = 1; i < MAX_NR_ZONES; i++) {
if (i == ZONE_MOVABLE) /*不处理ZONE_MOVABLE*/
continue;
/*将下一个管理区的起始页框置为上一个管理区的结束页框*/
arch_zone_lowest_possible_pfn[i] =
arch_zone_highest_possible_pfn[i-1];
arch_zone_highest_possible_pfn[i] =
max(max_zone_pfn[i], arch_zone_lowest_possible_pfn[i]);
}
arch_zone_lowest_possible_pfn[ZONE_MOVABLE] = 0;
arch_zone_highest_possible_pfn[ZONE_MOVABLE] = 0;
/* Find the PFNs that ZONE_MOVABLE begins at in each node */
memset(zone_movable_pfn, 0, sizeof(zone_movable_pfn));
find_zone_movable_pfns_for_nodes(zone_movable_pfn);
/* Print out the zone ranges */
printk("Zone PFN ranges:\n");
for (i = 0; i < MAX_NR_ZONES; i++) {
if (i == ZONE_MOVABLE)
continue;
printk(" %-8s %0#10lx -> %0#10lx\n",
zone_names[i],
arch_zone_lowest_possible_pfn[i],
arch_zone_highest_possible_pfn[i]);
}
/* Print out the PFNs ZONE_MOVABLE begins at in each node */
printk("Movable zone start PFN for each node\n");
for (i = 0; i < MAX_NUMNODES; i++) {
if (zone_movable_pfn[i])
printk(" Node %d: %lu\n", i, zone_movable_pfn[i]);
}
/* Print out the early_node_map[] */
printk("early_node_map[%d] active PFN ranges\n", nr_nodemap_entries);
for (i = 0; i < nr_nodemap_entries; i++)
printk(" %3d: %0#10lx -> %0#10lx\n", early_node_map[i].nid,
early_node_map[i].start_pfn,
early_node_map[i].end_pfn);
/* Initialise every node */
mminit_verify_pageflags_layout();
setup_nr_node_ids();
for_each_online_node(nid) {
/*遍历每个节点*/
pg_data_t *pgdat = NODE_DATA(nid);
/*初始化节点*/
free_area_init_node(nid, NULL,
find_min_pfn_for_node(nid), NULL);
/* Any memory on that node */
if (pgdat->node_present_pages)
node_set_state(nid, N_HIGH_MEMORY);
check_for_regular_memory(pgdat);
}
}
初始化struct zone
结构体
nr_all_pages += realsize;
zone->spanned_pages = size;
zone->present_pages = realsize;
#ifdef CONFIG_NUMA
zone->node = nid;
zone->min_unmapped_pages = (realsize*sysctl_min_unmapped_ratio)
/ 100;
zone->min_slab_pages = (realsize * sysctl_min_slab_ratio) / 100;
#endif
zone->name = zone_names[j];
spin_lock_init(&zone->lock);
spin_lock_init(&zone->lru_lock);
zone_seqlock_init(zone);
zone->zone_pgdat = pgdat;
zone->prev_priority = DEF_PRIORITY;
除此之外
zone_pcp_init(zone);
函数体为,主要作用是初始化zone
的结构体变量pageset
static __meminit void zone_pcp_init(struct zone *zone)
{
int cpu;
unsigned long batch = zone_batchsize(zone);
for (cpu = 0; cpu < NR_CPUS; cpu++) {
#ifdef CONFIG_NUMA
/* Early boot. Slab allocator not functional yet */
zone_pcp(zone, cpu) = &boot_pageset[cpu]; // 系统初始化的时候使用的是boot_pageset
setup_pageset(&boot_pageset[cpu],0);
#else
setup_pageset(zone_pcp(zone,cpu), batch);
#endif
}
if (zone->present_pages)
printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%lu\n",
zone->name, zone->present_pages, batch);
}
如果只分配一个页帧,可以直接从per_cpu
缓存中分配,而不用经过伙伴系统,可以提高分配效率;如果per_cpu
中没有可分配的页帧就从伙伴系统中分配batch
个页帧到缓存中来,其缓存中的页帧的page
就挂接在struct list_head lists
中
static struct per_cpu_pageset boot_pageset[NR_CPUS];
struct per_cpu_pages {
int count; /* number of pages in the list */
int high; /* high watermark, emptying needed */
int batch; /* chunk size for buddy add/remove */
struct list_head list; /* the list of pages */
};
struct per_cpu_pageset {
struct per_cpu_pages pcp;
#ifdef CONFIG_NUMA
s8 expire;
#endif
#ifdef CONFIG_SMP
s8 stat_threshold;
s8 vm_stat_diff[NR_VM_ZONE_STAT_ITEMS];
#endif
} ____cacheline_aligned_in_smp;
理解了per_cpu
高速缓存这个便变得好理解,什么是per_cpu
高速缓存呢?为何内核可以知道cpu
高速缓存的结构并分配呢?首先来看一些概念
什么是冷热页:在Linux Kernel
的物理内存管理的buddy system
中,引入了冷热页的概念。冷页表示该空闲页已经不再高速缓存中了(一般是指L2 Cache
),热页表示该空闲页仍然在高速缓存中。冷热页是针对于CPU
缓存而言的,每个zone
中,都会针对于所有的CPU
初始化一个冷热页的per_cpu_pageset
per_cpu
变量主要是为了解决SMP
或者NUMA
系统对变量访问问题,当建立一个Per-CPU
变量时,系统中的每个处理器都会拥有该变量的特有副本;Per-CPU
变量还可以保存在对应的处理器的高速缓存中,这样,在频繁更新时可以获得更好的性能
Linux
内核中的 Per-CPU
变量本质上是一个数组,数组的每个元素对应一个处理器,每个处理器都使用自己的变量副本
最后是这个
/* 设置pgdat->nr_zones和zone->zone_start_pfn成员
* 初始化zone->free_area成员
* 初始化zone->wait_table相关成员
*/
ret = init_currently_empty_zone(zone, zone_start_pfn,
size, MEMMAP_EARLY);
函数体为
__meminit int init_currently_empty_zone(struct zone *zone,
unsigned long zone_start_pfn,
unsigned long size,
enum memmap_context context)
{
struct pglist_data *pgdat = zone->zone_pgdat; // 节点描述符
int ret;
ret = zone_wait_table_init(zone, size);
if (ret)
return ret;
// 保存节点在zone数和设置区域的起始页帧号
pgdat->nr_zones = zone_idx(zone) + 1;
zone->zone_start_pfn = zone_start_pfn;
mminit_dprintk(MMINIT_TRACE, "memmap_init",
"Initialising map node %d zone %lu pfns %lu -> %lu\n",
pgdat->node_id,
(unsigned long)zone_idx(zone),
zone_start_pfn, (zone_start_pfn + size));
// 初始化free_list的链表,使nr_free=0
zone_init_free_lists(zone);
return 0;
}
static void __meminit zone_init_free_lists(struct zone *zone)
{
int order, t;
for_each_migratetype_order(order, t) {
INIT_LIST_HEAD(&zone->free_area[order].free_list[t]);
zone->free_area[order].nr_free = 0;
}
}
相关宏及其展开
#define for_each_migratetype_order(order, type) \
for (order = 0; order < MAX_ORDER; order++) \
for (type = 0; type < MIGRATE_TYPES; type++)
static inline void INIT_LIST_HEAD(struct list_head *list)
{
list->next = list;
list->prev = list;
}
#define MIGRATE_TYPES 5
#ifndef CONFIG_FORCE_MAX_ZONEORDER
#define MAX_ORDER 11
#else
#define MAX_ORDER CONFIG_FORCE_MAX_ZONEORDER
#endif
// 宏展开
static void __meminit zone_init_free_lists(struct zone *zone)
{
int order, t;
for (order = 0; order < MAX_ORDER; order++)
{
for (t = 0; t < MIGRATE_TYPES; t++)
{
INIT_LIST_HEAD(&zone->free_area[order].free_list[t]);
zone->free_area[order].nr_free = 0;
}
}
}
最后来看看memmap_init
函数,它的作用是在与页帧1:1
映射关系的页数组中,该函数对相应页帧的page
结构体的flags
成员设为PG_reverse
位
// size
// nid 节点id
// zone_start_pfn
memmap_init(size, nid, j, zone_start_pfn);
每个页的结构体为
struct page {
unsigned long flags; /* Atomic flags, some possibly
atomic_t _count; // 使用次数,为0表示未使用
union {
atomic_t _mapcount; //当前映射的页表项的值
struct { /* SLUB */
u16 inuse;
u16 objects;
};
};
union {
struct {
unsigned long private;
struct address_space *mapping;
};
#if USE_SPLIT_PTLOCKS
spinlock_t ptl;
#endif
struct kmem_cache *slab; /* SLUB: Pointer to slab */
struct page *first_page; /* Compound tail pages */
};
union {
pgoff_t index; /* Our offset within mapping. */
void *freelist; /* SLUB: freelist req. slab lock */
};
struct list_head lru; /* Pageout list, eg. active_list
* protected by zone->lru_lock !
*/
#if defined(WANT_PAGE_VIRTUAL)
void *virtual; /* Kernel virtual address (NULL if
not kmapped, ie. highmem) */
#endif /* WANT_PAGE_VIRTUAL */
#ifdef CONFIG_WANT_PAGE_DEBUG_FLAGS
unsigned long debug_flags; /* Use atomic bitops on this */
#endif
};
函数体为
#ifndef __HAVE_ARCH_MEMMAP_INIT
#define memmap_init(size, nid, zone, start_pfn) \
memmap_init_zone((size), (nid), (zone), (start_pfn), MEMMAP_EARLY)
#endif
enum memmap_context {
MEMMAP_EARLY,
MEMMAP_HOTPLUG,
};
void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
unsigned long start_pfn, enum memmap_context context)
{
struct page *page;
unsigned long end_pfn = start_pfn + size;
unsigned long pfn;
struct zone *z;
if (highest_memmap_pfn < end_pfn - 1) // memmap_init_zone是全局变量
highest_memmap_pfn = end_pfn - 1;
z = &NODE_DATA(nid)->node_zones[zone]; // 获得相应区域的地址
for (pfn = start_pfn; pfn < end_pfn; pfn++) {
if (context == MEMMAP_EARLY) {
if (!early_pfn_valid(pfn))
continue;
if (!early_pfn_in_nid(pfn, nid))
continue;
}
page = pfn_to_page(pfn); // 得到相应页的结构体
set_page_links(page, zone, nid, pfn);
mminit_verify_page_links(page, zone, nid, pfn);
init_page_count(page);
reset_page_mapcount(page);
SetPageReserved(page); // 设置页的flags为PG_reserve
if ((z->zone_start_pfn <= pfn)
&& (pfn < z->zone_start_pfn + z->spanned_pages)
&& !(pfn & (pageblock_nr_pages - 1)))
set_pageblock_migratetype(page, MIGRATE_MOVABLE);
INIT_LIST_HEAD(&page->lru); // 将设置好的页面添加到链表
#ifdef WANT_PAGE_VIRTUAL
/* The shift won't overflow because ZONE_NORMAL is below 4G. */
if (!is_highmem_idx(zone))
set_page_address(page, __va(pfn << PAGE_SHIFT));
#endif
}
}
可以看看几个函数
set_page_links
主要用于设置与zone
、node
和section
相关的位
static inline void set_page_links(struct page *page, enum zone_type zone,
unsigned long node, unsigned long pfn)
{
set_page_zone(page, zone);
set_page_node(page, node);
set_page_section(page, pfn_to_section_nr(pfn));
}
static inline void set_page_zone(struct page *page, enum zone_type zone)
{
page->flags &= ~(ZONES_MASK << ZONES_PGSHIFT);
page->flags |= (zone & ZONES_MASK) << ZONES_PGSHIFT;
}
init_page_count
设置page->_count=1
,reset_page_mapcount
设置page->_mapcount=-1
,
static inline void init_page_count(struct page *page)
{
atomic_set(&page->_count, 1); // 设置page->_count=1
}
static inline void reset_page_mapcount(struct page *page)
{
atomic_set(&(page)->_mapcount, -1);
}
有个疑惑的地方,为什么此时page->_count=1
?
因为当page->_count=0
时表示该页被换出,没有产生映射
当_count
的值为0
时,表示该page页面为空闲或即将要被释放的页面。当_count
的值大于0
时,表示该page
页面已经被分配且内核正在使用,暂时不会被释放
_count+=1
时存在多种情况,不仅仅是对该页面的引用而已
注意与
mapcpount
的不同,mapcount
用法较为单一,即表示这个页面被进程映射的个数
可参考12.2 _count和_mapcount的区别
内核中物理内存的管理机制主要有buddy system
,slab
高速缓存和vmalloc
机制
其中伙伴算法和slab
高速缓存都在物理内存映射区分配物理内存,而vmalloc
机制则在高端内存映射区分配物理内存
bootmem allocator
该api
参考《深入Linux
内核架构》 Wolfgang Mauerer
对内核的接口include/linux.bootmem.h
#define alloc_bootmem(x) \
__alloc_bootmem(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
#define alloc_bootmem_nopanic(x) \
__alloc_bootmem_nopanic(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
#define alloc_bootmem_pages(x) \
__alloc_bootmem(x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
#define alloc_bootmem_pages_nopanic(x) \
__alloc_bootmem_nopanic(x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
#define alloc_bootmem_node(pgdat, x) \
__alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
#define alloc_bootmem_pages_node(pgdat, x) \
__alloc_bootmem_node(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
#define alloc_bootmem_pages_node_nopanic(pgdat, x) \
__alloc_bootmem_node_nopanic(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
#define alloc_bootmem_low(x) \
__alloc_bootmem_low(x, SMP_CACHE_BYTES, 0)
#define alloc_bootmem_low_pages(x) \
__alloc_bootmem_low(x, PAGE_SIZE, 0)
#define alloc_bootmem_low_pages_node(pgdat, x) \
__alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0)
extern void *__alloc_bootmem(unsigned long size,
unsigned long align,
unsigned long goal);
extern void *__alloc_bootmem_nopanic(unsigned long size,
unsigned long align,
unsigned long goal);
extern void *__alloc_bootmem_node(pg_data_t *pgdat,
unsigned long size,
unsigned long align,
unsigned long goal);
extern void *__alloc_bootmem_node_nopanic(pg_data_t *pgdat,
unsigned long size,
unsigned long align,
unsigned long goal);
extern void *__alloc_bootmem_low(unsigned long size,
unsigned long align,
unsigned long goal);
extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
unsigned long size,
unsigned long align,
unsigned long goal);
extern unsigned long init_bootmem_node(pg_data_t *pgdat,
unsigned long freepfn,
unsigned long startpfn,
unsigned long endpfn);
extern unsigned long init_bootmem(unsigned long addr, unsigned long memend);
extern unsigned long free_all_bootmem_node(pg_data_t *pgdat);
extern unsigned long free_all_bootmem(void);
extern void free_bootmem_node(pg_data_t *pgdat,
unsigned long addr,
unsigned long size);
extern void free_bootmem(unsigned long addr, unsigned long size);