linux 内存管理---bootmem(三)

转载地址:https://blog.csdn.net/whuzm08/article/details/80135358

为什么要使用bootmem分配器,内存管理不是有buddy系统和slab分配器吗?由于在系统初始化的时候需要执行一些内存管理,内存分配的任务,这个时候buddy系统,slab分配器等并没有被初始化好,此时就引入了一种内存管理器bootmem分配器在系统初始化的时候进行内存管理与分配,当buddy系统和slab分配器初始化好后,在mem_init()中对bootmem分配器进行释放,内存管理与分配由buddy系统,slab分配器等进行接管。

bootmem分配器使用一个bitmap来标记物理页是否被占用,分配的时候按照第一适应的原则,从bitmap中进行查找,如果这位为1,表示已经被占用,否则表示未被占用。为什么系统运行的时候不使用bootmem分配器呢?bootmem分配器每次在bitmap中进行线性搜索,效率非常低,而且在内存的起始端留下许多小的空闲碎片,在需要非常大的内存块的时候,检查位图这一过程就显得代价很高。bootmem分配器是用于在启动阶段分配内存的,对该分配器的需求集中于简单性方面,而不是性能和通用性。

memblock算法是linux内核初始化阶段的一个内存分配器,本质上是取代了原来的bootmem算法. memblock实现比较简单,而它的作用就是在page allocator初始化之前来管理内存,完成分配和释放请求.

为了保证系统的兼容性, 内核为bootmem和memblock提供了相同的API接口.

这样在编译Kernel的时候可以选择nobootmem或者bootmem 来在buddy system起来之前管理memory. 这两种机制对提供的API是一致的,因此对用户是透明的

参见mm/Makefile

ifdef CONFIG_NO_BOOTMEM

obj-y += nobootmem.o

扫描二维码关注公众号,回复: 3523871 查看本文章

else

obj-y += bootmem.o

endif

由于接口是一致的, 那么他们共同使用一份

头文件 bootmem接口 nobootmem接口
include/linux/bootmem.h mm/bootmem.c mm/nobootmem.c

Memblock是在早期引导过程中管理内存的方法之一,此时内核内存分配器还没运行. Memblock以前被定义为Logical Memory Block( 逻辑内存块), 但根据Yinghai Lu的补丁, 它被重命名为memblock.

    +-------------------------------------------------------+

    |                                        外部模块申请内存                                   |

    +-------------------------------------------------------+

           |                                    |

           |                                    |

                    ↓                                                                    ↓                  

+------------------------+         +------------------------+

|        bootmem.c       |         |    nobootmem.c        |

|   __alloc_bootmem()     |                 |   __alloc_bootmem()     |

+------------------------+         +------------------------+

                                                                                            |

                                                                                            |

                                                                                            ↓

                                                                +-----------------------------------+

                                   |                 memblock.c             |

                                   | memblock_find_in_range_node()     |

                                                                +-----------------------------------+

这里仅仅介绍bootmem。

前面一节《 linux 内存管理---物理内存探测(二)》记录了物理内存的分布,那么之后就交由bootmem来管理了。

static void __init bootmem_init(void)

{

    unsigned long reserved_end;

    unsigned long mapstart = ~0UL;

    unsigned long bootmap_size;

    int i;

    /*

     * Init any data related to initrd. It's a nop if INITRD is

     * not selected. Once that done we can determine the low bound

     * of usable memory.

     */

    reserved_end = max(init_initrd(),

               (unsigned long) PFN_UP(__pa_symbol(&_end)));    //得到内核映像或者initrd占用的最后一个页框

    /*

     * max_low_pfn is not a number of pages. The number of pages

     * of the system is given by 'max_low_pfn - min_low_pfn'.

     */

    min_low_pfn = ~0UL;

    max_low_pfn = 0;

    /*

     * Find the highest page frame number we have available.

     */

    for (i = 0; i < boot_mem_map.nr_map; i++) {

        unsigned long start, end;

        if (boot_mem_map.map[i].type != BOOT_MEM_RAM)

            continue;

        start = PFN_UP(boot_mem_map.map[i].addr);

        end = PFN_DOWN(boot_mem_map.map[i].addr

                + boot_mem_map.map[i].size);

        if (end > max_low_pfn)

            max_low_pfn = end;

        if (start < min_low_pfn)

            min_low_pfn = start;

        if (end <= reserved_end)

            continue;

        if (start >= mapstart)

            continue;

        mapstart = max(reserved_end, start);      //得到mapstart的页框,用于bootmem记录分配的情况,mapstart就在内核映像后面的一个页框

    }

    if (min_low_pfn >= max_low_pfn)

        panic("Incorrect memory mapping !!!");

    if (min_low_pfn > ARCH_PFN_OFFSET) {

        pr_info("Wasting %lu bytes for tracking %lu unused pages\n",

            (min_low_pfn - ARCH_PFN_OFFSET) * sizeof(struct page),

            min_low_pfn - ARCH_PFN_OFFSET);

    } else if (min_low_pfn < ARCH_PFN_OFFSET) {

        pr_info("%lu free pages won't be used\n",

            ARCH_PFN_OFFSET - min_low_pfn);

    }

    min_low_pfn = ARCH_PFN_OFFSET;   //#define ARCH_PFN_OFFSET        PFN_UP(PHYS_OFFSET)

    /*

     * Determine low and high memory ranges

     */

    max_pfn = max_low_pfn;

    if (max_low_pfn > PFN_DOWN(HIGHMEM_START)) {         //最大不超过0x20000000+768M

#ifdef CONFIG_HIGHMEM

        highstart_pfn = PFN_DOWN(HIGHMEM_START);

        highend_pfn = max_low_pfn;

#endif

        max_low_pfn = PFN_DOWN(HIGHMEM_START);

    }

    /*

     * Initialize the boot-time allocator with low memory only.

     */

    bootmap_size = init_bootmem_node(NODE_DATA(0), mapstart,

                     min_low_pfn, max_low_pfn);      //初始化bootmem, 最小页框,最大页框,包括中间的空洞

    ...

    /*

     * Register fully available low RAM pages with the bootmem allocator.

     */

    for (i = 0; i < boot_mem_map.nr_map; i++) {

        unsigned long start, end, size;

        start = PFN_UP(boot_mem_map.map[i].addr);

        end   = PFN_DOWN(boot_mem_map.map[i].addr

                    + boot_mem_map.map[i].size);

        /*

         * Reserve usable memory.

         */

        switch (boot_mem_map.map[i].type) {

        case BOOT_MEM_RAM:

            break;

        case BOOT_MEM_INIT_RAM:

            memory_present(0, start, end);

            continue;

        default:

            /* Not usable memory */

            continue;

        }

        /*

         * We are rounding up the start address of usable memory

         * and at the end of the usable range downwards.

         */

        if (start >= max_low_pfn)

            continue;

        if (start < reserved_end)   //从内核映像最后一个页框开始标记为可用

            start = reserved_end;

        if (end > max_low_pfn)

            end = max_low_pfn;

        /*

         * ... finally, is the area going away?

         */

        if (end <= start)

            continue;

        size = end - start;

        /* Register lowmem ranges */

#ifdef CONFIG_BRCMSTB

        /* carve out space for bmem */

        brcm_free_bootmem(PFN_PHYS(start), size << PAGE_SHIFT);   //剔除bmem内存,bmem内存是保留给设备DMA用的

#else

        free_bootmem(PFN_PHYS(start), size << PAGE_SHIFT);   //标记内核映像结束的页框到连续页框最后一个页框之间的页框为free可用页框

#endif

    }

    /*

     * Reserve the bootmap memory.

     */

    reserve_bootmem(PFN_PHYS(mapstart), bootmap_size, BOOTMEM_DEFAULT);  //标记bootmem用于分配标记占用的页为保留

    /*

     * Reserve initrd memory if needed.

     */

    finalize_initrd();   //标记initrd占用的页为保留

    /*

     * Call memory_present() on all valid ranges, for SPARSEMEM.

     * This must be done after setting up bootmem, since memory_present()

     * may allocate bootmem.

     */

    for (i = 0; i < boot_mem_map.nr_map; i++) {

        unsigned long start, end;

        if (boot_mem_map.map[i].type != BOOT_MEM_RAM)

            continue;

        start = PFN_UP(boot_mem_map.map[i].addr);

        end   = PFN_DOWN(boot_mem_map.map[i].addr

                    + boot_mem_map.map[i].size);

        memory_present(0, start, end);     //主要是物理内存空洞,对于mips,低256M为DRAM,接着256M为register,接着768M为DRAM,所以对于系统内存大于256M,就肯定有内存空洞了

    }

}

unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn,

                unsigned long startpfn, unsigned long endpfn)

{

    return init_bootmem_core(pgdat->bdata, freepfn, startpfn, endpfn);

}

在include/asm-mips/mach-generic/spaces.h中:

#ifndef PHYS_OFFSET

#define PHYS_OFFSET  _AC(0, UL)

#endif

#ifdef CONFIG_32BIT

#define CAC_BASE  _AC(0x80000000, UL)

#endif

#define BRCM_MAX_UPPER_MB    _AC(768, UL)

#define UPPERMEM_START        _AC(0x20000000, UL)

#define HIGHMEM_START        (UPPERMEM_START + (BRCM_MAX_UPPER_MB << 20))

#ifndef PAGE_OFFSET

#define PAGE_OFFSET  (CAC_BASE + PHYS_OFFSET)

#endif

static unsigned long __init init_bootmem_core(bootmem_data_t *bdata,

    unsigned long mapstart, unsigned long start, unsigned long end)

{

    unsigned long mapsize;

    mminit_validate_memmodel_limits(&start, &end);

    bdata->node_bootmem_map = phys_to_virt(PFN_PHYS(mapstart));    //记录分配标志

    bdata->node_min_pfn = start;

    bdata->node_low_pfn = end;

    link_bootmem(bdata);

    /*

     * Initially all pages are reserved - setup_arch() has to

     * register free RAM areas explicitly.

     */

    mapsize = bootmap_bytes(end - start);   //需要多少个byte来记录

    memset(bdata->node_bootmem_map, 0xff, mapsize);     

    bdebug("nid=%td start=%lx map=%lx end=%lx mapsize=%lx\n",

        bdata - bootmem_node_data, start, mapstart, end, mapsize);

    return mapsize;

}

static unsigned long __init bootmap_bytes(unsigned long pages)

{

    unsigned long bytes = DIV_ROUND_UP(pages, 8);

    return ALIGN(bytes, sizeof(long));

}

一个byte有8bit,每个bit可用来记录一个页是否分配或释放,非0表示页可用,因此一个byte可用记录8个页。

void __init free_bootmem(unsigned long addr, unsigned long size)

{

    unsigned long start, end;

    kmemleak_free_part(__va(addr), size);

    start = PFN_UP(addr);

    end = PFN_DOWN(addr + size);

    mark_bootmem(start, end, 0, 0);

}

static int __init mark_bootmem(unsigned long start, unsigned long end,

                int reserve, int flags)

{

    unsigned long pos;

    bootmem_data_t *bdata;

    pos = start;

    list_for_each_entry(bdata, &bdata_list, list) {

        int err;

        unsigned long max;

        if (pos < bdata->node_min_pfn ||

            pos >= bdata->node_low_pfn) {

            BUG_ON(pos != start);

            continue;

        }

        max = min(bdata->node_low_pfn, end);

        err = mark_bootmem_node(bdata, pos, max, reserve, flags);

        if (reserve && err) {

            mark_bootmem(start, pos, 0, 0);

            return err;

        }

        if (max == end)

            return 0;

        pos = bdata->node_low_pfn;

    }

    BUG();

}

static int __init mark_bootmem_node(bootmem_data_t *bdata,

                unsigned long start, unsigned long end,

                int reserve, int flags)

{

    unsigned long sidx, eidx;

    sidx = start - bdata->node_min_pfn;

    eidx = end - bdata->node_min_pfn;

    if (reserve)

        return __reserve(bdata, sidx, eidx, flags);

    else

        __free(bdata, sidx, eidx);

    return 0;

}

static void __init __free(bootmem_data_t *bdata,

            unsigned long sidx, unsigned long eidx)

{

    unsigned long idx;

    ...

    if (bdata->hint_idx > sidx)

        bdata->hint_idx = sidx;

    for (idx = sidx; idx < eidx; idx++)

        if (!test_and_clear_bit(idx, bdata->node_bootmem_map))

            BUG();

}

static int __init __reserve(bootmem_data_t *bdata, unsigned long sidx,

            unsigned long eidx, int flags)

{

    unsigned long idx;

    int exclusive = flags & BOOTMEM_EXCLUSIVE;

    for (idx = sidx; idx < eidx; idx++)

        if (test_and_set_bit(idx, bdata->node_bootmem_map)) {

            if (exclusive) {   //如果是互斥的,页框已经为1,再设置为reserve

                __free(bdata, sidx, idx);

                return -EBUSY;

            }

            bdebug("silent double reserve of PFN %lx\n",

                idx + bdata->node_min_pfn);

        }

    return 0;

}

调用bootmem_init()函数之后bootmem就初始化完成了,当然可能有人会问在bootmem初始化之前内核要分配内存怎么办,而且在bootmem初始化过程中要用到内存哪里来?这就是一个先有鸡还是先有蛋的问题,内核采取的办法是在bootmem可用之前包括bootmem的初始化,内核的一切内存需要都采用静态内存,即全局变量的形式,比如bootmem的初始化过程中:

NODE_DATA(0) 宏展开为:

#define NODE_DATA(nid)        (&contig_page_data)

struct pglist_data __refdata contig_page_data = {

    .bdata = &bootmem_node_data[0]

};

contig_page_data就是定义为一个全局结构体变量,其中bdata为它的成员变量指针,直接指向另外一个全局变量:

bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata;

bootmem初始化完成后就可以通过下列函数分配内存了:

#define alloc_bootmem(x) \

    __alloc_bootmem(x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)

#define alloc_bootmem_align(x, align) \

    __alloc_bootmem(x, align, BOOTMEM_LOW_LIMIT)

#define alloc_bootmem_nopanic(x) \

    __alloc_bootmem_nopanic(x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)

#define alloc_bootmem_pages(x) \

    __alloc_bootmem(x, PAGE_SIZE, BOOTMEM_LOW_LIMIT)

#define alloc_bootmem_pages_nopanic(x) \

    __alloc_bootmem_nopanic(x, PAGE_SIZE, BOOTMEM_LOW_LIMIT)

#define alloc_bootmem_node(pgdat, x) \

    __alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)

#define alloc_bootmem_node_nopanic(pgdat, x) \

    __alloc_bootmem_node_nopanic(pgdat, x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)

#define alloc_bootmem_pages_node(pgdat, x) \

    __alloc_bootmem_node(pgdat, x, PAGE_SIZE, BOOTMEM_LOW_LIMIT)

#define alloc_bootmem_pages_node_nopanic(pgdat, x) \

    __alloc_bootmem_node_nopanic(pgdat, x, PAGE_SIZE, BOOTMEM_LOW_LIMIT)

下面简单进行说明:

static void * __init alloc_bootmem_core(struct bootmem_data *bdata,

                    unsigned long size, unsigned long align,

                    unsigned long goal, unsigned long limit)

{

    unsigned long fallback = 0;

    unsigned long min, max, start, sidx, midx, step;

    ...

    min = bdata->node_min_pfn;

    max = bdata->node_low_pfn;

    goal >>= PAGE_SHIFT;

    limit >>= PAGE_SHIFT;

    if (limit && max > limit)

        max = limit;

    if (max <= min)

        return NULL;

    step = max(align >> PAGE_SHIFT, 1UL);

    if (goal && min < goal && goal < max)

        start = ALIGN(goal, step);

    else

        start = ALIGN(min, step);

    sidx = start - bdata->node_min_pfn;

    midx = max - bdata->node_min_pfn;

    if (bdata->hint_idx > sidx) {

        /*

         * Handle the valid case of sidx being zero and still

         * catch the fallback below.

         */

        fallback = sidx + 1;

        sidx = align_idx(bdata, bdata->hint_idx, step);

    }

    while (1) {

        int merge;

        void *region;

        unsigned long eidx, i, start_off, end_off;

find_block:

        sidx = find_next_zero_bit(bdata->node_bootmem_map, midx, sidx);      //查找满足要求的起始页框

        sidx = align_idx(bdata, sidx, step);

        eidx = sidx + PFN_UP(size);

        if (sidx >= midx || eidx > midx)

            break;

        for (i = sidx; i < eidx; i++)

            if (test_bit(i, bdata->node_bootmem_map)) {

                sidx = align_idx(bdata, i, step);

                if (sidx == i)

                    sidx += step;

                goto find_block;

            }

        if (bdata->last_end_off & (PAGE_SIZE - 1) &&

                PFN_DOWN(bdata->last_end_off) + 1 == sidx)

            start_off = align_off(bdata, bdata->last_end_off, align);

        else

            start_off = PFN_PHYS(sidx);

        merge = PFN_DOWN(start_off) < sidx;

        end_off = start_off + size;

        bdata->last_end_off = end_off;

        bdata->hint_idx = PFN_UP(end_off);

        /*

         * Reserve the area now:

         */

        if (__reserve(bdata, PFN_DOWN(start_off) + merge,

                PFN_UP(end_off), BOOTMEM_EXCLUSIVE))         //将分配后的页框设置为保留

            BUG();

        region = phys_to_virt(PFN_PHYS(bdata->node_min_pfn) +

                start_off);

        memset(region, 0, size);

        /*

         * The min_count is set to 0 so that bootmem allocated blocks

         * are never reported as leaks.

         */

        kmemleak_alloc(region, size, 0, 0);

        return region;

    }

    if (fallback) {

        sidx = align_idx(bdata, fallback - 1, step);

        fallback = 0;

        goto find_block;

    }

    return NULL;

}

参考文档:

http://winfred-lu.blogspot.com/2011/03/linux-boot-memory-allocator-mips.html

https://github.com/gatieme/LDD-LinuxDeviceDrivers/tree/master/study/kernel/02-memory/03-initialize

--------------------- 本文来自 淡泊的猪 的CSDN 博客 ,全文地址请点击:https://blog.csdn.net/whuzm08/article/details/80135358?utm_source=copy

猜你喜欢

转载自blog.csdn.net/kunkliu/article/details/82966133