这篇文章将讲述f2fs的node的管理结构f2fs_nm_info的构建和恢复。
build_node_manager:首先分配f2fs_nm_info的空间,然后调用init_node_manager初始化f2fs_nm_info并分配一些位图的空间。最后调用build_free_nids读取一定page中的f2fs_nat_entry对free_nid进行初始化,然后根据crseg_info中的journal来进行最新的更新。
int build_node_manager(struct f2fs_sb_info *sbi)
{
int err;
sbi->nm_info = kzalloc(sizeof(struct f2fs_nm_info), GFP_KERNEL);
if (!sbi->nm_info)
return -ENOMEM;
err = init_node_manager(sbi);
if (err)
return err;
build_free_nids(sbi);
return 0;
}
init_node_manager:对f2fs_nm_info中的一些字段的初始化,特别是关于缓存的一些字段,这些字段以后会专门来讲述的,然后分配f2fs_nm_info中的nat_bitmap的空间。
static int init_node_manager(struct f2fs_sb_info *sbi)
{
struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi);
struct f2fs_nm_info *nm_i = NM_I(sbi);
unsigned char *version_bitmap;
unsigned int nat_segs, nat_blocks;
nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr);
nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
nm_i->available_nids = nm_i->max_nid - F2FS_RESERVED_NODE_NUM;
nm_i->fcnt = 0;
nm_i->nat_cnt = 0;
nm_i->ram_thresh = DEF_RAM_THRESHOLD;
nm_i->ra_nid_pages = DEF_RA_NID_PAGES;
nm_i->dirty_nats_ratio = DEF_DIRTY_NAT_RATIO_THRESHOLD;
INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC);
INIT_LIST_HEAD(&nm_i->free_nid_list);
INIT_RADIX_TREE(&nm_i->nat_root, GFP_NOIO);
INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_NOIO);
INIT_LIST_HEAD(&nm_i->nat_entries);
mutex_init(&nm_i->build_lock);
spin_lock_init(&nm_i->free_nid_list_lock);
init_rwsem(&nm_i->nat_tree_lock);
nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP);
version_bitmap = __bitmap_ptr(sbi, NAT_BITMAP);
if (!version_bitmap)
return -EFAULT;
nm_i->nat_bitmap = kmemdup(version_bitmap, nm_i->bitmap_size, GFP_KERNEL);
if (!nm_i->nat_bitmap)
return -ENOMEM;
return 0;
}
build_free_nids:f2fs_nm_info中的next_scan_nid这个字段保存着上次浏览到的nid,本次接着上次的进行。首先预读以nid所在的f2fs_nat_block开始的FREE_NID_PAGES个f2fs_nat_block,然后对这些f2fs_nat_block进行遍历对每一个f2fs_nat_block,调用scan_nat_page遍历其中的f2fs_nat_entry,如果检查到nid对应的地址是空的那就加入到free_nid中来,然后nid增加到下一个f2fs_nat_block的起始nid。遍历完之后更新next_scan_nid。由于f2fs_nat_block可能还不是最新的f2fs_nat_entry,最新的可能存放在curseg_info中,所以对curseg_info中的f2fs_nat_entry进行遍历,如果检测到地址为空,那就调用add_free_nid将nid加入到free_nid中,否则,将其从free_nid中删除。
void build_free_nids(struct f2fs_sb_info *sbi)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
struct f2fs_journal *journal = curseg->journal;
int i = 0;
nid_t nid = nm_i->next_scan_nid;
if (nm_i->fcnt >= NAT_ENTRY_PER_BLOCK)
return;
ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES, META_NAT, true);
down_read(&nm_i->nat_tree_lock);
while (1) {
struct page *page = get_current_nat_page(sbi, nid);
scan_nat_page(sbi, page, nid);
f2fs_put_page(page, 1);
nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK));
if (unlikely(nid >= nm_i->max_nid))
nid = 0;
if (++i >= FREE_NID_PAGES)
break;
}
nm_i->next_scan_nid = nid;
down_read(&curseg->journal_rwsem);
for (i = 0; i < nats_in_cursum(journal); i++) {
block_t addr;
addr = le32_to_cpu(nat_in_journal(journal, i).block_addr);
nid = le32_to_cpu(nid_in_journal(journal, i));
if (addr == NULL_ADDR)
add_free_nid(sbi, nid, true);
else
remove_free_nid(nm_i, nid);
}
up_read(&curseg->journal_rwsem);
up_read(&nm_i->nat_tree_lock);
ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid), nm_i->ra_nid_pages, META_NAT, false);
}
scan_nat_page:对f2fs_nat_block中从start_nid的f2fs_nat_entry进行遍历,首先检查nid不能大于f2fs_nm_info中的max_nid,然后获取f2fs_nat_entry中的地址,如果为空,那就加入到free_nid中,否则什么都不做。
static void scan_nat_page(struct f2fs_sb_info *sbi, struct page *nat_page, nid_t start_nid)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct f2fs_nat_block *nat_blk = page_address(nat_page);
block_t blk_addr;
int i;
i = start_nid % NAT_ENTRY_PER_BLOCK;
for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) {
if (unlikely(start_nid >= nm_i->max_nid))
break;
blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
f2fs_bug_on(sbi, blk_addr == NEW_ADDR);
if (blk_addr == NULL_ADDR) {
if (add_free_nid(sbi, start_nid, true) < 0)
break;
}
}
}
add_free_nid:首先如果nid==0那就直接返回。接着首先从nat_cache中查找nid的缓存的nat_entry,如果从缓存中查找到了nat_entry并且该nat_entry是没有check_point过的,或者地址不是NULL_ADDR空的,那就说明最新的nat_entry是在使用的,直接返回。如果没有满足的话那就分配一个free_nid的空间,然后调用radix_tree_insert将free_nid插入到以f2fs_nm_info中的free_nid_root为根的radix tree中,如果该项已经在free nid的radix tree中存在,那么释放空间返回就行。接着将free_nid插入到以f2fs_nm_info中的free_nid_list为头的双向链表中,最后更新当前的f2fs_nm_info中的free nid的数量。
static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct free_nid *i;
struct nat_entry *ne;
if (!available_free_memory(sbi, FREE_NIDS))
return -1;
if (unlikely(nid == 0))
return 0;
if (build) {
ne = __lookup_nat_cache(nm_i, nid);
if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) || nat_get_blkaddr(ne) != NULL_ADDR))
return 0;
}
i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS);
i->nid = nid;
i->state = NID_NEW;
if (radix_tree_preload(GFP_NOFS)) {
kmem_cache_free(free_nid_slab, i);
return 0;
}
spin_lock(&nm_i->free_nid_list_lock);
if (radix_tree_insert(&nm_i->free_nid_root, i->nid, i)) {
spin_unlock(&nm_i->free_nid_list_lock);
radix_tree_preload_end();
kmem_cache_free(free_nid_slab, i);
return 0;
}
list_add_tail(&i->list, &nm_i->free_nid_list);
nm_i->fcnt++;
spin_unlock(&nm_i->free_nid_list_lock);
radix_tree_preload_end();
return 1;
}
remove_free_nid:首先调用__lookup_free_nid_list在维护free_nid的radix tree中查找nid对应的free_nid,如果找到了且state==NID_NEW,那就从radix tree和list中将该nid对应的free_nid进行删除并更新f2fs_nm_info中的free nid的数量。最后如果删除了九江相应的free_nid的空间释放。
static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid)
{
struct free_nid *i;
bool need_free = false;
spin_lock(&nm_i->free_nid_list_lock);
i = __lookup_free_nid_list(nm_i, nid);
if (i && i->state == NID_NEW) {
__del_from_free_nid_list(nm_i, i);
nm_i->fcnt--;
need_free = true;
}
spin_unlock(&nm_i->free_nid_list_lock);
if (need_free)
kmem_cache_free(free_nid_slab, i);
}