这篇文章完成f2fs的segment管理结构f2fs_sm_info的创建和恢复。
build_segment_manager:首先分配容纳f2fs_sm_info的空间,然后用f2fs_super_block中的数据对f2fs_sm_info的一些关于segment数量的信息进行初始化。接着初始化其中的三个链表discard_list、wait_list、sit_entry_set。然后调用build_sit_info构建sit_info,主要是sit_info以及管理的结构的空间的分配。接着调用build_free_segmap构建free_segmap_info,这里主要完成空间的分配,并将所有的segment和section设置为脏,其修改过程在后面再完成。然后调用build_curseg来构建各种curseg_info,并完成curren segment的恢复。接着调用build_sit_entries来恢复所有的seg_entry和sec_entry,然后调用init_free_segmap来恢复free_segmap和free_secmap,接着调用build_dirty_segmap来构建dirty_seglist_info。最后更新sit_info中的min和max的mtime。
int build_segment_manager(struct f2fs_sb_info *sbi)
{
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
struct f2fs_sm_info *sm_info;
int err;
sm_info = kzalloc(sizeof(struct f2fs_sm_info), GFP_KERNEL);
if (!sm_info)
return -ENOMEM;
sbi->sm_info = sm_info;
sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
sm_info->segment_count = le32_to_cpu(raw_super->segment_count);
sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
sm_info->rec_prefree_segments = sm_info->main_segments * DEF_RECLAIM_PREFREE_SEGMENTS / 100;
if (sm_info->rec_prefree_segments > DEF_MAX_RECLAIM_PREFREE_SEGMENTS)
sm_info->rec_prefree_segments = DEF_MAX_RECLAIM_PREFREE_SEGMENTS;
if (!test_opt(sbi, LFS))
sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
INIT_LIST_HEAD(&sm_info->discard_list);
INIT_LIST_HEAD(&sm_info->wait_list);
sm_info->nr_discards = 0;
sm_info->max_discards = 0;
sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS;
INIT_LIST_HEAD(&sm_info->sit_entry_set);
if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) {
err = create_flush_cmd_control(sbi);
if (err)
return err;
}
err = build_sit_info(sbi);
if (err)
return err;
err = build_free_segmap(sbi);
if (err)
return err;
err = build_curseg(sbi);
if (err)
return err;
build_sit_entries(sbi);
init_free_segmap(sbi);
err = build_dirty_segmap(sbi);
if (err)
return err;
init_min_max_mtime(sbi);
return 0;
}
build_sit_info:主要完成f2fs_sm_info中的sit_info的空间的分配和其中的几个字段和位图的空间的分配,这些包括所有f2fs_sit_entry对应的内存结构seg_entry的空间、记录当前的sit位图的cur_valid_map和上次cp的sit位图的ckpt_valid_map、记录discard块的位图discard_map、临时位图tmp_map、所有section的相关信息的sec_entries。还有sit_info的一些字段的赋值。
static int build_sit_info(struct f2fs_sb_info *sbi)
{
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
struct sit_info *sit_i;
unsigned int sit_segs, start;
char *src_bitmap, *dst_bitmap;
unsigned int bitmap_size;
sit_i = kzalloc(sizeof(struct sit_info), GFP_KERNEL);
if (!sit_i)
return -ENOMEM;
SM_I(sbi)->sit_info = sit_i;
sit_i->sentries = f2fs_kvzalloc(MAIN_SEGS(sbi) * sizeof(struct seg_entry), GFP_KERNEL);
if (!sit_i->sentries)
return -ENOMEM;
bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(bitmap_size, GFP_KERNEL);
if (!sit_i->dirty_sentries_bitmap)
return -ENOMEM;
for (start = 0; start < MAIN_SEGS(sbi); start++) {
sit_i->sentries[start].cur_valid_map = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
sit_i->sentries[start].ckpt_valid_map = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
if (!sit_i->sentries[start].cur_valid_map || !sit_i->sentries[start].ckpt_valid_map)
return -ENOMEM;
if (f2fs_discard_en(sbi)) {
sit_i->sentries[start].discard_map = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
if (!sit_i->sentries[start].discard_map)
return -ENOMEM;
}
}
sit_i->tmp_map = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
if (!sit_i->tmp_map)
return -ENOMEM;
if (sbi->segs_per_sec > 1) {
sit_i->sec_entries = f2fs_kvzalloc(MAIN_SECS(sbi) * sizeof(struct sec_entry), GFP_KERNEL);
if (!sit_i->sec_entries)
return -ENOMEM;
}
sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1;
bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);
dst_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
if (!dst_bitmap)
return -ENOMEM;
sit_i->s_ops = &default_salloc_ops;
sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
sit_i->written_valid_blocks = le64_to_cpu(ckpt->valid_block_count);
sit_i->sit_bitmap = dst_bitmap;
sit_i->bitmap_size = bitmap_size;
sit_i->dirty_sentries = 0;
sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
sit_i->mounted_time = CURRENT_TIME_SEC.tv_sec;
mutex_init(&sit_i->sentry_lock);
return 0;
}
build_free_segmap:首先分配一个free_segmap_info的空间,然后分配记录所有的segment的free_segmap位图,再分配记录所有的section的free_secmap的位图。接着将这两个位图初始化为全是1,表示全部都不是空闲的,然后初始化free_segmap_info的记录segment起始地segno的start_segno,将空闲的segment和section的个数赋值为0。
static int build_free_segmap(struct f2fs_sb_info *sbi)
{
struct free_segmap_info *free_i;
unsigned int bitmap_size, sec_bitmap_size;
free_i = kzalloc(sizeof(struct free_segmap_info), GFP_KERNEL);
if (!free_i)
return -ENOMEM;
SM_I(sbi)->free_info = free_i;
bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
free_i->free_segmap = f2fs_kvmalloc(bitmap_size, GFP_KERNEL);
if (!free_i->free_segmap)
return -ENOMEM;
sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
free_i->free_secmap = f2fs_kvmalloc(sec_bitmap_size, GFP_KERNEL);
if (!free_i->free_secmap)
return -ENOMEM;
memset(free_i->free_segmap, 0xff, bitmap_size);
memset(free_i->free_secmap, 0xff, sec_bitmap_size);
free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi));
free_i->free_segments = 0;
free_i->free_sections = 0;
spin_lock_init(&free_i->segmap_lock);
return 0;
}
build_curseg首先分配NR_CURSEG_TYPE个curseg_info的空间,然后对每个curseg_info进行空间的分配,首先是分配一个f2fs_summary,然后初始化管理journal的读写锁,接着分配f2fs_journal,然后将segno和next_blkoff分别初始化为NULL_SEGNO和0。然后调用函数restore_curseg_summaries对curseg_info进行恢复。
static int build_curseg(struct f2fs_sb_info *sbi)
{
struct curseg_info *array;
int i;
array = kcalloc(NR_CURSEG_TYPE, sizeof(*array), GFP_KERNEL);
if (!array)
return -ENOMEM;
SM_I(sbi)->curseg_array = array;
for (i = 0; i < NR_CURSEG_TYPE; i++) {
mutex_init(&array[i].curseg_mutex);
array[i].sum_blk = kzalloc(PAGE_SIZE, GFP_KERNEL);
if (!array[i].sum_blk)
return -ENOMEM;
init_rwsem(&array[i].journal_rwsem);
array[i].journal = kzalloc(sizeof(struct f2fs_journal), GFP_KERNEL);
if (!array[i].journal)
return -ENOMEM;
array[i].segno = NULL_SEGNO;
array[i].next_blkoff = 0;
}
return restore_curseg_summaries(sbi);
}
restore_curseg_summaries:根据do_checkpoint时的curseg_info的两种写入方式,这里首先要判断那种方式进行恢复。首先检查是否设置了CP_COMPACT_SUM_FLAG,如果设置了那么就采用复杂的方式read_compacted对data的summaries进行读取恢复。然后检查之前是否将node的summaries也写入设备了,然后将node采用普通的方式read_normal_summaries对sumamies进行读取恢复。这里如果之前没有进行复杂的读取恢复,那么这里会将data和node一起以普通的方式read_normal_summaries读取恢复。
static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
{
int type = CURSEG_HOT_DATA;
int err;
if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) {
int npages = npages_for_summary_flush(sbi, true);
if (npages >= 2)
ra_meta_pages(sbi, start_sum_block(sbi), npages, META_CP, true);
if (read_compacted_summaries(sbi))
return -EINVAL;
type = CURSEG_HOT_NODE;
}
if (__exist_node_summaries(sbi))
ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type),
NR_CURSEG_TYPE - type, META_CP, true);
for (; type <= CURSEG_COLD_NODE; type++) {
err = read_normal_summaries(sbi, type);
if (err)
return err;
}
return 0;
}
build_sit_entries:首先将所有的f2fs_sit_entry读取出来恢复seg_entry。首先对f2fs_sit_block进行预读,然后遍历所有的segment,先获取当前segment的seg_entry,然后获取当前段的f2fs_sit_entry,接着调用check_block_count检查f2fs_sit_entry中的有效块数不能大于512,还有就是segno不能大于总的段数。然后调用seg_info_from_raw_sit将f2fs_sit_entry的信息同步到seg_entry中,接着更新seg_entry的discard_map,这个map跟seg_entry的cur_valid_map一致,其discard_blks跟seg_entry中的free的block的块数。另外由于sit的最新数据可能是放置在curseg_info的sit_journal中的,所以还需要读取这些f2fs_sit_entry来获取最新的f2fs_sit_entry,通过遍历其中的f2fs_journal数组,其恢复方式跟上面的是一致的。
static void build_sit_entries(struct f2fs_sb_info *sbi)
{
struct sit_info *sit_i = SIT_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
struct f2fs_journal *journal = curseg->journal;
struct seg_entry *se;
struct f2fs_sit_entry sit;
int sit_blk_cnt = SIT_BLK_CNT(sbi);
unsigned int i, start, end;
unsigned int readed, start_blk = 0;
int nrpages = MAX_BIO_BLOCKS(sbi) * 8;
do {
readed = ra_meta_pages(sbi, start_blk, nrpages, META_SIT, true);
start = start_blk * sit_i->sents_per_block;
end = (start_blk + readed) * sit_i->sents_per_block;
for (; start < end && start < MAIN_SEGS(sbi); start++) {
struct f2fs_sit_block *sit_blk;
struct page *page;
se = &sit_i->sentries[start];
page = get_current_sit_page(sbi, start);
sit_blk = (struct f2fs_sit_block *)page_address(page);
sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
f2fs_put_page(page, 1);
check_block_count(sbi, start, &sit);
seg_info_from_raw_sit(se, &sit);
if (f2fs_discard_en(sbi)) {
memcpy(se->discard_map, se->cur_valid_map, SIT_VBLOCK_MAP_SIZE);
sbi->discard_blks += sbi->blocks_per_seg - se->valid_blocks;
}
if (sbi->segs_per_sec > 1)
get_sec_entry(sbi, start)->valid_blocks += se->valid_blocks;
}
start_blk += readed;
} while (start_blk < sit_blk_cnt);
down_read(&curseg->journal_rwsem);
for (i = 0; i < sits_in_cursum(journal); i++) {
unsigned int old_valid_blocks;
start = le32_to_cpu(segno_in_journal(journal, i));
se = &sit_i->sentries[start];
sit = sit_in_journal(journal, i);
old_valid_blocks = se->valid_blocks;
check_block_count(sbi, start, &sit);
seg_info_from_raw_sit(se, &sit);
if (f2fs_discard_en(sbi)) {
memcpy(se->discard_map, se->cur_valid_map, SIT_VBLOCK_MAP_SIZE);
sbi->discard_blks += old_valid_blocks - se->valid_blocks;
}
if (sbi->segs_per_sec > 1)
get_sec_entry(sbi, start)->valid_blocks += se->valid_blocks - old_valid_blocks;
}
up_read(&curseg->journal_rwsem);
}
init_free_segmap:对所有的main area的segment进行遍历,检查其seg_entry中的有效块数valid_blocks是否为零,如果满足,则调用__set_free将相应的segment设置为free,同时如果该段所在的section都没有有效块数的话,也将section设置为free。再对所有的curseg_info进行遍历,将所有的current segment对应的segno和躲在的section都从free的map中清除。
static void init_free_segmap(struct f2fs_sb_info *sbi)
{
unsigned int start;
int type;
for (start = 0; start < MAIN_SEGS(sbi); start++) {
struct seg_entry *sentry = get_seg_entry(sbi, start);
if (!sentry->valid_blocks)
__set_free(sbi, start);
}
for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) {
struct curseg_info *curseg_t = CURSEG_I(sbi, type);
__set_test_and_inuse(sbi, curseg_t->segno);
}
}
__set_free:将segno所对应的free_segmap_info中的管理segment的free_segmap的位消掉,表示这个segno是空闲的。同时对该segno所在的section在free_segmap_info中 的所有位进行检查,如果都是空闲的,那么就将free_segmap_info中的管理section的free_secmap的位消掉,表示这个section是空闲的。在这个过程中free_segmap_info中的相关的数量统计free_segments和free_sections也随之更新。
static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno)
{
struct free_segmap_info *free_i = FREE_I(sbi);
unsigned int secno = segno / sbi->segs_per_sec;
unsigned int start_segno = secno * sbi->segs_per_sec;
unsigned int next;
spin_lock(&free_i->segmap_lock);
clear_bit(segno, free_i->free_segmap);
free_i->free_segments++;
next = find_next_bit(free_i->free_segmap, start_segno + sbi->segs_per_sec, start_segno);
if (next >= start_segno + sbi->segs_per_sec) {
clear_bit(secno, free_i->free_secmap);
free_i->free_sections++;
}
spin_unlock(&free_i->segmap_lock);
}
__set_test_and_inuse:检查segno对应的free_segmap_info中的管理segment的free_segmap的位是否是空闲的,如果是,那就置位标志不再空闲,然后检查segno所在的section对应的free_segmap_info中的管理section的free_secmap的位是否是空闲的,如果是,那就置位标志不再空闲。在这个过程中free_segmap_info中的相关的数量统计free_segments和free_sections也随之更新。
static inline void __set_test_and_inuse(struct f2fs_sb_info *sbi, unsigned int segno)
{
struct free_segmap_info *free_i = FREE_I(sbi);
unsigned int secno = segno / sbi->segs_per_sec;
spin_lock(&free_i->segmap_lock);
if (!test_and_set_bit(segno, free_i->free_segmap)) {
free_i->free_segments--;
if (!test_and_set_bit(secno, free_i->free_secmap))
free_i->free_sections--;
}
spin_unlock(&free_i->segmap_lock);
}
build_dirty_segno:首先分配dirty_seglist_info的空间,然后分配NR_DIRTY_TYPE个相关的管理不同类型的dirty的segment的位图。然后调用init_dirty_segmap利用free_segmap对dirty_segmap进行更新,最后调用init_victim_sectim分配dirty_seglist_info中的victim_secmap,初始化为全部都是零。
static int build_dirty_segmap(struct f2fs_sb_info *sbi)
{
struct dirty_seglist_info *dirty_i;
unsigned int bitmap_size, i;
dirty_i = kzalloc(sizeof(struct dirty_seglist_info), GFP_KERNEL);
if (!dirty_i)
return -ENOMEM;
SM_I(sbi)->dirty_info = dirty_i;
mutex_init(&dirty_i->seglist_lock);
bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
for (i = 0; i < NR_DIRTY_TYPE; i++) {
dirty_i->dirty_segmap[i] = f2fs_kvzalloc(bitmap_size, GFP_KERNEL);
if (!dirty_i->dirty_segmap[i])
return -ENOMEM;
}
init_dirty_segmap(sbi);
return init_victim_secmap(sbi);
}
init_dirty_segmap:首先在free_segmap_info的free_segmap中查找到不是空闲的segment,如果对应的有效块数是512或者0,那就不是dirty的,其他情况下调用函数__locate_dirty_segment将该segno在dirty_seglist_info的位图中置位。
static void init_dirty_segmap(struct f2fs_sb_info *sbi)
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
struct free_segmap_info *free_i = FREE_I(sbi);
unsigned int segno = 0, offset = 0;
unsigned short valid_blocks;
while (1) {
segno = find_next_inuse(free_i, MAIN_SEGS(sbi), offset);
if (segno >= MAIN_SEGS(sbi))
break;
offset = segno + 1;
valid_blocks = get_valid_blocks(sbi, segno, 0);
if (valid_blocks == sbi->blocks_per_seg || !valid_blocks)
continue;
if (valid_blocks > sbi->blocks_per_seg) {
f2fs_bug_on(sbi, 1);
continue;
}
mutex_lock(&dirty_i->seglist_lock);
__locate_dirty_segment(sbi, segno, DIRTY);
mutex_unlock(&dirty_i->seglist_lock);
}
}
__locate_dirty_segment:首先检查segno是不是current segment,如果是就不进行操作了。然后在检查dirty_segmap [DIRTY]的中是否有置位,没有就置位并更新数量。然后在获取segno对应的seg_entry,获取其type,然后检查相应的dirty_segmap [type]中是否有置位,没有就置位并更新数量。
static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
enum dirty_type dirty_type)
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
if (IS_CURSEG(sbi, segno))
return;
if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
dirty_i->nr_dirty[dirty_type]++;
if (dirty_type == DIRTY) {
struct seg_entry *sentry = get_seg_entry(sbi, segno);
enum dirty_type t = sentry->type;
if (unlikely(t >= DIRTY)) {
f2fs_bug_on(sbi, 1);
return;
}
if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t]))
dirty_i->nr_dirty[t]++;
}
}
init_victim_secmap:分配平衡gc时的victim_select的section的位图的空间。
static int init_victim_secmap(struct f2fs_sb_info *sbi)
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
dirty_i->victim_secmap = f2fs_kvzalloc(bitmap_size, GFP_KERNEL);
if (!dirty_i->victim_secmap)
return -ENOMEM;
return 0;
}