fsck_verify通过前面的检查结果来修正元数据。
首先是对nid的检查情况进行查看,f2fs_fsck中的nat_area_bitmap从开始的读取f2fs_nat_block中的所有的f2fs_nat_entry来记录所有有效的nid,但是在遍历的过程中在调用sanity_check_nid的时候已经将所有正常的nid都给clear掉了,所以在检查这个位图的时候,如果发现还有些位是有效的,那么证明有错误发生。然后是f2fs_fsck中记录硬链接链表的hard_link_list_head,正常情况下应该是NULL,如果不是,说明也是出错误了。接着f2fs_fsck的main_area_bitmap记录了在遍历过程中所访问到的所有的block,也就是记录了所有的有效块,所以这个理论上应该跟f2fs_fsck的sit_area_bitmap是一致的,所以不一致代表着错误的发生。还有就是f2fs_fsck的check_result的valid_blk_cnt、valid_node_cnt、valid_nat_entry_cnt、valid_inode_cnt、sit_free_segs也要跟发f2fs_checkpoint保持一致。然后是调用check_curseg_offset关于current segment进行检查,next_blkoff对应的block必须是空闲的。对于LFS写的,该segment剩下的block必须全部都是空闲的。另外在遍历过程中对seg_entry的type也是进行了一定的修改,这里也是要与原始的type进行比对。
for (i = 0; i < fsck->nr_nat_entries; i++) {
if (f2fs_test_bit(i, fsck->nat_area_bitmap) != 0) {
printf("NID[0x%x] is unreachable\n", i);
nr_unref_nid++;
}
}
if (fsck->hard_link_list_head != NULL) {
node = fsck->hard_link_list_head;
while (node) {
printf("NID[0x%x] has [0x%x] more unreachable links\n", node->nid, node->links);
node = node->next;
}
c.bug_on = 1;
}
printf("[FSCK] Unreachable nat entries ");
if (nr_unref_nid == 0x0) {
printf(" [Ok..] [0x%x]\n", nr_unref_nid);
} else {
printf(" [Fail] [0x%x]\n", nr_unref_nid);
ret = EXIT_ERR_CODE;
c.bug_on = 1;
}
printf("[FSCK] SIT valid block bitmap checking ");
if (memcmp(fsck->sit_area_bitmap, fsck->main_area_bitmap, fsck->sit_area_bitmap_sz) == 0x0) {
printf("[Ok..]\n");
} else {
printf("[Fail]\n");
ret = EXIT_ERR_CODE;
c.bug_on = 1;
}
printf("[FSCK] Hard link checking for regular file ");
if (fsck->hard_link_list_head == NULL) {
printf(" [Ok..] [0x%x]\n", fsck->chk.multi_hard_link_files);
} else {
printf(" [Fail] [0x%x]\n", fsck->chk.multi_hard_link_files);
ret = EXIT_ERR_CODE;
c.bug_on = 1;
}
printf("[FSCK] valid_block_count matching with CP ");
if (sbi->total_valid_block_count == fsck->chk.valid_blk_cnt) {
printf(" [Ok..] [0x%x]\n", (u32)fsck->chk.valid_blk_cnt);
} else {
printf(" [Fail] [0x%x]\n", (u32)fsck->chk.valid_blk_cnt);
ret = EXIT_ERR_CODE;
c.bug_on = 1;
}
printf("[FSCK] valid_node_count matcing with CP (de lookup) ");
if (sbi->total_valid_node_count == fsck->chk.valid_node_cnt) {
printf(" [Ok..] [0x%x]\n", fsck->chk.valid_node_cnt);
} else {
printf(" [Fail] [0x%x]\n", fsck->chk.valid_node_cnt);
ret = EXIT_ERR_CODE;
c.bug_on = 1;
}
printf("[FSCK] valid_node_count matcing with CP (nat lookup) ");
if (sbi->total_valid_node_count == fsck->chk.valid_nat_entry_cnt) {
printf(" [Ok..] [0x%x]\n", fsck->chk.valid_nat_entry_cnt);
} else {
printf(" [Fail] [0x%x]\n", fsck->chk.valid_nat_entry_cnt);
ret = EXIT_ERR_CODE;
c.bug_on = 1;
}
printf("[FSCK] valid_inode_count matched with CP ");
if (sbi->total_valid_inode_count == fsck->chk.valid_inode_cnt) {
printf(" [Ok..] [0x%x]\n", fsck->chk.valid_inode_cnt);
} else {
printf(" [Fail] [0x%x]\n", fsck->chk.valid_inode_cnt);
ret = EXIT_ERR_CODE;
c.bug_on = 1;
}
printf("[FSCK] free segment_count matched with CP ");
if (le32_to_cpu(F2FS_CKPT(sbi)->free_segment_count) == fsck->chk.sit_free_segs) {
printf(" [Ok..] [0x%x]\n", fsck->chk.sit_free_segs);
} else {
printf(" [Fail] [0x%x]\n", fsck->chk.sit_free_segs);
ret = EXIT_ERR_CODE;
c.bug_on = 1;
}
printf("[FSCK] next block offset is free ");
if (check_curseg_offset(sbi) == 0) {
printf(" [Ok..]\n");
} else {
printf(" [Fail]\n");
ret = EXIT_ERR_CODE;
c.bug_on = 1;
}
printf("[FSCK] fixing SIT types\n");
if (check_sit_types(sbi) != 0)
force = 1;
printf("[FSCK] other corrupted bugs ");
if (c.bug_on == 0) {
printf(" [Ok..]\n");
} else {
printf(" [Fail]\n");
ret = EXIT_ERR_CODE;
}
以上只是对这些数据的一致性问题进行了检查和打印。下面开始真正的修复工作。硬链接的问题由fix_hard_links来完成,nat的问题由fix_nat_entries来完成,sit的问题是由函数rewrite_sit_area_bitmap解决。move_curseg_info、write_curseg_info、flush_curseg_sit_entries共同完成current segment的问题,最后fix_checkpoint完成上述的统计数据到f2fs_checkpoint的修复工作。
if (force || (c.fix_on && !c.ro)) {
struct f2fs_checkpoint *cp = F2FS_CKPT(sbi);
if (force || c.bug_on) {
fix_hard_links(sbi);
fix_nat_entries(sbi);
rewrite_sit_area_bitmap(sbi);
if (check_curseg_offset(sbi)) {
move_curseg_info(sbi, SM_I(sbi)->main_blkaddr);
write_curseg_info(sbi);
flush_curseg_sit_entries(sbi);
}
fix_checkpoint(sbi);
} else if (is_set_ckpt_flags(cp, CP_FSCK_FLAG)) {
write_checkpoint(sbi);
}
}
fix_hard_links:如果f2fs_fsck的硬链接链表hard_link_list_head是NULL,那就直接返回,否则遍历这个链表的节点,对每个节点的ino进行基本的sanity_check_nid检查,然后将对应的f2fs_inode的链接数修复为记录在链表节点中的实际的链接数actual_links。最后将修改之后的f2fs_inode写回。
static void fix_hard_links(struct f2fs_sb_info *sbi)
{
struct f2fs_fsck *fsck = F2FS_FSCK(sbi);
struct hard_link_node *tmp, *node;
struct f2fs_node *node_blk = NULL;
struct node_info ni;
int ret;
if (fsck->hard_link_list_head == NULL)
return;
node_blk = (struct f2fs_node *)calloc(BLOCK_SZ, 1);
ASSERT(node_blk != NULL);
node = fsck->hard_link_list_head;
while (node) {
if (sanity_check_nid(sbi, node->nid, node_blk, F2FS_FT_MAX, TYPE_INODE, &ni))
FIX_MSG("Failed to fix, rerun fsck.f2fs");
node_blk->i.i_links = cpu_to_le32(node->actual_links);
FIX_MSG("File: 0x%x i_links= 0x%x -> 0x%x", node->nid, node->links, node->actual_links);
ret = dev_write_block(node_blk, ni.blk_addr);
ASSERT(ret >= 0);
tmp = node;
node = node->next;
free(tmp);
}
free(node_blk);
}
fix_nat_entries:前面提过,运行到f2fs_verify中,f2fs_fsck中的nat_area_bitmap正常情况下应该是将所有正常的nid的bit全部clear掉了。所以剩下的置了位所对应的nid都应该是无效的。这个函数就是完成这个功能,它逐位检查f2fs_fsck中的nat_area_bitmap,发现置位了的,就调用nullify_nat_entry来将对应的nid无效掉。
static void fix_nat_entries(struct f2fs_sb_info *sbi)
{
struct f2fs_fsck *fsck = F2FS_FSCK(sbi);
u32 i;
for (i = 0; i < fsck->nr_nat_entries; i++)
if (f2fs_test_bit(i, fsck->nat_area_bitmap) != 0)
nullify_nat_entry(sbi, i);
}
nullify_nat_entry:这个函数完成将特定的nid无效掉,这个需要将记录最新的nid对应的nat清空就行。记录最新nat可能存在两个地方,一个是在current segment的nat_journal中,还有一个就是记录在设备上的f2fs_nat_entry。所以nullify_nat_entry首先在nat_journal中查找相应的nid,如果找到了就将相应的nat_journal的f2fs_nat_entry清空。否则需要读取对应的f2fs_nat_block,找到nid的f2fs_nat_entry,将其清空并写回。
void nullify_nat_entry(struct f2fs_sb_info *sbi, u32 nid)
{
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
struct f2fs_journal *journal = &curseg->sum_blk->journal;
struct f2fs_nat_block *nat_block;
pgoff_t block_addr;
int entry_off;
int ret;
int i = 0;
for (i = 0; i < nats_in_cursum(journal); i++) {
if (le32_to_cpu(nid_in_journal(journal, i)) == nid) {
memset(&nat_in_journal(journal, i), 0, sizeof(struct f2fs_nat_entry));
FIX_MSG("Remove nid [0x%x] in nat journal", nid);
return;
}
}
nat_block = (struct f2fs_nat_block *)calloc(BLOCK_SZ, 1);
ASSERT(nat_block);
entry_off = nid % NAT_ENTRY_PER_BLOCK;
block_addr = current_nat_addr(sbi, nid);
ret = dev_read_block(nat_block, block_addr);
ASSERT(ret >= 0);
if (nid == F2FS_NODE_INO(sbi) || nid == F2FS_META_INO(sbi)) {
FIX_MSG("nid [0x%x] block_addr= 0x%x -> 0x1", nid,
le32_to_cpu(nat_block->entries[entry_off].block_addr));
nat_block->entries[entry_off].block_addr = cpu_to_le32(0x1);
} else {
memset(&nat_block->entries[entry_off], 0, sizeof(struct f2fs_nat_entry));
FIX_MSG("Remove nid [0x%x] in NAT", nid);
}
ret = dev_write_block(nat_block, block_addr);
ASSERT(ret >= 0);
free(nat_block);
}
rewrite_sit_area_bitmap:这个函数主要完成f2fs_fsck中记录遍历过程中的真实有效块的位图main_area_bitmap到f2fs_sit_entry的同步。首先遍历所有的segno,将segno对应的f2fs_sit_block读取进来,然后找到相应的f2fs_sit_entry,然后用main_area_bitmap中segno对应的位置的位图替代f2fs_sit_entry中的位图,然后根据这个位图更新其中的有效块数,还有就是将更新后的seg_entry中的segment的type也同步到f2fs_sit_entry,最后将修复后的f2fs_sit_entry写回。
void rewrite_sit_area_bitmap(struct f2fs_sb_info *sbi)
{
struct f2fs_fsck *fsck = F2FS_FSCK(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
struct sit_info *sit_i = SIT_I(sbi);
unsigned int segno = 0;
struct f2fs_summary_block *sum = curseg->sum_blk;
char *ptr = NULL;
sum->journal.n_sits = 0;
ptr = fsck->main_area_bitmap;
for (segno = 0; segno < TOTAL_SEGS(sbi); segno++) {
struct f2fs_sit_block *sit_blk;
struct f2fs_sit_entry *sit;
struct seg_entry *se;
u16 valid_blocks = 0;
u16 type;
int i;
sit_blk = get_current_sit_page(sbi, segno);
sit = &sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, segno)];
memcpy(sit->valid_map, ptr, SIT_VBLOCK_MAP_SIZE);
for (i = 0; i < SIT_VBLOCK_MAP_SIZE; i++)
valid_blocks += get_bits_in_byte(sit->valid_map[i]);
se = get_seg_entry(sbi, segno);
memcpy(se->cur_valid_map, ptr, SIT_VBLOCK_MAP_SIZE);
se->valid_blocks = valid_blocks;
type = se->type;
if (type >= NO_CHECK_TYPE) {
ASSERT_MSG("Invalide type and valid blocks=%x,%x", segno, valid_blocks);
type = 0;
}
sit->vblocks = cpu_to_le16((type << SIT_VBLOCKS_SHIFT) | valid_blocks);
rewrite_current_sit_page(sbi, segno, sit_blk);
free(sit_blk);
ptr += SIT_VBLOCK_MAP_SIZE;
}
}
之前提到过,函数check_curseg_offset检查current segment是否出现了问题,这里也是通过这个函数来检查是不是除了问题,有问题就通过进行修复。check_curseg_offset主要检查next_blkoff对应的block必须是空闲的。对于LFS写的,该segment剩下的block必须全部都是空闲的。
int check_curseg_offset(struct f2fs_sb_info *sbi)
{
int i;
for (i = 0; i < NO_CHECK_TYPE; i++) {
struct curseg_info *curseg = CURSEG_I(sbi, i);
struct seg_entry *se;
int j, nblocks;
if ((curseg->next_blkoff >> 3) >= SIT_VBLOCK_MAP_SIZE)
return -EINVAL;
se = get_seg_entry(sbi, curseg->segno);
if (f2fs_test_bit(curseg->next_blkoff, (const char *)se->cur_valid_map)) {
ASSERT_MSG("Next block offset is not free, type:%d", i);
return -EINVAL;
}
if (curseg->alloc_type == SSR)
return 0;
nblocks = sbi->blocks_per_seg;
for (j = curseg->next_blkoff + 1; j < nblocks; j++) {
if (f2fs_test_bit(j, (const char *)se->cur_valid_map)) {
ASSERT_MSG("LFS must have free section:%d", i);
return -EINVAL;
}
}
}
return 0;
}
move_curseg_info:对NO_CHECK_TYPE种current segment进行遍历,然后是调用函数find_next_free_block在main area中找到相应的seg_entry与遍历的current segment有着相同类型的segment中的空闲块或者整个segment空闲的起始块,然后返回其segno,将这个segno替换该类型对应的current segment,然后修改current segment中的字段segno、next_blkoff、alloc_type改为SSR(洞写)、sum_blk。然后调用函数reset_curseg根据current segment的type来设置curseg_info中sum_blk中的summary_footer的类型,由于刚才找空闲块的时候如果是空闲segment,那么这个segment的type可能跟需要查找的类型是不对应的,所以reset_curseg也完成对seg_entry的类型的修改。
void move_curseg_info(struct f2fs_sb_info *sbi, u64 from)
{
int i, ret;
for (i = 0; i < NO_CHECK_TYPE; i++) {
struct curseg_info *curseg = CURSEG_I(sbi, i);
struct f2fs_summary_block buf;
u32 old_segno;
u64 ssa_blk, to;
ssa_blk = GET_SUM_BLKADDR(sbi, curseg->segno);
ret = dev_write_block(curseg->sum_blk, ssa_blk);
ASSERT(ret >= 0);
to = from;
ret = find_next_free_block(sbi, &to, 0, i);
ASSERT(ret == 0);
old_segno = curseg->segno;
curseg->segno = GET_SEGNO(sbi, to);
curseg->next_blkoff = OFFSET_IN_SEG(sbi, to);
curseg->alloc_type = SSR;
ssa_blk = GET_SUM_BLKADDR(sbi, curseg->segno);
ret = dev_read_block(&buf, ssa_blk);
ASSERT(ret >= 0);
memcpy(curseg->sum_blk, &buf, SUM_ENTRIES_SIZE);
reset_curseg(sbi, i);
DBG(1, "Move curseg[%d] %x -> %x after %"PRIx64"\n", i, old_segno, curseg->segno, from);
}
}
write_curseg_info:将修改后的curent segment的segno和blkoff修改到f2fs_checkpoint中的cur_data_segno(cur_node_segno)、cur_data_blkoff(cur_node_blkoff),还有分配的类型alloc_type也进行更新。
void write_curseg_info(struct f2fs_sb_info *sbi)
{
struct f2fs_checkpoint *cp = F2FS_CKPT(sbi);
int i;
for (i = 0; i < NO_CHECK_TYPE; i++) {
cp->alloc_type[i] = CURSEG_I(sbi, i)->alloc_type;
if (i < CURSEG_HOT_NODE) {
set_cp(cur_data_segno[i], CURSEG_I(sbi, i)->segno);
set_cp(cur_data_blkoff[i], CURSEG_I(sbi, i)->next_blkoff);
} else {
int n = i - CURSEG_HOT_NODE;
set_cp(cur_node_segno[n], CURSEG_I(sbi, i)->segno);
set_cp(cur_node_blkoff[n], CURSEG_I(sbi, i)->next_blkoff);
}
}
}
flush_curseg_sit_entries:之前的move_curseg_info调用函数reset_curseg的过程中可能对seg_entry进行了修改,这个函数将current的seg_entry同步到f2fs_sit_entry中写回。
static void flush_curseg_sit_entries(struct f2fs_sb_info *sbi)
{
struct sit_info *sit_i = SIT_I(sbi);
int i;
for (i = 0; i < NO_CHECK_TYPE; i++) {
struct curseg_info *curseg = CURSEG_I(sbi, i);
struct f2fs_sit_block *sit_blk;
struct f2fs_sit_entry *sit;
struct seg_entry *se;
se = get_seg_entry(sbi, curseg->segno);
sit_blk = get_current_sit_page(sbi, curseg->segno);
sit = &sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, curseg->segno)];
sit->vblocks = cpu_to_le16((se->type << SIT_VBLOCKS_SHIFT) | se->valid_blocks);
rewrite_current_sit_page(sbi, curseg->segno, sit_blk);
free(sit_blk);
}
}
fix_checkpoint:首先将f2fs_fsck中的check_result中的统计结果同步到f2fs_checkpoint中,这些数据包括ckpt_flags、free_segment_count、valid_block_count、valid_node_count、valid_inode_count。然后按照cp pack中的顺序跳过orphan inode进行写回。
static void fix_checkpoint(struct f2fs_sb_info *sbi)
{
struct f2fs_fsck *fsck = F2FS_FSCK(sbi);
struct f2fs_super_block *sb = F2FS_RAW_SUPER(sbi);
struct f2fs_checkpoint *cp = F2FS_CKPT(sbi);
unsigned long long cp_blk_no;
u32 flags = CP_UMOUNT_FLAG;
block_t orphan_blks = 0;
u32 i;
int ret;
u_int32_t crc = 0;
if (is_set_ckpt_flags(cp, CP_ORPHAN_PRESENT_FLAG)) {
orphan_blks = __start_sum_addr(sbi) - 1;
flags |= CP_ORPHAN_PRESENT_FLAG;
}
set_cp(cp_pack_total_block_count, 8 + orphan_blks + get_sb(cp_payload));
flags = update_nat_bits_flags(sb, cp, flags);
flags |= CP_NOCRC_RECOVERY_FLAG;
set_cp(ckpt_flags, flags);
set_cp(free_segment_count, get_free_segments(sbi));
set_cp(valid_block_count, fsck->chk.valid_blk_cnt);
set_cp(valid_node_count, fsck->chk.valid_node_cnt);
set_cp(valid_inode_count, fsck->chk.valid_inode_cnt);
crc = f2fs_cal_crc32(F2FS_SUPER_MAGIC, cp, CHECKSUM_OFFSET);
*((__le32 *)((unsigned char *)cp + CHECKSUM_OFFSET)) = cpu_to_le32(crc);
cp_blk_no = get_sb(cp_blkaddr);
if (sbi->cur_cp == 2)
cp_blk_no += 1 << get_sb(log_blocks_per_seg);
ret = dev_write_block(cp, cp_blk_no++);
ASSERT(ret >= 0);
for (i = 0; i < get_sb(cp_payload); i++) {
ret = dev_write_block(((unsigned char *)cp) + i * F2FS_BLKSIZE, cp_blk_no++);
ASSERT(ret >= 0);
}
cp_blk_no += orphan_blks;
for (i = 0; i < NO_CHECK_TYPE; i++) {
struct curseg_info *curseg = CURSEG_I(sbi, i);
ret = dev_write_block(curseg->sum_blk, cp_blk_no++);
ASSERT(ret >= 0);
}
ret = dev_write_block(cp, cp_blk_no++);
ASSERT(ret >= 0);
if (flags & CP_NAT_BITS_FLAG)
write_nat_bits(sbi, sb, cp, sbi->cur_cp);
}