ubi & ubifs学习笔记

drivers/mtd/ubi/ubi-media.h 

EC: Erase Count,记录块的擦除次数

struct ubi_ec_hdr { 
__be32 magic;
__u8 version;
__u8 padding1[3];
__be64 ec; /* Warning: the current limit is 31-bit anyway! */
__be32 vid_hdr_offset;
__be32 data_offset;
__be32 image_seq;
__u8 padding2[32];
__be32 hdr_crc;
} __packed;

在ubiattach的时候指定一个mtd,如果PEB上没有EC,则用平均的EC值写入

EC值只有在擦除的时候才会增加1

/* Erase counter header magic number (ASCII "UBI#") */
#define UBI_EC_HDR_MAGIC 0x55424923 
/* Volume identifier header magic number (ASCII "UBI!") */
#define UBI_VID_HDR_MAGIC 0x55424921

VID:volume identifier

struct ubi_vid_hdr {
__be32 magic;
__u8 version;
__u8 vol_type;           //volume type (%UBI_VID_DYNAMIC or %UBI_VID_STATIC)
__u8 copy_flag;
__u8 compat;
__be32 vol_id;           //ID of this volume
__be32 lnum;             //logical eraseblock number
__u8 padding1[4];
__be32 data_size;
__be32 used_ebs;
__be32 data_pad;
__be32 data_crc;
__u8 padding2[4];
__be64 sqnum;        //
__u8 padding3[12];
__be32 hdr_crc;
} __packed;

* The @sqnum is the value of the global sequence counter at the time when this
* VID header was created. The global sequence counter is incremented each time
* UBI writes a new VID header to the flash, i.e. when it maps a logical
* eraseblock to a new physical eraseblock. The global sequence counter is an
* unsigned 64-bit integer and we assume it never overflows. The @sqnum
* (sequence number) is used to distinguish between older and newer versions of
* logical eraseblocks.
*

scrub:   //发生bitflip时进行清洗

int ubi_io_read(const struct ubi_device *ubi, void *buf, int pnum, int offset, 
int len)

{

err = mtd_read(ubi->mtd, addr, len, &read, buf);
if (err) {
const char *errstr = mtd_is_eccerr(err) ? " (ECC error)" : "";

if (mtd_is_bitflip(err)) {  //bitflip是可以ecc可以矫正的,所以读到的数据都是对的
/*
* -EUCLEAN is reported if there was a bit-flip which
* was corrected, so this is harmless.
*
* We do not report about it here unless debugging is
* enabled. A corresponding message will be printed
* later, when it is has been scrubbed.
*/
ubi_msg(ubi, "fixable bit-flip detected at PEB %d",
pnum);
ubi_assert(len == read);
return UBI_IO_BITFLIPS;
}

}

当发生bitflip时,调用ubi_wl_scrub_peb()函数进行scrub

找到volume table:

/*
* Starting ID of internal volumes: 0x7fffefff.
* There is reserved room for 4096 internal volumes.
*/
#define UBI_INTERNAL_VOL_START (0x7FFFFFFF - 4096)

/* The layout volume contains the volume table */

#define UBI_LAYOUT_VOLUME_ID UBI_INTERNAL_VOL_START 
#define UBI_LAYOUT_VOLUME_TYPE UBI_VID_DYNAMIC
#define UBI_LAYOUT_VOLUME_ALIGN 1
#define UBI_LAYOUT_VOLUME_EBS 2
#define UBI_LAYOUT_VOLUME_NAME "layout volume"
#define UBI_LAYOUT_VOLUME_COMPAT UBI_COMPAT_REJECT

/* The maximum number of volumes per one UBI device */
#define UBI_MAX_VOLUMES 128

/* The maximum volume name length */
#define UBI_VOL_NAME_MAX 127

/* Size of the volume table record */
#define UBI_VTBL_RECORD_SIZE sizeof(struct ubi_vtbl_record)



struct ubi_vtbl_record {
__be32 reserved_pebs;
__be32 alignment;
__be32 data_pad;
__u8 vol_type;
__u8 upd_marker;
__be16 name_len;
__u8 name[UBI_VOL_NAME_MAX+1];
__u8 flags;
__u8 padding[23];
__be32 crc;
} __packed;

ubi_read_volume_table()读取volume table

process_lvol()

读取vid,如果里面的vol_id==UBI_LAYOUT_VOLUME_ID,则说明这是一个layout volume

err = ubi_io_read_data(ubi, leb[aeb->lnum], aeb->pnum, 0,
ubi->vtbl_size);

读取对应的LEB,存放的是结构化的struct ubi_vtbl_record 数据,这个结构记录的就是用户的分卷信息

ubi设备:

struct ubi_device {
struct cdev cdev;
struct device dev;
int ubi_num;          //ubi设备号
char ubi_name[sizeof(UBI_NAME_STR)+5];   //ubi设备名称
int vol_count;  //记录上面卷的个数
struct ubi_volume *volumes[UBI_MAX_VOLUMES+UBI_INT_VOL_COUNT]; //上面有多少个卷设备
spinlock_t volumes_lock;
int ref_count;
int image_seq;

int rsvd_pebs;
int avail_pebs;
int beb_rsvd_pebs;
int beb_rsvd_level;
int bad_peb_limit;

int autoresize_vol_id;
int vtbl_slots;
int vtbl_size;
struct ubi_vtbl_record *vtbl;  //系统卷记录
struct mutex device_mutex;

int max_ec;
/* Note, mean_ec is not updated run-time - should be fixed */
int mean_ec;

...

/* Wear-leveling sub-system's stuff */
struct rb_root used;
struct rb_root erroneous;
struct rb_root free;
int free_count;
struct rb_root scrub;
struct list_head pq[UBI_PROT_QUEUE_LEN];
int pq_head;
spinlock_t wl_lock;
struct mutex move_mutex;
struct rw_semaphore work_sem;
int wl_scheduled;
struct ubi_wl_entry **lookuptbl;
struct ubi_wl_entry *move_from;
struct ubi_wl_entry *move_to;
int move_to_put;
struct list_head works;
int works_count;
struct task_struct *bgt_thread;
int thread_enabled;
char bgt_name[sizeof(UBI_BGT_NAME_PATTERN)+2];

/* I/O sub-system's stuff */
long long flash_size;
int peb_count;  //关联的mtd上物理擦除块的个数
int peb_size;    //物理擦除块的大小
int bad_peb_count;    //坏的擦除块的个数
int good_peb_count;
int corr_peb_count;
int erroneous_peb_count;
int max_erroneous;
int min_io_size;
int hdrs_min_io_size;
int ro_mode;
int leb_size;  //逻辑擦除块的大小
int leb_start;
int ec_hdr_alsize;
int vid_hdr_alsize;
int vid_hdr_offset;
int vid_hdr_aloffset;
int vid_hdr_shift;
unsigned int bad_allowed:1;
unsigned int nor_flash:1;
int max_write_size;
struct mtd_info *mtd;        //该ubi设备和哪个mtd关联

void *peb_buf;
struct mutex buf_mutex;
struct mutex ckvol_mutex;

struct ubi_debug_info dbg;
};

ubi卷设备:

struct ubi_volume {
struct device dev;
struct cdev cdev;
struct ubi_device *ubi;         //该卷设备属于哪个ubi设备
int vol_id;                              //该卷的id
int ref_count;
int readers;
int writers;
int exclusive;
int metaonly;

int reserved_pebs;
int vol_type;                        //该卷是静态卷还是动态卷
int usable_leb_size;
int used_ebs;
int last_eb_bytes;
long long used_bytes;
int alignment;
int data_pad;
int name_len;
char name[UBI_VOL_NAME_MAX + 1];     //该卷的名字

int upd_ebs;
int ch_lnum;
long long upd_bytes;
long long upd_received;
void *upd_buf;

struct ubi_eba_table *eba_tbl;   //leb到peb的映射表
unsigned int checked:1;
unsigned int corrupted:1;
unsigned int upd_marker:1;
unsigned int updating:1;
unsigned int changing_leb:1;
unsigned int direct_writes:1;
};

ubifs和ubi直接交互的接口 fs/ubifs/io.c :

int ubifs_leb_read(const struct ubifs_info *c, int lnum, void *buf, int offs,
int len, int even_ebadmsg)
{ 
     err = ubi_read(c->ubi, lnum, buf, offs, len);
}



int ubifs_leb_write(struct ubifs_info *c, int lnum, const void *buf, int offs,
int len)
{
int err;

ubifs_assert(!c->ro_media && !c->ro_mount);
if (c->ro_error)
return -EROFS;
if (!dbg_is_tst_rcvry(c))
err = ubi_leb_write(c->ubi, lnum, buf, offs, len);
else
err = dbg_leb_write(c, lnum, buf, offs, len);
if (err) {
ubifs_err(c, "writing %d bytes to LEB %d:%d failed, error %d",
len, lnum, offs, err);
ubifs_ro_mode(c, err);
dump_stack();
}
return err;
}

ubi模块对外提供的接口drivers/mtd/ubi/kapi.c:

int ubi_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset, ubi_leb_unmap 
int len, int check)

{
     err = ubi_eba_read_leb(ubi, vol, lnum, buf, offset, len, check); 
}

//drivers/mtd/ubi/eba.c

/* 
* The UBI Eraseblock Association (EBA) sub-system.
*
* This sub-system is responsible for I/O to/from logical eraseblock.
*/

int ubi_eba_read_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum, +entries
 void *buf, int offset, int len, int check)
{
     pnum = vol->eba_tbl->entries[lnum].pnum;   //由leb到peb的映射
    ...
     err = ubi_io_read_data(ubi, buf, pnum, offset, len);   //读取映射的peb块
}

static inline int ubi_io_read_data(const struct ubi_device *ubi, void *buf, 
int pnum, int offset, int len) 
{
    ubi_assert(offset >= 0);
    return ubi_io_read(ubi, buf, pnum, offset + ubi->leb_start, len);
}

int ubi_io_read(const struct ubi_device *ubi, void *buf, int pnum, int offset,
int len) 
{
     addr = (loff_t)pnum * ubi->peb_size + offset;
     err = mtd_read(ubi->mtd, addr, len, &read, buf);

}

ubifs存储的基本单位是node:

/**
* struct ubifs_ch - common header node.
* @magic: UBIFS node magic number (%UBIFS_NODE_MAGIC)
* @crc: CRC-32 checksum of the node header
* @sqnum: sequence number
* @len: full node length
* @node_type: node type
* @group_type: node group type
* @padding: reserved for future, zeroes
*
* Every UBIFS node starts with this common part. If the node has a key, the
* key always goes next.
*/
struct ubifs_ch { 
 __le32 magic;          //#define UBIFS_NODE_MAGIC  0x06101831
 __le32 crc;
 __le64 sqnum;
 __le32 len;
 __u8 node_type;
 __u8 group_type;
 __u8 padding[2];
} __packed;

所有的node都包含这个公共的node 头

superblock node,存放在LEB0:     //#define UBIFS_SB_LNUM 0

struct ubifs_sb_node {                   //#define UBIFS_SB_LEBS 1
struct ubifs_ch ch;
__u8 padding[2];
__u8 key_hash;
__u8 key_fmt;
__le32 flags;
__le32 min_io_size;         //最小的io单位
__le32 leb_size;
__le32 leb_cnt;        //整个文件系统的大小
__le32 max_leb_cnt;      //挂载的卷中除去坏块后剩下的leb数
__le64 max_bud_bytes;
__le32 log_lebs;
__le32 lpt_lebs;
__le32 orph_lebs;
__le32 jhead_cnt;
__le32 fanout; 
__le32 lsave_cnt;
__le32 fmt_version;
__le16 default_compr;
__u8 padding1[2];
__le32 rp_uid;
__le32 rp_gid;
__le64 rp_size;
__le32 time_gran;
__u8 uuid[16];
__le32 ro_compat_version;
__u8 padding2[3968];
} __packed;

master node,存放在LEB1,LEB2:     //#define UBIFS_MST_LNUM (UBIFS_SB_LNUM + UBIFS_SB_LEBS)

struct ubifs_mst_node {                     //#define UBIFS_MST_LEBS 2
struct ubifs_ch ch; 
__le64 highest_inum;
__le64 cmt_no;
__le32 flags;
__le32 log_lnum;
__le32 root_lnum;        //指出根节点的位置
__le32 root_offs;
__le32 root_len;
__le32 gc_lnum;
__le32 ihead_lnum;
__le32 ihead_offs;
__le64 index_size;
__le64 total_free;
__le64 total_dirty;
__le64 total_used;
__le64 total_dead;
__le64 total_dark;
__le32 lpt_lnum;
__le32 lpt_offs;
__le32 nhead_lnum;
__le32 nhead_offs;
__le32 ltab_lnum;
__le32 ltab_offs;
__le32 lsave_lnum;
__le32 lsave_offs;
__le32 lscan_lnum;
__le32 empty_lebs;
__le32 idx_lebs;
__le32 leb_cnt;
__u8 padding[344];
} __packed;

index对应的node:

struct ubifs_idx_node {
struct ubifs_ch ch;
__le16 child_cnt;       //该节点有多少个key,对于B+tree,多少个key就对应多少个branches
__le16 level;
__u8 branches[];
} __packed;

其中branches指向:struct ubifs_branch,每个结构体描述一个key

struct ubifs_branch {
__le32 lnum;    //子节点对应的leb号
__le32 offs;    //子节点在leb上的偏移
__le32 len;
__u8 key[];       //这个key,子节点的大小介于该相邻的两个key之间
} __packed;

其中key指向:

union ubifs_key { 
      uint8_t u8[UBIFS_SK_LEN];
     uint32_t u32[UBIFS_SK_LEN/4];
      uint64_t u64[UBIFS_SK_LEN/8];
    __le32 j32[UBIFS_SK_LEN/4];
};

/** 
* key_inum - fetch inode number from key. 
* @c: UBIFS file-system description object 
* @k: key to fetch inode number from 
*/ 
static inline ino_t key_inum(const struct ubifs_info *c, const void *k) 
{ 
    const union ubifs_key *key = k; 

    return key->u32[0]; 
}

ubifs采用的是B+Tree,正如下图所示:

                       

一个节点包含15,56,77三个key,三个branch,中间节点就是index node,是不带数据的。

inode对应的node:

struct ubifs_ino_node {
struct ubifs_ch ch; 
 __u8 key[UBIFS_MAX_KEY_LEN];   //inode对应的key
 __le64 creat_sqnum;
 __le64 size;
 __le64 atime_sec;
 __le64 ctime_sec;
 __le64 mtime_sec;
 __le32 atime_nsec;
 __le32 ctime_nsec;
 __le32 mtime_nsec;
__le32 nlink;
__le32 uid;
__le32 gid;
__le32 mode;
__le32 flags;
__le32 data_len;
__le32 xattr_cnt;
__le32 xattr_size;
__u8 padding1[4]; /* Watch 'zero_ino_node_unused()' if changing! */
__le32 xattr_names;
__le16 compr_type;
__u8 padding2[26]; /* Watch 'zero_ino_node_unused()' if changing! */
__u8 data[];
} __packed;

data对应的node:

struct ubifs_data_node {
struct ubifs_ch ch; 
__u8 key[UBIFS_MAX_KEY_LEN];       //数据node对应的key
__le32 size;
__le16 compr_type;
__u8 padding[2]; /* Watch 'zero_data_node_unused()' if changing! */
__u8 data[];
} __packed;

ubifs_data_node是ubifs文件数据的载体,对数据的访问,需要首先生成待访问数据所对应节点的key,然后根据这个key到UBIFS wandering 树中找到这个ubifs_data_node。

directory entry node:

struct ubifs_dent_node {
struct ubifs_ch ch;
__u8 key[UBIFS_MAX_KEY_LEN];          //dent对应的key
__le64 inum;      //目录项对应的文件的inode num
__u8 padding1;
__u8 type;
__le16 nlen;
__u8 padding2[4]; /* Watch 'zero_dent_node_unused()' if changing! */
__u8 name[];
} __packed;

非叶子节点包括index node(包含多个key),叶子节点包括inode node,Dent node,data node(包含一个key)

tnc:   tree node cache,是在内存中维护的一个tree node,构建的依据是Flash上的node

ubifs_tnc_add()

ubifs_tnc_lookup(c, key, node); 根据key在tnc中查找对应的node

static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry,
unsigned int flags)
{
int err; 
union ubifs_key key; 
struct inode *inode = NULL;
struct ubifs_dent_node *dent;
struct ubifs_info *c = dir->i_sb->s_fs_info;

dbg_gen("'%pd' in dir ino %lu", dentry, dir->i_ino);

if (dentry->d_name.len > UBIFS_MAX_NLEN)
return ERR_PTR(-ENAMETOOLONG);

dent = kmalloc(UBIFS_MAX_DENT_NODE_SZ, GFP_NOFS);
if (!dent)
return ERR_PTR(-ENOMEM);

dent_key_init(c, &key, dir->i_ino, &dentry->d_name);  //根据父目录的inode号和目录项的名称生成索引的key

err = ubifs_tnc_lookup_nm(c, &key, dent, &dentry->d_name);   //根据key生成对应目录项的inode
if (err) {
if (err == -ENOENT) {
dbg_gen("not found");
goto done;
} 
goto out; 
}

inode = ubifs_iget(dir->i_sb, le64_to_cpu(dent->inum));     //分配目录项对应文件的inode

}



static int read_block(struct inode *inode, void *addr, unsigned int block, 
struct ubifs_data_node *dn) 
{ 
struct ubifs_info *c = inode->i_sb->s_fs_info; 
int err, len, out_len; 
union ubifs_key key; 
unsigned int dlen; 

data_key_init(c, &key, inode->i_ino, block);     //通过文件的inode号和文件的偏移生成索引数据的key
err = ubifs_tnc_lookup(c, &key, dn);    //根据key得到数据node
if (err) { 
if (err == -ENOENT) 
/* Not found, so it must be a hole */ 
memset(addr, 0, UBIFS_BLOCK_SIZE); 
return err; 
}

}

padding node:

struct ubifs_pad_node { 
struct ubifs_ch ch; 
__le32 pad_len;
} __packed;

* The flash media obliges us to write only in chunks of %c->min_io_size and
* when we have to write less data we add padding node to the write-buffer and
* pad it to the next minimal I/O unit's boundary. Padding nodes help when the
* media is being scanned. If the amount of wasted space is not enough to fit a
* padding node which takes %UBIFS_PAD_NODE_SZ bytes, we write padding bytes
* pattern (%UBIFS_PADDING_BYTE).
*
* Padding nodes are also used to fill gaps when the "commit-in-gaps" method is
* used.
*/
void ubifs_pad(const struct ubifs_info *c, void *buf, int pad)
{
uint32_t crc;

ubifs_assert(pad >= 0 && !(pad & 7));

if (pad >= UBIFS_PAD_NODE_SZ) {   //填充的大小是否超过一个pad node节点的大小
struct ubifs_ch *ch = buf;
struct ubifs_pad_node *pad_node = buf;

ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC);
ch->node_type = UBIFS_PAD_NODE;
ch->group_type = UBIFS_NO_NODE_GROUP;
ch->padding[0] = ch->padding[1] = 0;
ch->sqnum = 0;
ch->len = cpu_to_le32(UBIFS_PAD_NODE_SZ);
pad -= UBIFS_PAD_NODE_SZ;
pad_node->pad_len = cpu_to_le32(pad);  //记录后面还有多少个0
crc = crc32(UBIFS_CRC32_INIT, buf + 8, UBIFS_PAD_NODE_SZ - 8);
ch->crc = cpu_to_le32(crc);
memset(buf + UBIFS_PAD_NODE_SZ, 0, pad);       //pad node后面填充0
} else if (pad > 0)
/* Too little space, padding node won't fit */
memset(buf, UBIFS_PADDING_BYTE, pad);      //#define UBIFS_PADDING_BYTE 0xCE
}

void ubifs_prepare_node(struct ubifs_info *c, void *node, int len, int pad) 
{
uint32_t crc; 
struct ubifs_ch *ch = node;
unsigned long long sqnum = next_sqnum(c);   //每一个node该值加1

ubifs_assert(len >= UBIFS_CH_SZ);

ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC);
ch->len = cpu_to_le32(len);
ch->group_type = UBIFS_NO_NODE_GROUP;
ch->sqnum = cpu_to_le64(sqnum);
ch->padding[0] = ch->padding[1] = 0; 
crc = crc32(UBIFS_CRC32_INIT, node + 8, len - 8);
ch->crc = cpu_to_le32(crc);

if (pad) {
len = ALIGN(len, 8);
pad = ALIGN(len, c->min_io_size) - len; 
ubifs_pad(c, node + len, pad);
} 
}

int ubifs_write_node(struct ubifs_info *c, void *buf, int len, int lnum,
int offs)
{
int err, buf_len = ALIGN(len, c->min_io_size); 

dbg_io("LEB %d:%d, %s, length %d (aligned %d)",
lnum, offs, dbg_ntype(((struct ubifs_ch *)buf)->node_type), len, 
buf_len);
ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
ubifs_assert(offs % c->min_io_size == 0 && offs < c->leb_size);
ubifs_assert(!c->ro_media && !c->ro_mount);
ubifs_assert(!c->space_fixup);

if (c->ro_error)
return -EROFS;

ubifs_prepare_node(c, buf, len, 1);   //所以只在node后填充一个字节0xCE
err = ubifs_leb_write(c, lnum, buf, offs, buf_len);
if (err)
ubifs_dump_node(c, buf);

return err; 
}

如果是空的ubi卷,这在挂载的时候是不会报错的,会调用create_default_filesystem()创建一个空的ubifs,建议看看这个函数的实现,可以更好的理解一些字段的含义。

lpt:leb properties tree,记录leb的使用情况, 包括lpt区域自己使用情况的记录和main分区使用情况的记录

static int lpt_init_wr(struct ubifs_info *c) 
{ 
    c->ltab_cmt = vmalloc(sizeof(struct ubifs_lpt_lprops) * c->lpt_lebs);

}

struct ubifs_lpt_lprops {     //记录每个leb上的使用情况
int free;             //可以使用
int dirty;            //已经释放了,但是还没有擦除,所以还不能使用
unsigned tgc:1;
unsigned cmt:1;
};

因为ubifs是工作在raw Flash之上的,在写之前必要要先擦除,dirty记录了已经释放了但是还没有擦除,因此也是不能使用的,GC负责垃圾回收,当发现lpt中记录的整个leb都是dirty时,调用ubifs_leb_unmap(),该函数负责leb到peb的解绑,同时将对应的peb擦除,这样这个leb就可以重新使用了。

mkfs.ubifs打包rootfs.ubifs的时候,leb中是可能存在空白页的,即leb中min_io 到leb最后都是FF,但是在lpt中记录的是free,在ubinize之后生成rootfs.ubi,如果直接用nandwrite来写rootfs.ubi,则这些空白页也会被写入,同时也会更新OOB,因为这些在lpt中记录的又是free,所以ubifs可以直接分配使用这些页,写入这些页显然是有问题的,因为OOB之前已经被写过了,nandflash只能由1变为0,所以OOB写入肯定会出错,有两个方法来解决这个问题:

1.nandwrite的时候leb中后面的空白页直接跳过;

2.在mkfs.ubifs的时候加上-F选项,这需要比较新的内核支持,它的原理就是在首次挂载后根据lpt中的leb部分标记为free,首先读出leb中的数据,然后擦除leb(ubifs_leb_unmap),然后重新写入数据,free部分不用写。

参考文档:

http://www.linux-mtd.infradead.org/doc/ubifs.html

http://www.linux-mtd.infradead.org/doc/ubifs_whitepaper.pdf

http://linux-mtd.infradead.org/doc/ubidesign/ubidesign.pdf

http://www.sourceware.org/jffs2/jffs2.pdf

http://www.linux-mtd.infradead.org/doc/ubifs.pdf

https://github.com/nlitsme/ubidump

发布了85 篇原创文章 · 获赞 26 · 访问量 12万+

猜你喜欢

转载自blog.csdn.net/whuzm08/article/details/86303219