Analysis of the Principle of Block Device--Create a Drive Example of a Memory Simulation Disk

This article briefly introduces the basic structure of the disk; and from a memory (ram) to simulate a simple drive (ramdisk) of a block device (disk), to understand the principle of block device driver; finally, how to simply use the block device.



One, the basic structure of the disk

To understand the principle of block devices, you need to first understand the basic structure of the disk. The following is a disk surface. The smallest unit of the disk is a sector. The kernel usually defaults to 512B. Each disk surface has a ring, which is a track.
Insert picture description here

Multiple disk surfaces together form a disk. As follows, the same tracks on each disk surface form a cylinder, so the number of cylinders = the number of tracks on the current disk surface, and each disk surface corresponds to a magnetic head for reading and writing.
Insert picture description here
In this way, the total capacity of the disk can be calculated: the number of heads (disk surface) × the number of tracks (cylinders) × the number of sectors per track × the number of bytes per sector (512)

The difference between the block device driver and the character driver is that the data is not immediately written to the device, but a request_queue caches the data, and classifies the read and write. When a certain amount of write/read IO is accumulated, the disk is written in a unified manner. /Read, the purpose of this is to improve disk read and write efficiency, instead of doing a disk operation every time a read or write IO comes.

Let's understand the principle of block device driver from a ram (memory) to simulate a simple driver of a block device.


Second, the basic architecture of ramdisk source code

First go to the source code, the source code is based on 3.10.0-123. The following is the ramdisk source code of the simplest architecture. Create a 4MB memory as a ram disk.

#include <linux/init.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/major.h>
#include <linux/blkdev.h>
#include <linux/bio.h>
#include <linux/highmem.h>
#include <linux/mutex.h>
#include <linux/radix-tree.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <asm/uaccess.h>
#include <linux/sizes.h>
#include <linux/kernel.h>
#include <linux/genhd.h>
#include <linux/hdreg.h>
#include <linux/blkpg.h>

#define DEVICE_NAME "ramdisk_test"
#define ram_size  SZ_4M
static DEFINE_SPINLOCK(ramdisk_lock);
struct gendisk *ramdisk;
static struct request_queue *disk_queue;
void *ram_addr;


static int ramdisk_getgeo(struct block_device *blk_dev, struct hd_geometry *geo)	
{
    
    
	geo->cylinders = 32; //磁道数、柱面数
	geo->heads = 2; //磁盘面数
	geo->sectors = ram_size/2/32/512;  //单个磁道上的扇区数量  
	printk(KERN_INFO "capacity %d\n", get_capacity(blk_dev->bd_disk)); //get_capacity(blk_dev->bd_disk)=8292
	return 0;

}
static struct kobject *ramdisk_probe(dev_t dev, int *part, void *data)
{
    
    

	printk(KERN_INFO "%s called\n", __FUNCTION__);
	*part = 0;
	return get_disk(ramdisk);
}

/*
列队(disk_queue)中的request处理函数

*/

static void ramdisk_request(struct request_queue *q)
{
    
    
	struct request *req;
	printk(KERN_INFO "%s called\n", __FUNCTION__);

	req = blk_fetch_request(q);                        //获取列队(disk_queue)里的request
	while (req) {
    
    
		unsigned long offset = blk_rq_pos(req) *512;  //扇区起始地址(即偏移)
		unsigned long len  = blk_rq_cur_bytes(req);  //request长度
		int err = 0;

		printk(KERN_INFO "%s offset:%d len %d\n", __FUNCTION__, offset, len);	
		if((u32)(offset+len) < ram_size)
		{
    
    
				if (rq_data_dir(req) == READ)  //列队读请求
				{
    
    
					printk(KERN_INFO "%s read len %d\n", __FUNCTION__, len);
					memcpy(req->buffer, (char *)(ram_addr + offset), len);  //列队读(request中的buffer拷贝出来,写入ramdisk的扇区为offset开始的地方)
					
				}
				else
				{
    
    
					printk(KERN_INFO "%s write len %d\n", __FUNCTION__, len);
					memcpy((char *)(ram_addr + offset), req->buffer, len);  //列队写
					
				}
		}
		else
		{
    
    
			printk(KERN_INFO "fail %s offset:%d len %d\n", __FUNCTION__, offset, len);	
			
		}
	 
		
	done:
		if (!__blk_end_request_cur(req, err))  //end_request 结束
			req = blk_fetch_request(q);
	}
}

static const struct block_device_operations disk_fops =
{
    
    
	.owner		= THIS_MODULE,
	.getgeo     =  ramdisk_getgeo,
	//.open		= ramdisk_open,
	//.release	= ramdisk_release,
};

int major;
static int __init ramdisk_init(void)
{
    
    
    int ret;
	printk(KERN_INFO "%s called\n", __FUNCTION__);

    major = register_blkdev(0, DEVICE_NAME);   //注册一个块设备,获取设备号

    disk_queue = blk_init_queue(ramdisk_request, &ramdisk_lock); //初始化列队 request_queue,列队处理函数为ramdisk_request
	if (!disk_queue)
		goto out_queue;

	ramdisk = alloc_disk(1);  //分配一个gendisk(次设备号个数=分区数=1)
	ramdisk->queue = disk_queue; //为gendisk赋值,绑定
    ramdisk->major = major;
    ramdisk->first_minor = 0;  //该值0---次设备号个数,都是该块设备 
    ramdisk->fops = &disk_fops;
    sprintf(ramdisk->disk_name, DEVICE_NAME);
	
	set_capacity(ramdisk, ram_size/512); //设置块设备容量(单位:扇区)
    add_disk(ramdisk);     //添加gendisk
	
    blk_register_region(MKDEV(major, 0), 1, THIS_MODULE,   //注册设备号[0-1)
				ramdisk_probe, NULL, NULL);


	ram_addr = kzalloc(ram_size, GFP_KERNEL);

	if(!ram_addr)
	{
    
    
		kfree(ram_addr);
		goto out_queue;

	}
	
	printk(KERN_INFO "%s end\n", __FUNCTION__);
    return 0;
	

out_queue:
    put_disk(ramdisk);
out_disk:
    unregister_blkdev(major, DEVICE_NAME);
err:
    return -1;
}


static void __exit ramdisk_exit(void)
{
    
    
	
	blk_unregister_region(MKDEV(major, 0), 1);
	unregister_blkdev(major, DEVICE_NAME);
	put_disk(ramdisk);
   //del_gendisk(ramdisk);
    blk_cleanup_queue(disk_queue);

	kfree(ram_addr);
	return;
}


module_init(ramdisk_init);
module_exit(ramdisk_exit);
MODULE_LICENSE("GPL");

2.1 Source code analysis:

The main implementation process of a block device framework is as follows:

  1. Register the block device, get the device number register_blkdev
  2. Apply to set up the queue (mainly to bind the queue processing function) blk_init_queue
  3. Allocate gendisk structure and assign value (including binding queue, device number, fop, etc.) alloc_disk
  4. Set disk capacity (number of sectors) set_capacity
  5. Add create disk add_disk

If you want to create a 4MB memory block as the block device, you need:

  1. ram_addr = kzalloc(ram_size, GFP_KERNEL); //Apply for 4MB memory
  2. Then write the queue processing function ramdisk_request , which is the core of data reading and writing.

The implementation process of ramdisk_request processing function:

  1. The kernel obtains the quests in the request_queue queue according to the elevator scheduling algorithm.
  2. Get the address of the sector of the quest (ie the offset address), the length of the request
  3. Judge whether to read or write to the queue, do the corresponding memcpy operation on the disk (ram) (req->copy between buffer and ram).

As above, it can be seen that the request_queue queue is the key to read and write. In this processing function, the kernel obtains the IO of the request according to the elevator scheduling algorithm, and then performs batch read or batch write.

It should be noted that the function needs to restrict access to the request length: if((u32)(offset+len) <ram_size), otherwise the length may exceed 4M, causing the memory to cross the boundary.
Insert picture description here
From the bottom-level printing of debugging, it can be observed that reading and writing file data under the mount path of the block device does not immediately execute the queue processing function (unless sync is executed), this is the block The principle of the device is related. These small amounts of data will be stored in the buffer first. When the time of the block device ramdisk_request is ripe, the request is taken out by the elevator scheduling algorithm.


Three, the use of ramdisk

  1. After loading the driver, you can see that the block device is created ( /dev/ramdisk_test )
[root@localhost mnt]# ls /dev/ramdisk_test  -l
brw-rw----. 1 root disk 252, 0 Feb  2 05:47 /dev/ramdisk_test
  1. After formatting the file system for the disk, the ramdisk_getgeo geometry function will be called to obtain the capacity allocation disk information. Including: the number of disk faces (geo->heads), the number of tracks/cylinders (geo->cylinders), and the number of sectors per track (geo->sectors).

[root@localhost ramdisk]# mkfs.ext4 /dev/ramdisk_test  #格式化为ext4
mke2fs 1.42.9 (28-Dec-2013)
Filesystem label=
OS type: Linux
Block size=1024 (log=0)
Fragment size=1024 (log=0)
Stride=0 blocks, Stripe width=0 blocks
1024 inodes, 4096 blocks
204 blocks (4.98%) reserved for the super user
First data block=1
Maximum filesystem blocks=4194304
1 block group
8192 blocks per group, 8192 fragments per group
1024 inodes per group

Allocating group tables: done
Writing inode tables: done
Creating journal (1024 blocks): done
Writing superblocks and filesystem accounting information: done

[root@localhost ramdisk]# mkfs.fat /dev/ramdisk_test  #或格式化为fat
mkfs.fat 3.0.20 (12 Jun 2013)

Note: If the ramdisk_getgeo function is not added, an error may be reported when formatting (the geometry information cannot be obtained), and the error is as follows:


[root@localhost ramdisk]# mkfs.fat /dev/ramdisk_test mkfs.fat 3.0.20
(12 Jun 2013) unable to get drive geometry, using default 255/63 ```
  1. Fdisk can check the capacity of the ramdisk and see that the entire disk is a partition with a capacity of 4MB.
[root@localhost mnt]# fdisk -l /dev/ramdisk_test

Disk /dev/ramdisk_test: 4 MB, 4194304 bytes, 8192 sectors
Units = sectors of 1 * 512 = 512 bytes
Sector size (logical/physical): 512 bytes / 512 bytes
I/O size (minimum/optimal): 512 bytes / 512 bytes
Disk label type: dos
Disk identifier: 0x00000000

            Device Boot      Start         End      Blocks   Id  System

  1. Mount and view the partition, and then you can access files under /mnt/.
[root@localhost /]# mount /dev/ramdisk_test /mnt/

[root@localhost /]# mount |grep ramdisk
/dev/ramdisk_test on /mnt type vfat (rw,relatime,fmask=0022,dmask=0022,codepage=437,iocharset=ascii,shortname=mixed,errors=remount-ro)
[root@localhost /]#

Guess you like

Origin blog.csdn.net/ludaoyi88/article/details/113486935