Address translation in Linux

1. Run the code and take a screenshot

paging_lowmem.c code is as follows:

#include <linux/init.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/mm_types.h>
#include <linux/sched.h>
#include <linux/export.h>
#include <linux/delay.h>


static unsigned long cr0,cr3;

static unsigned long vaddr = 0;


static void get_pgtable_macro(void)  /*打印页机制中的一些重要参数*/
{
    cr0 = read_cr0();
    cr3 = read_cr3_pa();
     
    printk("cr0 = 0x%lx, cr3 = 0x%lx\n",cr0,cr3);
    
    /*这些宏是用来指示线性地址中相应字段所能映射的区域大小的对数的*/
    printk("PGDIR_SHIFT = %d\n", PGDIR_SHIFT);  
    printk("P4D_SHIFT = %d\n",P4D_SHIFT);
    printk("PUD_SHIFT = %d\n", PUD_SHIFT);
    printk("PMD_SHIFT = %d\n", PMD_SHIFT);
    printk("PAGE_SHIFT = %d\n", PAGE_SHIFT);   /*指示page offset字段,映射的是一个页面的大小,一个页面大小是4k,转换成以2为底的对数就是12,其他的宏类似*/
 
 /*下面的这些宏是用来指示相应的页目录表中的项的个数的,这些宏都是为了方便寻页时进行位运算的*/
    printk("PTRS_PER_PGD = %d\n", PTRS_PER_PGD);
    printk("PTRS_PER_P4D = %d\n", PTRS_PER_P4D);
    printk("PTRS_PER_PUD = %d\n", PTRS_PER_PUD);
    printk("PTRS_PER_PMD = %d\n", PTRS_PER_PMD);
    printk("PTRS_PER_PTE = %d\n", PTRS_PER_PTE);
    printk("PAGE_MASK = 0x%lx\n", PAGE_MASK);   /*page_mask,页内偏移掩码,用来屏蔽掉page offset字段*/
}
 
static unsigned long vaddr2paddr(unsigned long vaddr)  /*线性地址到物理地址转换*/
{
    /*首先为每个目录项创建一个变量将它们保存起来*/
    pgd_t *pgd;
    p4d_t *p4d;
    pud_t *pud;
    pmd_t *pmd;
    pte_t *pte;
    
    unsigned long paddr = 0;
    unsigned long page_addr = 0;
    unsigned long page_offset = 0;
    
    pgd = pgd_offset(current->mm,vaddr);  /*第一个参数是当前进程的mm_struct结构(我们申请的线性地址空间是内核,所以应该查内核页表,又因为所有的进程都共享同一个内核页表,所以可以用当前进程的mm_struct结构来进行查找),pgd为页全局目录项*/
    printk("pgd_val = 0x%lx, pgd_index = %lu\n", pgd_val(*pgd),pgd_index(vaddr));
    if (pgd_none(*pgd)){
        printk("not mapped in pgd\n");
        return -1;
    }

    p4d = p4d_offset(pgd, vaddr);  /*查找到的页全局目录项pgd作为下级查找的参数传入到p4d_offset中*/
    printk("p4d_val = 0x%lx, p4d_index = %lu\n", p4d_val(*p4d),p4d_index(vaddr));
    if(p4d_none(*p4d))
    { 
        printk("not mapped in p4d\n");
        return -1;
    }

    pud = pud_offset(p4d, vaddr);
    printk("pud_val = 0x%lx, pud_index = %lu\n", pud_val(*pud),pud_index(vaddr));
    if (pud_none(*pud)) {
        printk("not mapped in pud\n");
        return -1;
    }
 
    pmd = pmd_offset(pud, vaddr);
    printk("pmd_val = 0x%lx, pmd_index = %lu\n", pmd_val(*pmd),pmd_index(vaddr));
    if (pmd_none(*pmd)) {
        printk("not mapped in pmd\n");
        return -1;
    }
 
    pte = pte_offset_kernel(pmd, vaddr);  /*与上面略有不同,这里表示在内核页表中查找,而在进程页表中查找是另外一个完全不同的函数   这里最后取得了页表项的物理地址*/
    printk("pte_val = 0x%lx, ptd_index = %lu\n", pte_val(*pte),pte_index(vaddr));

    if (pte_none(*pte)) {
        printk("not mapped in pte\n");
        return -1;
    }

    page_addr = pte_val(*pte) & PAGE_MASK;    /*取出其高52位*/
    /*取出页偏移地址,页偏移量也就是线性地址中的低12位*/
    page_offset = vaddr & ~PAGE_MASK;
    /*将两个地址拼接起来,就得到了想要的物理地址了*/
    paddr = page_addr | page_offset;
    printk("page_addr = %lx, page_offset = %lx\n", page_addr, page_offset);
    printk("vaddr = %lx, paddr = %lx\n", vaddr, paddr);
    return paddr;
}

static int __init v2p_init(void)    /*内核模块的注册函数*/
{
    unsigned long vaddr = 0 ;
    printk("vaddr to paddr module is running..\n");
    get_pgtable_macro();
    printk("\n");
    vaddr = __get_free_page(GFP_KERNEL);   /*在内核的ZONE_NORMAL中申请了一块页面,GFP_KERNEL标志指示优先从内核的ZONE_NORMAL中申请页框*/
    if (vaddr == 0) {
        printk("__get_free_page failed..\n");
        return 0;
    }
    sprintf((char *)vaddr, "hello world from kernel");   /*在地址中写入hello...*/
    printk("get_page_vaddr=0x%lx\n", vaddr);
    vaddr2paddr(vaddr);
    ssleep(600);
    return 0;
}
static void __exit v2p_exit(void)    /*内核模块的卸载函数*/
{
    printk("vaddr to paddr module is leaving..\n");
    free_page(vaddr);   /*将申请的线性地址空间释放掉*/
}


module_init(v2p_init);
module_exit(v2p_exit);
MODULE_LICENSE("GPL"); 

The Makefile code is as follows:

obj-m:= paging_lowmem.o

CURRENT_PATH:=$(shell pwd)	#模块所在的当前所在路径
LINUX_KERNEL:=$(shell uname -r)	#linux内核代码的当前版本
LINUX_KERNEL_PATH:=/usr/src/linux-headers-$(LINUX_KERNEL)	#linux内核的当前版本源码路径

all:
	make -C $(LINUX_KERNEL_PATH) M=$(CURRENT_PATH) modules	#编译模块
#				内核的路径		  当前目录编译完放哪   表明编译的是内核模块

clean:
	make -C $(LINUX_KERNEL_PATH) M=$(CURRENT_PATH) clean	#清理模块

operation result:

Insert image description here

You can see that PGDIR_SHIFT and P4D_SHIFT are both 39, which means that in the linear address, the P4D field is empty. You can also see that in the page directory entry, the page directory entry of P4D is also 1, which means This also shows that although Linux adopts a five-level page table model, there are actually only four page tables used.

PAGE_MASK is a 64-bit number in which the lower 12 bits are all zeros and the remaining bits are all 1.

You can see that the first bit of paddr is 8. When converted to binary, the highest 63 bits are 1. This is a protection bit used on the X86 platform to identify that the physical page frame cannot be used to execute code. Its physical The physical address of the page frame is the next 9 bits

2. Use debugging tools to debug

Debugging tools:

① The main function of the dram kernel module is to map the data in the physical memory to our device file through mmap. By accessing this device file, we can achieve the function of accessing the physical memory.

②Fileview, which can read this binary file in a format we want

For the source code of these two tools, refer to http://t.csdn.cn/2lc7y

Supplement: The role of EXPORT_SYMBOL: The functions or symbols defined in the EXPORT_SYMBOL tag are open to all kernel codes and can be called directly in your kernel module without modifying the kernel code. How to use it:

​ 1. Use "EXPORT_SYMBOL (function name)" to declare after the module function definition.

2. Use extern to declare it in another module that calls the function.

3. First load the module that defines the function, and then load the module that calls the function. Please pay attention to this order.

Open another terminal window and execute the following commands in sequence:

​ sudo insmod dram.ko

​ sudo mknod /dev/dram c 85 0

​ ./fileview /dev/dram

operation result:

Insert image description here

Addressing process:

page directory binary Decimal*8B Hexadecimal*8B
PGD 1 0011 0111(39) 311*8B 9b8
PUD 0 1100 0101(30) 197*8B 628
PMD 0 1000 1011(21) 139*8B 458
PTE 0 0001 1000(12) 24*8B c0

The base address of the page global directory table cr3 = 0x118fa000

0x118fa000+0x9b8 = 0x118fa9b8 → 3a202067, which is the pgd_val value (the physical address of the lower-level page table)

0x3a202000+0x628 = 0x3a202628 → 3a203067 is the pud_val value (the physical address of the lower-level page table)

0x3a203000 + 0x458 =0x3a203458 → 12148063, which is the pmd_val value (the physical address of the lower-level page table)

0x12148000 + 0xc0 = 0x121480c0 → 8000000011618063 is the pte_val value (page physical address)

The page offset page_offset is 0

It can be deduced from this that the required physical address is 11618000

Insert image description here

3. Analyze based on the principles mentioned and have your own opinions

Segment mechanism: virtual address → linear address

​ In Intel's 80x86 processor, for the conversion of virtual address to linear address, please refer to "Linux Operating System Principles and Applications" P29:

Insert image description here

The design of the Linux kernel does not all use the segmentation solution provided by Intel, but only uses the segmentation mechanism to a limited extent. This not only simplifies the design of the Linux kernel, but also creates conditions for porting Linux to other platforms. The designers of Linux set the base address of the segment to 0 and the segment limit to 4GB. At this time, if an offset is given arbitrarily, the equation "0 + offset = linear address", that is to say, "offset address" = linear address".

The origin of paging mechanism

​ It can be imagined that if paging is not used and the linear address space is directly mapped to the physical space, then modifying the data of any segment will modify the data of other segments at the same time. If not modified, the segment mechanism provides the "base address: limit" ” method to divide the linear address space so that segments are completely isolated from each other. This way of implementing segment protection simply does not work because they may overwrite each other.

Paging mechanism: linear address → physical address

​ For the conversion of the 32-bit linear address to the physical address of the two-level page table, please refer to "Linux Operating System Principles and Applications" P34:

Insert image description here

Currently, in order to be compatible with 32-bit and 64-bit CPUs, Linux needs a unified page address model. The most commonly used is the 4-level page table model:

Insert image description here

Since it is a 64-bit processor, the displayed address is 64 bits. However, due to the hardware limitations of the 64-bit processor, there are only 48 address lines, so the linear address and physical address actually use only 48 bits. In 64-bit Linux A 4-level page table structure is used, and its linear address division is shown in the figure below. In this case, the page size is 4kb, each page table entry is 8bit, and the entire page table can map 256TB of space. The MMU hardware of the new Intel chip provides for 5-level page table management, so in the 4.15 kernel, Linux adds a new page directory between the page global directory and the page superior directory, called the P4D page directory ( between PGD and PUD). The CR3 register is used to save the address of the page global directory of the current process. The start of paging starts from the page global directory.

Insert image description here

​ The conversion of the 64-bit linear address to the physical address of the five-level page table can refer to the conversion of the 32-bit linear address of the two-level page table to the physical address. Here is the paging_lowmem.c code in the first question:

​ vaddr is a linear address

	pgd = pgd_offset(current->mm, vaddr);

The first parameter is the mm_struct structure of the current process. The mm_struct structure is used to describe the virtual address space of the process. There is a field PGD in mm_struct that is used to save the physical address of the page global directory of the process. This line of code finds the physical address pgd of the PGD table entry. The physical address of the lower-level page table is stored under this physical address.

	p4d = p4d_offset(pgd, vaddr);

​ This line of code finds the physical address p4d of the P4D table entry. Since the page fourth-level directory is not enabled, the directory table entry is 1, that is, p4d=pgd.

​ And so on..., finally:

	pte = pte_offset_kernel(pmd, vaddr);

At this point, the physical address pte of the PTE entry is obtained. The physical address of the page is stored under this physical address , which corresponds to the third step of the 32-bit linear address to physical address conversion of the two-level page table.

	page_addr = pte_val(*pte) & PAGE_MASK;    /*取出其高52位*/
	/*取出页偏移地址,页偏移量也就是线性地址中的低12位*/
	page_offset = vaddr & ~PAGE_MASK;
	/*将两个地址拼接起来,就得到了想要的物理地址了*/
	paddr = page_addr | page_offset;

​ The last step is to take the high bit (the high 52 bits for 64 bits, the high 20 bits for 32 bits) and concatenate it with the low 12 bits of the linear address, that is, the offset, and the result is the physical address .

4. Ask 2 questions and give answers

Where is the address of the page global directory?

When the kernel creates a process, it will allocate a page global directory to it. In the process descriptor task_struct structure, there is a pointer mm pointing to the mm_struct structure, and the mm_struct structure is used to describe the virtual address space of the process. In mm_struct there is A field PGD is used to save the physical address of the page global directory of the process. (So ​​when the process is switched, the operating system completes the page table switch by accessing the task_struct structure, then accessing the mm_struct structure, and finally finding the PGD field, obtaining the address of the page global directory of the new process, and filling it into the CR3 register)

Why is the logical address equal to the linear address in Linux?

​ Because the linear addresses of all Linux segments (user code segment, user data segment, kernel code segment, kernel data segment) start from 0x00000000 and are 4G in length, in this way, linear address = logical address + 0x00000000, that is, the logical address is equal to linear address.

Guess you like

Origin blog.csdn.net/qq_58538265/article/details/133920479