"Operating System Truth Restore" Chapter 12 Further Improvement of the Kernel

The learning experience is better with videos!
Section 1: https://www.bilibili.com/video/BV1iu411j7Xj/?vd_source=701807c4f8684b13e922d0a8b116af31
Extra Section 1: https://www.bilibili.com/video/BV1d14y1X7xe/?vd_source=701807c4f8684b13e922d0a8
b116af31 Section 2: https ://www.bilibili.com/video/BV1wW4y1Z7EQ/?vd_source=701807c4f8684b13e922d0a8b116af31
Section 3: https://www.bilibili.com/video/BV12W4y1o7jH/?vd_source=701807c4f8684b13e922d0a8b116af
31 Section 4: https://www.bilibili .com/video/BV1sP411r7XQ/?vd_source=701807c4f8684b13e922d0a8b116af31Section 5:
https://www.bilibili.com/video/BV1km4y1L7Yt/?vd_source=701807c4f8684b13e922d0a8b116af31Section 6
: https://www.bilibili.com/video/BV1tV4y1t7ns /?vd_source=701807c4f8684b13e922d0a8b116af31
Section 7: https://www.bilibili.com/video/BV1Eu4y1U7P5/?vd_source=701807c4f8684b13e922d0a8b116af31
Section 8: https://www.bilibili.com/video/BV1Uc411c75V/?vd_source=701807c4f8684b13e922d0a8 b116af31

Code repository: https://github.com/xukanshan/the_truth_of_operationg_system

In order to facilitate application development on the operating system, the operating system must provide a series of interfaces for programs to call, which is what we call system calls. System calls allow user programs to request services from the operating system. Nowadays, mainstream operating systems use the interrupt mechanism to implement system calls. Therefore, before implementing system calls, we need to review the operation process of the interrupt mechanism on this operating system:

  1. When an event occurs in an external device, an interrupt signal with a number will be sent to the CPU through the interrupt agent;
  2. The CPU will find the interrupt gate descriptor corresponding to the interrupt number based on the incoming interrupt number and the IDT table pointed to in the IDTR register;
  3. Take out the CS selector and IP from the interrupt gate descriptor in the previous step, and then jump to the interrupt handler corresponding to this CS:IP for execution.
  4. Then, the jump in step 3 above is the interrupt handler written in assembly language, which is responsible for saving the scene (saving the user mode context to the kernel stack) and jumping to the interrupt handler written in C language.

In Linux on the x86 system, system calls are implemented by putting the system call number to be executed into the EAX register and then actively calling the INT 0x80 soft interrupt. Since the system call mechanism is based on the interrupt mechanism, the system call process is very similar to the interrupt process:

  1. The program uses int 0x80 to trigger a soft interrupt
  2. The CPU combines the IDT table pointed to by the IDTR register to find the interrupt gate descriptor corresponding to 0x80;
  3. Take out the CS selector and IP from the interrupt gate descriptor in the previous step, and then jump to the interrupt handler corresponding to this CS:IP for execution;
  4. The interrupt handler calls the corresponding system call function based on the value stored in EAX. The interrupt handler is also written in assembly. It will be responsible for saving the scene (saving the user mode context to the kernel stack) and jumping to the corresponding system call function written in C language for execution based on the system call number in the EAX register.

Therefore, we now implement the system call mechanism based on this system call process based on the interrupt mechanism:

First, let's prepare the system call entry of the user program, which is the program used to trigger int 0x80. We define 4 user system call program entries for different system call parameter number scenarios. Register parameter passing rules refer to Linux implementation method (book p525) ( myos/lib/user/syscall.c )

#include "syscall.h"

/* 无参数的系统调用 */
#define _syscall0(NUMBER) ({
      
      				       \
   int retval;					               \
   asm volatile (					       \
   "int $0x80"						       \
   : "=a" (retval)					       \
   : "a" (NUMBER)					       \
   : "memory"						       \
   );							       \
   retval;						       \
})

/* 一个参数的系统调用 */
#define _syscall1(NUMBER, ARG1) ({
      
      			       \
   int retval;					               \
   asm volatile (					       \
   "int $0x80"						       \
   : "=a" (retval)					       \
   : "a" (NUMBER), "b" (ARG1)				       \
   : "memory"						       \
   );							       \
   retval;						       \
})

/* 两个参数的系统调用 */
#define _syscall2(NUMBER, ARG1, ARG2) ({
      
      		       \
   int retval;						       \
   asm volatile (					       \
   "int $0x80"						       \
   : "=a" (retval)					       \
   : "a" (NUMBER), "b" (ARG1), "c" (ARG2)		       \
   : "memory"						       \
   );							       \
   retval;						       \
})

/* 三个参数的系统调用 */
#define _syscall3(NUMBER, ARG1, ARG2, ARG3) ({
      
      		       \
   int retval;						       \
   asm volatile (					       \
      "int $0x80"					       \
      : "=a" (retval)					       \
      : "a" (NUMBER), "b" (ARG1), "c" (ARG2), "d" (ARG3)       \
      : "memory"					       \
   );							       \
   retval;						       \
})

Then, we prepare the interrupt gate descriptor corresponding to the 0x80 soft interrupt

Modify ( myos/kernel/interrput.c )

#define IDT_DESC_CNT 0x81      // 目前总共支持的中断数,最后一个支持的中断号0x80 + 1

extern uint32_t syscall_handler(void);    //定义的汇编中断处理程序代码

//此函数用来循环调用make_idt_desc函数来完成中断门描述符与中断处理函数映射关系的建立,传入三个参数:中断描述符表某个中段描述符(一个结构体)的地址
//属性字段,中断处理函数的地址
static void idt_desc_init(void) {
    
    
   int i, lastindex = IDT_DESC_CNT - 1;
   for (i = 0; i < IDT_DESC_CNT; i++) {
    
    
      make_idt_desc(&idt[i], IDT_DESC_ATTR_DPL0, intr_entry_table[i]); 
   }
   //单独处理系统调用,系统调用对应的中断门dpl为3,中断处理程序为汇编的syscall_handler
   make_idt_desc(&idt[lastindex], IDT_DESC_ATTR_DPL3, syscall_handler);
   put_str("   idt_desc_init done\n");
}

Next, we define the assembly version of the system call processing function ( myos/kernel/kernel.S )

;;;;;;;;;;;;;;;;   0x80号中断   ;;;;;;;;;;;;;;;;
[bits 32]
extern syscall_table            ;如同之前我们中断处理机制中引入了C中定义的中断处理程序入口地址表一样,这里引入了C中定义的系统调用函数入口地址表
section .text
global syscall_handler
syscall_handler:
                                ;1 保存上下文环境,为了复用之前写好的intr_exit:,所以我们仿照中断处理机制压入的东西,构建系统调用压入的东西
   push 0			            ; 压入0, 使栈中格式统一
   push ds
   push es
   push fs
   push gs
   pushad			            ; PUSHAD指令压入32位寄存器,其入栈顺序是:EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI  
   push 0x80			        ; 此位置压入0x80也是为了保持统一的栈格式

                                ;2 为系统调用子功能传入参数,由于这个函数是3个参数的用户程序系统调用入口都会使用
                                ; 所以我们为了格式统一,直接按照最高参数数量压入3个参数
   push edx			            ; 系统调用中第3个参数
   push ecx			            ; 系统调用中第2个参数
   push ebx			            ; 系统调用中第1个参数

                                ;3 调用c中定义的功能处理函数
   call [syscall_table + eax*4]	    ; 编译器会在栈中根据C函数声明匹配正确数量的参数
   add esp, 12			        ; 跨过上面的三个参数

                                ;4 将call调用后的返回值存入待当前内核栈中eax的位置,c语言会自动把返回值放入eax中(c语言的ABI规定)
   mov [esp + 8*4], eax	
   jmp intr_exit		        ; intr_exit返回,恢复上下文

Then, we implement a mechanism similar to the one in the interrupt mechanism where the assembly code jumps into the C interrupt handler, so that we can use C to manage system calls ( myos/userprog/syscall-init.c )

#define syscall_nr 32 
typedef void* syscall;
syscall syscall_table[syscall_nr];

At this point, our system call mechanism has been constructed. In the future, we only need to put the address of the system call function written in C into this array.

Now, let's add the first system call to our system sys_get_pidto obtain the process number of the process or thread. In fact, it returns the pid value in the process/thread pcb.

First, we first add the pid member to the pcb of the process/thread, otherwise there will be no pid member, sys_get_pidwhat will be returned? Modify ( myos/thread/thread.h )

typedef uint16_t pid_t;

struct task_struct {
    
    
   uint32_t* self_kstack;	        // 用于存储线程的栈顶位置,栈顶放着线程要用到的运行信息
   pid_t pid;
   enum task_status status;
   uint8_t priority;		        // 线程优先级
   char name[16];                   //用于存储自己的线程的名字

   uint8_t ticks;	                 //线程允许上处理器运行还剩下的滴答值,因为priority不能改变,所以要在其之外另行定义一个值来倒计时
   uint32_t elapsed_ticks;          //此任务自上cpu运行后至今占用了多少cpu嘀嗒数, 也就是此任务执行了多久*/
   struct list_elem general_tag;		//general_tag的作用是用于线程在一般的队列(如就绪队列或者等待队列)中的结点
   struct list_elem all_list_tag;   //all_list_tag的作用是用于线程队列thread_all_list(这个队列用于管理所有线程)中的结点
   uint32_t* pgdir;              // 进程自己页表的虚拟地址
   struct virtual_addr userprog_vaddr;   // 用户进程的虚拟地址
   uint32_t stack_magic;	       //如果线程的栈无限生长,总会覆盖地pcb的信息,那么需要定义个边界数来检测是否栈已经到了PCB的边界
};

PCB has this pid member, so naturally when we create a process/thread, we need to assign and modify this member ( myos/thread/thread.c )

#include "sync.h"

struct lock pid_lock;		    // 分配pid锁

/* 分配pid */
static pid_t allocate_pid(void) {
    
    
   static pid_t next_pid = 0;
   lock_acquire(&pid_lock);
   next_pid++;
   lock_release(&pid_lock);
   return next_pid;
}

/* 初始化线程基本信息 , pcb中存储的是线程的管理信息,此函数用于根据传入的pcb的地址,线程的名字等来初始化线程的管理信息*/
void init_thread(struct task_struct* pthread, char* name, int prio) {
    
    
   memset(pthread, 0, sizeof(*pthread));                                //把pcb初始化为0
   pthread->pid = allocate_pid();
   strcpy(pthread->name, name);                                         //将传入的线程的名字填入线程的pcb中

   if(pthread == main_thread){
    
    
      pthread->status = TASK_RUNNING;     //由于把main函数也封装成一个线程,并且它一直是运行的,故将其直接设为TASK_RUNNING */  
   } 
   else{
    
    
      pthread->status = TASK_READY;
   }
   pthread->priority = prio;            
                                                                        /* self_kstack是线程自己在内核态下使用的栈顶地址 */
   pthread->ticks = prio;
   pthread->elapsed_ticks = 0;
   pthread->pgdir = NULL;	//线程没有自己的地址空间,进程的pcb这一项才有用,指向自己的页表虚拟地址	
   pthread->self_kstack = (uint32_t*)((uint32_t)pthread + PG_SIZE);     //本操作系统比较简单,线程不会太大,就将线程栈顶定义为pcb地址
                                                                        //+4096的地方,这样就留了一页给线程的信息(包含管理信息与运行信息)空间
   pthread->stack_magic = 0x19870916;	                                // /定义的边界数字,随便选的数字来判断线程的栈是否已经生长到覆盖pcb信息了              
}

/* 初始化线程环境 */
void thread_init(void) {
    
    
   put_str("thread_init start\n");
   list_init(&thread_ready_list);
   list_init(&thread_all_list);
   lock_init(&pid_lock);
/* 将当前main函数创建为线程 */
   make_main_thread();
   put_str("thread_init done\n");
}

Now, let's write this sys_get_pidfunction and put the function address into our system call table syscall_table to modify ( myos/userprog/syscall-init.c )

#include "syscall-init.h"
#include "syscall.h"
#include "stdint.h"
#include "print.h"
#include "thread.h"

/* 返回当前任务的pid */
uint32_t sys_getpid(void) {
    
    
   return running_thread()->pid;
}

/* 初始化系统调用 */
void syscall_init(void) {
    
    
   put_str("syscall_init start\n");
   syscall_table[SYS_GETPID] = sys_getpid;
   put_str("syscall_init done\n");
}

Function declaration ( myos/userprog/syscall-init.h )

#ifndef __USERPROG_SYSCALLINIT_H
#define __USERPROG_SYSCALLINIT_H
#include "stdint.h"
void syscall_init(void);
uint32_t sys_getpid(void);
#endif

Support code ( myos/user/syscall.h )

#ifndef __LIB_USER_SYSCALL_H
#define __LIB_USER_SYSCALL_H
#include "stdint.h"
enum SYSCALL_NR {
    
    
   SYS_GETPID
};

#endif

Then, init_allcall syscall_initthe installation modification to complete the system call processing function in ( myos/kernel/init.c )

#include "syscall-init.h"

/*负责初始化所有模块 */
void init_all() {
    
    
   put_str("init_all\n");
   idt_init();	     // 初始化中断
   mem_init();	     // 初始化内存管理系统
   thread_init();    // 初始化线程相关结构
   timer_init();     // 初始化PIT
   console_init();   // 控制台初始化最好放在开中断之前
   keyboard_init();  // 键盘初始化
   tss_init();       // tss初始化
   syscall_init();   // 初始化系统调用
}

Finally, we write an entry point for the user program to make system calls ( myos/lib/user/syscall.c )

/* 返回当前任务pid */
uint32_t getpid() {
    
    
   return _syscall0(SYS_GETPID);
}

Then the function declaration ( myos/user/syscall.h )

uint32_t getpid(void);

It is necessary to distinguish between the actual system call processing function sys_getpidand the user program entrance getpid. The former runs in the kernel state, and the latter is the entrance of the user state program to execute int 0x80.

Test code ( myos/kernel/main.c )

#include "print.h"
#include "init.h"
#include "thread.h"
#include "interrupt.h"
#include "console.h"
#include "process.h"
#include "syscall-init.h"
#include "syscall.h"

void k_thread_a(void*);
void k_thread_b(void*);
void u_prog_a(void);
void u_prog_b(void);
int prog_a_pid = 0, prog_b_pid = 0;

int main(void) {
    
    
   put_str("I am kernel\n");
   init_all();

   process_execute(u_prog_a, "user_prog_a");
   process_execute(u_prog_b, "user_prog_b");

   intr_enable();
   console_put_str(" main_pid:0x");
   console_put_int(sys_getpid());
   console_put_char('\n');
   thread_start("k_thread_a", 31, k_thread_a, "argA ");
   thread_start("k_thread_b", 31, k_thread_b, "argB ");
   while(1);
   return 0;
}

/* 在线程中运行的函数 */
void k_thread_a(void* arg) {
    
         
   char* para = arg;
   console_put_str(" thread_a_pid:0x");
   console_put_int(sys_getpid());
   console_put_char('\n');
   console_put_str(" prog_a_pid:0x");
   console_put_int(prog_a_pid);
   console_put_char('\n');
   while(1);
}

/* 在线程中运行的函数 */
void k_thread_b(void* arg) {
    
         
   char* para = arg;
   console_put_str(" thread_b_pid:0x");
   console_put_int(sys_getpid());
   console_put_char('\n');
   console_put_str(" prog_b_pid:0x");
   console_put_int(prog_b_pid);
   console_put_char('\n');
   while(1);
}

/* 测试用户进程 */
void u_prog_a(void) {
    
    
   prog_a_pid = getpid();
   while(1);
}

/* 测试用户进程 */
void u_prog_b(void) {
    
    
   prog_b_pid = getpid();
   while(1);
}

Previously, we used registers to pass parameters. In fact, we can also use the stack to pass parameters. The principle is that the system call mechanism is based on the interrupt mechanism. When a system call is made through the user program entry, the user program entry will push the system call number and parameters into the user stack, and then trigger the int 0x80 soft interrupt. At this time, the privilege level switches, and the CPU The location of the user stack will be automatically pushed into the kernel stack. If we obtain the location of the user stack from the kernel stack, then we can naturally obtain the system call number and parameters, and then call the corresponding system call processing function.

Please note that this experiment is only to show you that you can use the stack to pass parameters, and does not serve as the basis for subsequent development!

Modify ( myos/user/syscall.c )

/* 无参数的系统调用 */
#define _syscall0(NUMBER) ({
      
      				       \
   int retval;					               \
   asm volatile (					       \
   "pushl %[number]; int $0x80; addl $4, %%esp"		       \
   : "=a" (retval)					       \
   : [number] "i" (NUMBER)		  		       \
   : "memory"						       \
   );							       \
   retval;						       \
})

/* 一个参数的系统调用 */
#define _syscall1(NUMBER, ARG0) ({
      
      			       \
   int retval;					               \
   asm volatile (					       \
   "pushl %[arg0]; pushl %[number]; int $0x80; addl $8, %%esp" \
   : "=a" (retval)					       \
   : [number] "i" (NUMBER), [arg0] "g" (ARG0)		       \
   : "memory"						       \
   );							       \
   retval;						       \
})

/* 两个参数的系统调用 */
#define _syscall2(NUMBER, ARG0, ARG1) ({
      
      		       \
   int retval;						       \
   asm volatile (					       \
   "pushl %[arg1]; pushl %[arg0]; "			       \
   "pushl %[number]; int $0x80; addl $12, %%esp"	       \
      : "=a" (retval)					       \
      : [number] "i" (NUMBER),				       \
	[arg0] "g" (ARG0),				       \
	[arg1] "g" (ARG1)				       \
      : "memory"					       \
   );							       \
   retval;						       \
})

/* 三个参数的系统调用 */
#define _syscall3(NUMBER, ARG0, ARG1, ARG2) ({
      
      		       \
   int retval;						       \
   asm volatile (					       \
      "pushl %[arg2]; pushl %[arg1]; pushl %[arg0]; "	       \
      "pushl %[number]; int $0x80; addl $16, %%esp"	       \
      : "=a" (retval)					       \
      : [number] "i" (NUMBER),				       \
	[arg0] "g" (ARG0),				       \
	[arg1] "g" (ARG1),				       \
	[arg2] "g" (ARG2)				       \
      : "memory"					       \
   );							       \
   retval;						       \
})

Modify ( myos/kernel/kernel.S )

;;;;;;;;;;;;;;;;   0x80号中断   ;;;;;;;;;;;;;;;;
[bits 32]
extern syscall_table            ;如同之前我们中断处理机制中引入了C中定义的中断处理程序入口地址表一样,这里引入了C中定义的系统调用函数入口地址表
section .text
global syscall_handler
syscall_handler:
                                ;1 保存上下文环境,为了复用之前写好的intr_exit:,所以我们仿照中断处理机制压入的东西,构建系统调用压入的东西
    push 0			            ; 压入0, 使栈中格式统一
    push ds
    push es
    push fs
    push gs
    pushad			            ; PUSHAD指令压入32位寄存器,其入栈顺序是:EAX,ECX,EDX,EBX,ESP,EBP,ESI,EDI  
    push 0x80			        ; 此位置压入0x80也是为了保持统一的栈格式

                                ;2 从内核栈中获取cpu自动压入的用户栈指针esp的值
    mov ebx, [esp + 4 + 48 + 4 + 12]                             
                                
                                ; 为系统调用子功能传入参数,由于这个函数是3个参数的用户程序系统调用入口都会使用
                                ; 所以我们为了格式统一,直接按照最高参数数量压入3个参数,  此时ebx是用户栈指针
    push dword [ebx + 12]		; 系统调用中第3个参数
    push dword [ebx + 8]		; 系统调用中第2个参数
    push dword [ebx + 4]		; 系统调用中第1个参数
    mov edx, [ebx]              ; 系统调用的子功能号             

                                ;3 调用c中定义的功能处理函数
    call [syscall_table + edx*4]	; 编译器会在栈中根据C函数声明匹配正确数量的参数
    add esp, 12			        ; 跨过上面的三个参数

                                ;4 将call调用后的返回值存入待当前内核栈中eax的位置,c语言会自动把返回值放入eax中(c语言的ABI规定)
    mov [esp + 8*4], eax	
    jmp intr_exit		        ; intr_exit返回,恢复上下文

The core of modifying kernel.S is to obtain the top position of the user stack

mov ebx, [esp + 4 + 48 + 4 + 12]

At this time, the top position of the kernel stack + 4 is to skip the pushed 0x80, + 48 is to skip push ad and gs fs es ds, + 4 is to skip push 0, and finally + 12 is because the user program calls int 0x80 triggers a soft interrupt, which then leads to a privilege level switch. The CPU will automatically push the ss, esp, eflag, cs, eip of the user program at this time into the kernel stack in order, skipping 12 bytes (ss = 2, esp = 4, eflag = 4, cs = 2) is the top position eip of the user stack at this time

Before, we have been using put_str, put_ch, put_int for printing. These functions can only be used in kernel mode. Now we need to implement user mode printing function, which is like printf in C language. The printf in C language is provided by the libc library. It calls many system calls to implement functions. One of them is the write system call. If we write a simple C function that prints hello, and then compile it, use the strace command to track the compilation. executable binary file. For example, strace ./hello.binwe can clearly see that the write system call is called. The function of this system call here is to write to the standard output file (that is, our console shell). The console is abstracted into a file in Linux. , called standard output) writes hello.
Insert image description here
Our printf also imitates this, and ultimately calls the write system call to implement it. However, since we do not implement a file system now, our write is a simple version. Now let's add this write system call.

First, let's increase the write system call number.

Modify ( myos/lib/users/syscall.h )

enum SYSCALL_NR {
    
    
   	SYS_GETPID,
   	SYS_WRITE
};

Then, we implement the user program entry for the write system call.

Modify ( myos/lib/user/syscall.c )

/* 打印字符串str */
uint32_t write(char* str) {
    
    
   return _syscall1(SYS_WRITE, str);
}

After that, we declare the user program entry for the write system call.

Modify ( myos/lib/users/syscall.h )

uint32_t write(char* str);

Now that we have implemented the user program entry for the write system call, let's implement the real system call execution function. and add it to the system call table.

Modify ( myos/userprog/syscall-init.c )

#include "console.h"
#include "string.h"

/* 打印字符串str(未实现文件系统前的版本) */
uint32_t sys_write(char* str) {
    
    
   console_put_str(str);
   return strlen(str);
}

/* 初始化系统调用 */
void syscall_init(void) {
    
    
   put_str("syscall_init start\n");
   syscall_table[SYS_GETPID] = sys_getpid;
   syscall_table[SYS_WRITE] = sys_write;
   put_str("syscall_init done\n");
}

Then declare this system call function

Modify ( myos/userprog/syscall-init.h )

uint32_t sys_write(char* str);

Since we now have the page table mechanism enabled, any address will be treated as a virtual address. When we wrote print.S before, because it was for the kernel, some of the addresses used to deal with the video memory segment were addressed with the help of the 0th entry in the kernel page directory table. Now we share print with the user process. The user process cannot access entry 0 of the kernel page directory table. However, because item No. 768 of the process page directory table and item No. 0 of the kernel page directory table point to the same kernel page table (because item No. 768 of the process page directory table is the copied item No. 768 of the kernel page directory table). Therefore, we can access the address originally accessed through entry 0 of the kernel page directory table through entry 768 of the process page directory table. Therefore, we need to modify some address accesses in print.S and increase them by 3G, so that the addresses originally accessed through entry 0 of the kernel page directory table can now be accessed through entry 768 of the process page directory table.

Modify ( myos/lib/kernel/print.S )

.roll_screen:				                                ; 若超出屏幕大小,开始滚屏
    cld                                                     
    mov ecx, 960				                            ; 一共有2000-80=1920个字符要搬运,共1920*2=3840字节.一次搬4字节,共3840/4=960次 
    ;mov esi, 0xb80a0			                            
	mov esi, 0xc00b80a0										; 第1行行首
    ;mov edi, 0xb8000			                            
	mov edi, 0xc00b8000										; 第0行行首
    rep movsd				                                ;rep movs word ptr es:[edi], word ptr ds:[esi] 简写为: rep movsw

Now we have fully implemented the system call write for outputting text. You can try to use write in the user process for output.

Our write only supports: string output (because the underlying implementation uses put_str) and a string address parameter; while printf in the C language can not only process strings, but also numbers, characters, addresses, etc. And the number of parameters can be infinite, as follows:

printf("a = %x, b = %c, c = %p, d = %s", a, b, c, d);

So printf is by no means simply calling write, but adds: 1. Convert multiple formats into strings for write to print. For example, in the above example, we directly replace the first %x with the value of a, replace the second %c with the value of b... and then print out the entire string; 2. Support variable parameters, that is, there are no restrictions Number of parameters (Generally speaking, when we write a function, and then create a function declaration, we must specify the number and type of parameters, so that the compiler knows how much stack space to open for the function. For the principle of variable parameters, see p536 of the book).

Variable parameters rely on the characteristics of the compiler. The core of its principle is that the caller pushes parameters into the stack from right to left according to the C calling convention, and the callee can find the passed parameters based on the data in the stack. input parameters. Let’s use the above printf as an example. When a function calls printf, the callee’s stack is distributed as follows:

200 d 字符串的地址				高地址
196 c 地址值
195 b 字符的ascii码值
191 a 32位值
187 "a = %x, b = %c, c = %p, d = %s" 字符串地址
183 eip 返回地址				 低地址

As long as we know the position of the first parameter, which is the string address, and then know the type of each parameter (only by knowing the parameter type can we know the size of the parameter in the stack, and can we find the parameter by moving the pointer), we can find it after all parameters. And neither of these are difficult.

Now let’s write the printf function, but since this function has many functions, we only add support for hexadecimal output now.

myso/lib/stdio.c

#include "stdio.h"
#include "stdint.h"
#include "string.h"
#include "global.h"
#include "syscall.h"

#define va_start(ap, v) ap = (va_list)&v        	// 把ap指向第一个固定参数v
#define va_arg(ap, t) *((t*)(ap += 4))	         	// ap指向下一个参数并返回其值
#define va_end(ap) ap = NULL		               	// 清除ap

/* 将整型转换成字符(integer to ascii) */
static void itoa(uint32_t value, char** buf_ptr_addr, uint8_t base) {
    
    
   	uint32_t m = value % base;	                  	// 求模,最先掉下来的是最低位   
   	uint32_t i = value / base;	                  	// 取整
   	if (i) {
    
    			                            // 如果倍数不为0则递归调用。
      	itoa(i, buf_ptr_addr, base);
   	}
   if (m < 10) {
    
         								// 如果余数是0~9
      	*((*buf_ptr_addr)++) = m + '0';	  			// 将数字0~9转换为字符'0'~'9'
   	} 
	else {
    
    	      									// 否则余数是A~F
      	*((*buf_ptr_addr)++) = m - 10 + 'A'; 		// 将数字A~F转换为字符'A'~'F'
   	}
}

/* 将参数ap按照格式format输出到字符串str,并返回替换后str长度 */
uint32_t vsprintf(char* str, const char* format, va_list ap) {
    
    
	char* buf_ptr = str;
	const char* index_ptr = format;
	char index_char = *index_ptr;
	int32_t arg_int;
	while(index_char) {
    
    
		if (index_char != '%') {
    
    
			*(buf_ptr++) = index_char;
			index_char = *(++index_ptr);
			continue;
		}
		index_char = *(++index_ptr);	 			// 得到%后面的字符
		switch(index_char) {
    
    
		case 'x':
			arg_int = va_arg(ap, int);
			itoa(arg_int, &buf_ptr, 16); 	
			index_char = *(++index_ptr); 			// 跳过格式字符并更新index_char
			break;
		}
	}
	return strlen(str);
}

/* 格式化输出字符串format */
uint32_t printf(const char* format, ...) {
    
    
   va_list args;
   va_start(args, format);	       					// 使args指向format
   char buf[1024] = {
    
    0};	       					// 用于存储拼接后的字符串
   vsprintf(buf, format, args);
   va_end(args);
   return write(buf); 
}

Support code ( myos/lib/stdio.h )

#ifndef __LIB_STDIO_H
#define __LIB_STDIO_H
#include "stdint.h"
typedef char* va_list;
uint32_t printf(const char* str, ...);
uint32_t vsprintf(char* str, const char* format, va_list ap);
#endif

Test code ( myos/kernel/main.c )

#include "print.h"
#include "init.h"
#include "thread.h"
#include "interrupt.h"
#include "console.h"
#include "process.h"
#include "syscall-init.h"
#include "syscall.h"
#include "stdio.h"

void k_thread_a(void*);
void k_thread_b(void*);
void u_prog_a(void);
void u_prog_b(void);

int main(void) {
    
    
   put_str("I am kernel\n");
   init_all();

   process_execute(u_prog_a, "user_prog_a");
   process_execute(u_prog_b, "user_prog_b");

   intr_enable();
   console_put_str(" main_pid:0x");
   console_put_int(sys_getpid());
   console_put_char('\n');
   thread_start("k_thread_a", 31, k_thread_a, "argA ");
   thread_start("k_thread_b", 31, k_thread_b, "argB ");
   while(1);
   return 0;
}

/* 在线程中运行的函数 */
void k_thread_a(void* arg) {
    
         
   char* para = arg;
   console_put_str(" thread_a_pid:0x");
   console_put_int(sys_getpid());
   console_put_char('\n');
   while(1);
}

/* 在线程中运行的函数 */
void k_thread_b(void* arg) {
    
         
   char* para = arg;
   console_put_str(" thread_b_pid:0x");
   console_put_int(sys_getpid());
   console_put_char('\n');
   while(1);
}

/* 测试用户进程 */
void u_prog_a(void) {
    
    
   printf(" prog_a_pid:0x%x\n", getpid());
   while(1);
}

/* 测试用户进程 */
void u_prog_b(void) {
    
    
   printf(" prog_b_pid:0x%x\n", getpid());
   while(1);
}

Modify the Makefile to add compilation rules for the newly added files. After compilation, the following error will appear:

ld: build/stdio.o: in function `printf':
stdio.c:(.text+0x1ab): undefined reference to `__stack_chk_fail'
make: *** [makefile:105: build/kernel.bin] Error 1

This error occurs because the compiler is using the stack protection feature, but the linker cannot find the functions required to implement this feature __stack_chk_fail. Stack Guard is a security feature that detects stack overflows. Typically, __stack_chk_failfunctions are automatically inserted into the code by the compiler to abort the program when a stack overflow is detected. This function is usually included in the C library. Since the program we linked does not link the C library, we will see this error.

One solution to this problem is to disable the stack protection feature. Add options when compiling your code -fno-stack-protectorto do this.

Modify ( myos/Makefile )

CFLAGS= -Wall $(LIB) -c -fno-builtin -W -Wstrict-prototypes -Wmissing-prototypes -m32 -fno-stack-protector

Next, we improve printf and add the function of processing %s, %c, %d

Modify (myos/lib/stdio.c)

char* arg_str;

/* 将参数ap按照格式format输出到字符串str,并返回替换后str长度 */
uint32_t vsprintf(char* str, const char* format, va_list ap) {
    
    
	char* buf_ptr = str;
	const char* index_ptr = format;
	char index_char = *index_ptr;
	int32_t arg_int;
	char* arg_str;
	while(index_char) {
    
    
		if (index_char != '%') {
    
    
			*(buf_ptr++) = index_char;
			index_char = *(++index_ptr);
			continue;
		}
		index_char = *(++index_ptr);	 			// 得到%后面的字符
		switch(index_char) {
    
    
	 	case 's':
	    	arg_str = va_arg(ap, char*);
	    	strcpy(buf_ptr, arg_str);
	    	buf_ptr += strlen(arg_str);
	    	index_char = *(++index_ptr);
	    	break;
	 	case 'c':
	    	*(buf_ptr++) = va_arg(ap, char);
	    	index_char = *(++index_ptr);
	    	break;
		case 'd':
	    	arg_int = va_arg(ap, int);
	    	if (arg_int < 0) {
    
    
	       		arg_int = 0 - arg_int;	/* 若是负数, 将其转为正数后,再正数前面输出个负号'-'. */
	       		*buf_ptr++ = '-';
	    	}
	    	itoa(arg_int, &buf_ptr, 10); 
	    	index_char = *(++index_ptr);
	    	break;
		case 'x':
			arg_int = va_arg(ap, int);
			itoa(arg_int, &buf_ptr, 16); 	
			index_char = *(++index_ptr); 			// 跳过格式字符并更新index_char
			break;
		}
	}
	return strlen(str);
}

Test code ( myos/kernel/main.c )

#include "print.h"
#include "init.h"
#include "thread.h"
#include "interrupt.h"
#include "console.h"
#include "process.h"
#include "syscall-init.h"
#include "syscall.h"
#include "stdio.h"

void k_thread_a(void*);
void k_thread_b(void*);
void u_prog_a(void);
void u_prog_b(void);

int main(void) {
    
    
   put_str("I am kernel\n");
   init_all();

   process_execute(u_prog_a, "u_prog_a");
   process_execute(u_prog_b, "u_prog_b");

   console_put_str(" I am main, my pid:0x");
   console_put_int(sys_getpid());
   console_put_char('\n');
   intr_enable();
   thread_start("k_thread_a", 31, k_thread_a, "I am thread_a");
   thread_start("k_thread_b", 31, k_thread_b, "I am thread_b ");
   while(1);
   return 0;
}

/* 在线程中运行的函数 */
void k_thread_a(void* arg) {
    
         
   char* para = arg;
   console_put_str(" I am thread_a, my pid:0x");
   console_put_int(sys_getpid());
   console_put_char('\n');
   while(1);
}

/* 在线程中运行的函数 */
void k_thread_b(void* arg) {
    
         
   char* para = arg;
   console_put_str(" I am thread_b, my pid:0x");
   console_put_int(sys_getpid());
   console_put_char('\n');
   while(1);
}

/* 测试用户进程 */
void u_prog_a(void) {
    
    
   char* name = "prog_a";
   printf(" I am %s, my pid:%d%c", name, getpid(),'\n');
   while(1);
}

/* 测试用户进程 */
void u_prog_b(void) {
    
    
   char* name = "prog_b";
   printf(" I am %s, my pid:%d%c", name, getpid(), '\n');
   while(1);
}

Before proceeding with the next development, let’s fill in the holes first. Do you remember that when we implemented page allocation before, we only implemented the allocation of kernel pages, and the code for allocating user pages was empty!

Modify ( myos/kernel/memory.c )

/* 在pf表示的虚拟内存池中申请pg_cnt个虚拟页,
 * 成功则返回虚拟页的起始地址, 失败则返回NULL */
static void* vaddr_get(enum pool_flags pf, uint32_t pg_cnt) {
    
    
   	int vaddr_start = 0, bit_idx_start = -1;
   	uint32_t cnt = 0;
   	if (pf == PF_KERNEL) {
    
    
      	bit_idx_start  = bitmap_scan(&kernel_vaddr.vaddr_bitmap, pg_cnt);
      	if (bit_idx_start == -1) {
    
    
	 		return NULL;
      	}
      	while(cnt < pg_cnt) {
    
    
	 		bitmap_set(&kernel_vaddr.vaddr_bitmap, bit_idx_start + cnt++, 1);
      	}
      	vaddr_start = kernel_vaddr.vaddr_start + bit_idx_start * PG_SIZE;
   	} 
	else {
    
    	     // 用户内存池	
      	struct task_struct* cur = running_thread();
      	bit_idx_start  = bitmap_scan(&cur->userprog_vaddr.vaddr_bitmap, pg_cnt);
      	if (bit_idx_start == -1) {
    
    
	 		return NULL;
    	}
   		while(cnt < pg_cnt) {
    
    
	 		bitmap_set(&cur->userprog_vaddr.vaddr_bitmap, bit_idx_start + cnt++, 1);
     	}
      	vaddr_start = cur->userprog_vaddr.vaddr_start + bit_idx_start * PG_SIZE;

   		/* (0xc0000000 - PG_SIZE)做为用户3级栈已经在start_process被分配 */
      	ASSERT((uint32_t)vaddr_start < (0xc0000000 - PG_SIZE));
   }
   return (void*)vaddr_start;
}

Previously, our memory management was: 1. Only allocation without release; 2. In units of pages; 3. Can only be used in kernel mode; now we have improved memory management: 1. Implementing a release mechanism; 2. More fine-grained management units ;3. User mode can also be used;

The implementation of the release mechanism is very simple and is the reverse operation of the allocation mechanism;

More fine-grained memory management units require the understanding and implementation of the arena model. In this model, we first apply for a complete 4KB page, and then divide the 4KB page into different small blocks, such as 256 16B small blocks, 8 512B small blocks, and then these independent small blocks become allocation with the basic unit of release;

When used in user mode, the system call mechanism is directly used.

First, let's establish the underlying data structure:

Modify ( myos/kernel/memory.h )

#include "list.h"

/* 内存块 */
struct mem_block {
    
    
   struct list_elem free_elem;
};

/* 内存块描述符 */
struct mem_block_desc {
    
    
   uint32_t block_size;		 // 内存块大小
   uint32_t blocks_per_arena;	 // 本arena中可容纳此mem_block的数量.
   struct list free_list;	 // 目前可用的mem_block链表
};

#define DESC_CNT 7	   // 内存块描述符个数

Modify ( myos/kernel/memory.c )

/* 内存仓库arena元信息 */
struct arena {
    
    
   struct mem_block_desc* desc;	 // 此arena关联的mem_block_desc
   uint32_t cnt;
   bool large;		   /* large为ture时,cnt表示的是页框数。否则cnt表示空闲mem_block数量 */
};

struct mem_block_desc, struct mem_block, struct arenarelationship:

struct mem_block_descDifferent types of small blocks are described, such as the example just now: a 4KB page is divided into different small blocks, such as 256 16B small blocks and 8 512B small blocks. A 512B block corresponds to one mem_block_desc, and a 16B block corresponds to the other. block_sizeIt is to record the mem_block_descsmall memory block used to describe which size, such as 512 or 16. blocks_per_arenaUsed to record how many small blocks a page is split into, such as 8 or 256. free_listIt is used to manage the small blocks that can be allocated, that is, it is used to form a linked list of the small blocks that can be allocated.

struct mem_blockIn fact, the original intention is to describe this fixed small block that is divided into two parts of the 4KB page. However, in order to implement more general management logic, the author only contains a linked list node for managing this free small block in this structure.

struct arenaUsed to describe this arena and descpoint to the mem_block_desc structure that manages this arena. The value of cnt depends on the value of large. If large = true, it indicates the number of page frames occupied by this arena. Otherwise, it indicates that there are still more pages in this arena. How many free small memory blocks are available. It should be noted that one mem_block_desc can correspond to more than one arena. In fact, it is easy to understand. When a small memory block of an arena is allocated, we have to allocate a new page to serve as the arena and then divide it into small blocks of fixed size.

The relationship between these three structures is as shown in the figure:
Insert image description here
Then initialize different mem_block_desc of different types of arenas in the management kernel.

Modify ( myos/kernel/memory.c )

struct mem_block_desc k_block_descs[DESC_CNT];	// 内核内存块描述符数组

//初始化管理不同种类型arena的不同mem_block_desc
void block_desc_init(struct mem_block_desc* desc_array) {
    
    				   
   	uint16_t desc_idx, block_size = 16;
   	for (desc_idx = 0; desc_idx < DESC_CNT; desc_idx++) {
    
    
      	desc_array[desc_idx].block_size = block_size;
      	desc_array[desc_idx].blocks_per_arena = (PG_SIZE - sizeof(struct arena)) / block_size;	  
      	list_init(&desc_array[desc_idx].free_list);
      	block_size *= 2;         // 更新为下一个规格内存块
   }
}

/* 内存管理部分初始化入口 */
void mem_init() {
    
    
   put_str("mem_init start\n");
   uint32_t mem_bytes_total = (*(uint32_t*)(0xb00));
   mem_pool_init(mem_bytes_total);	  // 初始化内存池
   block_desc_init(k_block_descs);
   put_str("mem_init done\n");
}

Add function declaration and modify ( myos/kernel/memory.h )

void block_desc_init(struct mem_block_desc* desc_array);

The kernel has a mem_block_desck array that manages different types of arenas. We say that a process is a unit that allocates resources independently. A process has its own independent virtual address space, so the process should also have a mem_block_desc array that manages its own different types of arenas. In this way, when the process allocates memory, it just needs to find the corresponding mem_block_desc in the process's own mem_block_desc array, and then find the free small block through free_list.

Modify ( myos/thread/thread.h ) to add task_struct u_block_descso that each task_struct has this mem_block_desc array

struct task_struct {
    
    
   uint32_t* self_kstack;	        // 用于存储线程的栈顶位置,栈顶放着线程要用到的运行信息
   pid_t pid;
   enum task_status status;
   uint8_t priority;		        // 线程优先级
   char name[16];                   //用于存储自己的线程的名字

   uint8_t ticks;	                 //线程允许上处理器运行还剩下的滴答值,因为priority不能改变,所以要在其之外另行定义一个值来倒计时
   uint32_t elapsed_ticks;          //此任务自上cpu运行后至今占用了多少cpu嘀嗒数, 也就是此任务执行了多久*/
   struct list_elem general_tag;		//general_tag的作用是用于线程在一般的队列(如就绪队列或者等待队列)中的结点
   struct list_elem all_list_tag;   //all_list_tag的作用是用于线程队列thread_all_list(这个队列用于管理所有线程)中的结点
   uint32_t* pgdir;              // 进程自己页表的虚拟地址
   struct virtual_addr userprog_vaddr;   // 用户进程的虚拟地址
   struct mem_block_desc u_block_desc[DESC_CNT];   // 用户进程内存块描述符
   uint32_t stack_magic;	       //如果线程的栈无限生长,总会覆盖地pcb的信息,那么需要定义个边界数来检测是否栈已经到了PCB的边界
};

However, we only initialize the process's mem_block_desc array

Modify ( myos/userprog/process.c )

//用于创建进程,参数是进程要执行的函数与他的名字
void process_execute(void* filename, char* name) {
    
     
    /* pcb内核的数据结构,由内核来维护进程信息,因此要在内核内存池中申请 */
    struct task_struct* thread = get_kernel_pages(1);
    init_thread(thread, name, default_prio); 
    create_user_vaddr_bitmap(thread);
    thread_create(thread, start_process, filename);
    thread->pgdir = create_page_dir();
    block_desc_init(thread->u_block_desc);
    
    enum intr_status old_status = intr_disable();
    ASSERT(!elem_find(&thread_ready_list, &thread->general_tag));
    list_append(&thread_ready_list, &thread->general_tag);

    ASSERT(!elem_find(&thread_all_list, &thread->all_list_tag));
    list_append(&thread_all_list, &thread->all_list_tag);
    intr_set_status(old_status);
}

Now let's write something that can work with the arena model sys_mallocto actually allocate memory.

Modify ( myos/kernel/memory.c )

#include "interrupt.h"

/* 返回arena中第idx个内存块的地址 */
static struct mem_block* arena2block(struct arena* a, uint32_t idx) {
    
    
	return (struct mem_block*)((uint32_t)a + sizeof(struct arena) + idx * a->desc->block_size);
}

/* 返回内存块b所在的arena地址 */
static struct arena* block2arena(struct mem_block* b) {
    
    
   	return (struct arena*)((uint32_t)b & 0xfffff000);
}

/* 在堆中申请size字节内存 */
void* sys_malloc(uint32_t size) {
    
    
	enum pool_flags PF;
	struct pool* mem_pool;
	uint32_t pool_size;
	struct mem_block_desc* descs;	//用于存储mem_block_desc数组地址
	struct task_struct* cur_thread = running_thread();

	/* 判断用哪个内存池*/
	if (cur_thread->pgdir == NULL) {
    
         // 若为内核线程
		PF = PF_KERNEL; 
		pool_size = kernel_pool.pool_size;
		mem_pool = &kernel_pool;
		descs = k_block_descs;
	} 
	else {
    
    				      // 用户进程pcb中的pgdir会在为其分配页表时创建
		PF = PF_USER;
		pool_size = user_pool.pool_size;
		mem_pool = &user_pool;
		descs = cur_thread->u_block_desc;
	}

	/* 若申请的内存不在内存池容量范围内则直接返回NULL */
	if (!(size > 0 && size < pool_size)) {
    
    
		return NULL;
	}
	struct arena* a;
	struct mem_block* b;	
	lock_acquire(&mem_pool->lock);

	/* 超过最大内存块1024, 就分配页框 */
	if (size > 1024) {
    
    
		uint32_t page_cnt = DIV_ROUND_UP(size + sizeof(struct arena), PG_SIZE);    // 向上取整需要的页框数
		a = malloc_page(PF, page_cnt);
		if (a != NULL) {
    
    
			memset(a, 0, page_cnt * PG_SIZE);	 // 将分配的内存清0  

			/* 对于分配的大块页框,将desc置为NULL, cnt置为页框数,large置为true */
			a->desc = NULL;
			a->cnt = page_cnt;
			a->large = true;
			lock_release(&mem_pool->lock);
			return (void*)(a + 1);		 // 跨过arena大小,把剩下的内存返回
		} 
		else {
    
     
			lock_release(&mem_pool->lock);
			return NULL; 
		}
	} 
	else {
    
        // 若申请的内存小于等于1024,可在各种规格的mem_block_desc中去适配
		uint8_t desc_idx;
		
		/* 从内存块描述符中匹配合适的内存块规格 */
		for (desc_idx = 0; desc_idx < DESC_CNT; desc_idx++) {
    
    
			if (size <= descs[desc_idx].block_size) {
    
      // 从小往大后,找到后退出
				break;
			}
		}

	/* 若mem_block_desc的free_list中已经没有可用的mem_block,
		* 就创建新的arena提供mem_block */
		if (list_empty(&descs[desc_idx].free_list)) {
    
    
			a = malloc_page(PF, 1);       // 分配1页框做为arena
			if (a == NULL) {
    
    
				lock_release(&mem_pool->lock);
				return NULL;
			}
			memset(a, 0, PG_SIZE);

			/* 对于分配的小块内存,将desc置为相应内存块描述符, 
			* cnt置为此arena可用的内存块数,large置为false */
			a->desc = &descs[desc_idx];
			a->large = false;
			a->cnt = descs[desc_idx].blocks_per_arena;
			uint32_t block_idx;

			enum intr_status old_status = intr_disable();

			/* 开始将arena拆分成内存块,并添加到内存块描述符的free_list中 */
			for (block_idx = 0; block_idx < descs[desc_idx].blocks_per_arena; block_idx++) {
    
    
				b = arena2block(a, block_idx);
				ASSERT(!elem_find(&a->desc->free_list, &b->free_elem));
				list_append(&a->desc->free_list, &b->free_elem);	
			}
			intr_set_status(old_status);
		}    

	/* 开始分配内存块 */
		b = elem2entry(struct mem_block, free_elem, list_pop(&(descs[desc_idx].free_list)));
		memset(b, 0, descs[desc_idx].block_size);

		a = block2arena(b);  // 获取内存块b所在的arena
		a->cnt--;		   // 将此arena中的空闲内存块数减1
		lock_release(&mem_pool->lock);
		return (void*)b;
	}
}

Declare function, modify ( myos/kernel/memory.h )

void* sys_malloc(uint32_t size);

Test code, modify ( myos/kernel/main.c )

#include "print.h"
#include "init.h"
#include "thread.h"
#include "interrupt.h"
#include "console.h"
#include "process.h"
#include "syscall-init.h"
#include "syscall.h"
#include "stdio.h"
#include "memory.h"

void k_thread_a(void*);
void k_thread_b(void*);
void u_prog_a(void);
void u_prog_b(void);

int main(void) {
    
    
   put_str("I am kernel\n");
   init_all();
   intr_enable();
   thread_start("k_thread_a", 31, k_thread_a, "I am thread_a");
   thread_start("k_thread_b", 31, k_thread_b, "I am thread_b ");
   while(1);
   return 0;
}

/* 在线程中运行的函数 */
void k_thread_a(void* arg) {
    
         
   char* para = arg;
   void* addr = sys_malloc(33);
   console_put_str(" I am thread_a, sys_malloc(33), addr is 0x");
   console_put_int((int)addr);
   console_put_char('\n');
   while(1);
}

/* 在线程中运行的函数 */
void k_thread_b(void* arg) {
    
         
   char* para = arg;
   void* addr = sys_malloc(63);
   console_put_str(" I am thread_b, sys_malloc(63), addr is 0x");
   console_put_int((int)addr);
   console_put_char('\n');
   while(1);
}

/* 测试用户进程 */
void u_prog_a(void) {
    
    
   char* name = "prog_a";
   printf(" I am %s, my pid:%d%c", name, getpid(),'\n');
   while(1);
}

/* 测试用户进程 */
void u_prog_b(void) {
    
    
   char* name = "prog_b";
   printf(" I am %s, my pid:%d%c", name, getpid(), '\n');
   while(1);
}

Next, page-level memory recycling is implemented. Page recycling is the reverse operation of page allocation: 1. Clear the bits of the bitmap in the physical memory pool; 2. Clear the page table entries corresponding to the virtual address; 3. Clear the bits in the virtual memory pool. The bits of the figure;

Modify ( myos/kernel/memory.c )

//将物理地址pg_phy_addr回收到物理内存池,实质就是清除物理内存池中位图的位
void pfree(uint32_t pg_phy_addr) {
    
    
	struct pool* mem_pool;
	uint32_t bit_idx = 0;
	if (pg_phy_addr >= user_pool.phy_addr_start) {
    
         // 用户物理内存池
		mem_pool = &user_pool;
		bit_idx = (pg_phy_addr - user_pool.phy_addr_start) / PG_SIZE;
	} 
	else {
    
    	  // 内核物理内存池
		mem_pool = &kernel_pool;
		bit_idx = (pg_phy_addr - kernel_pool.phy_addr_start) / PG_SIZE;
	}
	bitmap_set(&mem_pool->pool_bitmap, bit_idx, 0);	 // 将位图中该位清0
}

/* 去掉页表中虚拟地址vaddr的映射,只去掉vaddr对应的pte */
static void page_table_pte_remove(uint32_t vaddr) {
    
    
   uint32_t* pte = pte_ptr(vaddr);
   *pte &= ~PG_P_1;	// 将页表项pte的P位置0
   asm volatile ("invlpg %0"::"m" (vaddr):"memory");    //更新tlb
}

//在虚拟地址池中释放以_vaddr起始的连续pg_cnt个虚拟页地址,实质就是清楚虚拟内存池位图的位
static void vaddr_remove(enum pool_flags pf, void* _vaddr, uint32_t pg_cnt) {
    
    
	uint32_t bit_idx_start = 0, vaddr = (uint32_t)_vaddr, cnt = 0;
	if (pf == PF_KERNEL) {
    
      // 内核虚拟内存池
		bit_idx_start = (vaddr - kernel_vaddr.vaddr_start) / PG_SIZE;
		while(cnt < pg_cnt) {
    
    
			bitmap_set(&kernel_vaddr.vaddr_bitmap, bit_idx_start + cnt++, 0);
		}
	} 
	else {
    
      // 用户虚拟内存池
		struct task_struct* cur_thread = running_thread();
		bit_idx_start = (vaddr - cur_thread->userprog_vaddr.vaddr_start) / PG_SIZE;
		while(cnt < pg_cnt) {
    
    
			bitmap_set(&cur_thread->userprog_vaddr.vaddr_bitmap, bit_idx_start + cnt++, 0);
		}
	}
}

/* 释放以虚拟地址vaddr为起始的cnt个物理页框 */
void mfree_page(enum pool_flags pf, void* _vaddr, uint32_t pg_cnt) {
    
    
	uint32_t pg_phy_addr;
	uint32_t vaddr = (int32_t)_vaddr, page_cnt = 0;
	ASSERT(pg_cnt >=1 && vaddr % PG_SIZE == 0); 
	pg_phy_addr = addr_v2p(vaddr);  // 获取虚拟地址vaddr对应的物理地址

	/* 确保待释放的物理内存在低端1M+1k大小的页目录+1k大小的页表地址范围外 */
	ASSERT((pg_phy_addr % PG_SIZE) == 0 && pg_phy_addr >= 0x102000);
	
	/* 判断pg_phy_addr属于用户物理内存池还是内核物理内存池 */
	if (pg_phy_addr >= user_pool.phy_addr_start) {
    
       // 位于user_pool内存池
		vaddr -= PG_SIZE;
		while (page_cnt < pg_cnt) {
    
    
			vaddr += PG_SIZE;
			pg_phy_addr = addr_v2p(vaddr);

			/* 确保物理地址属于用户物理内存池 */
			ASSERT((pg_phy_addr % PG_SIZE) == 0 && pg_phy_addr >= user_pool.phy_addr_start);

			/* 先将对应的物理页框归还到内存池 */
			pfree(pg_phy_addr);

				/* 再从页表中清除此虚拟地址所在的页表项pte */
			page_table_pte_remove(vaddr);

			page_cnt++;
		}
	/* 清空虚拟地址的位图中的相应位 */
		vaddr_remove(pf, _vaddr, pg_cnt);

	} 
	else {
    
    	     // 位于kernel_pool内存池
		vaddr -= PG_SIZE;	      
		while (page_cnt < pg_cnt) {
    
    
			vaddr += PG_SIZE;
			pg_phy_addr = addr_v2p(vaddr);
			/* 确保待释放的物理内存只属于内核物理内存池 */
			ASSERT((pg_phy_addr % PG_SIZE) == 0 && \
				pg_phy_addr >= kernel_pool.phy_addr_start && \
				pg_phy_addr < user_pool.phy_addr_start);
			
			/* 先将对应的物理页框归还到内存池 */
			pfree(pg_phy_addr);

				/* 再从页表中清除此虚拟地址所在的页表项pte */
			page_table_pte_remove(vaddr);

			page_cnt++;
		}
	/* 清空虚拟地址的位图中的相应位 */
		vaddr_remove(pf, _vaddr, pg_cnt);
	}
}

Function declaration: modification ( myos/kernel/memory.h )

void mfree_page(enum pool_flags pf, void* _vaddr, uint32_t pg_cnt);
void pfree(uint32_t pg_phy_addr);

Now, we implement the recycling mechanism corresponding to the arena model allocation mechanism, and encapsulate the previous page recycling to directly implement unified memory recycling system calls.sys_free

Modify ( myos/kernel/memory.c )

/* 回收内存ptr */
void sys_free(void* ptr) {
    
    
	ASSERT(ptr != NULL);
	if (ptr != NULL) {
    
    
		enum pool_flags PF;
		struct pool* mem_pool;

	/* 判断是线程还是进程 */
		if (running_thread()->pgdir == NULL) {
    
    
			ASSERT((uint32_t)ptr >= K_HEAP_START);
			PF = PF_KERNEL; 
			mem_pool = &kernel_pool;
		} 
		else {
    
    
			PF = PF_USER;
			mem_pool = &user_pool;
		}

		lock_acquire(&mem_pool->lock);   
		struct mem_block* b = ptr;
		struct arena* a = block2arena(b);	     // 把mem_block转换成arena,获取元信息
		ASSERT(a->large == 0 || a->large == 1);
		if (a->desc == NULL && a->large == true) {
    
     // 大于1024的内存
			mfree_page(PF, a, a->cnt); 
		} 
		else {
    
    				 // 小于等于1024的内存块先将内存块回收到free_list
			list_append(&a->desc->free_list, &b->free_elem);

			/* 再判断此arena中的内存块是否都是空闲,如果是就释放arena */
			if (++a->cnt == a->desc->blocks_per_arena) {
    
    
				uint32_t block_idx;
				for (block_idx = 0; block_idx < a->desc->blocks_per_arena; block_idx++) {
    
    
					struct mem_block*  b = arena2block(a, block_idx);
					ASSERT(elem_find(&a->desc->free_list, &b->free_elem));
					list_remove(&b->free_elem);
				}
				mfree_page(PF, a, 1); 
			} 
		}   
		lock_release(&mem_pool->lock); 
	}
}

Function declaration, modification ( myos/kernel/memory.h )

void sys_free(void* ptr);

Test code ( myos/kernel/main.c )

#include "print.h"
#include "init.h"
#include "thread.h"
#include "interrupt.h"
#include "console.h"
#include "process.h"
#include "syscall-init.h"
#include "syscall.h"
#include "stdio.h"
#include "memory.h"

void k_thread_a(void*);
void k_thread_b(void*);
void u_prog_a(void);
void u_prog_b(void);

int main(void) {
    
    
   put_str("I am kernel\n");
   init_all();
   intr_enable();
   thread_start("k_thread_a", 31, k_thread_a, "I am thread_a");
   thread_start("k_thread_b", 31, k_thread_b, "I am thread_b ");
   while(1);
   return 0;
}

/* 在线程中运行的函数 */
void k_thread_a(void* arg) {
    
         
   char* para = arg;
   void* addr1;
   void* addr2;
   void* addr3;
   void* addr4;
   void* addr5;
   void* addr6;
   void* addr7;
   console_put_str(" thread_a start\n");
   int max = 1000;
   while (max-- > 0) {
    
    
      int size = 128;
      addr1 = sys_malloc(size); 
      size *= 2; 
      addr2 = sys_malloc(size); 
      size *= 2; 
      addr3 = sys_malloc(size);
      sys_free(addr1);
      addr4 = sys_malloc(size);
      size *= 2; size *= 2; size *= 2; size *= 2; 
      size *= 2; size *= 2; size *= 2; 
      addr5 = sys_malloc(size);
      addr6 = sys_malloc(size);
      sys_free(addr5);
      size *= 2; 
      addr7 = sys_malloc(size);
      sys_free(addr6);
      sys_free(addr7);
      sys_free(addr2);
      sys_free(addr3);
      sys_free(addr4);
   }
   console_put_str(" thread_a end\n");
   while(1);
}

/* 在线程中运行的函数 */
void k_thread_b(void* arg) {
    
         
   char* para = arg;
   void* addr1;
   void* addr2;
   void* addr3;
   void* addr4;
   void* addr5;
   void* addr6;
   void* addr7;
   void* addr8;
   void* addr9;
   int max = 1000;
   console_put_str(" thread_b start\n");
   while (max-- > 0) {
    
    
      int size = 9;
      addr1 = sys_malloc(size);
      size *= 2; 
      addr2 = sys_malloc(size);
      size *= 2; 
      sys_free(addr2);
      addr3 = sys_malloc(size);
      sys_free(addr1);
      addr4 = sys_malloc(size);
      addr5 = sys_malloc(size);
      addr6 = sys_malloc(size);
      sys_free(addr5);
      size *= 2; 
      addr7 = sys_malloc(size);
      sys_free(addr6);
      sys_free(addr7);
      sys_free(addr3);
      sys_free(addr4);

      size *= 2; size *= 2; size *= 2; 
      addr1 = sys_malloc(size);
      addr2 = sys_malloc(size);
      addr3 = sys_malloc(size);
      addr4 = sys_malloc(size);
      addr5 = sys_malloc(size);
      addr6 = sys_malloc(size);
      addr7 = sys_malloc(size);
      addr8 = sys_malloc(size);
      addr9 = sys_malloc(size);
      sys_free(addr1);
      sys_free(addr2);
      sys_free(addr3);
      sys_free(addr4);
      sys_free(addr5);
      sys_free(addr6);
      sys_free(addr7);
      sys_free(addr8);
      sys_free(addr9);
   }
   console_put_str(" thread_b end\n");
   while(1);
}

/* 测试用户进程 */
void u_prog_a(void) {
    
    
   char* name = "prog_a";
   printf(" I am %s, my pid:%d%c", name, getpid(),'\n');
   while(1);
}

/* 测试用户进程 */
void u_prog_b(void) {
    
    
   char* name = "prog_b";
   printf(" I am %s, my pid:%d%c", name, getpid(), '\n');
   while(1);
}

Finally, we encapsulate sys_mallocand sys_freeinto a calling interface that can be used by user programs

Increase the system call number and modify ( myos/lib/user/syscall.h )

enum SYSCALL_NR {
    
    
   SYS_GETPID,
   SYS_WRITE,
   SYS_MALLOC,
   SYS_FREE
};

Encapsulate system call user entrance, modify ( myos/lib/user/syscall.c )

/* 申请size字节大小的内存,并返回结果 */
void* malloc(uint32_t size) {
    
    
   return (void*)_syscall1(SYS_MALLOC, size);
}

/* 释放ptr指向的内存 */
void free(void* ptr) {
    
    
   _syscall1(SYS_FREE, ptr);
}

Declare function, modify ( myos/lib/user/syscall.h )

void* malloc(uint32_t size);
void free(void* ptr);

Register system call function

Modify ( myos/userprog/syscall-init.c )

#include "memory.h" 

/* 初始化系统调用 */
void syscall_init(void) {
    
    
	put_str("syscall_init start\n");
	syscall_table[SYS_GETPID] = sys_getpid;
	syscall_table[SYS_WRITE] = sys_write;
	syscall_table[SYS_MALLOC] = sys_malloc;
   	syscall_table[SYS_FREE] = sys_free;
	put_str("syscall_init done\n");
}

Test function ( myos/kernel/main.c ), compared to the author's code, the time consumption variables in k_thread_a and k_thread_b increased from 100000 to 9999999. Otherwise, the results in the book will not appear because the thread has released the address before switching.

#include "print.h"
#include "init.h"
#include "thread.h"
#include "interrupt.h"
#include "console.h"
#include "process.h"
#include "syscall-init.h"
#include "syscall.h"
#include "stdio.h"
#include "memory.h"

void k_thread_a(void*);
void k_thread_b(void*);
void u_prog_a(void);
void u_prog_b(void);

int main(void) {
    
    
   put_str("I am kernel\n");
   init_all();
   intr_enable();
   process_execute(u_prog_a, "u_prog_a");
   process_execute(u_prog_b, "u_prog_b");
   thread_start("k_thread_a", 31, k_thread_a, "I am thread_a");
   thread_start("k_thread_b", 31, k_thread_b, "I am thread_b");
   while(1);
   return 0;
}

/* 在线程中运行的函数 */
void k_thread_a(void* arg) {
    
         
   void* addr1 = sys_malloc(256);
   void* addr2 = sys_malloc(255);
   void* addr3 = sys_malloc(254);
   console_put_str(" thread_a malloc addr:0x");
   console_put_int((int)addr1);
   console_put_char(',');
   console_put_int((int)addr2);
   console_put_char(',');
   console_put_int((int)addr3);
   console_put_char('\n');

   int cpu_delay = 9999999;
   while(cpu_delay-- > 0);
   sys_free(addr1);
   sys_free(addr2);
   sys_free(addr3);
   while(1);
}

/* 在线程中运行的函数 */
void k_thread_b(void* arg) {
    
         
   void* addr1 = sys_malloc(256);
   void* addr2 = sys_malloc(255);
   void* addr3 = sys_malloc(254);
   console_put_str(" thread_b malloc addr:0x");
   console_put_int((int)addr1);
   console_put_char(',');
   console_put_int((int)addr2);
   console_put_char(',');
   console_put_int((int)addr3);
   console_put_char('\n');

   int cpu_delay = 999999;
   while(cpu_delay-- > 0);
   sys_free(addr1);
   sys_free(addr2);
   sys_free(addr3);
   while(1);
}

/* 测试用户进程 */
void u_prog_a(void) {
    
    
   void* addr1 = malloc(256);
   void* addr2 = malloc(255);
   void* addr3 = malloc(254);
   printf(" prog_a malloc addr:0x%x,0x%x,0x%x\n", (int)addr1, (int)addr2, (int)addr3);

   int cpu_delay = 100000;
   while(cpu_delay-- > 0);
   free(addr1);
   free(addr2);
   free(addr3);
   while(1);
}

/* 测试用户进程 */
void u_prog_b(void) {
    
    
   void* addr1 = malloc(256);
   void* addr2 = malloc(255);
   void* addr3 = malloc(254);
   printf(" prog_b malloc addr:0x%x,0x%x,0x%x\n", (int)addr1, (int)addr2, (int)addr3);

   int cpu_delay = 100000;
   while(cpu_delay-- > 0);
   free(addr1);
   free(addr2);
   free(addr3);
   while(1);
}

Guess you like

Origin blog.csdn.net/kanshanxd/article/details/131697078