Linux arm64 system call process learning record (transferred)

Introduction

The process uses standard library routines, which in turn call kernel functions. Ultimately, the kernel is responsible for sharing resources and services fairly and smoothly among requesting processes.

User mode

#include <stdio.h>
int main()
{
    
    
   FILE *fp = NULL;
   // w 打开一个文本文件,允许写入文件。如果文件不存在,则会创建一个新文件
   fp = fopen("test.txt", "w");
   fprintf(fp, "test\n");
   fclose(fp);
}

compile, track

uname -a
Linux  5.11.0-27-generic #29~20.04.1-Ubuntu SMP Wed Aug 11 15:58:17 UTC 2021 x86_64 x86_64 x86_64 GNU/Linux
gcc write_test.c -o write_test
ldd write_test # ldd 用于打印程序或者库文件所依赖的共享库列表
# 待补

svc

The user layer enters the kernel mode to execute the system call function, puts the call number of the current system call function into the x8register through exception mode (completed by library function), and then uses svcthe instruction to initiate a synchronization exception. Reference[1]

Supervisor Call causes an exception to be taken to EL1.On executing an SVC instruction, the PE records the exception as a Supervisor Call exception in ESR_ELx, using the EC value 0x15 , and the value of the immediate argument.

Enter kernel mode

The following source code reference: linux-5.10

// arch/arm64/kernel/entry.S 
// 中断向量入口:
SYM_CODE_START(vectors)
	kernel_ventry	1, sync_invalid			// Synchronous EL1t
	kernel_ventry	1, irq_invalid			// IRQ EL1t
	kernel_ventry	1, fiq_invalid			// FIQ EL1t
	kernel_ventry	1, error_invalid		// Error EL1t
	kernel_ventry	1, sync				// Synchronous EL1h
	kernel_ventry	1, irq				// IRQ EL1h
	kernel_ventry	1, fiq_invalid			// FIQ EL1h
	kernel_ventry	1, error			// Error EL1h
	kernel_ventry	0, sync				// Synchronous 64-bit EL0 ,同步异常处理入口: 包括系统调用
	kernel_ventry	0, irq				// IRQ 64-bit EL0
	kernel_ventry	0, fiq_invalid			// FIQ 64-bit EL0
	kernel_ventry	0, error			// Error 64-bit EL0
// kernel_ventry 宏处理过程
	.macro kernel_ventry, el, label, regsize = 64
	.align 7
	sub	sp, sp, #S_FRAME_SIZE
	b	el\()\el\()_\label  // 展开为: b el0_sync // 跳转到el0_sync
	.endm
SYM_CODE_START_LOCAL_NOALIGN(el0_sync)
	kernel_entry 0 // 保存用户态在寄存器数据
	mov	x0, sp
	bl	el0_sync_handler // el0_sync 处理函数
	b	ret_to_user
SYM_CODE_END(el0_sync)

Find system call functions

// arch/arm64/kernel/entry-common.c
asmlinkage void noinstr el0_sync_handler(struct pt_regs *regs) {
    
    
	unsigned long esr = read_sysreg(esr_el1);
	switch (ESR_ELx_EC(esr)) {
    
    
	// arch/arm64/include/asm :
	// #define ESR_ELx_EC_SVC64 (0x15)
	case ESR_ELx_EC_SVC64:
		el0_svc(regs);
		break;
	... // 其他异常
	}
}
static void noinstr el0_svc(struct pt_regs *regs) {
    
    
	...
	do_el0_svc(regs);
}
// arch/arm64/kernel/syscall.c
void do_el0_svc(struct pt_regs *regs){
    
    
	sve_user_discard();
	// __NR_syscalls 系统调用总数 
	// sys_call_table 系统调用表 它每个系统调用的size是.long,即4byte
	el0_svc_common(regs, regs->regs[8], __NR_syscalls, sys_call_table);
}
static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
			   const syscall_fn_t syscall_table[]) {
    
    
	unsigned long flags = current_thread_info()->flags;
	regs->orig_x0 = regs->regs[0];
	regs->syscallno = scno;
	....
	invoke_syscall(regs, scno, sc_nr, syscall_table);
	....
}
static void invoke_syscall(struct pt_regs *regs, unsigned int scno,
			   unsigned int sc_nr,
			   const syscall_fn_t syscall_table[])
{
    
    
	long ret;
	if (scno < sc_nr) {
    
    
		syscall_fn_t syscall_fn;
		syscall_fn = syscall_table[array_index_nospec(scno, sc_nr)];
		ret = __invoke_syscall(regs, syscall_fn);
	} else {
    
    
		// 未定义的系统调用,返回—ENOSYS
		ret = do_ni_syscall(regs, scno);
	}
	if (is_compat_task())
		ret = lower_32_bits(ret);
	regs->regs[0] = ret;
}
static long __invoke_syscall(struct pt_regs *regs, syscall_fn_t syscall_fn){
    
    
	return syscall_fn(regs);
}

Related data structures

// arch/arm64/include/asm/ptrace.h
struct pt_regs {
    
    
	union {
    
    
		struct user_pt_regs user_regs;
		struct {
    
    
			u64 regs[31];
			u64 sp;
			u64 pc;
			u64 pstate;
		};
	};
	u64 orig_x0;
#ifdef __AARCH64EB__
	u32 unused2;
	s32 syscallno;
#else
	s32 syscallno;
	u32 unused2;
#endif
	u64 orig_addr_limit;
	/* Only valid when ARM64_HAS_IRQ_PRIO_MASKING is enabled. */
	u64 pmr_save;
	u64 stackframe[2];
	/* Only valid for some EL1 exceptions. */
	u64 lockdep_hardirqs;
	u64 exit_rcu;
};
// arch/arm64/include/asm/syscall.h
typedef long (*syscall_fn_t)(const struct pt_regs *regs);

system call table

// arch/arm64/kernel/sys.c
#undef __SYSCALL
#define __SYSCALL(nr, sym)	asmlinkage long __arm64_##sym(const struct pt_regs *);
#include <asm/unistd.h>
// 对于ARM64架构,头文件“asm/unistd.h”是“arch/arm64/include/asm/unistd.h”。
#undef __SYSCALL
#define __SYSCALL(nr, sym)	[nr] = __arm64_##sym,
const syscall_fn_t sys_call_table[__NR_syscalls] = {
    
    
	[0 ... __NR_syscalls - 1] = __arm64_sys_ni_syscall,
#include <asm/unistd.h>
};
// arch/arm64/include/asm/unistd.h 
#include <uapi/asm/unistd.h>
#define NR_syscalls (__NR_syscalls)
#define __ARCH_WANT_RENAMEAT
#define __ARCH_WANT_NEW_STAT
#define __ARCH_WANT_SET_GET_RLIMIT
#define __ARCH_WANT_TIME32_SYSCALLS
#define __ARCH_WANT_SYS_CLONE3
#include <asm-generic/unistd.h>
// include/uapi/asm-generic/unistd.h
#define __NR_io_setup 0
__SC_COMP(__NR_io_setup, sys_io_setup, compat_sys_io_setup)
#define __NR_io_destroy 1
__SYSCALL(__NR_io_destroy, sys_io_destroy)
....
#define __NR_syscalls 441

By expanding the above file, the system call table is:

 const syscall_fn_t sys_call_table[__NR_syscalls] = {
    
    
 	[0 ... __NR_syscalls - 1] = __arm64_sys_ni_syscall,__arm64_compat_sys_io_setup,__arm64_sys_io_destroy,......
 };

System call macro

// arch/arm64/include/asm/syscall_wrapper.h
#define __SYSCALL_DEFINEx(x, name, ...)						\
	asmlinkage long __arm64_sys##name(const struct pt_regs *regs);		\
	ALLOW_ERROR_INJECTION(__arm64_sys##name, ERRNO);			\
	static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__));		\
	static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));	\
	asmlinkage long __arm64_sys##name(const struct pt_regs *regs)		\
	{
      
      									\
		return __se_sys##name(SC_ARM64_REGS_TO_ARGS(x,__VA_ARGS__));	\
	}									\
	static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__))		\
	{
      
      									\
		long ret = __do_sys##name(__MAP(x,__SC_CAST,__VA_ARGS__));	\
		__MAP(x,__SC_TEST,__VA_ARGS__);					\
		__PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__));		\
		return ret;							\
	}									\
	static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
#define SYSCALL_DEFINE0(sname)							\
	SYSCALL_METADATA(_##sname, 0);						\
	asmlinkage long __arm64_sys_##sname(const struct pt_regs *__unused);	\
	ALLOW_ERROR_INJECTION(__arm64_sys_##sname, ERRNO);			\
	asmlinkage long __arm64_sys_##sname(const struct pt_regs *__unused)
// include/linux/syscalls.h
#ifndef SYSCALL_DEFINE0
#define SYSCALL_DEFINE0(sname)					\
	SYSCALL_METADATA(_##sname, 0);				\
	asmlinkage long sys_##sname(void);			\
	ALLOW_ERROR_INJECTION(sys_##sname, ERRNO);		\
	asmlinkage long sys_##sname(void)
#endif /* SYSCALL_DEFINE0 */
#define SYSCALL_DEFINE1(name, ...) SYSCALL_DEFINEx(1, _##name, __VA_ARGS__)
#define SYSCALL_DEFINE2(name, ...) SYSCALL_DEFINEx(2, _##name, __VA_ARGS__)
#define SYSCALL_DEFINE3(name, ...) SYSCALL_DEFINEx(3, _##name, __VA_ARGS__)
#define SYSCALL_DEFINE4(name, ...) SYSCALL_DEFINEx(4, _##name, __VA_ARGS__)
#define SYSCALL_DEFINE5(name, ...) SYSCALL_DEFINEx(5, _##name, __VA_ARGS__)
#define SYSCALL_DEFINE6(name, ...) SYSCALL_DEFINEx(6, _##name, __VA_ARGS__)
#define SYSCALL_DEFINE_MAXARGS	6
#define SYSCALL_DEFINEx(x, sname, ...)				\
	SYSCALL_METADATA(sname, x, __VA_ARGS__)			\
	__SYSCALL_DEFINEx(x, sname, __VA_ARGS__)

For writesystem call macro expansion

#define __NR_write 64 __SYSCALL(__NR_write, sys_write)
asmlinkage long __arm64_sys_write(const struct pt_regs *);
SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, size_t, count) {
    
    
	return ksys_write(fd, buf, count);
}

reference

Guess you like

Origin blog.csdn.net/a13821684483/article/details/127516755