Linux system: signal redirection, grab thread stack

Requirements: When a segfault or abnormal termination occurs in a process under the Linux system, the kernel will throw the corresponding semaphore. Usually, the default operation of these semaphores is to terminate the process and generate a core file for system engineers to perform fault analysis. However, in some disks In scenarios with small space, such as some embedded devices, when the program is large, core files cannot be generated. At this time, we need to redirect these semaphores, that is, custom processing. The usual operation is to grab the current execution thread Stack.

code show as below:

//gcc  -o test_process test_process.c -rdynamic -ldl -lpthread
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include <stdio.h>
#include <dlfcn.h>
#include <stdlib.h>
#include <signal.h>
#include <unistd.h>
#include <string.h>
#include <ucontext.h>
#include <sys/syscall.h>
#define gettid() syscall(__NR_gettid)

# define sigsegv_outp(x, ...) 	fprintf(stderr, x"\n", ##__VA_ARGS__)

static void print_reg(const ucontext_t *uc) 
{
	int i;
	for (i = 0; i < NGREG; i++) {
		sigsegv_outp("reg[%02d]: 0x%016lx", i, uc->uc_mcontext.gregs[i]);
	}
}

static void print_call_link(const ucontext_t *uc) 
{
	int i = 0;
	Dl_info	dl_info;

	const void **frame_pointer = (const void **)uc->uc_mcontext.gregs[REG_RBP];
	const void *return_address = (const void *)uc->uc_mcontext.gregs[REG_RIP];

	sigsegv_outp("Stack trace:");
	while (return_address) {
		sigsegv_outp("return_address  [%x]\n",return_address);//return_address
		memset(&dl_info, 0, sizeof(Dl_info));
		if (!dladdr((void *)return_address, &dl_info))	break;
		const char *sname = dl_info.dli_sname;	
		/* No: return address <sym-name + offset> (filename) */
		sigsegv_outp("%02d: %p <%s + %lu> (%s)", ++i, return_address, sname, 
			(unsigned long)return_address - (unsigned long)dl_info.dli_saddr, 
													dl_info.dli_fname);
		if (dl_info.dli_sname && !strcmp(dl_info.dli_sname, "main")){
			sigsegv_outp("to main \n");
			break;		
		} 

		if (!frame_pointer){
			sigsegv_outp("frame_pointer == 0\n");
			break;		
		} 
		return_address = frame_pointer[1];
		frame_pointer = (const void **)frame_pointer[0];

	}
	sigsegv_outp("Stack trace end.");
}

static void sigsegv_handler(int signo, siginfo_t *info, void *context)
{
	sigsegv_outp("\ntask tid =%d",(int)gettid());
	sigsegv_outp("Segmentation Fault!");
	sigsegv_outp("info.si_signo = %d", signo);
	if (info) {
		sigsegv_outp("info.si_errno = %d", info->si_errno);
		sigsegv_outp("info.si_code  = %d (%s)", info->si_code, 
			(info->si_code == SEGV_MAPERR) ? "SEGV_MAPERR" : "SEGV_ACCERR");
		sigsegv_outp("info.si_addr  = %p\n", info->si_addr);
	}

	if (context) {
		const ucontext_t *uc = (const ucontext_t *)context;

		print_reg(uc);
		print_call_link(uc);
	}

}

#define SETSIG(sa, signo, func, flags)	\
        do {                            \
            sa.sa_sigaction = func;  	\
            sa.sa_flags = flags;        \
            sigemptyset(&sa.sa_mask);   \
            sigaction(signo, &sa, NULL);\
        } while(0)
		
//static void __attribute((constructor)) setup_sigsegv(void) 
static void  setup_sigsegv(void) {
	struct sigaction sa;
	SETSIG(sa, SIGSEGV, sigsegv_handler, SA_SIGINFO); 
}

void* task_entry(void* arg); 
void func1(void);
int main(int argc, const char *argv[]){
	sigsegv_outp("\nparent process tid: %u", gettid());
	setup_sigsegv();
	pthread_t pid1;  
    pthread_create(&pid1,NULL,task_entry,(void *)1); 
	func1();	
	exit(EXIT_SUCCESS);
}
void func3(void){
	int i=0;
	while(1){
		//sleep(1);
		i++;
	}
}

void func2(void){
	func3();	
}

void func1(void){
	func2();
}

void test_func(){
	func2();	
}

void* task_entry(void* arg){
	sigsegv_outp("child thread tid: %u", gettid());
	test_func();	
}

As a result of the test, semaphore No. 11 is directly thrown to each thread running, and the execution is as follows:

[root@localhost signal_registration_stack_dump]# ./test_process &
[11] 24995
[root@localhost signal_registration_stack_dump]#
parent process tid: 24995
child thread tid: 24996

[root@localhost signal_registration_stack_dump]# kill -11 24995
[root@localhost signal_registration_stack_dump]#
task tid =24995
Segmentation Fault!
info.si_signo = 11
info.si_errno = 0
info.si_code  = 0 (SEGV_ACCERR)
info.si_addr  = 0x39be

reg[00]: 0x00007f10cf1b7700
reg[01]: 0x00007f10cf1b7700
reg[02]: 0x00007f10cf1b79d0
reg[03]: 0x0000000000000206
reg[04]: 0x0000000000400a00
reg[05]: 0x00007fffe06166b0
reg[06]: 0x0000000000000000
reg[07]: 0x0000000000000000
reg[08]: 0x00000000003d0f00
reg[09]: 0x00007f10cf1b6ff0
reg[10]: 0x00007fffe0616580
reg[11]: 0x0000000000000000
reg[12]: 0x00007f10cf1b79d0
reg[13]: 0x0000000000000000
reg[14]: 0x00000031ce8e8901
reg[15]: 0x00007fffe0616580
reg[16]: 0x0000000000400f22
reg[17]: 0x0000000000000202
reg[18]: 0x0000000000000033
reg[19]: 0x0000000000000000
reg[20]: 0x0000000000000000
reg[21]: 0x0000000000000000
reg[22]: 0x0000000000000000
Stack trace:
return_address  [400f22]

01: 0x400f22 <func3 + 11> (./test_process)
return_address = 0x400f31
return_address  [400f31]

02: 0x400f31 <func2 + 9> (./test_process)
return_address = 0x400f3c
return_address  [400f3c]

03: 0x400f3c <func1 + 9> (./test_process)
return_address = 0x400f0d
return_address  [400f0d]

04: 0x400f0d <main + 103> (./test_process)
to main

Stack trace end.

[root@localhost signal_registration_stack_dump]# kill -11 24996
[root@localhost signal_registration_stack_dump]#
task tid =24996
Segmentation Fault!
info.si_signo = 11
info.si_errno = 0
info.si_code  = 0 (SEGV_ACCERR)
info.si_addr  = 0x39be

reg[00]: 0x00007f10cf1b7700
reg[01]: 0x0000000000000000
reg[02]: 0x0000000000000000
reg[03]: 0x0000000000000000
reg[04]: 0x00000031cf61c360
reg[05]: 0x00007f10cf1b79c0
reg[06]: 0x0000000000000000
reg[07]: 0x0000000000000003
reg[08]: 0x00007f10cf1b66a0
reg[09]: 0x0000000000000000
reg[10]: 0x00007f10cf1b6e70
reg[11]: 0x0000000000000000
reg[12]: 0x00000031ceb8fe20
reg[13]: 0x0000000000000000
reg[14]: 0x00000031ce8db57d
reg[15]: 0x00007f10cf1b6e70
reg[16]: 0x0000000000400f26
reg[17]: 0x0000000000000202
reg[18]: 0x0000000000000033
reg[19]: 0x0000000000000000
reg[20]: 0x0000000000000000
reg[21]: 0x0000000000000000
reg[22]: 0x0000000000000000
Stack trace:
return_address  [400f26]

01: 0x400f26 <func3 + 15> (./test_process)
return_address = 0x400f31
return_address  [400f31]

02: 0x400f31 <func2 + 9> (./test_process)
return_address = 0x400f47
return_address  [400f47]

03: 0x400f47 <test_func + 9> (./test_process)
return_address = 0x400f8d
return_address  [400f8d]

04: 0x400f8d <task_entry + 68> (./test_process)
return_address = 0xcf407aa1
return_address  [cf407aa1]

05: 0x31cf407aa1 <(null) + 213930506913> (/lib64/libpthread.so.0)
frame_pointer == 0

Stack trace end.

[root@localhost signal_registration_stack_dump]#

The principle of stack traceback: mainly through the result storage structure of the stack frame, find the symbol table through the rip pointer, and register the traceback. Registers such as rbp and tip are obtained from the ucontext_t structure of the kernel package (current execution thread).

I have a little doubt. I changed the logic of func3 to sleep mode, and found that the stack traceback failed. Gdb debugging found that the stack frame did have a problem, but the gdb could backtrack normally.

Guess you like

Origin blog.csdn.net/wangquan1992/article/details/108469168
Recommended