最近遇到一个程序因为收到 SIGBUS死机的问题。刚开始不清楚是哪个模块导致的,用 address sanitizer 也查不出来。
后来想了一下,可以用gdb来执行程序,当收到了 BUS 消息的时候, gdb会自动在出问题的代码行停下来。
下面是一个例子。
// C program to demonstrate Bus Error
#include <stdlib.h>
#include <signal.h>
#include <pthread.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/prctl.h>
struct sigaction g_prev_sigaction;
static void sigbus_handler(int sig, siginfo_t *siginfo, void *ptr)
{
printf("enter SIGBUS handler\n");
unsigned char *addr = (unsigned char *)(siginfo->si_addr);
// printf("addr: 0x%x\n", addr);
switch( siginfo->si_code)
{
case BUS_ADRERR:
{
// printf("nonexistent physical address\n");
}
break;
case BUS_ADRALN:
{
// printf("invalid address alignment\n");
}
break;
case BUS_OBJERR:
{
// printf("object-specific hardware error\n");
}
break;
#ifdef BUS_MCEERR_AR
case BUS_MCEERR_AR:
{
// printf("Memory Fault - BUS_MCEERR_AR\n");
}
break;
#endif
#ifdef BUS_MCEERR_AO
case BUS_MCEERR_AO:
{
// printf("Memory Fault - BUS_MCEERR_AO.\n");
}
break;
#endif
default:
{
// printf("Memory Fault - Unknown.\n");
}
break;
}
//printf("leaves SIGBUS handler\n");
abort();
}
void *func(void *arg)
{
// sleep(60);
prctl(PR_SET_NAME, "child_thread", 0, 0, 0);
char *cptr = malloc(sizeof(int) + 1);
int *iptr = (int *) ++cptr;
*iptr = 42;
for(;;);
{
sleep(1);
}
return 0;
}
int main(int argc, char **argv)
{
#if defined(__GNUC__)
# if defined(__i386__)
/* Enable Alignment Checking on x86 */
__asm__("pushf\norl $0x40000,(%esp)\npopf");
# elif defined(__x86_64__)
/* Enable Alignment Checking on x86_64 */
__asm__("pushf\norl $0x40000,(%rsp)\npopf");
# endif
#endif
/* malloc() always provides aligned memory */
// char *cptr = malloc(sizeof(int) + 1);
/* Increment the pointer by one, making it
misaligned */
// int *iptr = (int *) ++cptr;
/* Dereference it as an int pointer, causing
an unaligned access */
//*iptr = 42;
/* Following accesses will also result in
sigbus error.
short *sptr;
int i;
sptr = (short *)&i;
// For all odd value increments, it will
// result in sigbus.
sptr = (short *)(((char *)sptr) + 1);
*sptr = 100; */
struct sigaction sa;
sa.sa_sigaction = sigbus_handler;
sa.sa_flags = SA_RESTART | SA_SIGINFO;
sigemptyset(&sa.sa_mask);
if (sigaction(SIGBUS, &sa, &g_prev_sigaction) != 0)
{
return -1;
}
pthread_t tid;
if(pthread_create(&tid, NULL, func, NULL) != 0)
{
return -1;
}
pthread_detach(tid);
while(1)
{
sleep(1);
}
return 0;
}
(gdb) r
Starting program: /home/charles/tmp/test_sigbus
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/i386-linux-gnu/libthread_db.so.1".
[New Thread 0xb7dd8b40 (LWP 22722)]
Thread 2 "child_thread" received signal SIGBUS, Bus error.
[Switching to Thread 0xb7dd8b40 (LWP 22722)]
0x0804873f in func (arg=0x0) at test_sigbus.c:63
63 *iptr = 42;
(gdb) thread apply all bt full
Thread 2 (Thread 0xb7dd8b40 (LWP 22722)):
#0 0x0804873f in func (arg=0x0) at test_sigbus.c:63
cptr = 0xb7400471 ""
iptr = 0xb7400471
#1 0xb7f97295 in start_thread (arg=0xb7dd8b40) at pthread_create.c:333
__res = <optimized out>
__ignore1 = <optimized out>
__ignore2 = <optimized out>
pd = 0xb7dd8b40
now = <optimized out>
unwind_buf = {cancel_jmp_buf = {
{jmp_buf = {-1208307712, 0, 4001536, -1210219480, -1614920075, -698609547},
mask_was_saved = 0}}, priv = {pad = {0x0, 0x0, 0x0, 0x0}, data = {prev = 0x0, cleanup = 0x0, canceltype = 0}}}
not_first_call = 0
pagesize_m1 = <optimized out>
sp = <optimized out>
freesize = <optimized out>
__PRETTY_FUNCTION__ = "start_thread"
#2 0xb7ec11ce in clone () at ../sysdeps/unix/sysv/linux/i386/clone.S:114
No locals.
Thread 1 (Thread 0xb7dd9700 (LWP 22718)):
#0 0xb7fdabd1 in __kernel_vsyscall ()
No symbol table info available.
#1 0xb7e8a4aa in nanosleep () at ../sysdeps/unix/syscall-template.S:84
No locals.
#2 0xb7e8a3dd in __sleep (seconds=0) at ../sysdeps/posix/sleep.c:55
save_errno = 0
ts = {tv_sec = 0, tv_nsec = 999847700}
#3 0x08048803 in main (argc=1, argv=0xbffff054) at test_sigbus.c:126
sa = {__sigaction_handler = {sa_handler = 0x80486cb <sigbus_handler>, sa_sigaction = 0x80486cb <sigbus_handler>}, sa_mask = {
__val = {0 <repeats 32 times>}}, sa_flags = 268435460, sa_restorer = 0xbffff05c}
tid = 3084749632
(gdb)
可以看到,程序在出问题的代码行停了下来。
SIGBUS一般是由于内存对齐导致的,另外mmap映射文件的时候,如果访问到了文件之外的区域,也有可能导致SIGBUS.