Here is no longer explaining the concept of vDSO, but directly talk about its meaning:
- vDSO is similar to an information bulletin board, users can get what they need without going through any formalities.
- vDSO is equivalent to a C library directly exposed by the kernel as a supplement to GLIBC.
- …
Calls like gettimeofday are trapped in the kernel to get a timestamp every time. It seems a bit expensive. It is better for the kernel to put the timestamp in a public place that can be exposed to any user, and the user can read it by himself. Typical use cases for vDSO.
To simplify the description, we turn off ASLR:
[root@localhost ~]# sysctl -w kernel.randomize_va_space=0
Just open a ping program and get the map interval of vdso in /proc/pid/smap:
7ffff7ffa000-7ffff7ffc000 r-xp 00000000 00:00 0 [vdso]
Size: 8 kB
...
We dd it out:
[root@localhost ~]# dd if=/proc/3688/mem of=./vsdo.dd obs=1 bs=1 skip=140737354113024 count=8192
Then we see what it is:
[root@localhost ~]# file ./vdso.dd
./vdso.dd: ELF 64-bit LSB shared object, x86-64, version 1 (SYSV), dynamically linked, BuildID[sha1]=09be88363f7ca8b05e2cb54a82d16bec2e840186, stripped
Then, you can objdump, just like a normal dynamic link library:
[root@localhost ~]# objdump -T vdso.dd
vdso.dd: 文件格式 elf64-x86-64
DYNAMIC SYMBOL TABLE:
ffffffffff700354 l d .eh_frame_hdr 0000000000000000 .eh_frame_hdr
ffffffffff700700 w DF .text 000000000000059d LINUX_2.6 clock_gettime
0000000000000000 g DO *ABS* 0000000000000000 LINUX_2.6 LINUX_2.6
ffffffffff700ca0 g DF .text 00000000000002d5 LINUX_2.6 __vdso_gettimeofday
ffffffffff700fa0 g DF .text 000000000000003d LINUX_2.6 __vdso_getcpu
ffffffffff700ca0 w DF .text 00000000000002d5 LINUX_2.6 gettimeofday
ffffffffff700f80 w DF .text 0000000000000016 LINUX_2.6 time
ffffffffff700fa0 w DF .text 000000000000003d LINUX_2.6 getcpu
ffffffffff700700 g DF .text 000000000000059d LINUX_2.6 __vdso_clock_gettime
ffffffffff700f80 g DF .text 0000000000000016 LINUX_2.6 __vdso_time
Take a look, take a look, what are there actually, there are some time announcement functions, which means that if you want to get the time, just adjust the function here. Let’s see how the simplest time system call is To get the time, the following is the result of objdump -D for the vdso.dd file:
ffffffffff700f80 <__vdso_time@@LINUX_2.6>:
ffffffffff700f80: 55 push %rbp
ffffffffff700f81: 48 85 ff test %rdi,%rdi
ffffffffff700f84: 48 8b 04 25 a8 f0 5f mov 0xffffffffff5ff0a8,%rax
ffffffffff700f8b: ff
ffffffffff700f8c: 48 89 e5 mov %rsp,%rbp
ffffffffff700f8f: 74 03 je ffffffffff700f94 <__vdso_time@@LINUX_2.6+0x14>
ffffffffff700f91: 48 89 07 mov %rax,(%rdi)
ffffffffff700f94: 5d pop %rbp
ffffffffff700f95: c3 retq
Obviously, no system call was called, but the time was obtained directly from the address 0xffffffffff5ff0a8, then the address 0xffffffffff5ff0a8 must be the location of the time bulletin board mapped from the kernel to the user mode.
Remember the address 0xffffffffff5ff0a8, the analysis of user mode is over here, we enter the kernel to take a look.
First check the location of vdso from /proc/kallsyms:
ffffffff81941000 D vdso_start
ffffffff819424b0 D vdso_end
Next, we find the location of the kernel time bulletin board vsyscall_gtod_data:
ffffffff81a75080 D vsyscall_gtod_data
Let's look at the value of the bulletin board:
crash> struct vsyscall_gtod_data.wall_time_sec ffffffff81a75080
wall_time_sec = 1600912854
crash> struct vsyscall_gtod_data.wall_time_sec ffffffff81a75080
wall_time_sec = 1600912856
crash> struct vsyscall_gtod_data.wall_time_sec ffffffff81a75080
wall_time_sec = 1600912857
Obviously, the wall_time_sec field of the bulletin board is the value returned to time. Below we find its address:
crash> struct vsyscall_gtod_data ffffffff81a75080 -o
struct vsyscall_gtod_data {
[ffffffff81a75080] seqcount_t seq;
struct {
int vclock_mode;
cycle_t cycle_last;
cycle_t mask;
u32 mult;
u32 shift;
[ffffffff81a75088] } clock;
[ffffffff81a750a8] time_t wall_time_sec;
[ffffffff81a750b0] u64 wall_time_snsec;
[ffffffff81a750b8] u64 monotonic_time_snsec;
[ffffffff81a750c0] time_t monotonic_time_sec;
[ffffffff81a750c8] struct timezone sys_tz;
[ffffffff81a750d0] struct timespec wall_time_coarse;
[ffffffff81a750e0] struct timespec monotonic_time_coarse;
}
Well, it is 0xffffffff81a750a8. It is mapped to the address exposed to user mode at 0xffffffffff5ff0a8.
We next confirm this:
- Modify the mapping address and return to the time call with 0.
Let’s look at the bulletin board again:
crash> struct vsyscall_gtod_data ffffffff81a75080
...
sys_tz = {
tz_minuteswest = 0,
tz_dsttime = 0
},
How do we map sys_tz, this value is always 0, we expect time to return 0.
For this, we first get the offset between sys_tz and wall_time_sec:
crash> eval ffffffff81a750c8-ffffffff81a750a8
hexadecimal: 20
decimal: 32
octal: 40
Therefore, we only need to change the time function code of vdso:
ffffffffff700f84: 48 8b 04 25 a8 f0 5f mov 0xffffffffff5ff0a8,%rax
To:
ffffffffff700f84: 48 8b 04 25 c8 f0 5f mov 0xffffffffff5ff0c8,%rax
That is, the 8th byte of the time function, 0xa8 can be changed to 0xc8:
Through pattern matching, you can get the offset of the time function on the vdso page:
f80: 55 push rbp
f81: 48 85 ff test rdi,rdi
f84: 48 8b 04 25 a8 f0 5f mov rax,QWORD PTR ds:0xffffffffff5ff0a8
f8b: ff
f8c: 48 89 e5 mov rbp,rsp
f8f: 74 03 je 0xf94
f91: 48 89 07 mov QWORD PTR [rdi],rax
f94: 5d pop rbp
f95: c3 ret
That is 0xf80.
Then 0xffffffff81941f80 is the address of the time function:
unsigned char *addr = (unsigned char *)0xffffffff81941f80;
addr[8] = 0xc8;
Before modification, we first program verification:
#include <time.h>
#include <stdio.h>
typedef time_t (*time_func)(time_t *);
int main(int argc, char *argv[])
{
time_t tloc;
// 直接从地址拿值
unsigned long *p = (unsigned long *)0xffffffffff5ff0a8;
// 通过函数拿值
time_func func = (time_func)0x7ffff7ffaf80;
func(&tloc);
printf("%ld\n", tloc);
printf("%lu\n", *p);
}
The expected result should be the same value obtained in two ways:
[root@localhost ~]# ./a.out
1600923922
1600923922
[root@localhost ~]# ./a.out
1600923923
1600923923
[root@localhost ~]#
Modify the instructions corresponding to the kernel page as follows:
[root@localhost ~]# cat modtime.stp
#!/usr/local/bin/stap -g
function modtime(val:long)
%{
unsigned char *addr = (unsigned char *)0xffffffff81941f80;
unsigned char c = (unsigned char)STAP_ARG_val;
addr[8] = c;
%}
probe begin
{
modtime($1)
exit()
}
Execute it:
[root@localhost ~]# ./modtime.stp 0xc8
[root@localhost ~]# ./a.out
0
1600924228
[root@localhost ~]# ./a.out
0
1600924229
[root@localhost ~]# ./modtime.stp 0xa8
[root@localhost ~]# ./a.out
1600924238
1600924238
[root@localhost ~]#
When the instructions of the vdso page are modified, all processes that call time will be abnormal, which is very obvious:
top - 08:00:00 up 42 min, 3 users, load average: 0.00, 0.00, 0.00
Tasks: 114 total, 1 running, 113 sleeping, 0 stopped, 0 zombie
%Cpu(s): 0.0 us, 0.0 sy, 0.0 ni,100.0 id, 0.0 wa, 0.0 hi, 0.0 si, 0.0 st
KiB Mem : 0 total, 0 free, 0 used, 0 buff/cache
KiB Swap: 0 total, 0 free, 0 used. 0 avail Mem
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
1 root 20 0 51696 3808 2492 S 0.0 inf 0:01.29 systemd
2 root 20 0 0 0 0 S 0.0 -nan 0:00.00 kthreadd
3 root 20 0 0 0 0 S 0.0 -nan 0:00.00 ksoftirqd/0
7 root rt 0 0 0 0 S 0.0 -nan 0:00.01 migration/0
8 root 20 0 0 0 0 S 0.0 -nan 0:00.00 rcu_bh
9 root 20 0 0 0 0 S 0.0 -nan 0:00.00 rcuob/0
10 root 20 0 0 0 0 S 0.0 -nan 0:00.00 rcuob/1
It is worth mentioning that before vdso, the vsyscall mechanism is similar, except that it only provides a map without abstracting the meaning of dynamic linking, so it cannot enjoy the security protection brought by ASLR.
The leather shoes in Wenzhou, Zhejiang are wet, so they won’t get fat in the rain.