Overview of BCC source code content (2)

Continuing from the previous article: Overview of BCC source code content (1)

This article refers to the introduction in the Contents section of the official website.

The files in the BCC source code root directory, some of which are single files containing both C and Python, others are paired files of .c and .py, and some are directories.

Tracing

Files in the examples directory:

  • examples/tracing/nodejs_http_server.py

Track Node.js HTTP server requests using USDT probes.

The contents of the bcc/examples/tracing/nodejs_http_server_example.txt file are as follows:

# ./nodejs_http_server.py 24728
TIME(s)            COMM             PID    ARGS
24653324.561322998 node             24728  path:/index.html
24653335.343401998 node             24728  path:/images/welcome.png
24653340.510164998 node             24728  path:/images/favicon.png
  • examples/tracing/stacksnoop

Trace kernel functions and print all kernel stack traces.

The contents of the bcc/examples/tracing/stacksnoop_example.txt file are as follows:

Demonstrations of stacksnoop, the Linux eBPF/bcc version.


This program traces the given kernel function and prints the kernel stack trace
for every call. This tool is useful for studying low frequency kernel functions,
to see how they were invoked. For example, tracing the submit_bio() call:

# ./stacksnoop submit_bio
TIME(s)            SYSCALL
3592.838736000     submit_bio
        submit_bio
        submit_bh
        jbd2_journal_commit_transaction
        kjournald2
        kthread
        ret_from_fork

This shows that submit_bio() was called by submit_bh(), which was called
by jbd2_journal_commit_transaction(), and so on. 

For high frequency functions, see stackcount, which summarizes in-kernel for
efficiency. If you don't know if your function is low or high frequency, try
funccount.


The -v option includes more fields, including the on-CPU process (COMM and PID):

# ./stacksnoop -v submit_bio
TIME(s)            COMM         PID    CPU SYSCALL
3734.855027000     jbd2/dm-0-8  313    0   submit_bio
        submit_bio
        submit_bh
        jbd2_journal_commit_transaction
        kjournald2
        kthread
        ret_from_fork

This identifies the application issuing the sync syscall: the jbd2 process
(COMM column).


Here's another example, showing the path to second_overflow() and on-CPU
process:

# ./stacksnoop -v second_overflow
TIME(s)            COMM         PID    CPU SYSCALL
3837.526433000     <idle>       0      1   second_overflow
        second_overflow
        tick_do_update_jiffies64
        tick_irq_enter
        irq_enter
        smp_apic_timer_interrupt
        apic_timer_interrupt
        default_idle
        arch_cpu_idle
        default_idle_call
        cpu_startup_entry
        start_secondary

3838.526953000     <idle>       0      1   second_overflow
        second_overflow
        tick_do_update_jiffies64
        tick_irq_enter
        irq_enter
        smp_apic_timer_interrupt
        apic_timer_interrupt
        default_idle
        arch_cpu_idle
        default_idle_call
        cpu_startup_entry
        start_secondary

This fires every second (see TIME(s)), and is from tick_do_update_jiffies64().


USAGE message:

# ./stacksnoop -h
usage: stacksnoop [-h] [-p PID] [-s] [-v] function

Trace and print kernel stack traces for a kernel function

positional arguments:
  function           kernel function name

optional arguments:
  -h, --help         show this help message and exit
  -p PID, --pid PID  trace this PID only
  -s, --offset       show address offsets
  -v, --verbose      print more fields

examples:
    ./stacksnoop ext4_sync_fs    # print kernel stack traces for ext4_sync_fs
    ./stacksnoop -s ext4_sync_fs    # ... also show symbol offsets
    ./stacksnoop -v ext4_sync_fs    # ... show extra columns
    ./stacksnoop -p 185 ext4_sync_fs    # ... only when PID 185 is on-CPU
  • tools/statsnoop

Trace the stat() system call.

The contents of the bcc/tools/statsnoop.py file are as follows:

#!/usr/bin/env python
# @lint-avoid-python-3-compatibility-imports
#
# statsnoop Trace stat() syscalls.
#           For Linux, uses BCC, eBPF. Embedded C.
#
# USAGE: statsnoop [-h] [-t] [-x] [-p PID]
#
# Copyright 2016 Netflix, Inc.
# Licensed under the Apache License, Version 2.0 (the "License")
#
# 08-Feb-2016   Brendan Gregg   Created this.
# 17-Feb-2016   Allan McAleavy  updated for BPF_PERF_OUTPUT
# 29-Nov-2022   Rocky Xing      Added stat() variants.

from __future__ import print_function
from bcc import BPF
import argparse

# arguments
examples = """examples:
    ./statsnoop           # trace all stat() syscalls
    ./statsnoop -t        # include timestamps
    ./statsnoop -x        # only show failed stats
    ./statsnoop -p 181    # only trace PID 181
"""
parser = argparse.ArgumentParser(
    description="Trace stat() syscalls",
    formatter_class=argparse.RawDescriptionHelpFormatter,
    epilog=examples)
parser.add_argument("-t", "--timestamp", action="store_true",
    help="include timestamp on output")
parser.add_argument("-x", "--failed", action="store_true",
    help="only show failed stats")
parser.add_argument("-p", "--pid",
    help="trace this PID only")
parser.add_argument("--ebpf", action="store_true",
    help=argparse.SUPPRESS)
args = parser.parse_args()
debug = 0

# define BPF program
bpf_text = """
#include <uapi/linux/ptrace.h>
#include <uapi/linux/limits.h>
#include <linux/sched.h>

struct val_t {
    const char *fname;
};

struct data_t {
    u32 pid;
    u64 ts_ns;
    int ret;
    char comm[TASK_COMM_LEN];
    char fname[NAME_MAX];
};

BPF_HASH(infotmp, u32, struct val_t);
BPF_PERF_OUTPUT(events);

static int trace_entry(struct pt_regs *ctx, const char __user *filename)
{
    struct val_t val = {};
    u64 pid_tgid = bpf_get_current_pid_tgid();
    u32 pid = pid_tgid >> 32;
    u32 tid = (u32)pid_tgid;

    FILTER
    val.fname = filename;
    infotmp.update(&tid, &val);

    return 0;
};

int syscall__stat_entry(struct pt_regs *ctx, const char __user *filename)
{
    return trace_entry(ctx, filename);
}

int syscall__statx_entry(struct pt_regs *ctx, int dfd, const char __user *filename)
{
    return trace_entry(ctx, filename);
}

int trace_return(struct pt_regs *ctx)
{
    u64 pid_tgid = bpf_get_current_pid_tgid();
    u32 tid = (u32)pid_tgid;
    struct val_t *valp;

    valp = infotmp.lookup(&tid);
    if (valp == 0) {
        // missed entry
        return 0;
    }

    struct data_t data = {.pid = pid_tgid >> 32};
    bpf_probe_read_user(&data.fname, sizeof(data.fname), (void *)valp->fname);
    bpf_get_current_comm(&data.comm, sizeof(data.comm));
    data.ts_ns = bpf_ktime_get_ns();
    data.ret = PT_REGS_RC(ctx);

    events.perf_submit(ctx, &data, sizeof(data));
    infotmp.delete(&tid);

    return 0;
}
"""
if args.pid:
    bpf_text = bpf_text.replace('FILTER',
        'if (pid != %s) { return 0; }' % args.pid)
else:
    bpf_text = bpf_text.replace('FILTER', '')
if debug or args.ebpf:
    print(bpf_text)
    if args.ebpf:
        exit()

# initialize BPF
b = BPF(text=bpf_text)

# for POSIX compliance, all architectures implement these
# system calls but the name of the actual entry point may
# be different for which we must check if the entry points
# actually exist before attaching the probes
def try_attach_syscall_probes(syscall):
    syscall_fnname = b.get_syscall_fnname(syscall)
    if BPF.ksymname(syscall_fnname) != -1:
        if syscall in ["statx", "fstatat64", "newfstatat"]:
            b.attach_kprobe(event=syscall_fnname, fn_name="syscall__statx_entry")
        else:
            b.attach_kprobe(event=syscall_fnname, fn_name="syscall__stat_entry")
        b.attach_kretprobe(event=syscall_fnname, fn_name="trace_return")

try_attach_syscall_probes("stat")
try_attach_syscall_probes("statx")
try_attach_syscall_probes("statfs")
try_attach_syscall_probes("newstat")
try_attach_syscall_probes("newlstat")
try_attach_syscall_probes("fstatat64")
try_attach_syscall_probes("newfstatat")

start_ts = 0
prev_ts = 0
delta = 0

# header
if args.timestamp:
    print("%-14s" % ("TIME(s)"), end="")
print("%-7s %-16s %4s %3s %s" % ("PID", "COMM", "FD", "ERR", "PATH"))

# process event
def print_event(cpu, data, size):
    event = b["events"].event(data)
    global start_ts
    global prev_ts
    global delta
    global cont

    # split return value into FD and errno columns
    if event.ret >= 0:
        if args.failed:
            return
        fd_s = event.ret
        err = 0
    else:
        fd_s = -1
        err = - event.ret

    if start_ts == 0:
        start_ts = event.ts_ns

    if args.timestamp:
        print("%-14.9f" % (float(event.ts_ns - start_ts) / 1000000000), end="")

    print("%-7d %-16s %4d %3d %s" % (event.pid,
        event.comm.decode('utf-8', 'replace'), fd_s, err,
        event.fname.decode('utf-8', 'replace')))

# loop with callback to print_event
b["events"].open_perf_buffer(print_event, page_cnt=64)
while 1:
    try:
        b.perf_buffer_poll()
    except KeyboardInterrupt:
        exit()

Guess you like

Origin blog.csdn.net/phmatthaus/article/details/133133187