嵌入式linux CPU占用率高的调试方法

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/litao31415/article/details/80905891

1,主要是使用top命令,可看出哪个进程CPU占有率过高,但嵌入式linux的top是由busybox编译出来的,属于轻量级。PC上有的功能,嵌入式平台上可能没有,譬如“top -H”等。

2,另外ps命令可以看到各个进程的pid,包括内核线程,内核线程看起来更像用户态的进程

3,知道哪个进程占用过高后,可试着在用户态上动态修改进程的优先级,如:renice -19 383,其中-19是新的nice值,该值范围为[-20:19],383是进程pid号

4,如果是内核的驱动,可使用set_user_nice()改变nice值

5,如果是用户态,可使用改变调度策略,如FIFO或者RR,并设置优先级

6,这样就把重要性较弱的进程调度优先级降下来,CPU占用率会有一定的降幅。在开发软件时,最好是根据linux thread api封装一个好用的接口,可以方便设置优先级,调度策略等,并用一个*.h头文件把各个模块的优先级使用宏来定义,方便修改和阅读各种任务的优先级以及实时性。

7,此时需要进一步定位该进程(假设进程ID为<pid>)的哪些线程占用率过高。为了调试,最好是使用上述说的“自己封装并且好用的接口”来为新创建的线程起名字,内部或许使用了pthread_setname_np()等系统api来实现。此时使用pstree -p <pid>可以查看该进程创建的所有线程。

8,使用proc文件系统进行统计线程的实际cpu占用。主要用到/proc/stat以及/proc/<pid>/task/<tid>/stat,下面是一个很好的例子,代码不是我写的,我改动了部分,仅做参考

#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <fcntl.h>
#include <getopt.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/termios.h>
#include <sys/time.h>
#include <unistd.h>
#include <linux/unistd.h>
#include <sys/types.h>
#include <dirent.h>
#include <pthread.h>


#define VALID_PROCSTATE 0xa84b62fc

struct procState
{
    char procname[32];
    FILE * fp;
    int pid;
    unsigned int utimeold;
    unsigned int utimenew;
    unsigned int stimeold;
    unsigned int stimenew;
    unsigned int allold;
    unsigned int allnew;
    int valid;
};

#define MAX_THREADS_NUM 100
struct procState lanProc[MAX_THREADS_NUM];
int procTotal=0;
char targetProcName[64]="";
int targetProcPid=0;

char taskName[64]="";
FILE            *taskfp;
FILE            *statfp;

char *version = "1.1.0";
char            *procdevice = "/proc/stat";

struct dirent *taskdirent;
DIR * taskdir;

struct stats
{
    unsigned int    user;
    unsigned int    nice;
    unsigned int    system;
    unsigned int    idle;
    unsigned int    total;
};

void readprocessstat( void );

int getdata(struct stats *st)
{
    unsigned char   buf[80];
    int i;

    for(i=0;i<procTotal;i++)
    {
        if ((lanProc[i].fp = fopen(lanProc[i].procname,"r")) == NULL) 
		{
              fprintf(stderr, "ERROR: failed, errno=%d\n", errno);
              exit(-1);
        }

        fscanf(lanProc[i].fp, "%d %s %s %s %s %s %s %s %s %s %s %s %s %lu %lu", &lanProc[i].pid,
               &buf[0],&buf[0],&buf[0],&buf[0],&buf[0],&buf[0],&buf[0],&buf[0],&buf[0],&buf[0],&buf[0],&buf[0],
               &lanProc[i].utimenew,&lanProc[i].stimenew);
	 	fclose(lanProc[i].fp);
    }

    if ((statfp = fopen(procdevice, "r")) == NULL) 
	{
        fprintf(stderr, "ERROR: failed to open %s, errno=%d\n",
                procdevice, errno);
        exit(0);
    }

    fscanf(statfp, "%s %d %d %d %d", &buf[0], &st->user, &st->nice,&st->system, &st->idle);
	fclose(statfp);
			
    st->total = st->user + st->nice + st->system + st->idle;
    for(i=0;i<procTotal;i++)
    {
        lanProc[i].allnew = lanProc[i].utimenew + lanProc[i].stimenew;
    }

    return(0);
}

void usage(FILE *fp, int rc)
{
    fprintf(fp, "Usage: ./thread_top [-h?v] [-p <pid>] [-c <count>] [-s seconds]\n\n");
    fprintf(fp, "        -h?            this help\n");
    fprintf(fp, "        -v             print version info\n");
	fprintf(fp, "        -p <pid>       pid of target proccess\n");
	fprintf(fp, "        -c count       repeat count times\n");
    fprintf(fp, "        -s seconds     seconds between output\n");
    exit(rc);
}

int main(int argc, char *argv[])
{
	struct stats    st, stold;
	unsigned int    curtotal;
	int             c = 0;
	int 			cnt = 1;
	int             loop = 1;
	int				delay = 1;
	int             busy = 0;
	int 			max_busy=0;
	int 			i = 0;

	if(argc < 2)
		usage(stdout, 0);
	
	while ((c = getopt(argc, argv, "h?vc:s:p:")) > 0) 
	{
		switch (c) {
			case 'v':
				printf("%s: version %s\n", argv[0], version);
				exit(0);
			case 'c':
				loop = 0;
				cnt = atoi(optarg) + 1;
				break;
			case 's':
				delay = atoi(optarg);
				break;
			case 'h':
			case '?':
				usage(stdout, 0);
				break;
			case 'p':

//struct dirent {
//   ino_t          d_ino;       /* inode number */
//   off_t          d_off;       /* offset to the next dirent */
//   unsigned short d_reclen;    /* length of this record */
//   unsigned char  d_type;      /* type of file */
//   char           d_name[256]; /* filename */
//};

				sprintf(targetProcName, "%s", optarg);

				sprintf(taskName, "/proc/%s/task", optarg);

				if((taskdir = opendir(taskName))==NULL)
				{
					fprintf(stderr, "ERROR: failed to open %s, errno=%d\n", taskName,errno);
					exit(-1);
				}

				targetProcPid=atol(targetProcName);

				for(i=0;i<MAX_THREADS_NUM;i++) 
				{
					if ((taskdirent=readdir(taskdir)) == NULL) 
					{
						break;
					}

					lanProc[procTotal].pid=atoi(taskdirent->d_name);// /proc/%s/task 下的文件名即为线程tid号
					if( lanProc[procTotal].pid < targetProcPid ) //一般不会小于父进程id
					{ 
						continue;
					}

					sprintf(lanProc[procTotal].procname, "/proc/%s/task/%s/stat", optarg,taskdirent->d_name);
					lanProc[procTotal].valid = VALID_PROCSTATE;
					procTotal++;
				}

				break;
			default:
				fprintf(stderr, "ERROR: unkown option '%c'\n", c);
				usage(stderr, 1);
				break;
		}
	}

	getdata(&st);

	for (c = 0; (loop || (c < cnt)); c++) 
	{
		sleep(delay);

		stold = st;
		getdata(&st);

		curtotal = st.total - stold.total;
		busy = ((st.system + st.user + st.nice)-(stold.system + stold.user + stold.nice)) * 100 / curtotal;
		if (max_busy < busy)
			max_busy = busy;

		if(c) //第一次不打印
		printf("busy %3d%%:max=%3d%%(system %3d%%, user %3d%%, nice %3d%%, idle %3d%%)\n",
												busy,max_busy,
												(st.system - stold.system) * 100 / curtotal,
												(st.user - stold.user) * 100 / curtotal,
												(st.nice - stold.nice) * 100 / curtotal,
												(st.idle - stold.idle) * 100 / curtotal);

		for(i=0;i<procTotal;i++) 
		{
			if( lanProc[i].valid != VALID_PROCSTATE )
				continue;

			if(c) //第一次不打印	
			printf("pthread:%3d alltime:%6lu %3d%% usrtime:%6lu %3d%% systime:%6lu %3d%% \n",
										lanProc[i].pid,
										lanProc[i].allnew - lanProc[i].allold,
										(lanProc[i].allnew - lanProc[i].allold) * 100/curtotal,
										lanProc[i].utimenew - lanProc[i].utimeold,
										(lanProc[i].utimenew - lanProc[i].utimeold) * 100/curtotal,
										lanProc[i].stimenew - lanProc[i].stimeold,
										(lanProc[i].stimenew - lanProc[i].stimeold) * 100/curtotal);

			lanProc[i].allold = lanProc[i].allnew;
			lanProc[i].utimeold = lanProc[i].utimenew;
			lanProc[i].stimeold = lanProc[i].stimenew;
		}
		
		printf("\n");
	}

	exit(0);
}

9,因proc下面的stat文件内容可读性比较差,上面的源码把stat文件解析出来,并算出总CPU时间,以及各线程的stime和utime,并在一段时间后(如1s后)再统计上述参数,最后将上述参数均求差(deta),类似于求斜率,(utime2-utime1)/(total_time2-total_time1)等。编译成嵌入式板可执行文件,如thread_cpu,运行./thread_top -p <pid> 可计算出ID为<pid>的进程内所有线程内核态cpu占用率和用户态占用率。pstree -p  <pid>能看到所有线程tid和线程名称。

10,通过运行上述命令,进程中cpu占用较高的线程被一一统计出来,根据tid号和pstree出来的线程名称对比,即可定位源代码了。当然使用pthread_setname_np()为线程起名字是为了给人看,但限制是名字最大不超过16个字符。如果是一般的调试也可以根据线程创建时返回的pthread_t类型id,使用系统api将其转换为tid号,如:

printf("proc_name=%s,tid=%ld\n",__func__,syscall(224));

,这样也能对比出来,定位源代码!

猜你喜欢

转载自blog.csdn.net/litao31415/article/details/80905891