Linux基础第五章进程控制

5.2 fork

fork函数实现进程复制，类似于动物界的单性繁殖，fork函数直接创建一个子进程。这是Linux创建进程最常用的方法。在这一小节中，子进程概念指fork产生的进程，父进程指主动调用fork的进程。

fork后，子进程继承了父进程很多属性，包括：

文件描述符：相当与dup，标准输入标准输出标准错误三个文件
账户/组ID：
进程组ID
会话ID
控制终端
set-user-ID和set-group-ID标记
当前工作目录
根目录
umask
信号掩码
文件描述符的close-on-exec标记
环境变量
共享内存
内存映射
资源限制

但是也有一些不同，包括：

fork返回值
进程ID
父进程
进程运行时间记录，在子进程中被清0
文件锁没有继承
闹钟

信号集合

#include <stdio.h>
#include <unistd.h>
 #include <sys/types.h>
       #include <sys/stat.h>
       #include <fcntl.h>

int main()
{
    printf("before fork\n");

    // 在父进程中打开的文件描述符
//    int fd = open("a.txt", O_RDWR|O_CREAT, 0777);

//    FILE* fp = fopen("a.txt", "r");
    int fd = open("a.txt", O_RDWR);
    pid_t pid = fork(); // 创建一个新进程
    if(pid == 0)
    {
        // 子进程可以使用父进程的描述符
  //      write(fd, "hello", 5);
    
      //  char ch = fgetc(fp);
        char ch;
        read(fd, &ch, 1);
        printf("ch is %c\n", ch);

        printf("this is in child, ppid=%d\n", (int)getppid());
    }
    else if(pid > 0)
    {
    //    write(fd, "world", 5);
        char ch;
        read(fd, &ch, 1);
        printf("ch is %c\n", ch);


        // 当fork返回值大于0时，说明该进程是父进程
        // 此时，返回值就是子进程的pid
        printf("this is in parent, pid=%d\n", (int)getpid());
    }
    else
    {
        printf("error fork\n");
    }

    printf("hello fork\n");
}

#include <stdio.h>
#include <unistd.h>

int global_var = 0;//fork()出来的子进程的值改变，不会影响父进程  因为开开辟了新的空间

int main()
{
    int var = 0;
    int* p = (int*)malloc(sizeof(int));
    *p = 0;

    pid_t pid = fork();
    if(pid == 0)
    {
        global_var = 100;
        *p = 100;
        var = 100;
        printf("set var\n");
    }
    else if(pid > 0)
    {
        sleep(1);
        // 确定的结果，就是0
        printf("%d\n", global_var);
        printf("var is %d\n", var); // 0
        printf("*p = %d\n", *p);
    }
    
    printf("hello world\n");
}

#include <stdio.h>
#include <unistd.h>

void forkn(int n)
{
    int i;
    for(i=0; i<n; ++i)
    {
        pid_t pid = fork();
        if(pid == 0)
            break;
    }
}

int main()
{
    forkn(10);

    printf("hello world\n");
}

5.3 进程终止

进程有许多终止方法：

方法
main函数return	正常退出
调用exit或者_Exit或者_exit	正常退出
在多线程程序中，最后一个线程例程结束	正常退出
在多线程程序中，最后一个线程调用pthread_exit	正常退出
调用abort	异常退出
收到信号退出	异常退出
多线程程序中，最后一个线程响应pthread_cancel	异常退出

当进程退出时，内核会为进程清除它申请的内存，这里的内存是指物理内存，比如栈空间、堆、代码段、数据段等，并且关闭所有文件描述符。

一般来说，进程退出时，需要告诉父亲进程退出的结果，如果是正常退出，那么这个结果保存在内核的PCB中。如果是异常退出，那么PCB中保存退出结果的字段，是一个不确定的值。因此程序员应该避免程序的异常退出。

进程退出时，除了它的PCB所占内存，其他资源都会清除。

5.4 wait和waitpid

一个进程终止后，其实这个进程的痕迹还没有完全被清除，因为还有一个PCB在内核中，如果不回收，那么会导致内存泄漏。父进程可以调用wait函数来回收子进程PCB，并得到子进程的结果。

wait是一个阻塞调用，它的条件是一个子进程退出或者一个子进程有状态变化。
wait得到的status，包含了子进程的状态变化原因和退出码信息等等。

#include <stdio.h>
#include <unistd.h>
#include <sys/types.h>
       #include <sys/wait.h>


int main()
{
    pid_t pid = fork();

    if(pid == 0)
    {
        sleep(1);
        printf("child process\n");

        return 18;
    }
    else if(pid > 0)
    {
        printf("parent process\n");

        // 等待子进程结束，并且回收子进程的PCB
        int status;
        wait(&status);

        // 如何得到子进程的返回值
        if(WIFEXITED(status))
        {
            printf("normal child process exit\n"); // 正常退出

            int code =WEXITSTATUS(status);
            printf("code is %d\n", code);
        }
        else if(WIFSIGNALED(status))
        {
            printf("signal\n");
        }
        else if(WIFSTOPPED(status))
        {
            printf("child stopped\n");
        }
        else if(WIFCONTINUED(status))
        {
            printf("child continue...\n");
        }


        printf("after wait\n");
    }



    return 0;
}

wait和waitpid可能会阻塞父进程，所以一般使用SIGCHLD信号来监控子进程

5.5 僵尸进程和孤儿进程

5.5.1 僵尸进程

是指已经退出的进程，但是父进程没有调用wait回收的子进程。僵尸进程没有任何作用，唯一的副作用就是内存泄漏。如果父进程退出，那么它的所有僵尸儿子会得到清理，因此僵尸进程一般指那些用不停歇的后台服务进程的僵尸儿子。

程序员应该避免僵尸进程的产生。

#include <stdio.h>
#include <unistd.h>

int main()
{
    pid_t pid = fork();
    if(pid == 0)
    {
        // 子进程什么事儿都不干，退出了，此时子进程是僵尸进程
    }
    else if(pid > 0)
    {
        getchar(); // 父进程不退出
    }

    return 0;
}

5.5.2 孤儿进程

父进程退出了，而子进程没有退出，那么子进程就成了没有父亲的孤儿进程。孤儿进程不会在系统中出现很长时间，因为系统一旦发现孤儿进程，就会将其父进程设置为init进程。那么将来该进程的回收，由init来负责。

5.6 exec

exec函数执行一个进程，当一个进程调用exec后，调用该函数的进程的虚拟地址空间的代码段、数据段、堆、栈被释放，替换成新进程的代码段、数据段、堆、栈，而PCB依旧使用之前进程的PCB。这个函数用中文来说就是鸠占鹊巢。

exec后使用的是同一个PCB，所以exec之后和之前，由很多进程属性是相同的，包括：

进程ID和父进程ID
账户相关
进程组相关
定时器
当前目录和根目录
umask
文件锁
信号mask
未决的信号
资源限制
进程优先级
进程时间
没有close-on-exec属性的文件描述符

使用fork和exec来执行一个新程序

#include <unistd.h>
#include <stdio.h>

// execle, e表示环境变量environ
//
int main(int argc, char* argv[])
{
    char* args[] = {
        "/bin/ls",
        "-a",
        "-l",
        NULL
    };
    execv("/bin/ls", args);
}

int main2(int argc, char* argv[])
{
    // p表示在PATH的环境变量中寻找这个程序
    execlp("ls", "ls", NULL);
}

int main1(int argc, char* argv[])
{
    // 执行一个程序
    execl("/bin/ls", "/bin/ls", "-a", "-l", NULL);

    // 该函数不会被执行
    printf("hello world\n");
}

#include <stdio.h>
#include <fcntl.h>
#include <sys/types.h>
#include <unistd.h>

int main()
{
    // fd is 3
    int fd = open("exec.txt", O_RDWR|O_CREAT|O_CLOEXEC, 0777);

    execl("./exec_test", "./exec_test", NULL);
}

int execl(const char *path, const char *arg, ...);

int execlp(const char *file, const char *arg, ...);

int execle(const char *path, const char *arg,

..., char * const envp[]);

int execv(const char *path, char *const argv[]);

int execvp(const char *file, char *const argv[]);

int execvpe(const char *file, char *const argv[],

char *const envp[]);

函数后缀	解析
l	list 用不定参数列表来表示命令参数，如果用不定参数列表，那么用NULL表示结束
v	vector 用数组来传递命令行参数
p	path 表示程序使用程序名即可，在$PATH中搜索该程序，不带p的需要提供全路径
e	environ 表示环境变量

补充：不定参数

不定参数函数定义：

#include "../h.h"

#define mylog(fmt, ...) myprint(__FILE__, __LINE__, fmt, __VA_ARGS__)

void myprint(const char* filename, int line, const char* fmt, ...)

{

printf("%s, %d, ", filename, line);

va_list ap;

va_start(ap, fmt);

vprintf(fmt, ap);

va_end(ap);

}

int print(const char* a, ...)

{

const char* arg = a;

va_list args;

va_start(args, a);

while(arg)

{

printf("%s\n", arg);

arg = va_arg(args, const char*);

}

#if 0

printf("%s\n", a);

while(1)

{

const char* arg = va_arg(args, const char*);

if(arg == NULL)

break;

printf("%s\n", arg);

}

#endif

va_end(args);

}

int add(int count, ...)

{

int i;

int sum = 0;

va_list args;

// 获得不定参数的首地址

va_start(args, count);

for(i=0; i<count; ++i)

{

// 通过va_arg获得参数

int arg = va_arg(args, int);

sum += arg;

}

// 参数获取完毕

va_end(args);

return sum;

}

int main()

{

myprint(__FILE__, __LINE__, "haha%d\n", 100);

mylog("print in mylog %d\n", 100);

print("hello", "world", "haha", "you are dead", NULL);

int ret = add(3, 5, 6, 7);

printf("%d\n", ret);

return 0;

}

int main()
{
    int a = add(3, 12, 13, 14);
    int b = add(2, 12, 13);
    int c = add(4, 12, 13, 14, 15);
     printf("%d, %d, %d\n", a, b, c);

    char* p = concat("abc", "bcd", NULL);
    printf("p is %s\n", p);

    // 最后的NULL，被称之为哨兵
    p = concat("aaaa", "bbbb", "cccc", NULL);
    printf("p is %s\n", p);

}

#include <stdio.h>
#include <fcntl.h>
#include <sys/types.h>

// 如果没有__VA_ARGS__不带##，表示__VA_ARGS__至少要表示一个参数
// #define mylog(fmt, ...) printf("[%s:%d] "fmt, __FILE__, __LINE__, __VA_ARGS__)

// __VA_ARGS__如果有##，表示可以没有参数
#define mylog(fmt, ...) printf("[%s:%d] "fmt, __FILE__, __LINE__, ##__VA_ARGS__)

int main()
{
    int fd = open("a.txt", O_RDWR);
    if(fd < 0)
    {
        mylog("error open file\n");
    }
}

#include <stdio.h>

// 转字符串 abc "abc"
#define STR(a) #a

// 拼接标识符
#define CC(a, b) a##b

int main()
{
    int abcxyz = 100;
    printf("%d\n", CC(abc, xyz));
}

5.8 账户和组控制

Snip20161008_26
Snip20161008_28

5.9 进程间关系

在Linux系统中，进程间除了有父子关系，还有组关系、Session关系、进程和终端进程关系。设计这些关系是为了更好的管理进程。

5.9.1 Session

一次登陆算一个session，exit命令可以退出session，session包括多个进程组，一旦session领导退出，那么一个session内所有进程退出（它的所有进程收到一个信号）。

#include <unistd.h>
int main()
{
    pid_t pid = fork();

    if(pid == 0)
    {
            // 独立一个session
            setsid();
    }
    
    while(1)
    {
        sleep(1);
    }
}

5.9.2 进程组

在终端执行进程，就会生成一个进程组。执行的进程fork之后，子进程和父进程在一个组中。

进程组长退出后，进程组的其他进程的组号依旧没有变化。

5.10 练习

5.10.1 fork任意个子进程。

int main()

{

int i;

for(i=0; i<7; ++i)

{

pid_t pid = fork();

if(pid == 0)

break;

}

5.10.2 使用多进程加速文件拷贝

./mycp -job 4 srcfile destfile

使用-job定义进程数量，加速文件拷贝。

#include "../h.h"

int get_file_length(const char* filename)

{

struct stat buf;

int ret = stat(filename, &buf);

if(ret == -1)

return -1;

return buf.st_size;

}

void process_copy(const char* src_file, const char* dst_file, int pos, int length)

{

FILE* src = fopen(src_file, "r");

FILE* dst = fopen(dst_file, "r+");

char buf[4096];

int block_size = sizeof(buf);

fseek(src, pos, SEEK_SET);

fseek(dst, pos, SEEK_SET);

while(length)

{

int copy_len = length < block_size ? length : block_size;

int ret = fread(buf, 1, copy_len, src);

fwrite(buf, ret, 1, dst);

length -= ret;

}

fclose(src);

fclose(dst);

}

// ./multi-process-cp -job n srcfile dstfile

int main(int argc, char* argv[])

{

if(argc != 5)

{

printf("usage %s -job {process_count} {src_file} {dst_file}\n", argv[0]);

return 1;

}

if(strcmp(argv[1], "-job") != 0)

{

printf("unknown options: %s\n", argv[1]);

return 2;

}

int process_count = atoi(argv[2]);

if(process_count <= 0)

{

printf("process count error\n");

return 3;

}

const char* src_file = argv[3];

const char* dst_file = argv[4];

// 获得文件总长度

int filelen = get_file_length(src_file);

if(filelen == -1)

{

printf("file not exist\n");

return 3;

}

// 保证dst文件存在，并且dst的文件尺寸是src文件一样大

int fd = open(dst_file, O_CREAT|O_WRONLY, 0777);

// ftruncate(fd, filelen);

close(fd);

truncate(dst_file, filelen);

// 4 process 21 字节 21/4 = 5

// 0 0~4

// 1 5-9

// 2 10-14

// 3 21-15 6

int i;

int average = filelen / process_count;

// 只要创建n-1个子进程，父进程负责最后部分的拷贝

for(i=0; i<process_count-1; ++i)

{

pid_t pid = fork();

if(pid == 0)

{

// 子进程拷贝完成直接结束

int pos = average * i;

process_copy(src_file, dst_file, pos, average);

return 0;

}

int pos = average * i;

process_copy(src_file, dst_file, pos, filelen - pos);

// wait一次只wait一个子进程

for(i=0; i<process_count-1; ++i)

wait(NULL);

return 0;

}

5.10.3 实现自定义终端

#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <stdlib.h>

// ls
// mkdir aaa
// cp ../aa bb
// cd
void handle_cmd(char* cmd)
{
    char* args[1024];
    char* p = strtok(cmd, " ");
    int i = 0;
    while(p)
    {
        args[i++] = p;
        p = strtok(NULL, " ");
    }
    args[i] = NULL; // 表示参数结束位置

    if(strcmp(args[0], "cd") == 0)
    {
        // 切换当前目录
        chdir(args[1]);
        return;
    }


    pid_t pid = fork();
    if(pid == 0)
    {
        execvp(args[0], args);
        // 如果命令执行失败，应该让子进程退出
        printf("invalid command\n");
        exit(0);
    }
    else
    {
        wait(NULL); 
    }
}

int main()
{
    while(1)
    {
        printf("myshell> ");
        // 等待用户输入
        char buf[4096];
        fgets(buf, sizeof(buf), stdin);
        buf[strlen(buf)-1] = 0; // remove \n

        if(strlen(buf) == 0)
        {
            continue;
        }

        handle_cmd(buf);
    }
}

5.11 函数和命令

5.11.1 函数

fork：创建子进程
exec：执行新的程序
wait/waitpid：等待子进程结束，回收子进程PCB内存。
va_list：
va_start：定义指向不定参数的第一个参数的地址
va_arg：从参数列表中获取一个参数，并且让指针指向下一个参数
va_end：清除ap