翻译来自：
http://tldp.org/LDP/lkmpg/2.6/html/lkmpg.html
本系列文章还有:
Linux内核模块编程指南(一)
Linux内核模块编程指南(二)
Linux内核模块编程指南(三)
Linux内核模块编程指南(四)

第6章使用/ proc输入

TODO：写一篇关于sysfs的章节

这只是一个占位符。最后，我想在这里看一个关于sysfs的（尚未写）章节。如果您熟悉sysfs并希望参与编写本章，请随时联系我们（LKMPG维护人员）以获取更多详细信息。

第7章与设备文件通信

与设备文件交谈（写入和IOCTL）

设备文件应该代表物理设备。大多数物理设备用于输出和输入，因此内核中的设备驱动程序必须有一些机制才能使输出从进程发送到设备。这是通过打开设备文件进行输出和写入来完成的，就像写入文件一样。在以下示例中，这是由device_write实现的。

这并不总是足够的。想象一下，你有一个连接到调制解调器的串口（即使你有一个内部调制解调器，它仍然从CPU的角度实现，作为连接到调制解调器的串口，所以你不必太费力地征服你的想象力）。自然要做的就是使用设备文件将内容写入调制解调器（调制解调器命令或通过电话线发送的数据）并从调制解调器读取内容（对命令的响应或通过电话接收的数据）线）。但是，当您需要与串行端口本身通信时，例如发送数据发送和接收的速率时，这就留下了一个问题。

Unix中的答案是使用一个名为ioctl的特殊函数（输入输出ConTroL的缩写）。每个设备都可以拥有自己的ioctl命令，可以读取ioctl （将信息从进程发送到内核），写入ioctl （将信息返回给进程）， [10]两者都可以。使用三个参数调用ioctl函数：相应设备文件的文件描述符，ioctl编号和参数，类型为long，因此您可以使用强制转换来使用它来传递任何内容。 [11]

ioctl编号对主设备编号，ioctl的类型，命令和参数的类型进行编码。此ioctl编号通常由头文件中的宏调用（ _IO ， _IOR ， _IOW或_IOWR —取决于类型）创建。这个头文件应该包含在将使用ioctl的程序中（因此它们可以生成适当的ioctl ）和内核模块（因此它可以理解它）。在下面的示例中，头文件是chardev.h ，使用它的程序是ioctl.c 。

如果你想在你自己的内核模块中使用ioctl ，最好接收一个正式的ioctl赋值，所以如果你不小心得到别人的ioctl ，或者如果他们得到了你的ioctl ，你就会知道出了什么问题。有关更多信息，请参阅Documentation / ioctl-number.txt中的内核源代码树。

例7-1。 chardev.c


/*
 *  chardev.c - Create an input/output character device
 */

#include <linux/kernel.h>   /* We're doing kernel work */
#include <linux/module.h>   /* Specifically, a module */
#include <linux/fs.h>
#include <asm/uaccess.h>    /* for get_user and put_user */

#include "chardev.h"
#define SUCCESS 0
#define DEVICE_NAME "char_dev"
#define BUF_LEN 80

/* 
 * Is the device open right now? Used to prevent
 * concurent access into the same device 
 */
static int Device_Open = 0;

/* 
 * The message the device will give when asked 
 */
static char Message[BUF_LEN];

/* 
 * How far did the process reading the message get?
 * Useful if the message is larger than the size of the
 * buffer we get to fill in device_read. 
 */
static char *Message_Ptr;

/* 
 * This is called whenever a process attempts to open the device file 
 */
static int device_open(struct inode *inode, struct file *file)
{
#ifdef DEBUG
    printk(KERN_INFO "device_open(%p)\n", file);
#endif

    /* 
     * We don't want to talk to two processes at the same time 
     */
    if (Device_Open)
        return -EBUSY;

    Device_Open++;
    /*
     * Initialize the message 
     */
    Message_Ptr = Message;
    try_module_get(THIS_MODULE);
    return SUCCESS;
}

static int device_release(struct inode *inode, struct file *file)
{
#ifdef DEBUG
    printk(KERN_INFO "device_release(%p,%p)\n", inode, file);
#endif

    /* 
     * We're now ready for our next caller 
     */
    Device_Open--;

    module_put(THIS_MODULE);
    return SUCCESS;
}

/* 
 * This function is called whenever a process which has already opened the
 * device file attempts to read from it.
 */
static ssize_t device_read(struct file *file,   /* see include/linux/fs.h   */
               char __user * buffer,    /* buffer to be
                             * filled with data */
               size_t length,   /* length of the buffer     */
               loff_t * offset)
{
    /* 
     * Number of bytes actually written to the buffer 
     */
    int bytes_read = 0;

#ifdef DEBUG
    printk(KERN_INFO "device_read(%p,%p,%d)\n", file, buffer, length);
#endif

    /* 
     * If we're at the end of the message, return 0
     * (which signifies end of file) 
     */
    if (*Message_Ptr == 0)
        return 0;

    /* 
     * Actually put the data into the buffer 
     */
    while (length && *Message_Ptr) {

        /* 
         * Because the buffer is in the user data segment,
         * not the kernel data segment, assignment wouldn't
         * work. Instead, we have to use put_user which
         * copies data from the kernel data segment to the
         * user data segment. 
         */
        put_user(*(Message_Ptr++), buffer++);
        length--;
        bytes_read++;
    }

#ifdef DEBUG
    printk(KERN_INFO "Read %d bytes, %d left\n", bytes_read, length);
#endif

    /* 
     * Read functions are supposed to return the number
     * of bytes actually inserted into the buffer 
     */
    return bytes_read;
}

/* 
 * This function is called when somebody tries to
 * write into our device file. 
 */
static ssize_t
device_write(struct file *file,
         const char __user * buffer, size_t length, loff_t * offset)
{
    int i;

#ifdef DEBUG
    printk(KERN_INFO "device_write(%p,%s,%d)", file, buffer, length);
#endif

    for (i = 0; i < length && i < BUF_LEN; i++)
        get_user(Message[i], buffer + i);

    Message_Ptr = Message;

    /* 
     * Again, return the number of input characters used 
     */
    return i;
}

/* 
 * This function is called whenever a process tries to do an ioctl on our
 * device file. We get two extra parameters (additional to the inode and file
 * structures, which all device functions get): the number of the ioctl called
 * and the parameter given to the ioctl function.
 *
 * If the ioctl is write or read/write (meaning output is returned to the
 * calling process), the ioctl call returns the output of this function.
 *
 */
int device_ioctl(struct inode *inode,   /* see include/linux/fs.h */
         struct file *file, /* ditto */
         unsigned int ioctl_num,    /* number and param for ioctl */
         unsigned long ioctl_param)
{
    int i;
    char *temp;
    char ch;

    /* 
     * Switch according to the ioctl called 
     */
    switch (ioctl_num) {
    case IOCTL_SET_MSG:
        /* 
         * Receive a pointer to a message (in user space) and set that
         * to be the device's message.  Get the parameter given to 
         * ioctl by the process. 
         */
        temp = (char *)ioctl_param;

        /* 
         * Find the length of the message 
         */
        get_user(ch, temp);
        for (i = 0; ch && i < BUF_LEN; i++, temp++)
            get_user(ch, temp);

        device_write(file, (char *)ioctl_param, i, 0);
        break;

    case IOCTL_GET_MSG:
        /* 
         * Give the current message to the calling process - 
         * the parameter we got is a pointer, fill it. 
         */
        i = device_read(file, (char *)ioctl_param, 99, 0);

        /* 
         * Put a zero at the end of the buffer, so it will be 
         * properly terminated 
         */
        put_user('\0', (char *)ioctl_param + i);
        break;

    case IOCTL_GET_NTH_BYTE:
        /* 
         * This ioctl is both input (ioctl_param) and 
         * output (the return value of this function) 
         */
        return Message[ioctl_param];
        break;
    }

    return SUCCESS;
}

/* Module Declarations */

/* 
 * This structure will hold the functions to be called
 * when a process does something to the device we
 * created. Since a pointer to this structure is kept in
 * the devices table, it can't be local to
 * init_module. NULL is for unimplemented functions. 
 */
struct file_operations Fops = {
    .read = device_read,
    .write = device_write,
    .ioctl = device_ioctl,
    .open = device_open,
    .release = device_release,  /* a.k.a. close */
};

/* 
 * Initialize the module - Register the character device 
 */
int init_module()
{
    int ret_val;
    /* 
     * Register the character device (atleast try) 
     */
    ret_val = register_chrdev(MAJOR_NUM, DEVICE_NAME, &Fops);

    /* 
     * Negative values signify an error 
     */
    if (ret_val < 0) {
        printk(KERN_ALERT "%s failed with %d\n",
               "Sorry, registering the character device ", ret_val);
        return ret_val;
    }

    printk(KERN_INFO "%s The major device number is %d.\n",
           "Registeration is a success", MAJOR_NUM);
    printk(KERN_INFO "If you want to talk to the device driver,\n");
    printk(KERN_INFO "you'll have to create a device file. \n");
    printk(KERN_INFO "We suggest you use:\n");
    printk(KERN_INFO "mknod %s c %d 0\n", DEVICE_FILE_NAME, MAJOR_NUM);
    printk(KERN_INFO "The device file name is important, because\n");
    printk(KERN_INFO "the ioctl program assumes that's the\n");
    printk(KERN_INFO "file you'll use.\n");

    return 0;
}

/* 
 * Cleanup - unregister the appropriate file from /proc 
 */
void cleanup_module()
{
    int ret;

    /* 
     * Unregister the device 
     */
    ret = unregister_chrdev(MAJOR_NUM, DEVICE_NAME);

    /* 
     * If there's an error, report it 
     */
    if (ret < 0)
        printk(KERN_ALERT "Error: unregister_chrdev: %d\n", ret);
}

例7-2。 chardev.h

/*
 *  chardev.h - the header file with the ioctl definitions.
 *
 *  The declarations here have to be in a header file, because
 *  they need to be known both to the kernel module
 *  (in chardev.c) and the process calling ioctl (ioctl.c)
 */

#ifndef CHARDEV_H
#define CHARDEV_H

#include <linux/ioctl.h>

/* 
 * The major device number. We can't rely on dynamic 
 * registration any more, because ioctls need to know 
 * it. 
 */
#define MAJOR_NUM 100

/* 
 * Set the message of the device driver 
 */
#define IOCTL_SET_MSG _IOR(MAJOR_NUM, 0, char *)
/*
 * _IOR means that we're creating an ioctl command 
 * number for passing information from a user process
 * to the kernel module. 
 *
 * The first arguments, MAJOR_NUM, is the major device 
 * number we're using.
 *
 * The second argument is the number of the command 
 * (there could be several with different meanings).
 *
 * The third argument is the type we want to get from 
 * the process to the kernel.
 */

/* 
 * Get the message of the device driver 
 */
#define IOCTL_GET_MSG _IOR(MAJOR_NUM, 1, char *)
/* 
 * This IOCTL is used for output, to get the message 
 * of the device driver. However, we still need the 
 * buffer to place the message in to be input, 
 * as it is allocated by the process.
 */

/* 
 * Get the n'th byte of the message 
 */
#define IOCTL_GET_NTH_BYTE _IOWR(MAJOR_NUM, 2, int)
/* 
 * The IOCTL is used for both input and output. It 
 * receives from the user a number, n, and returns 
 * Message[n]. 
 */

/* 
 * The name of the device file 
 */
#define DEVICE_FILE_NAME "char_dev"

#endif

例7-3。 ioctl.c

/*
 *  ioctl.c - the process to use ioctl's to control the kernel module
 *
 *  Until now we could have used cat for input and output.  But now
 *  we need to do ioctl's, which require writing our own process.
 */

/* 
 * device specifics, such as ioctl numbers and the
 * major device file. 
 */
#include "chardev.h"

#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>      /* open */
#include <unistd.h>     /* exit */
#include <sys/ioctl.h>      /* ioctl */

/* 
 * Functions for the ioctl calls 
 */

ioctl_set_msg(int file_desc, char *message)
{
    int ret_val;

    ret_val = ioctl(file_desc, IOCTL_SET_MSG, message);

    if (ret_val < 0) {
        printf("ioctl_set_msg failed:%d\n", ret_val);
        exit(-1);
    }
}

ioctl_get_msg(int file_desc)
{
    int ret_val;
    char message[100];

    /* 
     * Warning - this is dangerous because we don't tell
     * the kernel how far it's allowed to write, so it
     * might overflow the buffer. In a real production
     * program, we would have used two ioctls - one to tell
     * the kernel the buffer length and another to give
     * it the buffer to fill
     */
    ret_val = ioctl(file_desc, IOCTL_GET_MSG, message);

    if (ret_val < 0) {
        printf("ioctl_get_msg failed:%d\n", ret_val);
        exit(-1);
    }

    printf("get_msg message:%s\n", message);
}

ioctl_get_nth_byte(int file_desc)
{
    int i;
    char c;

    printf("get_nth_byte message:");

    i = 0;
    do {
        c = ioctl(file_desc, IOCTL_GET_NTH_BYTE, i++);

        if (c < 0) {
            printf
                ("ioctl_get_nth_byte failed at the %d'th byte:\n",
                 i);
            exit(-1);
        }

        putchar(c);
    } while (c != 0);
    putchar('\n');
}

/* 
 * Main - Call the ioctl functions 
 */
main()
{
    int file_desc, ret_val;
    char *msg = "Message passed by ioctl\n";

    file_desc = open(DEVICE_FILE_NAME, 0);
    if (file_desc < 0) {
        printf("Can't open device file: %s\n", DEVICE_FILE_NAME);
        exit(-1);
    }

    ioctl_get_nth_byte(file_desc);
    ioctl_get_msg(file_desc);
    ioctl_set_msg(file_desc, msg);

    close(file_desc);
}

第8章系统调用

系统调用

到目前为止，我们唯一做的就是使用定义良好的内核机制来注册/ proc文件和设备处理程序。如果你想做内核程序员认为你想要的东西，比如写一个设备驱动程序，这很好。但是，如果你想做一些不寻常的事情，以某种方式改变系统的行为呢？然后，你主要靠自己。

这是内核编程变得危险的地方。在编写下面的示例时，我杀死了open（）系统调用。这意味着我无法打开任何文件，我无法运行任何程序，我无法关闭计算机。我不得不拔电源开关。幸运的是，没有文件死亡。为确保您不会丢失任何文件，请在执行insmod和rmmod之前运行sync 。

忘记/ proc文件，忘记设备文件。他们只是一些细节。所有进程使用的内核通信机制的真正过程是系统调用。当进程从内核请求服务（例如打开文件，分支到新进程或请求更多内存）时，这是使用的机制。如果你想以有趣的方式改变内核的行为，那么就是这样做的地方。顺便说一下，如果要查看程序使用哪个系统调用，请运行strace 。

通常，进程不应该能够访问内核。它无法访问内核内存，也无法调用内核函数。 CPU的硬件强制执行此操作（这就是为什么它被称为“保护模式”）。

系统调用是此一般规则的一个例外。会发生的是，进程使用适当的值填充寄存器，然后调用一个特殊的指令，该指令跳转到内核中先前定义的位置（当然，该位置可由用户进程读取，但不能由它们写入）。在Intel CPU下，这是通过中断0x80完成的。硬件知道，一旦你跳转到这个位置，你不再在受限制的用户模式下运行，而是作为操作系统内核运行—因此你可以做任何你想做的事情。

进程可以跳转到的内核中的位置称为system_call 。该位置的过程检查系统调用号，该号码告诉内核进程请求的服务。然后，它查看系统调用表（ sys_call_table ）以查看要调用的内核函数的地址。然后它调用该函数，在它返回后，进行一些系统检查，然后返回到进程（如果进程时间用完，则返回到另一个进程）。如果你想阅读这段代码，请在ENTRY（system_call）行之后的源文件arch / $<architecture$ > $ / kernel / entry.S 。

因此，如果我们想要改变某个系统调用的工作方式，我们需要做的是编写我们自己的函数来实现它（通常通过添加一些我们自己的代码，然后调用原始函数）然后更改sys_call_table指针指向我们的函数。因为我们可能会在以后删除并且我们不希望系统处于不稳定状态，所以cleanup_module将表恢复到其原始状态非常重要。

这里的源代码是这样一个内核模块的一个例子。我们希望“窥探”某个用户，并在该用户打开文件时printk（）一条消息。为此，我们替换系统调用以使用我们自己的函数打开文件，名为our_sys_open 。此函数检查当前进程的uid（用户id），如果它等于我们侦听的uid，则调用printk（）显示要打开的文件的名称。然后，无论哪种方式，它都使用相同的参数调用原始的open（）函数，以实际打开文件。

init_module函数替换sys_call_table中的适当位置，并将原始指针保存在变量中。 cleanup_module函数使用该变量将一切恢复正常。这种方法很危险，因为两个内核模块可能会改变相同的系统调用。想象一下，我们有两个内核模块，A和B.A的开放系统调用将是A_open，B将是B_open。现在，当A插入内核时，系统调用将替换为A_open，它将在完成后调用原始的sys_open。接下来，将B插入内核，用B_open替换系统调用，B_open将在完成后调用它认为是原始系统调用A_open的内容。

现在，如果首先删除B，一切都会很好 - 它只会将系统调用恢复到调用原始的A_open。但是，如果删除A然后删除B，系统将崩溃。 A的删除将恢复系统调用原始的sys_open，将B切出循环。然后，当B被删除时，它会将系统调用恢复到它认为的原始A_open，它不再在内存中。乍一看，似乎我们可以通过检查系统调用是否等于我们的open函数来解决这个特殊问题，如果是这样的话，根本不改变它（这样B就不会在系统调用被删除时改变它），但是会导致更糟糕的问题。删除A时，它会看到系统调用已更改为B_open，因此它不再指向A_open，因此在从内存中删除之前，它不会将其还原到sys_open。不幸的是，B_open仍会尝试调用不再存在的A_open，因此即使不删除B，系统也会崩溃。

请注意，所有相关问题都会导致系统调用窃取无法用于生产。为了防止人们做有潜在的有害事情，不再导出sys_call_table。这意味着，如果你想做的不仅仅是这个例子的干运行，你必须修补当前的内核，以便导出sys_call_table。在示例目录中，您将找到README和补丁。可以想象，这些修改不能掉以轻心。不要在有价值的系统上尝试这种方法（即您不拥有的系统 - 或者无法轻松恢复）。您需要获取本指南的完整源代码作为tarball才能获得补丁和README。根据您的内核版本，您甚至可能需要手动应用补丁。还在？那么，本章也是如此。如果Wyle E. Coyote是一个内核黑客，这将是他尝试的第一件事。 ;）

例8-1。 syscall.c

/*
 *  syscall.c
 *
 *  System call "stealing" sample.
 */

/* 
 * Copyright (C) 2001 by Peter Jay Salzman 
 */

/* 
 * The necessary header files 
 */

/*
 * Standard in kernel modules 
 */
#include <linux/kernel.h>   /* We're doing kernel work */
#include <linux/module.h>   /* Specifically, a module, */
#include <linux/moduleparam.h>  /* which will have params */
#include <linux/unistd.h>   /* The list of system calls */

/* 
 * For the current (process) structure, we need
 * this to know who the current user is. 
 */
#include <linux/sched.h>
#include <asm/uaccess.h>

/* 
 * The system call table (a table of functions). We
 * just define this as external, and the kernel will
 * fill it up for us when we are insmod'ed
 *
 * sys_call_table is no longer exported in 2.6.x kernels.
 * If you really want to try this DANGEROUS module you will
 * have to apply the supplied patch against your current kernel
 * and recompile it.
 */
extern void *sys_call_table[];

/* 
 * UID we want to spy on - will be filled from the
 * command line 
 */
static int uid;
module_param(uid, int, 0644);

/* 
 * A pointer to the original system call. The reason
 * we keep this, rather than call the original function
 * (sys_open), is because somebody else might have
 * replaced the system call before us. Note that this
 * is not 100% safe, because if another module
 * replaced sys_open before us, then when we're inserted
 * we'll call the function in that module - and it
 * might be removed before we are.
 *
 * Another reason for this is that we can't get sys_open.
 * It's a static variable, so it is not exported. 
 */
asmlinkage int (*original_call) (const char *, int, int);

/* 
 * The function we'll replace sys_open (the function
 * called when you call the open system call) with. To
 * find the exact prototype, with the number and type
 * of arguments, we find the original function first
 * (it's at fs/open.c).
 *
 * In theory, this means that we're tied to the
 * current version of the kernel. In practice, the
 * system calls almost never change (it would wreck havoc
 * and require programs to be recompiled, since the system
 * calls are the interface between the kernel and the
 * processes).
 */
asmlinkage int our_sys_open(const char *filename, int flags, int mode)
{
    int i = 0;
    char ch;

    /* 
     * Check if this is the user we're spying on 
     */
    if (uid == current->uid) {
        /* 
         * Report the file, if relevant 
         */
        printk("Opened file by %d: ", uid);
        do {
            get_user(ch, filename + i);
            i++;
            printk("%c", ch);
        } while (ch != 0);
        printk("\n");
    }

    /* 
     * Call the original sys_open - otherwise, we lose
     * the ability to open files 
     */
    return original_call(filename, flags, mode);
}

/* 
 * Initialize the module - replace the system call 
 */
int init_module()
{
    /* 
     * Warning - too late for it now, but maybe for
     * next time... 
     */
    printk(KERN_ALERT "I'm dangerous. I hope you did a ");
    printk(KERN_ALERT "sync before you insmod'ed me.\n");
    printk(KERN_ALERT "My counterpart, cleanup_module(), is even");
    printk(KERN_ALERT "more dangerous. If\n");
    printk(KERN_ALERT "you value your file system, it will ");
    printk(KERN_ALERT "be \"sync; rmmod\" \n");
    printk(KERN_ALERT "when you remove this module.\n");

    /* 
     * Keep a pointer to the original function in
     * original_call, and then replace the system call
     * in the system call table with our_sys_open 
     */
    original_call = sys_call_table[__NR_open];
    sys_call_table[__NR_open] = our_sys_open;

    /* 
     * To get the address of the function for system
     * call foo, go to sys_call_table[__NR_foo]. 
     */

    printk(KERN_INFO "Spying on UID:%d\n", uid);

    return 0;
}

/* 
 * Cleanup - unregister the appropriate file from /proc 
 */
void cleanup_module()
{
    /* 
     * Return the system call back to normal 
     */
    if (sys_call_table[__NR_open] != our_sys_open) {
        printk(KERN_ALERT "Somebody else also played with the ");
        printk(KERN_ALERT "open system call\n");
        printk(KERN_ALERT "The system may be left in ");
        printk(KERN_ALERT "an unstable state.\n");
    }

    sys_call_table[__NR_open] = original_call;
}

第9章阻止进程

阻止进程

当有人问你一些你不能马上做的事情时，你会怎么做？如果你是一个人并且你被一个人困扰，你唯一可以说的是： “不是现在，我很忙。走开！ ” 。但是如果你是一个内核模块并且你被一个进程困扰，你还有另一种可能性。您可以将进程置于睡眠状态，直到您可以为其提供服务。毕竟，内核正在使进程处于休眠状态并且一直被唤醒（这就是多个进程在单个CPU上同时运行的方式）。

这个内核模块就是一个例子。该文件（称为/ proc / sleep ）一次只能由一个进程打开。如果文件已经打开，则内核模块调用wait_event_interruptible [12] 。此函数更改任务的状态（任务是内核数据结构，其中包含有关进程和系统调用的信息，如果有的话）到TASK_INTERRUPTIBLE ，这意味着任务在以某种方式被唤醒之前不会运行，并将其添加到WaitQ ，即等待访问该文件的任务队列。然后，该函数调用调度程序将上下文切换到另一个进程，该进程对CPU有一些用处。

当一个进程完成该文件时，它会关闭它，并调用module_close 。该函数唤醒队列中的所有进程（没有机制只唤醒其中一个进程）。然后它返回，刚刚关闭文件的进程可以继续运行。最后，调度程序决定该进程已经足够，并将CPU的控制权交给另一个进程。最终，队列中的一个进程将由调度程序控制CPU。它在调用module_interruptible_sleep_on [13]之后立即开始。然后，它可以继续设置一个全局变量，以告诉所有其他进程文件仍处于打开状态并继续其生命周期。当其他进程获得一块CPU时，它们将看到该全局变量并重新进入休眠状态。

所以我们将使用tail -f来保持文件在后台打开，同时尝试使用另一个进程访问它（同样在后台，这样我们就不需要切换到不同的vt）。一旦第一个后台进程被kill％1杀死，第二个被唤醒，就能够访问该文件并最终终止。

为了让我们的生活更有趣， module_close没有垄断唤醒等待访问文件的进程。一个信号，如Ctrl + c （ SIGINT ）也可以唤醒一个进程。 [14]在这种情况下，我们希望立即返回-EINTR 。这很重要，因此用户可以在收到文件之前终止进程。

还有一点要记住。有时候进程不想睡觉，他们想要立即获得他们想要的东西，或者被告知无法完成。打开文件时，此类进程使用O_NONBLOCK标志。内核应该通过从操作中返回错误代码-EAGAIN来响应，否则会阻塞，例如在此示例中打开文件。程序cat_noblock （可在本章的源目录中找到）可用于打开带有O_NONBLOCK的文件。

hostname:~/lkmpg-examples/09-BlockingProcesses# insmod sleep.ko
hostname:~/lkmpg-examples/09-BlockingProcesses# cat_noblock /proc/sleep
Last input:
hostname:~/lkmpg-examples/09-BlockingProcesses# tail -f /proc/sleep &
Last input:
Last input:
Last input:
Last input:
Last input:
Last input:
Last input:
tail: /proc/sleep: file truncated
[1] 6540
hostname:~/lkmpg-examples/09-BlockingProcesses# cat_noblock /proc/sleep
Open would block
hostname:~/lkmpg-examples/09-BlockingProcesses# kill %1
[1]+  Terminated              tail -f /proc/sleep
hostname:~/lkmpg-examples/09-BlockingProcesses# cat_noblock /proc/sleep
Last input:
hostname:~/lkmpg-examples/09-BlockingProcesses#

例9-1。 sleep.c


/*
 *  sleep.c - create a /proc file, and if several processes try to open it at
 *  the same time, put all but one to sleep
 */

#include <linux/kernel.h>   /* We're doing kernel work */
#include <linux/module.h>   /* Specifically, a module */
#include <linux/proc_fs.h>  /* Necessary because we use proc fs */
#include <linux/sched.h>    /* For putting processes to sleep and 
                   waking them up */
#include <asm/uaccess.h>    /* for get_user and put_user */

/* 
 * The module's file functions 
 */

/* 
 * Here we keep the last message received, to prove that we can process our
 * input
 */
#define MESSAGE_LENGTH 80
static char Message[MESSAGE_LENGTH];

static struct proc_dir_entry *Our_Proc_File;
#define PROC_ENTRY_FILENAME "sleep"

/* 
 * Since we use the file operations struct, we can't use the special proc
 * output provisions - we have to use a standard read function, which is this
 * function
 */
static ssize_t module_output(struct file *file, /* see include/linux/fs.h   */
                 char *buf, /* The buffer to put data to 
                       (in the user segment)    */
                 size_t len,    /* The length of the buffer */
                 loff_t * offset)
{
    static int finished = 0;
    int i;
    char message[MESSAGE_LENGTH + 30];

    /* 
     * Return 0 to signify end of file - that we have nothing 
     * more to say at this point.
     */
    if (finished) {
        finished = 0;
        return 0;
    }

    /* 
     * If you don't understand this by now, you're hopeless as a kernel
     * programmer.
     */
    sprintf(message, "Last input:%s\n", Message);
    for (i = 0; i < len && message[i]; i++)
        put_user(message[i], buf + i);

    finished = 1;
    return i;       /* Return the number of bytes "read" */
}

/* 
 * This function receives input from the user when the user writes to the /proc
 * file.
 */
static ssize_t module_input(struct file *file,  /* The file itself */
                const char *buf,    /* The buffer with input */
                size_t length,  /* The buffer's length */
                loff_t * offset)
{               /* offset to file - ignore */
    int i;

    /* 
     * Put the input into Message, where module_output will later be 
     * able to use it
     */
    for (i = 0; i < MESSAGE_LENGTH - 1 && i < length; i++)
        get_user(Message[i], buf + i);
    /* 
     * we want a standard, zero terminated string 
     */
    Message[i] = '\0';

    /* 
     * We need to return the number of input characters used 
     */
    return i;
}

/* 
 * 1 if the file is currently open by somebody 
 */
int Already_Open = 0;

/* 
 * Queue of processes who want our file 
 */
DECLARE_WAIT_QUEUE_HEAD(WaitQ);
/* 
 * Called when the /proc file is opened 
 */
static int module_open(struct inode *inode, struct file *file)
{
    /* 
     * If the file's flags include O_NONBLOCK, it means the process doesn't
     * want to wait for the file.  In this case, if the file is already 
     * open, we should fail with -EAGAIN, meaning "you'll have to try 
     * again", instead of blocking a process which would rather stay awake.
     */
    if ((file->f_flags & O_NONBLOCK) && Already_Open)
        return -EAGAIN;

    /* 
     * This is the correct place for try_module_get(THIS_MODULE) because 
     * if a process is in the loop, which is within the kernel module,
     * the kernel module must not be removed.
     */
    try_module_get(THIS_MODULE);

    /* 
     * If the file is already open, wait until it isn't 
     */

    while (Already_Open) {
        int i, is_sig = 0;

        /* 
         * This function puts the current process, including any system
         * calls, such as us, to sleep.  Execution will be resumed right
         * after the function call, either because somebody called 
         * wake_up(&WaitQ) (only module_close does that, when the file 
         * is closed) or when a signal, such as Ctrl-C, is sent 
         * to the process
         */
        wait_event_interruptible(WaitQ, !Already_Open);

        /* 
         * If we woke up because we got a signal we're not blocking, 
         * return -EINTR (fail the system call).  This allows processes
         * to be killed or stopped.
         */

/*
 * Emmanuel Papirakis:
 *
 * This is a little update to work with 2.2.*.  Signals now are contained in
 * two words (64 bits) and are stored in a structure that contains an array of
 * two unsigned longs.  We now have to make 2 checks in our if.
 *
 * Ori Pomerantz:
 *
 * Nobody promised me they'll never use more than 64 bits, or that this book
 * won't be used for a version of Linux with a word size of 16 bits.  This code
 * would work in any case.
 */
        for (i = 0; i < _NSIG_WORDS && !is_sig; i++)
            is_sig =
                current->pending.signal.sig[i] & ~current->
                blocked.sig[i];

        if (is_sig) {
            /* 
             * It's important to put module_put(THIS_MODULE) here,
             * because for processes where the open is interrupted
             * there will never be a corresponding close. If we 
             * don't decrement the usage count here, we will be 
             * left with a positive usage count which we'll have no
             * way to bring down to zero, giving us an immortal 
             * module, which can only be killed by rebooting 
             * the machine.
             */
            module_put(THIS_MODULE);
            return -EINTR;
        }
    }

    /* 
     * If we got here, Already_Open must be zero 
     */

    /* 
     * Open the file 
     */
    Already_Open = 1;
    return 0;       /* Allow the access */
}

/* 
 * Called when the /proc file is closed 
 */
int module_close(struct inode *inode, struct file *file)
{
    /* 
     * Set Already_Open to zero, so one of the processes in the WaitQ will
     * be able to set Already_Open back to one and to open the file. All 
     * the other processes will be called when Already_Open is back to one,
     * so they'll go back to sleep.
     */
    Already_Open = 0;

    /* 
     * Wake up all the processes in WaitQ, so if anybody is waiting for the
     * file, they can have it.
     */
    wake_up(&WaitQ);

    module_put(THIS_MODULE);

    return 0;       /* success */
}

/*
 * This function decides whether to allow an operation (return zero) or not
 * allow it (return a non-zero which indicates why it is not allowed).
 *
 * The operation can be one of the following values:
 * 0 - Execute (run the "file" - meaningless in our case)
 * 2 - Write (input to the kernel module)
 * 4 - Read (output from the kernel module)
 *
 * This is the real function that checks file permissions. The permissions
 * returned by ls -l are for reference only, and can be overridden here.
 */
static int module_permission(struct inode *inode, int op, struct nameidata *nd)
{
    /* 
     * We allow everybody to read from our module, but only root (uid 0)
     * may write to it
     */
    if (op == 4 || (op == 2 && current->euid == 0))
        return 0;

    /* 
     * If it's anything else, access is denied 
     */
    return -EACCES;
}

/* 
 * Structures to register as the /proc file, with pointers to all the relevant
 * functions.
 */

/* 
 * File operations for our proc file. This is where we place pointers to all
 * the functions called when somebody tries to do something to our file. NULL
 * means we don't want to deal with something.
 */
static struct file_operations File_Ops_4_Our_Proc_File = {
    .read = module_output,  /* "read" from the file */
    .write = module_input,  /* "write" to the file */
    .open = module_open,    /* called when the /proc file is opened */
    .release = module_close,    /* called when it's closed */
};

/* 
 * Inode operations for our proc file.  We need it so we'll have somewhere to
 * specify the file operations structure we want to use, and the function we
 * use for permissions. It's also possible to specify functions to be called
 * for anything else which could be done to an inode (although we don't bother,
 * we just put NULL).
 */

static struct inode_operations Inode_Ops_4_Our_Proc_File = {
    .permission = module_permission,    /* check for permissions */
};

/* 
 * Module initialization and cleanup 
 */

/* 
 * Initialize the module - register the proc file 
 */

int init_module()
{

    Our_Proc_File = create_proc_entry(PROC_ENTRY_FILENAME, 0644, NULL);

    if (Our_Proc_File == NULL) {
        remove_proc_entry(PROC_ENTRY_FILENAME, &proc_root);
        printk(KERN_ALERT "Error: Could not initialize /proc/test\n");
        return -ENOMEM;
    }

    Our_Proc_File->owner = THIS_MODULE;
    Our_Proc_File->proc_iops = &Inode_Ops_4_Our_Proc_File;
    Our_Proc_File->proc_fops = &File_Ops_4_Our_Proc_File;
    Our_Proc_File->mode = S_IFREG | S_IRUGO | S_IWUSR;
    Our_Proc_File->uid = 0;
    Our_Proc_File->gid = 0;
    Our_Proc_File->size = 80;

    printk(KERN_INFO "/proc/test created\n");

    return 0;
}

/* 
 * Cleanup - unregister our file from /proc.  This could get dangerous if
 * there are still processes waiting in WaitQ, because they are inside our
 * open function, which will get unloaded. I'll explain how to avoid removal
 * of a kernel module in such a case in chapter 10.
 */
void cleanup_module()
{
    remove_proc_entry(PROC_ENTRY_FILENAME, &proc_root);

    printk(KERN_INFO "/proc/test removed\n");
}

例9-2。 cat_noblock.c

/* cat_noblock.c - open a file and display its contents, but exit rather than
 * wait for input */


/* Copyright (C) 1998 by Ori Pomerantz */



#include <stdio.h>    /* standard I/O */
#include <fcntl.h>    /* for open */
#include <unistd.h>   /* for read */ 
#include <stdlib.h>   /* for exit */
#include <errno.h>    /* for errno */

#define MAX_BYTES 1024*4


main(int argc, char *argv[])
{
  int    fd;  /* The file descriptor for the file to read */
  size_t bytes; /* The number of bytes read */
  char   buffer[MAX_BYTES]; /* The buffer for the bytes */  


  /* Usage */
  if (argc != 2) {
    printf("Usage: %s <filename>\n", argv[0]);
    puts("Reads the content of a file, but doesn't wait for input");
    exit(-1);
  }

  /* Open the file for reading in non blocking mode */ 
  fd = open(argv[1], O_RDONLY | O_NONBLOCK);

  /* If open failed */
  if (fd == -1) {
    if (errno = EAGAIN)
      puts("Open would block");
    else
      puts("Open failed");
    exit(-1);
  }

  /* Read the file and output its contents */
  do {
    int i;

    /* Read characters from the file */
    bytes = read(fd, buffer, MAX_BYTES);

    /* If there's an error, report it and die */
    if (bytes == -1) {
      if (errno = EAGAIN)
    puts("Normally I'd block, but you told me not to");
      else
    puts("Another read error");
      exit(-1);
    }

    /* Print the characters */
    if (bytes > 0) {
      for(i=0; i<bytes; i++)
    putchar(buffer[i]);
    }

    /* While there are no errors and the file isn't over */
  } while (bytes > 0);
}

Linux内核模块编程指南(三)