poll-轮询

kernel 到 user 的单向数据流

kernel 模拟数据源,每隔3秒一次,每次3个数据
user 不断poll,每秒一次

user代码

#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <unistd.h>
#include <poll.h>
#include <string.h>
#include <errno.h> 

int main(int argc, char **argv)
{
    int i, j;
    int ret;
    int fd;
    struct pollfd fds[1];

	unsigned char tmp[20];

	fd = open("/dev/MyDemo", O_RDONLY);
	if(fd < 0) {
		printf("open failed, errno %d\n", errno);
		return -1;
	}
	printf("open successful\n");
	sleep(5);

    fds[0].fd = fd;
    fds[0].events = POLLIN;

	i = 20;
    while (i--) {
		printf("before poll!\n");
        ret = poll(fds, 1, 1000); // 第三个参数是毫秒数
		printf("after poll!\n");
        if(ret == 0) {
            printf("time out!\n");
        } else {
			if(fds->revents == POLLIN) {
				printf("before read!\n");
				memset(&tmp, 0, sizeof(tmp));
				ret = read(fd, &tmp, 20);
				if (ret <= 0) {
					printf("read err!\n");
				} else {
					for (j = 0; j < ret; j++) {
						printf("%d ", tmp[j]);
					}
					printf("\n");
				}
				printf("after read!\n");
			} else {
				printf("not POLLIN!\n");
			}
        }
    }

	ret = close(fd);
	if (ret < 0) {
		printf("close failed!\n");
	} else if (ret == 0) {
		printf("close success!\n");
	} else {
		printf("close invalid!\n");
	}
	return 0;   
}

kernel代码

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/device.h>
#include <linux/cdev.h>
#include <linux/kdev_t.h>
#include <linux/interrupt.h>
#include <linux/poll.h>
#include <linux/uaccess.h>
#include <linux/kthread.h>
#include <linux/unistd.h>
#include <linux/delay.h>
#include <linux/printk.h>
#include <linux/types.h>
#include <linux/syscalls.h>

#define DEMO_BUFFER_SIZE 256
#define DEVICE_NAME "MyDemo"
#define DEVICE_CNT	1

// #define PRINT_DEBUG

struct chr_dev{
	dev_t devid;
	struct cdev cdev;
	struct class *class;
	struct device *device;
	int major;
	int minor;
};

struct chr_dev dump_dev;

// 以一条通道(读)为例进行设计
struct chan_waitqueue {
	wait_queue_head_t queue; //等待队列的结点
	atomic_t in_open;
	struct mutex mutex;
};

static struct chan_waitqueue kern2user_Q;

struct channel {
	unsigned char buffer[DEMO_BUFFER_SIZE]; // 缓存区
	int user_read_pos; // user 待读的位置
	int kern_write_pos; // kern 待写的位置 
};

static struct channel kern2user_Ch;

// 一个全局flag
// g_stopping
// g_stopping 为 true 表示停止
static bool g_stopping;

static struct task_struct *generate_and_wake;

// 返回值
// 可以读取从 user_read_pos 开始的 返回值个字节
// 比如返回值是3,则表示可以读取从 user_read_pos 开始的3个字节
unsigned int can_read(int can_sleep)
{
	struct channel *chan = &kern2user_Ch;

	if (chan->user_read_pos == chan->kern_write_pos) {
		if (can_sleep) {
			// __wait_event_interruptible 返回值,除了0就是负数
			// 第二个参数是condition,只要条件满足就会从内部for循环中跳出来
			int ret = __wait_event_interruptible(kern2user_Q.queue,
				(chan->user_read_pos != chan->kern_write_pos) || g_stopping);
			// 出错导致的返回
			if (ret)
				return ret;
			// 条件满足导致的返回,并且是g_stopping条件满足
			if (g_stopping)
				return 0;
			// 条件满足导致的返回,不是g_stopping条件满足,则进入最后的return
		} else {
			// 非阻塞模式
			return 0;
		}
	}

	return (chan->kern_write_pos + DEMO_BUFFER_SIZE - chan->user_read_pos) % DEMO_BUFFER_SIZE;

	// 等价于:
	// if (chan->user_read_pos <= chan->kern_write_pos)
	//	 return chan->kern_write_pos - chan->user_read_pos;
	// else
	//	 return chan->kern_write_pos + DEMO_BUFFER_SIZE - chan->user_read_pos;
}


void print_debug(unsigned long start, unsigned long num) {
#ifdef PRINT_DEBUG
	unsigned long i = 0;
	struct channel *chan = &kern2user_Ch;

	printk("demo(nums %lu): ", num);
	for (i = start; i < start + num; i++) {
		printk("%d ", chan->buffer[i]);
	}
	printk("\n");
#endif
}

void print_user(char __user *buffer, unsigned long num) {
#ifdef PRINT_DEBUG
	unsigned long i = 0;

	printk("demo(nums %lu): ", num);
	for (; i < num; i++) {
		printk("%d ", buffer[i]);
	}
	printk("\n");
#endif
}

static ssize_t file_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos)
{
	unsigned long failed;
	unsigned long nums, nums2;
	struct channel *chan = &kern2user_Ch;

	// 传入参数:是否允许睡眠
	// 1表示可睡眠,即上层采用阻塞模式
	// 0表示不可睡眠,上层采用非阻塞模式
	if (!can_read((file->f_flags & O_NONBLOCK) ? 0 : 1)) {
		pr_info("%s, no data to read\n", __func__);
		return 0;
	}

	mutex_lock(&(kern2user_Q.mutex));
	smp_rmb();

	nums = 0;
	nums2 = 0;

	// 此次需要读取的字节数
	count = min(count,
		     (size_t)((chan->kern_write_pos + DEMO_BUFFER_SIZE - chan->user_read_pos) % DEMO_BUFFER_SIZE));

	if (chan->kern_write_pos >= chan->user_read_pos) {
		// 写指针超前读指针

		// 容量有限,只能装 min(count, chan->kern_write_pos - chan->user_read_pos)个字节
		nums = count;
		pr_info("%s, %lu bytes will copy to user\n", __func__, nums);
		// 拷贝 [chan->user_read_pos, chan->user_read_pos + nums)
		print_debug(chan->user_read_pos, nums);
		failed = copy_to_user(buffer, chan->buffer + chan->user_read_pos, nums);
		if (failed) {
			pr_err("%s, %lu bytes copy to user failed\n", __func__, failed);
			goto out;
		} else {
			pr_info("%s, %lu bytes copy to user success\n", __func__, nums);
		}
	} else {
		// 写指针落后读指针

		// 容量有限,只能装 min(count, chan->kern_write_pos + DEMO_BUFFER_SIZE - chan->user_read_pos)个字节

		// 第一段:
		nums = DEMO_BUFFER_SIZE - chan->user_read_pos;
		pr_info("%s, first segment %lu bytes\n", __func__, nums);

		if (nums < count) {
			// 装完第一段后,还可以去装第二段
			pr_info("%s, first segment %lu bytes will copy to user\n", __func__, nums);

			// 拷贝 [chan->buffer + chan->user_read_pos, chan->buffer + DEMO_BUFFER_SIZE)
			print_debug(chan->user_read_pos, nums);
			failed = copy_to_user(buffer, chan->buffer + chan->user_read_pos, nums);
			if (failed) {
				pr_err("%s, %lu bytes copy to user failed\n", __func__, failed);
				goto out;
			} else {
				pr_info("%s, %lu bytes copy to user success\n", __func__, nums);
			}

			pr_info("%s, second segment %lu bytes\n", __func__, chan->kern_write_pos);
			nums2 = count - nums;
			pr_info("%s, second segment, %lu bytes will copy to user\n", __func__, nums2);

			// 拷贝 [0, nums2)
			print_debug(0, nums2);
			failed = copy_to_user(buffer + nums, chan->buffer, nums2);
			if (failed) {
				pr_err("%s, %lu bytes copy to user failed\n", __func__, failed);
				goto out;
			} else {
				pr_info("%s, %lu bytes copy to user success\n", __func__, nums2);
			}
		} else {
			// 只能装部分第一段
			nums = count;
			pr_info("%s, first segment %lu bytes will copy to user\n", __func__, nums);

			// 拷贝 [chan->buffer + chan->user_read_pos, chan->buffer + chan->user_read_pos + nums)
			print_debug(chan->user_read_pos, nums);
			failed = copy_to_user(buffer, chan->buffer + chan->user_read_pos, nums);
			if (failed) {
				pr_err("%s, %lu bytes copy to user failed\n", __func__, failed);
				goto out;
			} else {
				pr_info("%s, %lu bytes copy to user success\n", __func__, nums);
			}
		}
	}

out:
	count = nums + nums2 - failed;
	*ppos += count;

	print_user(buffer, count);

	smp_wmb();
	chan->user_read_pos = (chan->user_read_pos + count) % DEMO_BUFFER_SIZE;
	smp_wmb();
	mutex_unlock(&(kern2user_Q.mutex));

	return count;
}

static unsigned file_poll(struct file *file, poll_table *wait)
{
	unsigned mask = 0;
	poll_wait(file, &(kern2user_Q.queue), wait);

	if (can_read(0))
		mask |= POLLIN | POLLRDNORM;

	return mask;
}

void demo_starting(void) {
	g_stopping = false;
	wake_up_interruptible(&kern2user_Q.queue);
}

void demo_stopping(void) {
	g_stopping = true;
	wake_up_interruptible(&kern2user_Q.queue);
}

static int file_open(struct inode *inode, struct file *filp)
{
	int ret = 0;

	if (atomic_inc_return(&kern2user_Q.in_open) > 1) {
		pr_err("%s, chan already opened\n", __func__);
		ret = -EBUSY;
		goto out_fail;
	}

	return 0;

out_fail:
	smp_mb();
	atomic_dec(&kern2user_Q.in_open);
	smp_mb();
	return ret;
}

static int file_release(struct inode *inode, struct file *filp)
{
	atomic_dec(&kern2user_Q.in_open);
	return 0;
}

const struct file_operations demo_fops = {
	.owner =   THIS_MODULE,
	.open =    file_open,
	.release = file_release,
	.read =    file_read,
	.poll =    file_poll,
	.llseek =  noop_llseek,
};

int demo_generate_data_thread(void *data) {
	struct channel *chan = &kern2user_Ch;
	int i;

	// 模拟一个固定到来的事件,每 3s 往前走3步
	while (!g_stopping) {
		smp_wmb();
		for (i = 0; i < 3; i++) {
			chan->buffer[chan->kern_write_pos] = chan->kern_write_pos;
			chan->kern_write_pos = (chan->kern_write_pos + 1) % DEMO_BUFFER_SIZE;
		}
		pr_info("%s, write buffer, cur kern_write_pos %lu\n", __func__, chan->kern_write_pos);
		smp_wmb();

		pr_info("%s, before wake up\n", __func__);
		wake_up_interruptible(&kern2user_Q.queue);
		pr_info("%s, after wake up\n", __func__);
		msleep(3000);
	}

	
	return 0;
}

static int demo_init(void) {
	int result = 0;

	pr_info("%s ===>\n", __func__);

	if (dump_dev.major) {
		dump_dev.devid = MKDEV(dump_dev.major, 0);
		result = register_chrdev_region(dump_dev.devid, DEVICE_CNT, DEVICE_NAME);
		if(result < 0){ 
			goto out_err_1;
		}
	} else {
		result = alloc_chrdev_region(&dump_dev.devid, 0, DEVICE_CNT, DEVICE_NAME);
		if(result < 0){
			goto out_err_1;
		}
		dump_dev.major = MAJOR(dump_dev.devid);
		dump_dev.minor = MINOR(dump_dev.devid);
	}
	pr_info("%s, major=%d, minor=%d\r\n", __func__, dump_dev.major, dump_dev.minor);	

	dump_dev.cdev.owner = THIS_MODULE;
	cdev_init(&dump_dev.cdev, &demo_fops);
	cdev_add(&dump_dev.cdev, dump_dev.devid, DEVICE_CNT);

	dump_dev.class = class_create(THIS_MODULE, DEVICE_NAME);
	if (IS_ERR(dump_dev.class)) {
		pr_info("%s, class_create failed\n", __func__);
		result = PTR_ERR(dump_dev.class);
		goto out_err_2;
	}

	dump_dev.device = device_create(dump_dev.class, NULL, dump_dev.devid, NULL, DEVICE_NAME);
	if (IS_ERR(dump_dev.device)) {
		pr_info("%s, device_create failed\n", __func__);
		result = PTR_ERR(dump_dev.device);
		goto out_err_3;
	}

	init_waitqueue_head(&kern2user_Q.queue);
	atomic_set(&kern2user_Q.in_open, 0);
	mutex_init(&kern2user_Q.mutex);
	kern2user_Ch.kern_write_pos = 0;
	kern2user_Ch.user_read_pos = 0;
	pr_info("%s, memset buffer, size %lu\n", __func__, sizeof(kern2user_Ch.buffer));
	memset(&kern2user_Ch.buffer, 0, sizeof(kern2user_Ch.buffer));

	demo_starting();
	generate_and_wake = kthread_run(demo_generate_data_thread, NULL, "demo_thread");
	if (IS_ERR_OR_NULL(generate_and_wake)) {
		generate_and_wake = NULL;
		pr_err("%s, thread create failed\n", __func__);
		result = -EINVAL;
	} else {
		pr_info("%s, thread create success\n", __func__);
	}

	pr_info("%s <===\n", __func__);
	return result; 

out_err_3:
	device_destroy(dump_dev.class, dump_dev.devid);
out_err_2:
	class_destroy(dump_dev.class);
	unregister_chrdev_region(dump_dev.devid, DEVICE_CNT);
	cdev_del(&dump_dev.cdev);
out_err_1:
	return 	result; 
}

static void demo_exit(void) {
	pr_info("%s ===>\n", __func__);
	demo_stopping();

	generate_and_wake = NULL;

	device_destroy(dump_dev.class, dump_dev.devid);
	class_destroy(dump_dev.class);
	unregister_chrdev_region(dump_dev.devid, DEVICE_CNT);
	cdev_del(&dump_dev.cdev);
	pr_info("%s <===\n", __func__);
	return;
}

module_init(demo_init);
module_exit(demo_exit);
MODULE_LICENSE("GPL");

源码参考

#define __wait_event_interruptible(wq_head, condition)				\
	___wait_event(wq_head, condition, TASK_INTERRUPTIBLE, 0, 0, schedule())

/*
 * The below macro ___wait_event() has an explicit shadow of the __ret
 * variable when used from the wait_event_*() macros.
 *
 * This is so that both can use the ___wait_cond_timeout() construct
 * to wrap the condition.
 *
 * The type inconsistency of the wait_event_*() __ret variable is also
 * on purpose; we use long where we can return timeout values and int
 * otherwise.
 */

#define ___wait_event(wq_head, condition, state, exclusive, ret, cmd)		\
({										\
	__label__ __out;							\
	struct wait_queue_entry __wq_entry;					\
	long __ret = ret;	/* explicit shadow */				\
										\
	init_wait_entry(&__wq_entry, exclusive ? WQ_FLAG_EXCLUSIVE : 0);	\
	for (;;) {								\
		long __int = prepare_to_wait_event(&wq_head, &__wq_entry, state);\
										\
		if (condition)							\
			break;							\
										\
		if (___wait_is_interruptible(state) && __int) {			\
			__ret = __int;						\
			goto __out;						\
		}								\
										\
		cmd;								\
	}									\
	finish_wait(&wq_head, &__wq_entry);					\
__out:	__ret;									\
})

// 这里的ret除了0就是负数
long prepare_to_wait_event(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state)
{
	unsigned long flags;
	long ret = 0;

	spin_lock_irqsave(&wq_head->lock, flags);
	if (signal_pending_state(state, current)) {
		/*
		 * Exclusive waiter must not fail if it was selected by wakeup,
		 * it should "consume" the condition we were waiting for.
		 *
		 * The caller will recheck the condition and return success if
		 * we were already woken up, we can not miss the event because
		 * wakeup locks/unlocks the same wq_head->lock.
		 *
		 * But we need to ensure that set-condition + wakeup after that
		 * can't see us, it should wake up another exclusive waiter if
		 * we fail.
		 */
		list_del_init(&wq_entry->entry);
		ret = -ERESTARTSYS;
	} else {
		if (list_empty(&wq_entry->entry)) {
			if (wq_entry->flags & WQ_FLAG_EXCLUSIVE)
				__add_wait_queue_entry_tail(wq_head, wq_entry);
			else
				__add_wait_queue(wq_head, wq_entry);
		}
		set_current_state(state);
	}
	spin_unlock_irqrestore(&wq_head->lock, flags);

	return ret;
}
EXPORT_SYMBOL(prepare_to_wait_event);

//========================================================================

#define wake_up_interruptible(x)	__wake_up(x, TASK_INTERRUPTIBLE, 1, NULL)

void __wake_up(struct wait_queue_head *wq_head, unsigned int mode,
			int nr_exclusive, void *key)
{
	__wake_up_common_lock(wq_head, mode, nr_exclusive, 0, key);
}
EXPORT_SYMBOL(__wake_up);

总结

该文缺陷如下:
(1) demo_exit 时,要先 demo_stopping , 同时把 demo_generate_data_thread 也要 kill 掉,实际上,用kthread_stop发现无法停止,因此该bug的学习放入另一个文章中,该方案使用的是demo_generate_data_thread中用flag的方式结束它
(2) 借鉴其他文章, 采用 内存屏障的方式进行同步,需要单独在另一个文章中学习
(3) 只有 read , write 未实现

该文运用如下:
(1) 可将kernel的数据搬到user

学习了一下午,收获蛮多,poll 的功能很强大,这只是通用方案,epoll 也是基于 poll 机制实现的

Guess you like

Origin blog.csdn.net/wangkai6666/article/details/118654481