fs/timerfd.c内核分析

/*

 *  fs/timerfd.c

 *

 *  Copyright (C) 2007  Davide Libenzi <[email protected]>

 *

 *

 *  Thanks to Thomas Gleixner for code reviews and useful comments.

 *

 */

 

#include <linux/file.h>

#include <linux/poll.h>

#include <linux/init.h>

#include <linux/fs.h>

#include <linux/sched.h>

#include <linux/kernel.h>

#include <linux/list.h>

#include <linux/spinlock.h>

#include <linux/time.h>

#include <linux/hrtimer.h>

#include <linux/anon_inodes.h>

#include <linux/timerfd.h>

#include <linux/syscalls.h>

 

struct timerfd_ctx {

       struct hrtimer tmr;

       ktime_t tintv;

       wait_queue_head_t wqh;

       u64 ticks;

       int expired;

       int clockid;

};

 

//该函数是timerfd的定时器超时函数。在timerfd超时时,该函数会设置定时器

 //超时标记位;增加定时器超时次数(在设置定时器循环模式时,可能会出现多//次超时没有被处理的情况);唤醒一个等待队列,从而唤醒可能存在的正被阻//塞的readselect

static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr)

{

       struct timerfd_ctx *ctx = container_of(htmr, struct timerfd_ctx, tmr);

       unsigned long flags;

       //spin_lock_irqsave函数的作用是采用锁机制保证变量的正确性,但是在过程

       //中可能会出现死锁的情况,所以采用一个中断状态来解决这样的问题,与

       //下面的spin_lock_irqrestore相对应。

       spin_lock_irqsave(&ctx->wqh.lock, flags);

     ctx->expired = 1;// 设置定时器超时标记位

       ctx->ticks++; //增加定时器超时次数

       wake_up_locked(&ctx->wqh); //唤醒一个等待队列

       spin_unlock_irqrestore(&ctx->wqh.lock, flags);

 

       return HRTIMER_NORESTART;

}

 

static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)

{

       ktime_t remaining;

 

       remaining = hrtimer_expires_remaining(&ctx->tmr);

       return remaining.tv64 < 0 ? ktime_set(0, 0): remaining;

}

 

//一些初始化的工作

static void timerfd_setup(struct timerfd_ctx *ctx, int flags,

                       const struct itimerspec *ktmr)

{

       enum hrtimer_mode htmode;

       ktime_t texp;

 

       htmode = (flags & TFD_TIMER_ABSTIME) ?

              HRTIMER_MODE_ABS: HRTIMER_MODE_REL;

 

       texp = timespec_to_ktime(ktmr->it_value);

       ctx->expired = 0;

       ctx->ticks = 0;

       ctx->tintv = timespec_to_ktime(ktmr->it_interval);

       hrtimer_init(&ctx->tmr, ctx->clockid, htmode);

       hrtimer_set_expires(&ctx->tmr, texp);

       ctx->tmr.function = timerfd_tmrproc;

       if (texp.tv64 != 0)

              //hrtimer_start函数将一个hrtimer加入到一个按照到期时间排序的红黑树中

              hrtimer_start(&ctx->tmr, texp, htmode);

}

 

//timerfd_release函数释放timerfd_create函数中申请的资源,删除已分配的定时//器。

static int timerfd_release(struct inode *inode, struct file *file)

{

       struct timerfd_ctx *ctx = file->private_data;

       //hrtimer_cancel函数的作用是删除一个正在排队的定时器。这里分三种情况,一种是定

//时器已到期,并且设置了软中断模式;第二种是没有到期,还在红黑树中;第三种是

//定时器正在执行。

       hrtimer_cancel(&ctx->tmr);

       kfree(ctx); //释放内核空间

       return 0;

}

 

/*timerfd_polltimerfd的等待队列登记到一个poll_table,从而在定时器超时时能唤醒select系统调用。

*/

static unsigned int timerfd_poll(struct file *file, poll_table *wait)

{

       struct timerfd_ctx *ctx = file->private_data;

       unsigned int events = 0;

       unsigned long flags;

 

       poll_wait(file, &ctx->wqh, wait);  //增加一个等待队列到poll_table

 

       spin_lock_irqsave(&ctx->wqh.lock, flags);

       if (ctx->ticks)

              events |= POLLIN;

       spin_unlock_irqrestore(&ctx->wqh.lock, flags);

 

       return events;

}

 

/*

*timerfd_read函数是文件操作read的内核实现,读到的是定时器的超时次数。

*该函数在阻塞模式下会把自身挂到timerfd的等待队列中,等待定时器超时时

*被唤醒。

*/

static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,

                         loff_t *ppos)

{

       struct timerfd_ctx *ctx = file->private_data;

       ssize_t res;

       u64 ticks = 0;

       DECLARE_WAITQUEUE(wait, current);

 

       if (count < sizeof(ticks))

              return -EINVAL;

       spin_lock_irq(&ctx->wqh.lock);

       res = -EAGAIN;

       if (!ctx->ticks && !(file->f_flags & O_NONBLOCK)) {

              __add_wait_queue(&ctx->wqh, &wait);

              for (res = 0;;) {

                     set_current_state(TASK_INTERRUPTIBLE);

                     if (ctx->ticks) {

                            res = 0;

                            break;

                     }

                     if (signal_pending(current)) {

                            res = -ERESTARTSYS;

                            break;

                     }

                     spin_unlock_irq(&ctx->wqh.lock);

                     schedule();

                     spin_lock_irq(&ctx->wqh.lock);

              }

              __remove_wait_queue(&ctx->wqh, &wait);

              __set_current_state(TASK_RUNNING);

       }

       if (ctx->ticks) {

              ticks = ctx->ticks;

              if (ctx->expired && ctx->tintv.tv64) {

                     /*

                      * If tintv.tv64 != 0, this is a periodic timer that

                      * needs to be re-armed. We avoid doing it in the timer

                      * callback to avoid DoS attacks specifying a very

                      * short timer period.

                      */

                     ticks += hrtimer_forward_now(&ctx->tmr,

                                               ctx->tintv) - 1;

                     hrtimer_restart(&ctx->tmr);

              }

              ctx->expired = 0;

              ctx->ticks = 0;

       }

       spin_unlock_irq(&ctx->wqh.lock);

       if (ticks)

              res = put_user(ticks, (u64 __user *) buf) ? -EFAULT: sizeof(ticks);

       return res;

}

 

static const struct file_operations timerfd_fops = {

       .release          = timerfd_release,

       .poll        = timerfd_poll,

       .read             = timerfd_read,

};

 

//根据文件描述符获得一个file的结构体

static struct file *timerfd_fget(int fd)

{

       struct file *file;

 

       file = fget(fd);

       if (!file)

              return ERR_PTR(-EBADF);

       if (file->f_op != &timerfd_fops) {

              fput(file);

              return ERR_PTR(-EINVAL);

       }

 

       return file;

}

/*

*做一些定时器的初始化工作;

*调用hrtimer_init初始化一个hrtimer

*调用anon_inode_getfd分配一个dentry,并得到一个文件号fd,同时传入timerfd

*的文件操作指针struct file_operations timerfd_fopsanno_inode_getfd是文件系统

*anon_inodefs的一个帮助函数。anon文件系统比较简单,整个文件系统只有一

*inode节点,其实现代码可以在fs/anon_inodes.c中找到。

*/

SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)

{

       int ufd;

       struct timerfd_ctx *ctx;

 

       /* Check the TFD_* constants for consistency.  */

       BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC);

       BUILD_BUG_ON(TFD_NONBLOCK != O_NONBLOCK);

 

       if ((flags & ~TFD_CREATE_FLAGS) ||

           (clockid != CLOCK_MONOTONIC &&

            clockid != CLOCK_REALTIME))

              return -EINVAL;

 

       ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);

       if (!ctx)

              return -ENOMEM;

 

       init_waitqueue_head(&ctx->wqh);

       ctx->clockid = clockid;

       hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS);

 

       ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx,

                            flags & TFD_SHARED_FCNTL_FLAGS);

       if (ufd < 0)

              kfree(ctx);

 

       return ufd;

}

 

//timerfd_settime最终会调用hrtimer_start启动定时器,其超时函数被设置为timerfd_tmrproc

/*此函数用于设置新的超时时间,并开始计时。

*参数ufdtimerfd_create返回的文件句柄。

*参数flags1代表设置的是绝对时间;为0代表相对时间。

*参数utmr为需要设置的时间。

*参数otmr为定时器这次设置之前的超时时间。

*函数返回0代表设置成功。

*/

SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,

              const struct itimerspec __user *, utmr,

              struct itimerspec __user *, otmr)

{

       struct file *file;

       struct timerfd_ctx *ctx;

       struct itimerspec ktmr, kotmr;

 

       if (copy_from_user(&ktmr, utmr, sizeof(ktmr)))

              return -EFAULT;

 

       if ((flags & ~TFD_SETTIME_FLAGS) ||

           !timespec_valid(&ktmr.it_value) ||

           !timespec_valid(&ktmr.it_interval))

              return -EINVAL;

 

       file = timerfd_fget(ufd);

       if (IS_ERR(file))

              return PTR_ERR(file);

       ctx = file->private_data;

 

       /*

        * We need to stop the existing timer before reprogramming

        * it to the new values.

        */

       for (;;) {

              spin_lock_irq(&ctx->wqh.lock);

              if (hrtimer_try_to_cancel(&ctx->tmr) >= 0)

                     break;

              spin_unlock_irq(&ctx->wqh.lock);

              cpu_relax();

       }

 

       /*

        * If the timer is expired and it's periodic, we need to advance it

        * because the caller may want to know the previous expiration time.

        * We do not update "ticks" and "expired" since the timer will be

        * re-programmed again in the following timerfd_setup() call.

        */

       if (ctx->expired && ctx->tintv.tv64)

              hrtimer_forward_now(&ctx->tmr, ctx->tintv);

 

       kotmr.it_value = ktime_to_timespec(timerfd_get_remaining(ctx));

       kotmr.it_interval = ktime_to_timespec(ctx->tintv);

 

       /*

        * Re-program the timer to the new value ...

        */

       timerfd_setup(ctx, flags, &ktmr);

 

       spin_unlock_irq(&ctx->wqh.lock);

       fput(file);

       if (otmr && copy_to_user(otmr, &kotmr, sizeof(kotmr)))

              return -EFAULT;

 

       return 0;

}

//此函数用于获得定时器距离下次超时还剩下的时间。

//如果调用时定时器已经到期,并且该定时器处于循环模式,那么调用此函数之后定时器重新开始计时。

SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr)

{

       struct file *file;

       struct timerfd_ctx *ctx;

       struct itimerspec kotmr;

 

       file = timerfd_fget(ufd);

       if (IS_ERR(file))

              return PTR_ERR(file);

       ctx = file->private_data;

 

       spin_lock_irq(&ctx->wqh.lock);

       if (ctx->expired && ctx->tintv.tv64) {

              ctx->expired = 0;

              ctx->ticks +=

                     hrtimer_forward_now(&ctx->tmr, ctx->tintv) - 1;

              hrtimer_restart(&ctx->tmr);

       }

       kotmr.it_value = ktime_to_timespec(timerfd_get_remaining(ctx));

       kotmr.it_interval = ktime_to_timespec(ctx->tintv);

       spin_unlock_irq(&ctx->wqh.lock);

       fput(file);

 

       return copy_to_user(otmr, &kotmr, sizeof(kotmr)) ? -EFAULT: 0;

}

猜你喜欢

转载自carmark.iteye.com/blog/1026857
fs