/dev/mem同步写不能使用msync的MS_SYNC选项探究

问题

做了个测试板子的程序,里面有一项写铁电的功能,要求写入之后立即断电,重启后校验数据准确性;铁电设计是通过内存地址直接映射的,于是,使用mmap直接映射了/dev/mem文件,自然地写入之后使用msync进行同步,最后使用munmap解映射;

然而,当我运行这段程序的时候,发现msync的MS_SYNC选项进行同步的时候会返回错误,错误码是EINVAL;这就奇怪了;

查原因

1. 查看MAN手册,如下:当地址不是页的整数倍,或者参数传递错误时才返回这个结果;

1 EINVAL addr  is not a multiple of PAGESIZE; or any bit other than MS_ASYNC | MS_INVALIDATE | MS_SYNC is set in flags; or both MS_SYNC
2 and MS_ASYNC are set in flags.

反复验证,发现地址没问题,而且将MS_SYNC换成MS_ASYNC就没问题了,所以怀疑是内核不支持这个同步选项;为了求证,查看内核代码:

2. sys_msync这个系统调用,在校验参数时,如果不合法会返回-EINVAL,这点如上述MAN手册所描述;

 1 asmlinkage long sys_msync(unsigned long start, size_t len, int flags)
 2 {
 3     unsigned long end;
 4     struct mm_struct *mm = current->mm;
 5     struct vm_area_struct *vma;
 6     int unmapped_error = 0;
 7     int error = -EINVAL;
 8 
 9     if (flags & ~(MS_ASYNC | MS_INVALIDATE | MS_SYNC))
10         goto out;
11     if (start & ~PAGE_MASK)
12         goto out;
13     if ((flags & MS_ASYNC) && (flags & MS_SYNC))
14         goto out;
15         ....
16 }

3. 继续往下看代码,有这么一句,如果有MS_SYNC标记的话,会执行do_fsync(),出错会返回error;

 1 asmlinkage long sys_msync(unsigned long start, size_t len, int flags)
 2 {
 3     ...
 4         if ((flags & MS_SYNC) && file &&
 5                 (vma->vm_flags & VM_SHARED)) {
 6             get_file(file);
 7             up_read(&mm->mmap_sem);
 8             error = do_fsync(file, 0);
 9             fput(file);
10             if (error || start >= end)
11                 goto out;
12             down_read(&mm->mmap_sem);
13             vma = find_vma(mm, start);
14         } else {
15             if (start >= end) {
16                 error = 0;
17                 goto out_unlock;
18             }
19             vma = vma->vm_next;
20         }
21     }
22 out_unlock:
23     up_read(&mm->mmap_sem);
24 out:
25     return error ? : unmapped_error;
26 }

4. 在do_fsync函数中,会对file_operations和里面的fsync函数做校验,如果没有,则返回-EINVAL,基本上可以确定,正是因为该文件没有实现file_operations里面的fsync函数,所以返回参数错误了;

 1 long do_fsync(struct file *file, int datasync)
 2 {
 3     int ret;
 4     int err;
 5     struct address_space *mapping = file->f_mapping;
 6 
 7     if (!file->f_op || !file->f_op->fsync) {
 8         /* Why?  We can still call filemap_fdatawrite */
 9         ret = -EINVAL;
10         goto out;
11     }
12 
13     ret = filemap_fdatawrite(mapping);
14 
15     /*
16      * We need to protect against concurrent writers, which could cause
17      * livelocks in fsync_buffers_list().
18      */
19     mutex_lock(&mapping->host->i_mutex);
20     err = file->f_op->fsync(file, file->f_path.dentry, datasync);
21     if (!ret)
22         ret = err;
23     mutex_unlock(&mapping->host->i_mutex);
24     err = filemap_fdatawait(mapping);
25     if (!ret)
26         ret = err;
27 out:
28     return ret;
29 }

5. 我们来看看内存设备是在什么时候初始化的,如下代码,在device_create函数调用中会对一系列的内存设备进行初始化,其中包括/dev/mem;

 1 static int __init chr_dev_init(void)
 2 {
 3     int i;
 4     int err;
 5 
 6     err = bdi_init(&zero_bdi);
 7     if (err)
 8         return err;
 9 
10     if (register_chrdev(MEM_MAJOR,"mem",&memory_fops))
11         printk("unable to get major %d for memory devs\n", MEM_MAJOR);
12 
13     mem_class = class_create(THIS_MODULE, "mem");
14     for (i = 0; i < ARRAY_SIZE(devlist); i++)
15         device_create(mem_class, NULL,
16                   MKDEV(MEM_MAJOR, devlist[i].minor),
17                   devlist[i].name);
18 
19     return 0;
20 }

6. 这个/dev/mem对应着一个操作函数,如下代码中的mem_fops:

 1 static const struct {
 2     unsigned int        minor;
 3     char            *name;
 4     umode_t            mode;
 5     const struct file_operations    *fops;
 6 } devlist[] = { /* list of minor devices */
 7     {1, "mem",     S_IRUSR | S_IWUSR | S_IRGRP, &mem_fops},
 8     {2, "kmem",    S_IRUSR | S_IWUSR | S_IRGRP, &kmem_fops},
 9     {3, "null",    S_IRUGO | S_IWUGO,           &null_fops},
10 #ifdef CONFIG_DEVPORT
11     {4, "port",    S_IRUSR | S_IWUSR | S_IRGRP, &port_fops},
12 #endif
13     {5, "zero",    S_IRUGO | S_IWUGO,           &zero_fops},
14     {7, "full",    S_IRUGO | S_IWUGO,           &full_fops},
15     {8, "random",  S_IRUGO | S_IWUSR,           &random_fops},
16     {9, "urandom", S_IRUGO | S_IWUSR,           &urandom_fops},
17     {11,"kmsg",    S_IRUGO | S_IWUSR,           &kmsg_fops},
18 #ifdef CONFIG_CRASH_DUMP
19     {12,"oldmem",    S_IRUSR | S_IWUSR | S_IRGRP, &oldmem_fops},
20 #endif
21 };

7. 看看这个mem_fops的实现,如下,可见其并没有实现fsync函数;

1 static const struct file_operations mem_fops = {
2     .llseek        = memory_lseek,
3     .read        = read_mem,
4     .write        = write_mem,
5     .mmap        = mmap_mem,
6     .open        = open_mem,
7     .get_unmapped_area = get_unmapped_area_mem,
8 };

到这,问题总算水落石出了;

8. 再来看看mmap函数的实现,里面调用了这个函数phys_mem_access_prot;

 1 static int mmap_mem(struct file * file, struct vm_area_struct * vma)
 2 {
 3     size_t size = vma->vm_end - vma->vm_start;
 4 
 5     if (!valid_mmap_phys_addr_range(vma->vm_pgoff, size))
 6         return -EINVAL;
 7 
 8     if (!private_mapping_ok(vma))
 9         return -ENOSYS;
10 
11     vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff,
12                          size,
13                          vma->vm_page_prot);
14 
15     /* Remap-pfn-range will mark the range VM_IO and VM_RESERVED */
16     if (remap_pfn_range(vma,
17                 vma->vm_start,
18                 vma->vm_pgoff,
19                 size,
20                 vma->vm_page_prot))
21         return -EAGAIN;
22     return 0;
23 }

9. 上面提到的这个函数,如下,其中有个是否支持不缓存的方式判断,uncached_access;

 1 #ifndef __HAVE_PHYS_MEM_ACCESS_PROT
 2 static pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 3                      unsigned long size, pgprot_t vma_prot)
 4 {
 5 #ifdef pgprot_noncached
 6     unsigned long offset = pfn << PAGE_SHIFT;
 7 
 8     if (uncached_access(file, offset))
 9         return pgprot_noncached(vma_prot);
10 #endif
11     return vma_prot;
12 }
13 #endif

10. 进入uncached_access非缓存访问函数,可见其内部根据文件的O_SYNC选项来判断是否支持不缓存的写;

 1 static inline int uncached_access(struct file *file, unsigned long addr)
 2 {
 3 #if defined(__i386__) && !defined(__arch_um__)
 4     /*
 5      * On the PPro and successors, the MTRRs are used to set
 6      * memory types for physical addresses outside main memory,
 7      * so blindly setting PCD or PWT on those pages is wrong.
 8      * For Pentiums and earlier, the surround logic should disable
 9      * caching for the high addresses through the KEN pin, but
10      * we maintain the tradition of paranoia in this code.
11      */
12     if (file->f_flags & O_SYNC)
13         return 1;
14      return !( test_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability) ||
15           test_bit(X86_FEATURE_K6_MTRR, boot_cpu_data.x86_capability) ||
16           test_bit(X86_FEATURE_CYRIX_ARR, boot_cpu_data.x86_capability) ||
17           test_bit(X86_FEATURE_CENTAUR_MCR, boot_cpu_data.x86_capability) )
18       && addr >= __pa(high_memory);
19 #elif defined(__x86_64__) && !defined(__arch_um__)
20     /* 
21      * This is broken because it can generate memory type aliases,
22      * which can cause cache corruptions
23      * But it is only available for root and we have to be bug-to-bug
24      * compatible with i386.
25      */
26     if (file->f_flags & O_SYNC)
27         return 1;
28     /* same behaviour as i386. PAT always set to cached and MTRRs control the
29        caching behaviour. 
30        Hopefully a full PAT implementation will fix that soon. */       
31     return 0;
32 #elif defined(CONFIG_IA64)
33     /*
34      * On ia64, we ignore O_SYNC because we cannot tolerate memory attribute aliases.
35      */
36     return !(efi_mem_attributes(addr) & EFI_MEMORY_WB);
37 #elif defined(CONFIG_MIPS)
38     {
39         extern int __uncached_access(struct file *file,
40                          unsigned long addr);
41 
42         return __uncached_access(file, addr);
43     }
44 #else
45     /*
46      * Accessing memory above the top the kernel knows about or through a file pointer
47      * that was marked O_SYNC will be done non-cached.
48      */
49     if (file->f_flags & O_SYNC)
50         return 1;
51     return addr >= __pa(high_memory);
52 #endif
53 }

好了,分析完毕;

解决办法

在打开/dev/mem时,使用如下方式,即open增加O_SYNC选项,这个选项即上面uncached_access函数使用的判断标记,表示每次写操作都要等到数据和文件属性都同步到物理存储才返回;

1 int fd = open("/dev/mem", O_RDWR|O_SYNC);

参考文章:

https://blog.csdn.net/wlp600/article/details/6893636

http://www.armadeus.org/wiki/index.php?title=FPGA_registers_access_from_Linux_userspace

https://stackoverflow.com/questions/20750176/how-to-get-writes-via-an-mmap-mapped-memory-pointer-to-flush-immediately

https://blog.csdn.net/tiantao2012/article/details/52168383?locationNum=2&fps=1

猜你喜欢

转载自www.cnblogs.com/wanpengcoder/p/11767208.html