mmap系统调用的实现

  1.  GLIBC 文件 sysdeps/unix/sysv/linux/generic/wordsize-32/mmap.c:
__ptr_t  
__mmap (__ptr_t addr, size_t len, int prot, int flags, int fd, off_t offset)  
{  
  if (offset & ((1 << MMAP_PAGE_SHIFT) - 1)) //注意这里传入的offset必须为4k的整数倍
    {     
      __set_errno (EINVAL);  
      return MAP_FAILED;  
    }     
  return (__ptr_t) INLINE_SYSCALL (mmap2, 6, addr, len, prot, flags, fd,   
                                   offset >> MMAP_PAGE_SHIFT);  
} 

INLINE_SYSCALL 定义为(sysdeps/unix/sysv/linux/arm/sysdep.h):

#undef INLINE_SYSCALL  
#define INLINE_SYSCALL(name, nr, args...)               \  
  ({ unsigned int _sys_result = INTERNAL_SYSCALL (name, , nr, args);    \  
     if (__builtin_expect (INTERNAL_SYSCALL_ERROR_P (_sys_result, ), 0))    \  
       {                                \  
     __set_errno (INTERNAL_SYSCALL_ERRNO (_sys_result, ));      \  
     _sys_result = (unsigned int) -1;               \  
       }                                \  
     (int) _sys_result; })
#undef INTERNAL_SYSCALL  
#define INTERNAL_SYSCALL(name, err, nr, args...)        \  
    INTERNAL_SYSCALL_RAW(SYS_ify(name), err, nr, args)  
# undef INTERNAL_SYSCALL_RAW  
# define INTERNAL_SYSCALL_RAW(name, err, nr, args...)       \  
  ({                                \  
       register int _a1 asm ("r0"), _nr asm ("r7");     \  
       LOAD_ARGS_##nr (args)                    \  
       _nr = name;                      \  
       asm volatile ("swi   0x0 @ syscall " #name   \  
             : "=r" (_a1)               \  
             : "r" (_nr) ASM_ARGS_##nr          \  
             : "memory");               \  
       _a1; })

* For Linux we can use the system call table in the header file 
    /usr/include/asm/unistd.h 
   of the kernel.  But these symbols do not follow the SYS_* syntax 
   so we have to redefine the `SYS_ify' macro here.  */  
#undef SYS_ify  
#define SYS_ify(syscall_name)   (__NR_##syscall_name)  
  1. linux 内核代码
    arch/arm/include/uapi/asm/unistd.h:
#define __NR_OABI_SYSCALL_BASE  0x900000  

#if defined(__thumb__) || defined(__ARM_EABI__)  
#define __NR_SYSCALL_BASE   0  
#else  
#define __NR_SYSCALL_BASE   __NR_OABI_SYSCALL_BASE  
#endif  

#define __NR_mmap2          (__NR_SYSCALL_BASE+192)  

arch/arm/kernel/entry-common.S:

/*=============================================================================  
 * SWI handler  
 *-----------------------------------------------------------------------------  
 */  

    .align  5  
ENTRY(vector_swi)  
    sub sp, sp, #S_FRAME_SIZE  
    stmia   sp, {r0 - r12}          @ Calling r0 - r12   
 ARM(   add r8, sp, #S_PC       )  
 ARM(   stmdb   r8, {sp, lr}^       )   @ Calling sp, lr  
 THUMB( mov r8, sp          )  
 THUMB( store_user_sp_lr r8, r10, S_SP  )   @ calling sp, lr  
    mrs r8, spsr            @ called from non-FIQ mode, so ok.   
    str lr, [sp, #S_PC]         @ Save calling PC  
    str r8, [sp, #S_PSR]        @ Save CPSR  
    str r0, [sp, #S_OLD_R0]     @ Save OLD_R0  
    zero_fp  
... ...  
/* 
 * Note: off_4k (r5) is always units of 4K.  If we can't do the requested 
 * offset, we return EINVAL. 
 */  
sys_mmap2:  
#if PAGE_SHIFT > 12  
        tst r5, #PGOFF_MASK  
        moveq   r5, r5, lsr #PAGE_SHIFT - 12  
        streq   r5, [sp, #4]   
        beq sys_mmap_pgoff  
        mov r0, #-EINVAL  
        mov pc, lr  
#else  
        str r5, [sp, #4]   
        b   sys_mmap_pgoff  
#endif  
ENDPROC(sys_mmap2) 

arch/arm/kernel/calls.S (included by entry-common.S):

/* 190 */   CALL(sys_vfork)  
        CALL(sys_getrlimit)  
        CALL(sys_mmap2) 
asmlinkage long sys_mmap_pgoff(unsigned long addr, unsigned long len,  
            unsigned long prot, unsigned long flags,  
            unsigned long fd, unsigned long pgoff);
SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,  
        unsigned long, prot, unsigned long, flags,  
        unsigned long, fd, unsigned long, pgoff)  
{  
    struct file *file = NULL;  
    unsigned long retval = -EBADF;  

    if (!(flags & MAP_ANONYMOUS)) {  
        audit_mmap_fd(fd, flags);  
        if (unlikely(flags & MAP_HUGETLB))  
            return -EINVAL;  
        file = fget(fd);  
        if (!file)  
            goto out;  
        if (is_file_hugepages(file))  
            len = ALIGN(len, huge_page_size(hstate_file(file)));  
    } else if (flags & MAP_HUGETLB) {  
        struct user_struct *user = NULL;  
        struct hstate *hs = hstate_sizelog((flags >> MAP_HUGE_SHIFT) &  
                           SHM_HUGE_MASK);  

        if (!hs)  
            return -EINVAL;  

        len = ALIGN(len, huge_page_size(hs));  
        /* 
         * VM_NORESERVE is used because the reservations will be 
         * taken when vm_ops->mmap() is called 
         * A dummy user value is used because we are not locking 
         * memory so no accounting is necessary 
         */  
        file = hugetlb_file_setup(HUGETLB_ANON_FILE, len,  
                VM_NORESERVE,  
                &user, HUGETLB_ANONHUGE_INODE,  
                (flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);  
        if (IS_ERR(file))  
            return PTR_ERR(file);  
    }  

    flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);  

    retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff);  
    if (file)  
        fput(file);  
out:  
    return retval;  
}  

比如应用层调用一个mmap函数
ptrdata=(char*)mmap(0, MMAP_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED, dev, 0x32d50000);
libc会调用__mmap 函数

__mmap (__ptr_t addr, size_t len, int prot, int flags, int fd, off_t offset)  
{  
  if (offset & ((1 << MMAP_PAGE_SHIFT) - 1)) //注意这里传入的offset必须为4k的整数倍
    {     
      __set_errno (EINVAL);  
      return MAP_FAILED;  
    }     
  return (__ptr_t) INLINE_SYSCALL (mmap2, 6, addr, len, prot, flags, fd,   
                                   offset >> MMAP_PAGE_SHIFT);  
} 

此时offset 为0x32d50000, 即 4k的整数倍,如果传入0x32d50001等不是4k整数倍, 则return MAP_FAILED;
1 << MMAP_PAGE_SHIFT = 00001000
(1 << MMAP_PAGE_SHIFT) -1 = 00000FFF

猜你喜欢

转载自blog.csdn.net/mxgsgtc/article/details/78409070