glibc中memcpy和memmove函数的实现

两三年前的一篇笔记,挺有意思的。

[ glibc-2.12.2 ] 中memcpy()的源码,加上个别中文注释:

// 位于 string/memcpy.c
#include <string.h>
#include <memcopy.h>
#include <pagecopy.h>

#undef memcpy

void *
memcpy (dstpp, srcpp, len)
     void *dstpp;
     const void *srcpp;
     size_t len;
{
  unsigned long int dstp = (long int) dstpp; //目的地址
  unsigned long int srcp = (long int) srcpp; //源地址

  /* Copy from the beginning to the end.  */

  /* If there not too few bytes to copy, use word copy.  */
  if (len >= OP_T_THRES) /* 根据不同情况,OP_T_THRES定义为16或8。 */
    {
      /* Copy just a few bytes to make DSTP aligned.  */
      /* 补码的余数 -- 算出dstp与离的最近一个对齐地址之间的距离。*/
      len -= (-dstp) % OPSIZ;
      BYTE_COPY_FWD (dstp, srcp, (-dstp) % OPSIZ);

      /* Copy whole pages from SRCP to DSTP by virtual address manipulation,
      as much as possible.  */
      // 对于特殊平台可能使用虚拟页拷贝。i386不支持,空的
      PAGE_COPY_FWD_MAYBE (dstp, srcp, len, len);

      /* Copy from SRCP to DSTP taking advantage of the known alignment of
        DSTP.  Number of bytes remaining is put in the third argument,
        i.e. in LEN.  This number may vary from machine to machine.  */

      WORD_COPY_FWD (dstp, srcp, len, len);

      /* Fall out and copy the tail.  */
    }

  /* There are just a few bytes to copy.  Use byte memory operations.  */
  BYTE_COPY_FWD (dstp, srcp, len);

  return dstpp;
}
libc_hidden_builtin_def (memcpy)

范围为[0,M)的整数计量系统,其模为M。若 a + b = M,则a与b互为补数。
可以考虑8位二进制数,M为2^8=256。若a = 17,则 a的补数: b = 239。
uint16_t val = 17; 则无符号的val加一个负号变成它的补数:-val=256-17=239,按4字节对齐 239 % 4 = 3.
17离的最近的两个对齐地址分别是16,20,与16的距离:val%4==1,与20的距离:(-val)%4==3.
所以,(-dstp) % OPSIZ 一下就得到了dstp到(下一个以OPSIZ字节对齐的目的地址)之间的距离!
随后先把srcp开始的"(-dstp) % OPSIZ"大小的几个字节拷贝过去,同时len -=这么多;这是第一步。

第二步,对于字节对齐的部分,用PAGE_COPY_FWD_MAYBE或WORD_COPY_FWD实现快速拷贝,
WORD_COPY_FWD第四个参数是预计要拷贝的总字节数,第三个参数如原注释所说,是剩下未完成的字节数,放进len变量。

第三步,剩下的len不够字节对齐的大小,用BYTE_COPY_FWD一个字节一个字节完成拷贝。


i386平台下字节拷贝和字节块拷贝的实现:

注:i386平台指的是Intel32位处理器上硬件和GCC代码的组合.
       x86_64指代在AMD和Intel的较新的64位处理器上运行的硬件和GCC代码的组合.

BYTE_COPY_FWD 用"rep movsb"指令,实现one byte by one byte的拷贝。 

//memcopy.h
#undef    BYTE_COPY_FWD
#define BYTE_COPY_FWD(dst_bp, src_bp, nbytes)                      \
  do {                                          \
    int __d0;                                      \
    asm volatile(/* Clear the direction flag, so copying goes forward.  */    \
         "cld\n"                              \
         /* Copy bytes.  */                          \
         "rep\n"                              \
         "movsb" :                              \
         "=D" (dst_bp), "=S" (src_bp), "=c" (__d0) :              \
         "0" (dst_bp), "1" (src_bp), "2" (nbytes) :              \
         "memory");                              \
  } while (0)

#undef    WORD_COPY_FWD
#define WORD_COPY_FWD(dst_bp, src_bp, nbytes_left, nbytes)              \
  do                                          \
    {                                          \
      int __d0;                                      \
      asm volatile(/* Clear the direction flag, so copying goes forward.  */  \
           "cld\n"                              \
           /* Copy longwords.  */                      \
           "rep\n"                              \
           "movsl" :                              \
            "=D" (dst_bp), "=S" (src_bp), "=c" (__d0) :              \
           "0" (dst_bp), "1" (src_bp), "2" ((nbytes) / 4) :          \
           "memory");                              \
      (nbytes_left) = (nbytes) % 4;                          \
    } while (0)

        WORD_COPY_FWD利用i386的movsl指令实现四字节拷贝。首先使用cld指令将DF标志清零(如果DF标志被清零,那么每条movs指令执行之后ESI和EDI寄存器中的数值就会递增。如果DF标志通过STD指令被设置,那么每条movs指令执行之后ESI和EDI寄存器中的数值就会递减)。接着使用"rep movsl"指令实现每次四字节的拷贝。
        rep movsl 指令每次从 ESI 寄存器指向的内存块复制 4 个字节到 EDI 指向的内存块,直到 ECX 为 0 时结束(n/4 次)。最后如果字节数不是 4 的整数倍,就还需要移动 n mod 4 个字节(nbytes_left)。
        WORD_COPY_FWD把原本n次的movs操作变为n/4次,如果movsl和movsb花费相同的cpu时钟周期,那优化后的时间约为原来的四分之一!这就是glibc中反复优化过的结果。

        在test-mempcpy.c中,简化的memcpy是这样的:

char *
simple_mempcpy (char *dst, const char *src, size_t n)
{
  while (n--)
    *dst++ = *src++;
  return dst;
}

另外,memmove()和memcpy()的作用是一样的,唯一的区别是,当内存发生局部重叠的时候,memmove保证拷贝的结果是正确的,memcpy不保证拷贝的结果的正确。

memmove()的实现源码:

#include <string.h>
#include <memcopy.h>
#include <pagecopy.h>

/* All this is so that bcopy.c can #include
   this file after defining some things.  */
#ifndef	a1
#define	a1	dest	/* First arg is DEST.  */
#define	a1const
#define	a2	src	/* Second arg is SRC.  */
#define	a2const	const
#undef memmove
#endif
#if	!defined(RETURN) || !defined(rettype)
#define	RETURN(s)	return (s)	/* Return DEST.  */
#define	rettype		void *
#endif


rettype
memmove (a1, a2, len)
     a1const void *a1;
     a2const void *a2;
     size_t len;
{
  unsigned long int dstp = (long int) dest;
  unsigned long int srcp = (long int) src;

  /* This test makes the forward copying code be used whenever possible.
     Reduces the working set.  */
  if (dstp - srcp >= len)	/* *Unsigned* compare!  */
    {
      /* Copy from the beginning to the end.  */

      /* If there not too few bytes to copy, use word copy.  */
      if (len >= OP_T_THRES)
	{
	  /* Copy just a few bytes to make DSTP aligned.  */
	  len -= (-dstp) % OPSIZ;
	  BYTE_COPY_FWD (dstp, srcp, (-dstp) % OPSIZ);

	  /* Copy whole pages from SRCP to DSTP by virtual address
	     manipulation, as much as possible.  */

	  PAGE_COPY_FWD_MAYBE (dstp, srcp, len, len);

	  /* Copy from SRCP to DSTP taking advantage of the known
	     alignment of DSTP.  Number of bytes remaining is put
	     in the third argument, i.e. in LEN.  This number may
	     vary from machine to machine.  */

	  WORD_COPY_FWD (dstp, srcp, len, len);

	  /* Fall out and copy the tail.  */
	}

      /* There are just a few bytes to copy.  Use byte memory operations.  */
      BYTE_COPY_FWD (dstp, srcp, len);
    }
  else
    {
      /* Copy from the end to the beginning.  */
      srcp += len;
      dstp += len;

      /* If there not too few bytes to copy, use word copy.  */
      if (len >= OP_T_THRES)
	{
	  /* Copy just a few bytes to make DSTP aligned.  */
	  len -= dstp % OPSIZ;
	  BYTE_COPY_BWD (dstp, srcp, dstp % OPSIZ);

	  /* Copy from SRCP to DSTP taking advantage of the known
	     alignment of DSTP.  Number of bytes remaining is put
	     in the third argument, i.e. in LEN.  This number may
	     vary from machine to machine.  */

	  WORD_COPY_BWD (dstp, srcp, len, len);

	  /* Fall out and copy the tail.  */
	}

      /* There are just a few bytes to copy.  Use byte memory operations.  */
      BYTE_COPY_BWD (dstp, srcp, len);
    }

  RETURN (dest);
}

memmove还有个相视一笑的版本(笔试时常用):

void *
memmove (void *dest, const void *src, size_t len)
{
  if(NULL == dest || NULL == src) return 0;
  char* pdst = (char*)dest;
  char* psrc = (char*)src;
  /* 没有重叠 */
  if ((pdst <= psrc) || (pdst >= psrc + len))
    {
      while(len--)
         *dest++ = *src++;
    }
  else
    {  /* Copy from the end to the beginning.  */
      psrc += len;
      pdst += len;

      while(len--)
        *--dest = *--src;
    }

  return dest;
}

它在test-memmove.c中是这样写的:

char *
simple_memmove (char *dst, const char *src, size_t n)
{
  char *ret = dst;
  if (src < dst)
    {
      dst += n;
      src += n;
      while (n--)
	*--dst = *--src;
    }
  else
    while (n--)
      *dst++ = *src++;
  return ret;
}

猜你喜欢

转载自blog.csdn.net/rover2002/article/details/106321850