ARM Cortex-M memcoy的效率

c语言示例1如下:

void mem_cpy(void *Dst,const void *Src,unsigned int size)
{
    unsigned int i;
    unsigned char *pDst = (unsigned char *)Dst;
    unsigned char *pSrc = (unsigned char *)Src;
	
    for(i=0; i<size; i++)
    {
        pDst[i] = pSrc[i];
    }
}

经过优化后的汇编代码如下:

0x0800022A B510      PUSH          {r4,lr}

;         for(i=0; i<size; i++) 
0x0800022C 2300      MOVS          r3,#0x00
0x0800022E E002      B             0x08000236

;               pDst[i] = pSrc[i]; 
0x08000230 5CCC      LDRB          r4,[r1,r3]
0x08000232 54C4      STRB          r4,[r0,r3]
0x08000234 1C5B      ADDS          r3,r3,#1
0x08000236 4293      CMP           r3,r2
0x08000238 D3FA      BCC           0x08000230

0x0800023A 4770      BX            lr

c语言示例2如下:

void mem_cpy(void *Dst,const void *Src,unsigned int size)
{
    unsigned int i;
    unsigned char *pDst = (unsigned char *)Dst;
    unsigned char *pSrc = (unsigned char *)Src;
	
    for(i=0; i<size; i++)
    {
        *pDst++ = *pSrc++;
    }
}

经过优化后的汇编代码如下:

0x0800022A B510      PUSH          {r4,lr}

;        for(i=0; i<size; i++) 
0x0800022C 2300      MOVS          r3,#0x00
0x0800022E E004      B             0x0800023A

;                *pDst++ = *pSrc++; 
0x08000230 F8114B01  LDRB          r4,[r1],#0x01
0x08000234 F8004B01  STRB          r4,[r0],#0x01
0x08000238 1C5B      ADDS          r3,r3,#1
0x0800023A 4293      CMP           r3,r2
0x0800023C D3F8      BCC           0x08000230

0x0800023E 4770      BX            lr

上面两个例子c代码稍微有些不同,但效率是一样的,仅仅示例2的汇编代码量稍微大一点。

c语言示例3如下:

void mem_cpy(unsigned int *Dst,const unsigned int *Src,unsigned int size)
{
    unsigned int i;

    for(i=0; i<size; i++)
    {
        Dst[i] = Src[i];
    }
}

经过优化后的汇编代码如下:

0x0800023C B510      PUSH          {r4,lr}
 
;      for(i = 0;i<size;i++)  
0x0800023E 2300      MOVS          r3,#0x00
0x08000240 E004      B             0x0800024C

;      Dst[i] = Src[i]; 
0x08000242 F8514023  LDR           r4,[r1,r3,LSL #2]
0x08000246 F8404023  STR           r4,[r0,r3,LSL #2]
0x0800024A 1C5B      ADDS          r3,r3,#1
0x0800024C 4293      CMP           r3,r2
0x0800024E D3F8      BCC           0x08000242

0x08000250 BD10      POP           {r4,pc}

c语言示例4如下:

void mem_cpy(unsigned int *Dst,const unsigned int *Src,unsigned int size)
{
    unsigned int i;

    for(i=0; i<size; i++)
    {
        *Dst++ = *Src++;
    }
}

经过优化后的汇编代码如下:

0x0800023C B510      PUSH          {r4,lr}

;         for(i = 0;i<size;i++)  
0x0800023E 2300      MOVS          r3,#0x00
0x08000240 E002      B             0x08000248

;          *Dst++ = *Src++; 
0x08000242 C910      LDM           r1!,{r4}
0x08000244 1C5B      ADDS          r3,r3,#1
0x08000246 C010      STM           r0!,{r4}
0x08000248 4293      CMP           r3,r2
0x0800024A D3FA      BCC           0x08000242

0x0800024C BD10      POP           {r4,pc}

上面两个例子c代码稍微有些不同,但效率是一样的,仅仅示例3的汇编代码量稍微大一点。

猜你喜欢

转载自blog.csdn.net/zuixin369/article/details/88082506