#include <stdio.h>
unsigned short int A[] = {1,2,3,4}; // array with 4 elements
int main(void)
{
for(int i=0;i<4;++i)
{
A[i]+=A[i];
}
return 0;
}
对应的neon intrinsics
#include <stdio.h>
#include <arm_neon.h>
unsigned short int A[] = {1,2,3,4}; // array with 4 elements
int main(void)
{
uint16x4_t v; // declare a vector of four 16-bit lanes
v = vld1_u16(A); // load the array from memory into a vector
v = vadd_u16(v,v); // double each element in the vector
vst1_u16(A, v); // store the vector back to memory
return 0;
}
编译命令gcc -S normal.c -o normal.s
normal compilation and neon contrast, can clearly see the armv8.2, the instruction is not neon, neon only register. Register wherein v represents neon, B for 8
bit, H 16 bit.S Representative Representative 32bit, D representative of 64 bit