Thursday, February 26, 2015

 

Introducing MOVDQU, PADDB and MOVNTPD instructions

 //-------------------------------------------  
 //1. use movdqu to:  
 //load array values in xmm0 and xmm1  
 //2. use paddb to:  
 //add each byte element in the two arrays.  
 //3. use movntpd to:  
 //save the result in xmm1 to the sum array.  
 //Notes:  
 //If your cpu supports sse4.1 the movdqu can   
 //be replaced with movntdqa. 128bit movntdqa   
 //requires the operand to be 16byte aligned. Hence the  
 //array declarations are accompanied by aligned(16).  
 //for using movdqu aligned(16) is not required.  
 //--------------------------------------------  
 #include <stdio.h>  
 #include <stdlib.h>  
 int main() {  
  unsigned char __attribute__((aligned(16))) carray1[16];   
  unsigned char __attribute__((aligned(16))) carray2[16];   
  unsigned char sum[16];   
  int i = 0;  
  for(i=0;i<16;i++){  
    carray1[i] = i;  
    carray2[i] = carray1[i] ;  
  }  
  asm volatile ("movdqu %0, %%xmm0;" : :"m"(carray1[0]) :);  
  asm volatile ("movdqu %0, %%xmm1;" : :"m"(carray2[0]) :);  
  asm volatile ("paddb %%xmm0, %%xmm1;" : : :);  
  asm volatile ("movntdq %%xmm1, %0;" :"=m"(sum[0]) : :);   
  printf("The second element of the sum array is %d\n", sum[1]);  
  printf("The middle element of the sum array is %d\n", sum[8]);  
  printf("The last element of the sum array is %d\n", sum[15]);  
  return 0;  
 }  


$ gcc -g -Fstabs paddb.c

$ ./a.out
The second element of the sum array is 2
The middle element of the sum array is 16
The last element of the sum array is 30

Comments:

Post a Comment

Subscribe to Post Comments [Atom]





<< Home

This page is powered by Blogger. Isn't yours?

Subscribe to Posts [Atom]