Thursday, February 26, 2015

 

Introducing MOVDQU, PADDB and MOVNTPD instructions

 //-------------------------------------------  
 //1. use movdqu to:  
 //load array values in xmm0 and xmm1  
 //2. use paddb to:  
 //add each byte element in the two arrays.  
 //3. use movntpd to:  
 //save the result in xmm1 to the sum array.  
 //Notes:  
 //If your cpu supports sse4.1 the movdqu can   
 //be replaced with movntdqa. 128bit movntdqa   
 //requires the operand to be 16byte aligned. Hence the  
 //array declarations are accompanied by aligned(16).  
 //for using movdqu aligned(16) is not required.  
 //--------------------------------------------  
 #include <stdio.h>  
 #include <stdlib.h>  
 int main() {  
  unsigned char __attribute__((aligned(16))) carray1[16];   
  unsigned char __attribute__((aligned(16))) carray2[16];   
  unsigned char sum[16];   
  int i = 0;  
  for(i=0;i<16;i++){  
    carray1[i] = i;  
    carray2[i] = carray1[i] ;  
  }  
  asm volatile ("movdqu %0, %%xmm0;" : :"m"(carray1[0]) :);  
  asm volatile ("movdqu %0, %%xmm1;" : :"m"(carray2[0]) :);  
  asm volatile ("paddb %%xmm0, %%xmm1;" : : :);  
  asm volatile ("movntdq %%xmm1, %0;" :"=m"(sum[0]) : :);   
  printf("The second element of the sum array is %d\n", sum[1]);  
  printf("The middle element of the sum array is %d\n", sum[8]);  
  printf("The last element of the sum array is %d\n", sum[15]);  
  return 0;  
 }  


$ gcc -g -Fstabs paddb.c

$ ./a.out
The second element of the sum array is 2
The middle element of the sum array is 16
The last element of the sum array is 30

Friday, February 20, 2015

 

CVTPS2PD and CVTSD2SI instructions

 //--------------------------------------  
 //summary:   
 //Take floating point numbers from f[].  
 //convert float numbers from f[] into   
 //double and put it in d[]. (cvtps2pd)  
 //convert double numbers from d[] into   
 //int and put it in op_arr[]. (cvtsd2si)  
 //--------------------------------------  
 #include <stdio.h>  
 #include <stdlib.h>  
 int main() {  
  float f[] = {3.22779, 5.25, 6.8, 44.65};  
  double d[] = {0.0,0.0,0.0,0.0};  
  int op_arr[] = {0,0,0,0};  
  int index=0;  
  //convert f[0] and f[1] to double precision  
  asm  volatile ( "cvtps2pd %0, %%xmm0;\n"   
          : :"m"(f[0]) : );   
  //convert f[2] and f[3] to double precision  
  asm  volatile ( "cvtps2pd %0, %%xmm1;\n"   
            : :"m"(f[2]) : );   
  //mov xmm0, xmm1 to memory  
  asm volatile ("movupd %%xmm0, %0;\n"   
           :"=m"(d[0]) : :);  
  asm volatile ("movupd %%xmm1, %0;\n"   
           :"=m"(d[2]) : :);  
  //now convert d[0] to d[3] into int   
  //result in eax, ebx, ecx, edx  
  asm volatile ("cvtsd2si %0, %%eax\n"   
           : :"m"(d[0]) :);   
  asm volatile ("cvtsd2si %0, %%ebx\n"   
           : :"m"(d[1]) :);   
  asm volatile ("cvtsd2si %0, %%ecx\n"   
           : :"m"(d[2]) :);   
  asm volatile ("cvtsd2si %0, %%edx\n"   
           : :"m"(d[3]) :);   
  //mov eax, ebx, ecx, edx   
  //into the int array  
  asm volatile ("movl %%eax, %0\n"   
          : :"m"(op_arr[0]) :);   
  asm volatile ("movl %%ebx, %0\n"   
         : :"m"(op_arr[1]) :);   
  asm volatile ("movl %%ecx, %0\n"   
          : :"m"(op_arr[2]) :);   
  asm volatile ("movl %%edx, %0\n"   
           : :"m"(op_arr[3]) :);   
  for (index=0; index < 4 ; index++) {  
    printf("float value %f: double value %e\n", f[index] , d[index]);  
    printf("float value %f: int value %d\n",  f[index] , op_arr[index]);  
    }  
  return 0;  
 }  
 $ gcc -g -Fstabs cvtps2pd_cvtsd2si.c
 
 $ ./a.out float value 3.227790: double value 3.227790e+00
float value 3.227790: int value 3
float value 5.250000: double value 5.250000e+00
float value 5.250000: int value 5
float value 6.800000: double value 6.800000e+00
float value 6.800000: int value 7
float value 44.650002: double value 4.465000e+01
float value 44.650002: int value 45

This page is powered by Blogger. Isn't yours?

Subscribe to Posts [Atom]