Main Page | Class Hierarchy | Class List | File List | Class Members | File Members

fermiqcd_sse_su3.h

Go to the documentation of this file.
00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 #if defined(USE_DOUBLE_PRECISION) && defined(SSE2)
00014 
00015 inline void _sse_mulABC_set_331(mdp_complex* a, mdp_complex* b, mdp_complex* c) 
00016     {                                             
00017       _sse_double_load_123(b[0],b[1],b[2]);       
00018       _sse_double_su3_multiply(*((_sse_su3*) a)); 
00019       _sse_double_store_up_123(c[0],c[1],c[2]);      
00020     }
00021 inline void _sse_mulABC_add_331(mdp_complex* a, mdp_complex* b, mdp_complex* c) 
00022     {                                             
00023       _sse_double_load_123(b[0],b[1],b[2]);       
00024       _sse_double_su3_multiply(*((_sse_su3*) a)); 
00025       _sse_double_load_123(c[0],c[1],c[2]);       
00026       _sse_double_vector_add();                   
00027       _sse_double_store_123(c[0],c[1],c[2]);      
00028     }
00029 inline void _sse_mulABC_sub_331(mdp_complex* a, mdp_complex* b, mdp_complex* c) 
00030     {                                             
00031       _sse_double_load_123(b[0],b[1],b[2]);       
00032       _sse_double_su3_multiply(*((_sse_su3*) a)); 
00033       _sse_double_load_123(c[0],c[1],c[2]);       
00034       _sse_double_vector_sub();                   
00035       _sse_double_store_123(c[0],c[1],c[2]);      
00036     }
00037 
00038 inline void _sse_mulAHBC_set_331(mdp_complex* a, mdp_complex* b, mdp_complex* c) 
00039     {                                             
00040       _sse_double_load_123(b[0],b[1],b[2]);       
00041       _sse_double_su3_inverse_multiply(*((_sse_su3*) a)); 
00042       _sse_double_store_up_123(c[0],c[1],c[2]);      
00043     }
00044 inline void _sse_mulAHBC_add_331(mdp_complex* a, mdp_complex* b, mdp_complex* c) 
00045     {                                             
00046       _sse_double_load_123(b[0],b[1],b[2]);       
00047       _sse_double_su3_inverse_multiply(*((_sse_su3*) a)); 
00048       _sse_double_load_123(c[0],c[1],c[2]);       
00049       _sse_double_vector_add();                   
00050       _sse_double_store_123(c[0],c[1],c[2]);      
00051     }
00052 inline void _sse_mulAHBC_sub_331(mdp_complex* a, mdp_complex* b, mdp_complex* c) 
00053     {                                             
00054       _sse_double_load_123(b[0],b[1],b[2]);       
00055       _sse_double_su3_inverse_multiply(*((_sse_su3*) a)); 
00056       _sse_double_load_123(c[0],c[1],c[2]);       
00057       _sse_double_vector_sub();                   
00058       _sse_double_store_123(c[0],c[1],c[2]);      
00059     }
00060 
00061 
00062 inline void _sse_mulABC_set_333(mdp_complex* a, mdp_complex* b, mdp_complex* c) 
00063     {                                             
00064       _sse_double_load_123(b[0],b[3],b[6]);       
00065       _sse_double_su3_multiply(*((_sse_su3*) a)); 
00066       _sse_double_store_up_123(c[0],c[3],c[6]);      
00067       _sse_double_load_123(b[1],b[4],b[7]);       
00068       _sse_double_su3_multiply(*((_sse_su3*) a)); 
00069       _sse_double_store_up_123(c[1],c[4],c[7]);      
00070       _sse_double_load_123(b[2],b[5],b[8]);       
00071       _sse_double_su3_multiply(*((_sse_su3*) a)); 
00072       _sse_double_store_up_123(c[2],c[5],c[8]);      
00073     }
00074 
00075 inline void _sse_mulABC_add_333(mdp_complex* a, mdp_complex* b, mdp_complex* c) 
00076     {                                             
00077       _sse_double_load_123(b[0],b[3],b[6]);       
00078       _sse_double_su3_multiply(*((_sse_su3*) a)); 
00079       _sse_double_load_123(c[0],c[3],c[6]);       
00080       _sse_double_vector_add();                   
00081       _sse_double_store_123(c[0],c[3],c[6]);      
00082       _sse_double_load_123(b[1],b[4],b[7]);       
00083       _sse_double_su3_multiply(*((_sse_su3*) a)); 
00084       _sse_double_load_123(c[1],c[4],c[7]);       
00085       _sse_double_vector_add();                   
00086       _sse_double_store_123(c[1],c[4],c[7]);      
00087       _sse_double_load_123(b[2],b[5],b[8]);       
00088       _sse_double_su3_multiply(*((_sse_su3*) a)); 
00089       _sse_double_load_123(c[2],c[5],c[8]);       
00090       _sse_double_vector_add();                   
00091       _sse_double_store_123(c[2],c[5],c[8]);      
00092     }
00093 
00094 inline void _sse_mulABC_sub_333(mdp_complex* a, mdp_complex* b, mdp_complex* c) 
00095     {                                             
00096       _sse_double_load_123(b[0],b[3],b[6]);       
00097       _sse_double_su3_multiply(*((_sse_su3*) a)); 
00098       _sse_double_load_123(c[0],c[3],c[6]);       
00099       _sse_double_vector_sub();                   
00100       _sse_double_store_123(c[0],c[3],c[6]);      
00101       _sse_double_load_123(b[1],b[4],b[7]);       
00102       _sse_double_su3_multiply(*((_sse_su3*) a)); 
00103       _sse_double_load_123(c[1],c[4],c[7]);       
00104       _sse_double_vector_sub();                   
00105       _sse_double_store_123(c[1],c[4],c[7]);      
00106       _sse_double_load_123(b[2],b[5],b[8]);       
00107       _sse_double_su3_multiply(*((_sse_su3*) a)); 
00108       _sse_double_load_123(c[2],c[5],c[8]);       
00109       _sse_double_vector_sub();                   
00110       _sse_double_store_123(c[2],c[5],c[8]);      
00111     }
00112 
00113 inline void _sse_mulAHBC_set_333(mdp_complex* a, mdp_complex* b, mdp_complex* c) 
00114     {                                             
00115       _sse_double_load_123(b[0],b[3],b[6]);       
00116       _sse_double_su3_inverse_multiply(*((_sse_su3*) a)); 
00117       _sse_double_store_up_123(c[0],c[3],c[6]);      
00118       _sse_double_load_123(b[1],b[4],b[7]);       
00119       _sse_double_su3_inverse_multiply(*((_sse_su3*) a)); 
00120       _sse_double_store_up_123(c[1],c[4],c[7]);      
00121       _sse_double_load_123(b[2],b[5],b[8]);       
00122       _sse_double_su3_inverse_multiply(*((_sse_su3*) a)); 
00123       _sse_double_store_up_123(c[2],c[5],c[8]);      
00124     }
00125 
00126 inline void _sse_mulAHBC_add_333(mdp_complex* a, mdp_complex* b, mdp_complex* c) 
00127     {                                             
00128       _sse_double_load_123(b[0],b[3],b[6]);       
00129       _sse_double_su3_inverse_multiply(*((_sse_su3*) a)); 
00130       _sse_double_load_123(c[0],c[3],c[6]);       
00131       _sse_double_vector_add();                   
00132       _sse_double_store_123(c[0],c[3],c[6]);      
00133       _sse_double_load_123(b[1],b[4],b[7]);       
00134       _sse_double_su3_inverse_multiply(*((_sse_su3*) a)); 
00135       _sse_double_load_123(c[1],c[4],c[7]);       
00136       _sse_double_vector_add();                   
00137       _sse_double_store_123(c[1],c[4],c[7]);      
00138       _sse_double_load_123(b[2],b[5],b[8]);       
00139       _sse_double_su3_inverse_multiply(*((_sse_su3*) a)); 
00140       _sse_double_load_123(c[2],c[5],c[8]);       
00141       _sse_double_vector_add();                   
00142       _sse_double_store_123(c[2],c[5],c[8]);      
00143     }
00144 
00145 inline void _sse_mulAHBC_sub_333(mdp_complex* a, mdp_complex* b, mdp_complex* c) 
00146     {                                             
00147       _sse_double_load_123(b[0],b[3],b[6]);       
00148       _sse_double_su3_inverse_multiply(*((_sse_su3*) a)); 
00149       _sse_double_load_123(c[0],c[3],c[6]);       
00150       _sse_double_vector_sub();                   
00151       _sse_double_store_123(c[0],c[3],c[6]);      
00152       _sse_double_load_123(b[1],b[4],b[7]);       
00153       _sse_double_su3_inverse_multiply(*((_sse_su3*) a)); 
00154       _sse_double_load_123(c[1],c[4],c[7]);       
00155       _sse_double_vector_sub();                   
00156       _sse_double_store_123(c[1],c[4],c[7]);      
00157       _sse_double_load_123(b[2],b[5],b[8]);       
00158       _sse_double_su3_inverse_multiply(*((_sse_su3*) a)); 
00159       _sse_double_load_123(c[2],c[5],c[8]);       
00160       _sse_double_vector_sub();                   
00161       _sse_double_store_123(c[2],c[5],c[8]);      
00162     }
00163 
00164 
00165 inline void _sse_mulABHC_set_333(mdp_complex* a, mdp_complex* b, mdp_complex* c) 
00166     {                          
00167       static _sse_su3_vector v ALIGN16;
00168       v.c1=conj(b[0]); v.c2=conj(b[1]); v.c3=conj(b[2]);
00169       _sse_double_load(v);       
00170       _sse_double_su3_multiply(*((_sse_su3*) a)); 
00171       _sse_double_store_up_123(c[0],c[3],c[6]);      
00172 
00173       v.c1=conj(b[3]); v.c2=conj(b[4]); v.c3=conj(b[5]);
00174       _sse_double_load(v);       
00175       _sse_double_su3_multiply(*((_sse_su3*) a)); 
00176       _sse_double_store_up_123(c[1],c[4],c[7]);      
00177 
00178       v.c1=conj(b[6]); v.c2=conj(b[7]); v.c3=conj(b[8]);
00179       _sse_double_load(v);       
00180       _sse_double_su3_multiply(*((_sse_su3*) a)); 
00181       _sse_double_store_up_123(c[2],c[5],c[8]);      
00182     }
00183 
00184 inline void _sse_mulABHC_add_333(mdp_complex* a, mdp_complex* b, mdp_complex* c) 
00185     {  
00186       static _sse_su3_vector v ALIGN16;
00187       v.c1=conj(b[0]); v.c2=conj(b[1]); v.c3=conj(b[2]);
00188       _sse_double_load(v);       
00189       _sse_double_su3_multiply(*((_sse_su3*) a)); 
00190       _sse_double_load_123(c[0],c[3],c[6]);       
00191       _sse_double_vector_add();                   
00192       _sse_double_store_123(c[0],c[3],c[6]);      
00193 
00194       v.c1=conj(b[3]); v.c2=conj(b[4]); v.c3=conj(b[5]);
00195       _sse_double_load(v);
00196       _sse_double_su3_multiply(*((_sse_su3*) a)); 
00197       _sse_double_load_123(c[1],c[4],c[7]);       
00198       _sse_double_vector_add();                   
00199       _sse_double_store_123(c[1],c[4],c[7]);      
00200 
00201       v.c1=conj(b[6]); v.c2=conj(b[7]); v.c3=conj(b[8]);
00202       _sse_double_load(v);       
00203       _sse_double_su3_multiply(*((_sse_su3*) a)); 
00204       _sse_double_load_123(c[2],c[5],c[8]);       
00205       _sse_double_vector_add();                   
00206       _sse_double_store_123(c[2],c[5],c[8]);      
00207     }
00208 
00209 inline void _sse_mulABHC_sub_333(mdp_complex* a, mdp_complex* b, mdp_complex* c) 
00210     {                             
00211       static _sse_su3_vector v ALIGN16;
00212       v.c1=conj(b[0]); v.c2=conj(b[1]); v.c3=conj(b[2]);
00213       _sse_double_load(v);       
00214       _sse_double_su3_multiply(*((_sse_su3*) a)); 
00215       _sse_double_load_123(c[0],c[3],c[6]);       
00216       _sse_double_vector_sub();                   
00217       _sse_double_store_123(c[0],c[3],c[6]);      
00218 
00219       v.c1=conj(b[3]); v.c2=conj(b[4]); v.c3=conj(b[5]);
00220       _sse_double_load(v);
00221       _sse_double_su3_multiply(*((_sse_su3*) a)); 
00222       _sse_double_load_123(c[1],c[4],c[7]);       
00223       _sse_double_vector_sub();                   
00224       _sse_double_store_123(c[1],c[4],c[7]);      
00225 
00226       v.c1=conj(b[6]); v.c2=conj(b[7]); v.c3=conj(b[8]);
00227       _sse_double_load(v);       
00228       _sse_double_su3_multiply(*((_sse_su3*) a)); 
00229       _sse_double_load_123(c[2],c[5],c[8]);       
00230       _sse_double_vector_sub();                   
00231       _sse_double_store_123(c[2],c[5],c[8]);      
00232     }
00233 
00234 inline void _sse_mulAbC_set_31(mdp_complex* a, mdp_complex b, mdp_complex* c) {
00235   
00236   static _sse_double real, imag ALIGN16;
00237   real.c1=real.c2=b.real();
00238   imag.c1=imag.c2=b.imag();
00239   _sse_double_load_123(a[0],a[1],a[2]);       
00240   _sse_double_vector_mul_complex(real,imag);
00241   _sse_double_store_up_123(c[0],c[1],c[2]);   
00242 }
00243 
00244 inline void _sse_mulAbC_add_31(mdp_complex* a, mdp_complex b, mdp_complex* c) 
00245     {                                             
00246       static _sse_double real, imag ALIGN16;
00247       real.c1=real.c2=b.real();
00248       imag.c1=imag.c2=b.imag();
00249       _sse_double_load_123(a[0],a[1],a[2]);       
00250       _sse_double_vector_mul_complex(real, imag);
00251       _sse_double_load_123(c[0],c[1],c[2]);          
00252       _sse_double_vector_add();                   
00253       _sse_double_store_123(c[0],c[1],c[2]);       
00254     }
00255 inline void _sse_mulAbC_sub_31(mdp_complex* a, mdp_complex b, mdp_complex* c) 
00256     {                                            
00257       static _sse_double real, imag ALIGN16;
00258       real.c1=real.c2=b.real();
00259       imag.c1=imag.c2=b.imag();          
00260       _sse_double_load_123(a[0],a[1],a[2]);       
00261       _sse_double_vector_mul_complex(real, imag);                  
00262       _sse_double_load_123(c[0],c[1],c[2]);          
00263       _sse_double_vector_sub();                   
00264       _sse_double_store_123(c[0],c[1],c[2]);       
00265     }
00266 
00267 inline void _sse_sumAC_set_31(mdp_complex* a, mdp_complex* c) 
00268     {                                      
00269       _sse_double_load_123(a[0],a[1],a[2]); 
00270       _sse_double_store_123(c[0],c[1],c[2]); 
00271     }
00272 inline void _sse_sumAC_add_31(mdp_complex* a, mdp_complex* c) 
00273     {                                          
00274       _sse_double_load_123(c[0],c[1],c[2]);    
00275       _sse_double_load_up_123(a[0],a[1],a[2]); 
00276       _sse_double_vector_add();                
00277       _sse_double_store_123(c[0],c[1],c[2]);   
00278     }
00279 inline void _sse_sumAC_sub_31(mdp_complex* a, mdp_complex* c)
00280     {                                          
00281       _sse_double_load_123(c[0],c[1],c[2]);    
00282       _sse_double_load_up_123(a[0],a[1],a[2]); 
00283       _sse_double_vector_sub();                
00284       _sse_double_store_123(c[0],c[1],c[2]);   
00285     }
00286 
00287 #endif

Generated on Sun Feb 27 15:12:19 2005 by  doxygen 1.4.1