Main Page | Class Hierarchy | Class List | File List | Class Members | File Members

mdp_complex_field.h

Go to the documentation of this file.
00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 bool mdp_write_double_as_float(FILE *fp,
00014                                void* data,
00015                                long psize,
00016                                long header_size,
00017                                long position,
00018                                const mdp_lattice &lattice) {
00019   double *p=(double*) data;
00020   float  *q=(float*) malloc(psize/2);
00021   for(uint i=0; i<psize/sizeof(double); i++) q[i]=p[i];
00022   if(fseek(fp, position*psize/2+header_size, SEEK_SET) ||
00023      fwrite(q, psize/2,1, fp)!=1) return false;
00024   free(q);
00025   return true;
00026 }
00027 
00028 bool mdp_read_double_as_float(FILE *fp,
00029                                void* data,
00030                                long psize,
00031                                long header_size,
00032                                long position,
00033                                const mdp_lattice &lattice) {
00034   double *p=(double*) data;
00035   float  *q=(float*) malloc(psize/2);
00036   if(fseek(fp, position*psize/2+header_size, SEEK_SET) ||
00037      fread(q, psize/2,1, fp)!=1) return false;
00038   for(uint i=0; i<psize/sizeof(double); i++) p[i]=q[i];
00039   free(q);
00040   return true;
00041 }
00042 
00043 bool mdp_write_float_as_double(FILE *fp,
00044                                void* data,
00045                                long psize,
00046                                long header_size,
00047                                long position,
00048                                const mdp_lattice &lattice) {
00049   float  *p=(float*) data;
00050   double *q=(double*) malloc(psize*2);
00051   for(uint i=0; i<psize/sizeof(float); i++) q[i]=p[i];
00052   if(fseek(fp, position*psize*2+header_size, SEEK_SET) ||
00053      fwrite(q, psize*2,1, fp)!=1) return false;
00054   free(q);
00055   return true;
00056 }
00057 
00058 bool mdp_read_float_as_double(FILE *fp,
00059                               void* data,
00060                               long psize,
00061                               long header_size,
00062                               long position,
00063                               const mdp_lattice &lattice) {
00064   float  *p=(float*) data;
00065   double *q=(double*) malloc(psize*2);
00066   if(fseek(fp, position*psize*2+header_size, SEEK_SET) ||
00067      fread(q, psize*2,1, fp)!=1) return false;
00068   for(uint i=0; i<psize/sizeof(float); i++) p[i]=q[i];
00069   free(q);
00070   return true;
00071 }
00072 
00085 class mdp_complex_field : public mdp_field<mdp_complex> {
00086  public:
00087   mdp_complex_field() {}
00088   mdp_complex_field(mdp_lattice& lattice, int n=1) { 
00089     allocate_field(lattice,n);
00090   }
00091   inline void operator= (const mdp_complex_field &psi) {
00092     if(&lattice()==0) return;
00093     if(&lattice() != &psi.lattice() || 
00094        size!=psi.size || 
00095        field_components!=psi.field_components) 
00096       error("mdp_field: operator=() impatible fields");
00097 
00098     long i=0;    
00099     
00100 #if defined(SSE2) && defined(USE_DOUBLE_PRECISION) && !defined(NO_SSE2_LINALG)
00101     _sse_double* r=(_sse_double*) m;
00102     _sse_double* s=(_sse_double*) psi.m;
00103     
00104     for(; i<size-7; i+=8, r+=8, s+=8) {
00105       _sse_double_prefetch_16(s+8);
00106       _sse_double_copy_16(r,s);
00107     }
00108 #endif
00109     
00110     for(; i<size; i++) m[i]=psi.m[i];
00111   }
00112   
00113   inline void operator*=(const mdp_complex alpha) {
00114     long i_min=physical_local_start(EVENODD);
00115     long i_max=physical_local_stop(EVENODD);
00116     long i=i_min;
00117     for(; i<i_max; i++) m[i]*=alpha;
00118   }
00119   inline void operator/=(const mdp_complex alpha) {
00120     (*this)*=(1.0/alpha);
00121   }
00122 
00123   inline void operator*=(const mdp_real alpha) {
00124     long i_min=physical_local_start(EVENODD);
00125     long i_max=physical_local_stop(EVENODD);
00126     long i=i_min;
00127 
00128     if(alpha==1) return;
00129     
00130 #if defined(SSE2) && defined(USE_DOUBLE_PRECISION) && !defined(NO_SSE2_LINALG)
00131     static _sse_double c ALIGN16;
00132     _sse_double* r=(_sse_double*) physical_address(i_min);
00133 
00134     _sse_check_alignment(&c,0xf);
00135 
00136     c.c1=c.c2=alpha;
00137     for(; i<i_max-7; i+=8, r+=8) {
00138       _sse_double_prefetch_16(r+8);
00139       _sse_double_multiply_16(r,c,r); 
00140     }
00141 #endif
00142     
00143     for(; i<i_max; i++) m[i]*=alpha;
00144   }
00145 
00146   inline void operator/=(const mdp_real alpha) {
00147     (*this)*=(1.0/alpha);
00148   }
00149 
00150   
00151   inline void operator+=(mdp_complex_field &psi) {
00152     long i_min=psi.physical_local_start(EVENODD);
00153     long i_max=psi.physical_local_stop(EVENODD);
00154     long i=i_min;
00155 
00156 #if defined(SSE2) && defined(USE_DOUBLE_PRECISION) && !defined(NO_SSE2_LINALG)
00157     _sse_double* r=(_sse_double*) physical_address(i_min);
00158     _sse_double* s=(_sse_double*) psi.physical_address(i_min);
00159     
00160     for(; i<i_max-7; i+=8, r+=8, s+=8) {
00161       _sse_double_prefetch_16(s+8);
00162       _sse_double_add_16(r,s); 
00163     }
00164 #endif   
00165     
00166     for(; i<i_max; i++) m[i]+=psi.m[i];
00167   }
00168 
00169   inline void operator-=(mdp_complex_field &psi) {
00170     long i_min=psi.physical_local_start(EVENODD);
00171     long i_max=psi.physical_local_stop(EVENODD);
00172     long i=i_min;
00173 
00174 #if defined(SSE2) && defined(USE_DOUBLE_PRECISION) && !defined(NO_SSE2_LINALG)
00175     _sse_double* r=(_sse_double*) physical_address(i_min);
00176     _sse_double* s=(_sse_double*) psi.physical_address(i_min);
00177     
00178     for(; i<i_max-7; i+=8, r+=8, s+=8) {
00179       _sse_double_prefetch_16(s+8);
00180       _sse_double_sub_16(r,s); 
00181     }
00182 #endif   
00183     
00184     for(; i<i_max; i++) m[i]-=psi.m[i];
00185   }
00186 
00187 
00188   inline friend mdp_real norm_square(mdp_complex_field &psi, 
00189                                      int parity=EVENODD) {
00190     double n2=0;
00191     long i_min=psi.physical_local_start(parity);
00192     long i_max=psi.physical_local_stop(parity);
00193     long i=i_min;
00194    
00195 #if defined(SSE2) && defined(USE_DOUBLE_PRECISION) && !defined(NO_SSE2_LINALG)
00196     static _sse_double c ALIGN16;
00197     _sse_double* r=(_sse_double*) psi.physical_address(i_min);
00198 
00199     _sse_check_alignment(&c,0xf);
00200 
00201     c.c1=c.c2=0;
00202     for(; i<i_max-7; i+=8, r+=8) {
00203       _sse_double_prefetch_16(r+8);
00204       _sse_double_add_norm_square_16(r,c);
00205     }
00206     n2+=c.c1+c.c2;
00207 #endif
00208    
00209     for(; i<i_max; i++)
00210       n2+=abs2(psi[i]);
00211     mpi.add(n2);
00212     return n2;
00213   }
00214 
00215   inline friend mdp_complex scalar_product(mdp_complex_field &psi, 
00216                                           mdp_complex_field &chi,
00217                                           int parity=EVENODD) {
00218     mdp_complex n2=0;
00219     long i_min=psi.physical_local_start(parity);
00220     long i_max=psi.physical_local_stop(parity);
00221     long i=i_min;
00222    
00223 #if defined(SSE2) && defined(USE_DOUBLE_PRECISION) && !defined(NO_SSE2_LINALG)
00224     static _sse_double c,d ALIGN16;
00225     _sse_double* r=(_sse_double*) psi.physical_address(i_min);
00226     _sse_double* s=(_sse_double*) chi.physical_address(i_min);
00227 
00228     _sse_check_alignment(&c,0xf);
00229 
00230     c.c1=c.c2=0;
00231     d.c1=d.c2=0;
00232     for(; i<i_max-7; i+=8, r+=8, s+=8) {
00233       _sse_double_prefetch_16(r+8);
00234       _sse_double_prefetch_16(s+8);
00235       _sse_double_add_real_scalar_product_16(r,s,c);
00236       _sse_double_add_imag_scalar_product_16(r,s,d);
00237     }
00238     n2+=mdp_complex(c.c1+c.c2,d.c2-d.c1);
00239 #endif
00240     
00241     for(;i<i_max; i++)
00242       n2+=conj(psi[i])*chi[i];
00243     mpi.add(n2);
00244 
00245     return n2;
00246   }
00247 
00248   inline friend mdp_real real_scalar_product(mdp_complex_field &psi, 
00249                                              mdp_complex_field &chi, 
00250                                              int parity=EVENODD) {
00251 
00252     double n2=0;
00253     long i_min=psi.physical_local_start(parity);
00254     long i_max=psi.physical_local_stop(parity);
00255     long i=i_min;
00256     
00257 #if defined(SSE2) && defined(USE_DOUBLE_PRECISION) && !defined(NO_SSE2_LINALG)
00258     static _sse_double c ALIGN16;
00259     _sse_double* r=(_sse_double*) psi.physical_address(i_min);
00260     _sse_double* s=(_sse_double*) chi.physical_address(i_min);
00261 
00262     _sse_check_alignment(&c,0xf);
00263 
00264     c.c1=c.c2=0;
00265     for(; i<i_max-7; i+=8, r+=8, s+=8) {
00266       _sse_double_prefetch_16(r+8);
00267       _sse_double_prefetch_16(s+8);
00268       _sse_double_add_real_scalar_product_16(r,s,c);
00269     }
00270     n2+=c.c1+c.c2;
00271 #endif
00272    
00273     for(;i<i_max; i++)
00274       n2+=
00275         real(chi[i])*real(psi[i])+
00276         imag(chi[i])*imag(psi[i]);
00277     
00278     mpi.add(n2);
00279     return n2;
00280   }
00281 
00282   inline friend mdp_real imag_scalar_product(mdp_complex_field &psi, 
00283                                              mdp_complex_field &chi, 
00284                                              int parity=EVENODD) {
00285     double n2=0;
00286     long i_min=psi.physical_local_start(parity);
00287     long i_max=psi.physical_local_stop(parity);
00288     long i=i_min;
00289     
00290 #if defined(SSE2) && defined(USE_DOUBLE_PRECISION) && !defined(NO_SSE2_LINALG)
00291     static _sse_double c ALIGN16;
00292     _sse_double* r=(_sse_double*) psi.physical_address(i_min);
00293     _sse_double* s=(_sse_double*) chi.physical_address(i_min);
00294 
00295     _sse_check_alignment(&c,0xf);
00296 
00297     c.c1=c.c2=0;
00298     for(; i<i_max-7; i+=8, r+=8, s+=8) {
00299       _sse_double_prefetch_16(r+8);
00300       _sse_double_prefetch_16(s+8);
00301       _sse_double_add_imag_scalar_product_16(r,s,c);
00302     }
00303     n2+=c.c2-c.c1;
00304 #endif
00305     
00306     for(;i<i_max; i++)
00307       n2+=
00308         real(psi[i])*imag(chi[i])+
00309         imag(psi[i])*real(chi[i]);
00310     mpi.add(n2);
00311     return n2;
00312   }
00313 
00314   inline friend void mdp_add_scaled_field(mdp_complex_field &psi, 
00315                                           mdp_real alpha, 
00316                                           mdp_complex_field &chi,
00317                                           int parity=EVENODD) {
00318     long i_min=psi.physical_local_start(parity);
00319     long i_max=psi.physical_local_stop(parity);
00320     long i=i_min;
00321     
00322 #if defined(SSE2) && defined(USE_DOUBLE_PRECISION) && !defined(NO_SSE2_LINALG)
00323     static _sse_double c ALIGN16;
00324     _sse_double* r=(_sse_double*) psi.physical_address(i_min);
00325     _sse_double* s=(_sse_double*) chi.physical_address(i_min);
00326 
00327     _sse_check_alignment(&c,0xf);
00328 
00329     c.c1=c.c2=alpha;
00330     for(i=0; i<i_max-7; i+=8, r+=8, s+=8) {
00331       _sse_double_prefetch_16(r+8);
00332       _sse_double_prefetch_16(s+8);
00333       _sse_double_add_multiply_16(r,c,s);
00334     }
00335     
00336 #endif
00337     
00338     for(;i<i_max; i++)
00339       psi[i]+=alpha*chi[i];
00340   }
00341 
00342   inline friend void mdp_add_scaled_field(mdp_complex_field &psi, 
00343                                           mdp_complex alpha, 
00344                                           mdp_complex_field &chi,
00345                                           int parity=EVENODD) {
00346     long i_min=psi.physical_local_start(parity);
00347     long i_max=psi.physical_local_stop(parity);
00348     long i=i_min;
00349     //    this needs optimization.
00350     for(;i<i_max; i++)
00351       psi[i]+=alpha*chi[i];
00352   }
00353 
00354   inline friend mdp_complex operator* (mdp_complex_field &psi, 
00355                                        mdp_complex_field &chi) {
00356     return scalar_product(psi, chi);
00357   }
00358 
00359   inline friend mdp_real relative_residue(mdp_complex_field& p,
00360                                           mdp_complex_field& q,
00361                                           int parity=EVENODD) {
00362     register double residue=0, num=0, den=0;
00363     long i_min=p.physical_local_start(parity);
00364     register long i_max=q.physical_local_stop(parity);
00365     register long i=i_min;
00366     //    this needs optimization.
00367     for(;i<i_max;) {
00368       num+=abs2(p[i]);
00369       den+=abs2(q[i]);
00370       if(++i % p.size_per_site()==0) {
00371         residue+=(den==0)?1.0:(num/den);            
00372         num=den=0;
00373       }
00374     }
00375     mpi.add(residue);
00376     return sqrt(residue/p.lattice().global_volume());
00377   }
00378   
00379   bool save_as_float(string filename, 
00380                      int processIO=0, 
00381                      long max_buffer_size=1024, 
00382                      bool load_header=true,
00383                      long skip_bytes=0) {
00384 #if defined(USE_DOUBLE_PRECISION)
00385     header.bytes_per_site/=2;
00386     save(filename,processIO, max_buffer_size,load_header,skip_bytes,
00387          mdp_write_double_as_float);
00388     header.bytes_per_site*=2;
00389 #else
00390     save(filename,processIO, max_buffer_size,load_header,skip_bytes,0);
00391 #endif
00392     return true;
00393   }
00394 
00395   bool load_as_float(string filename, 
00396                      int processIO=0, 
00397                      long max_buffer_size=1024, 
00398                      bool load_header=true,
00399                      long skip_bytes=0) {
00400 
00401 #if defined(USE_DOUBLE_PRECISION)
00402     header.bytes_per_site/=2;
00403     load(filename,processIO, max_buffer_size,load_header,skip_bytes,
00404          mdp_read_double_as_float, true);
00405     header.bytes_per_site*=2;
00406 #else
00407     load(filename,processIO, max_buffer_size,load_header,skip_bytes,0,true);
00408 #endif
00409     return true;
00410   }
00411 
00412   
00413   bool load_as_double(string filename, 
00414                       int processIO=0, 
00415                       long max_buffer_size=1024, 
00416                       bool load_header=true,
00417                       long skip_bytes=0) {
00418 #if !defined(USE_DOUBLE_PRECISION)
00419     header.bytes_per_site*=2;
00420     load(filename,processIO, max_buffer_size,load_header,skip_bytes,
00421          mdp_read_float_as_double, true);
00422     header.bytes_per_site/=2;
00423 #else
00424     load(filename,processIO, max_buffer_size,load_header,skip_bytes,0,true);
00425 #endif
00426     return true;
00427   }
00428   
00429   bool save_as_double(string filename, 
00430                       int processIO=0, 
00431                       long max_buffer_size=1024, 
00432                       bool load_header=true,
00433                       long skip_bytes=0) {
00434 #if !defined(USE_DOUBLE_PRECISION)
00435     header.bytes_per_site*=2;
00436     save(filename,processIO, max_buffer_size,load_header,skip_bytes,
00437          mdp_write_float_as_double);
00438     header.bytes_per_site/=2;
00439 #else
00440     save(filename,processIO, max_buffer_size,load_header,skip_bytes,0);
00441 #endif   
00442     return true;
00443   }
00444 };
00445 
00446 

Generated on Sun Feb 27 15:12:20 2005 by  doxygen 1.4.1