00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #if defined(SSE2)
00014
00015 class FermiFermilabActionNew {
00016 public:
00017
00018 static void mul_Q(fermi_field &psi_out,
00019 fermi_field &psi_in,
00020 gauge_field &U,
00021 coefficients &coeff, int parity=EVENODD) {
00022
00023 mdp_lattice& lattice=psi_in.lattice();
00024
00025 if(parity!=EVENODD)
00026 error("FermiFermilabAction::mul_Q\nparity must be EVENODD");
00027
00028 if(psi_in.nspin!=4)
00029 error("FermiFermilabAction::mul_Q\ndoes not work for nspin!=4");
00030 if(psi_in.nc!=U.nc)
00031 error("FermiFermilabAction::mul_Q\nincompatible number of colors");
00032
00033 mdp_real sign, kappa, kappat,kappas,zeta,rs,rt,cSW,cE,cB;
00034 mdp_real c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,mac1,mac2,mac3,mac4,mac5;
00035
00036 if(coeff.has_key("sign")) sign=coeff["sign"];
00037 else sign=1;
00038 if(sign!=+1)
00039 error("FermiFermilabAction::mul_Q\nsign must be +1.0");
00040
00041 if(coeff.has_key("kappa")) kappat=kappas=coeff["kappa"];
00042 if(coeff.has_key("zeta")) kappat=kappas/coeff["zeta"];
00043
00044 if(coeff.has_key("kappa_s")) kappas=coeff["kappa_s"];
00045 if(coeff.has_key("kappa_t")) kappat=coeff["kappa_t"];
00046
00047 if(kappat==0 || kappas==0)
00048 error("FermiFermilabAction::mul_Q\nparameter kappa not assigned");
00049
00050 zeta=kappas/kappat;
00051
00052 if(coeff.has_key("r_t")) rt=coeff["r_t"];
00053 else rt=1;
00054
00055 if(coeff.has_key("r_s")) rs=coeff["r_s"];
00056 else rs=1;
00057
00058 if(coeff.has_key("c_{sw}")) cSW=coeff["c_{sw}"];
00059 else cSW=0;
00060
00061 if(coeff.has_key("c_E")) cE=coeff["c_E"];
00062 else cE=1;
00063
00064 if(coeff.has_key("c_B")) cB=coeff["c_B"];
00065 else cB=1;
00066
00067 if(coeff.has_key("alpha_1")) c1=coeff["alpha_1"];
00068 else c1=0;
00069 if(coeff.has_key("alpha_2")) c2=coeff["alpha_2"];
00070 else c2=0;
00071 if(coeff.has_key("alpha_3")) c3=coeff["alpha_3"];
00072 else c3=0;
00073 if(coeff.has_key("alpha_4")) c4=coeff["alpha_4"];
00074 else c4=0;
00075 if(coeff.has_key("alpha_5")) c5=coeff["alpha_5"];
00076 else c5=0;
00077 if(coeff.has_key("alpha_6")) c6=coeff["alpha_6"];
00078 else c6=0;
00079 if(coeff.has_key("alpha_7")) c7=coeff["alpha_7"];
00080 else c7=0;
00081 if(coeff.has_key("alpha_8")) c8=coeff["alpha_8"];
00082 else c8=0;
00083 if(coeff.has_key("alpha_9")) c9=coeff["alpha_9"];
00084 else c9=0;
00085 if(coeff.has_key("alpha_10")) c10=coeff["alpha_10"];
00086 else c10=0;
00087 if(coeff.has_key("mac_1")) mac1=coeff["mac_1"];
00088 else mac1=0;
00089 if(coeff.has_key("mac_2")) mac2=coeff["mac_2"];
00090 else mac2=0;
00091 if(coeff.has_key("mac_3")) mac3=coeff["mac_3"];
00092 else mac3=0;
00093 if(coeff.has_key("mac_4")) mac4=coeff["mac_4"];
00094 else mac4=0;
00095 if(coeff.has_key("mac_5")) mac5=coeff["mac_5"];
00096 else mac5=0;
00097
00098 site x(lattice);
00099
00100 int i,j,a,b,k;
00101
00102 mdp_matrix_field Up(lattice,4,3);
00103 mdp_matrix_field Dw(lattice,4,3);
00104 mdp_matrix_field Ei(lattice,3,3);
00105 mdp_matrix_field Bi(lattice,3,3);
00106
00107 mdp_matrix out(3,1);
00108
00109
00110 static mdp_matrix u0u0(3,1);
00111 static mdp_matrix d0d0(3,1);
00112 static mdp_matrix u1u1(3,1);
00113 static mdp_matrix d1d1(3,1);
00114 static mdp_matrix e1u1(3,1);
00115 static mdp_matrix e1d1(3,1);
00116 static mdp_matrix u1e1(3,1);
00117 static mdp_matrix d1e1(3,1);
00118 static mdp_matrix b1u1(3,1);
00119 static mdp_matrix b1d1(3,1);
00120 static mdp_matrix u1b1(3,1);
00121 static mdp_matrix d1b1(3,1);
00122 static mdp_matrix u1u2(3,1);
00123 static mdp_matrix u1d2(3,1);
00124 static mdp_matrix d1u2(3,1);
00125 static mdp_matrix d1d2(3,1);
00126 static mdp_matrix e1u2(3,1);
00127 static mdp_matrix e1d2(3,1);
00128 static mdp_matrix u1e2(3,1);
00129 static mdp_matrix d1e2(3,1);
00130 static mdp_matrix b1u2(3,1);
00131 static mdp_matrix b1d2(3,1);
00132 static mdp_matrix u1b2(3,1);
00133 static mdp_matrix d1b2(3,1);
00134 static mdp_matrix u1u3(3,1);
00135 static mdp_matrix u1d3(3,1);
00136 static mdp_matrix d1u3(3,1);
00137 static mdp_matrix d1d3(3,1);
00138 static mdp_matrix e1u3(3,1);
00139 static mdp_matrix e1d3(3,1);
00140 static mdp_matrix u1e3(3,1);
00141 static mdp_matrix d1e3(3,1);
00142 static mdp_matrix b1u3(3,1);
00143 static mdp_matrix b1d3(3,1);
00144 static mdp_matrix u1b3(3,1);
00145 static mdp_matrix d1b3(3,1);
00146 static mdp_matrix u2u1(3,1);
00147 static mdp_matrix u2d1(3,1);
00148 static mdp_matrix d2u1(3,1);
00149 static mdp_matrix d2d1(3,1);
00150 static mdp_matrix e2u1(3,1);
00151 static mdp_matrix e2d1(3,1);
00152 static mdp_matrix u2e1(3,1);
00153 static mdp_matrix d2e1(3,1);
00154 static mdp_matrix b2u1(3,1);
00155 static mdp_matrix b2d1(3,1);
00156 static mdp_matrix u2b1(3,1);
00157 static mdp_matrix d2b1(3,1);
00158 static mdp_matrix u2u2(3,1);
00159 static mdp_matrix d2d2(3,1);
00160 static mdp_matrix e2u2(3,1);
00161 static mdp_matrix e2d2(3,1);
00162 static mdp_matrix u2e2(3,1);
00163 static mdp_matrix d2e2(3,1);
00164 static mdp_matrix b2u2(3,1);
00165 static mdp_matrix b2d2(3,1);
00166 static mdp_matrix u2b2(3,1);
00167 static mdp_matrix d2b2(3,1);
00168 static mdp_matrix u2u3(3,1);
00169 static mdp_matrix u2d3(3,1);
00170 static mdp_matrix d2u3(3,1);
00171 static mdp_matrix d2d3(3,1);
00172 static mdp_matrix e2u3(3,1);
00173 static mdp_matrix e2d3(3,1);
00174 static mdp_matrix u2e3(3,1);
00175 static mdp_matrix d2e3(3,1);
00176 static mdp_matrix b2u3(3,1);
00177 static mdp_matrix b2d3(3,1);
00178 static mdp_matrix u2b3(3,1);
00179 static mdp_matrix d2b3(3,1);
00180 static mdp_matrix u3u1(3,1);
00181 static mdp_matrix u3d1(3,1);
00182 static mdp_matrix d3u1(3,1);
00183 static mdp_matrix d3d1(3,1);
00184 static mdp_matrix e3u1(3,1);
00185 static mdp_matrix e3d1(3,1);
00186 static mdp_matrix u3e1(3,1);
00187 static mdp_matrix d3e1(3,1);
00188 static mdp_matrix b3u1(3,1);
00189 static mdp_matrix b3d1(3,1);
00190 static mdp_matrix u3b1(3,1);
00191 static mdp_matrix d3b1(3,1);
00192 static mdp_matrix u3u2(3,1);
00193 static mdp_matrix u3d2(3,1);
00194 static mdp_matrix d3u2(3,1);
00195 static mdp_matrix d3d2(3,1);
00196 static mdp_matrix e3u2(3,1);
00197 static mdp_matrix e3d2(3,1);
00198 static mdp_matrix u3e2(3,1);
00199 static mdp_matrix d3e2(3,1);
00200 static mdp_matrix b3u2(3,1);
00201 static mdp_matrix b3d2(3,1);
00202 static mdp_matrix u3b2(3,1);
00203 static mdp_matrix d3b2(3,1);
00204 static mdp_matrix u3u3(3,1);
00205 static mdp_matrix d3d3(3,1);
00206 static mdp_matrix e3u3(3,1);
00207 static mdp_matrix e3d3(3,1);
00208 static mdp_matrix u3e3(3,1);
00209 static mdp_matrix d3e3(3,1);
00210 static mdp_matrix b3u3(3,1);
00211 static mdp_matrix b3d3(3,1);
00212 static mdp_matrix u3b3(3,1);
00213 static mdp_matrix d3b3(3,1);
00214 mdp_complex c_id[16];
00215 mdp_complex c_u[16*4];
00216 mdp_complex c_d[16*4];
00217 mdp_complex c_e[16*4];
00218 mdp_complex c_b[16*4];
00219 mdp_complex c_uu[16*4*4];
00220 mdp_complex c_ud[16*4*4];
00221 mdp_complex c_du[16*4*4];
00222 mdp_complex c_dd[16*4*4];
00223 mdp_complex c_eu[16*4*4];
00224 mdp_complex c_ed[16*4*4];
00225 mdp_complex c_ue[16*4*4];
00226 mdp_complex c_de[16*4*4];
00227 mdp_complex c_bu[16*4*4];
00228 mdp_complex c_bd[16*4*4];
00229 mdp_complex c_ub[16*4*4];
00230 mdp_complex c_db[16*4*4];
00231
00232
00233 #include "fermiqcd_fermilab_coefficients.h"
00234
00235 psi_out=0;
00236
00237
00238 for(a=0; a<4; a++) {
00239
00240 forallsites(x) {
00241
00242 _sse_mulABC_set_331(&U(x,0,0,0),&psi_in(x+0,a,0),&Up(x,0,0));
00243 _sse_mulABC_set_331(&U(x,1,0,0),&psi_in(x+1,a,0),&Up(x,1,0));
00244 _sse_mulABC_set_331(&U(x,2,0,0),&psi_in(x+2,a,0),&Up(x,2,0));
00245 _sse_mulABC_set_331(&U(x,3,0,0),&psi_in(x+3,a,0),&Up(x,3,0));
00246 _sse_mulAHBC_set_331(&U(x-0,0,0,0),&psi_in(x-0,a,0),&Dw(x,0,0));
00247 _sse_mulAHBC_set_331(&U(x-1,1,0,0),&psi_in(x-1,a,0),&Dw(x,1,0));
00248 _sse_mulAHBC_set_331(&U(x-2,2,0,0),&psi_in(x-2,a,0),&Dw(x,2,0));
00249 _sse_mulAHBC_set_331(&U(x-3,3,0,0),&psi_in(x-3,a,0),&Dw(x,3,0));
00250 _sse_mulABC_set_331(&U.em(x,0,1,0,0),&psi_in(x,a,0),&Ei(x,0,0));
00251 _sse_mulABC_set_331(&U.em(x,0,2,0,0),&psi_in(x,a,0),&Ei(x,1,0));
00252 _sse_mulABC_set_331(&U.em(x,0,3,0,0),&psi_in(x,a,0),&Ei(x,2,0));
00253 _sse_mulABC_set_331(&U.em(x,2,3,0,0),&psi_in(x,a,0),&Bi(x,0,0));
00254 _sse_mulABC_set_331(&U.em(x,1,3,0,0),&psi_in(x,a,0),&Bi(x,1,0));
00255 _sse_mulABC_set_331(&U.em(x,1,2,0,0),&psi_in(x,a,0),&Bi(x,2,0));
00256
00257 }
00258
00259 Up.update();
00260 Dw.update();
00261 Ei.update();
00262 Bi.update();
00263
00264 forallsites(x) {
00265
00266 _sse_mulABC_set_331(&U(x,0,0,0),&Up(x+0,0,0),&u0u0(0,0));
00267 _sse_mulAHBC_set_331(&U(x-0,0,0,0),&Dw(x-0,0,0),&d0d0(0,0));
00268 _sse_mulABC_set_331(&U(x,1,0,0),&Up(x+1,1,0),&u1u1(0,0));
00269 _sse_mulAHBC_set_331(&U(x-1,1,0,0),&Dw(x-1,1,0),&d1d1(0,0));
00270 _sse_mulABC_set_331(&U(x,1,0,0),&Bi(x+1,0,0),&u1b1(0,0));
00271 _sse_mulAHBC_set_331(&U(x-1,1,0,0),&Bi(x-1,0,0),&d1b1(0,0));
00272 _sse_mulABC_set_331(&U.em(x,2,3,0,0),&Up(x,1,0),&b1u1(0,0));
00273 _sse_mulABC_set_331(&U.em(x,2,3,0,0),&Dw(x,1,0),&b1d1(0,0));
00274 _sse_mulABC_set_331(&U(x,1,0,0),&Up(x+1,2,0),&u1u2(0,0));
00275 _sse_mulAHBC_set_331(&U(x-1,1,0,0),&Dw(x-1,2,0),&d1d2(0,0));
00276 _sse_mulABC_set_331(&U(x,1,0,0),&Dw(x+1,2,0),&u1d2(0,0));
00277 _sse_mulAHBC_set_331(&U(x-1,1,0,0),&Up(x-1,2,0),&d1d2(0,0));
00278 _sse_mulABC_set_331(&U(x,1,0,0),&Bi(x+1,1,0),&u1b2(0,0));
00279 _sse_mulAHBC_set_331(&U(x-1,1,0,0),&Bi(x-1,1,0),&d1b2(0,0));
00280 _sse_mulABC_set_331(&U.em(x,2,3,0,0),&Up(x,2,0),&b1u2(0,0));
00281 _sse_mulABC_set_331(&U.em(x,2,3,0,0),&Dw(x,2,0),&b1d2(0,0));
00282 _sse_mulABC_set_331(&U(x,1,0,0),&Up(x+1,3,0),&u1u3(0,0));
00283 _sse_mulAHBC_set_331(&U(x-1,1,0,0),&Dw(x-1,3,0),&d1d3(0,0));
00284 _sse_mulABC_set_331(&U(x,1,0,0),&Dw(x+1,3,0),&u1d3(0,0));
00285 _sse_mulAHBC_set_331(&U(x-1,1,0,0),&Up(x-1,3,0),&d1d3(0,0));
00286 _sse_mulABC_set_331(&U(x,1,0,0),&Bi(x+1,2,0),&u1b3(0,0));
00287 _sse_mulAHBC_set_331(&U(x-1,1,0,0),&Bi(x-1,2,0),&d1b3(0,0));
00288 _sse_mulABC_set_331(&U.em(x,2,3,0,0),&Up(x,3,0),&b1u3(0,0));
00289 _sse_mulABC_set_331(&U.em(x,2,3,0,0),&Dw(x,3,0),&b1d3(0,0));
00290 _sse_mulABC_set_331(&U(x,2,0,0),&Up(x+2,1,0),&u2u1(0,0));
00291 _sse_mulAHBC_set_331(&U(x-2,2,0,0),&Dw(x-2,1,0),&d2d1(0,0));
00292 _sse_mulABC_set_331(&U(x,2,0,0),&Dw(x+2,1,0),&u2d1(0,0));
00293 _sse_mulAHBC_set_331(&U(x-2,2,0,0),&Up(x-2,1,0),&d2d1(0,0));
00294 _sse_mulABC_set_331(&U(x,2,0,0),&Bi(x+2,0,0),&u2b1(0,0));
00295 _sse_mulAHBC_set_331(&U(x-2,2,0,0),&Bi(x-2,0,0),&d2b1(0,0));
00296 _sse_mulABC_set_331(&U.em(x,1,3,0,0),&Up(x,1,0),&b2u1(0,0));
00297 _sse_mulABC_set_331(&U.em(x,1,3,0,0),&Dw(x,1,0),&b2d1(0,0));
00298 _sse_mulABC_set_331(&U(x,2,0,0),&Up(x+2,2,0),&u2u2(0,0));
00299 _sse_mulAHBC_set_331(&U(x-2,2,0,0),&Dw(x-2,2,0),&d2d2(0,0));
00300 _sse_mulABC_set_331(&U(x,2,0,0),&Bi(x+2,1,0),&u2b2(0,0));
00301 _sse_mulAHBC_set_331(&U(x-2,2,0,0),&Bi(x-2,1,0),&d2b2(0,0));
00302 _sse_mulABC_set_331(&U.em(x,1,3,0,0),&Up(x,2,0),&b2u2(0,0));
00303 _sse_mulABC_set_331(&U.em(x,1,3,0,0),&Dw(x,2,0),&b2d2(0,0));
00304 _sse_mulABC_set_331(&U(x,2,0,0),&Up(x+2,3,0),&u2u3(0,0));
00305 _sse_mulAHBC_set_331(&U(x-2,2,0,0),&Dw(x-2,3,0),&d2d3(0,0));
00306 _sse_mulABC_set_331(&U(x,2,0,0),&Dw(x+2,3,0),&u2d3(0,0));
00307 _sse_mulAHBC_set_331(&U(x-2,2,0,0),&Up(x-2,3,0),&d2d3(0,0));
00308 _sse_mulABC_set_331(&U(x,2,0,0),&Bi(x+2,2,0),&u2b3(0,0));
00309 _sse_mulAHBC_set_331(&U(x-2,2,0,0),&Bi(x-2,2,0),&d2b3(0,0));
00310 _sse_mulABC_set_331(&U.em(x,1,3,0,0),&Up(x,3,0),&b2u3(0,0));
00311 _sse_mulABC_set_331(&U.em(x,1,3,0,0),&Dw(x,3,0),&b2d3(0,0));
00312 _sse_mulABC_set_331(&U(x,3,0,0),&Up(x+3,1,0),&u3u1(0,0));
00313 _sse_mulAHBC_set_331(&U(x-3,3,0,0),&Dw(x-3,1,0),&d3d1(0,0));
00314 _sse_mulABC_set_331(&U(x,3,0,0),&Dw(x+3,1,0),&u3d1(0,0));
00315 _sse_mulAHBC_set_331(&U(x-3,3,0,0),&Up(x-3,1,0),&d3d1(0,0));
00316 _sse_mulABC_set_331(&U(x,3,0,0),&Bi(x+3,0,0),&u3b1(0,0));
00317 _sse_mulAHBC_set_331(&U(x-3,3,0,0),&Bi(x-3,0,0),&d3b1(0,0));
00318 _sse_mulABC_set_331(&U.em(x,1,2,0,0),&Up(x,1,0),&b3u1(0,0));
00319 _sse_mulABC_set_331(&U.em(x,1,2,0,0),&Dw(x,1,0),&b3d1(0,0));
00320 _sse_mulABC_set_331(&U(x,3,0,0),&Up(x+3,2,0),&u3u2(0,0));
00321 _sse_mulAHBC_set_331(&U(x-3,3,0,0),&Dw(x-3,2,0),&d3d2(0,0));
00322 _sse_mulABC_set_331(&U(x,3,0,0),&Dw(x+3,2,0),&u3d2(0,0));
00323 _sse_mulAHBC_set_331(&U(x-3,3,0,0),&Up(x-3,2,0),&d3d2(0,0));
00324 _sse_mulABC_set_331(&U(x,3,0,0),&Bi(x+3,1,0),&u3b2(0,0));
00325 _sse_mulAHBC_set_331(&U(x-3,3,0,0),&Bi(x-3,1,0),&d3b2(0,0));
00326 _sse_mulABC_set_331(&U.em(x,1,2,0,0),&Up(x,2,0),&b3u2(0,0));
00327 _sse_mulABC_set_331(&U.em(x,1,2,0,0),&Dw(x,2,0),&b3d2(0,0));
00328 _sse_mulABC_set_331(&U(x,3,0,0),&Up(x+3,3,0),&u3u3(0,0));
00329 _sse_mulAHBC_set_331(&U(x-3,3,0,0),&Dw(x-3,3,0),&d3d3(0,0));
00330 _sse_mulABC_set_331(&U(x,3,0,0),&Bi(x+3,2,0),&u3b3(0,0));
00331 _sse_mulAHBC_set_331(&U(x-3,3,0,0),&Bi(x-3,2,0),&d3b3(0,0));
00332 _sse_mulABC_set_331(&U.em(x,1,2,0,0),&Up(x,3,0),&b3u3(0,0));
00333 _sse_mulABC_set_331(&U.em(x,1,2,0,0),&Dw(x,3,0),&b3d3(0,0));
00334 for(b=0; b<4; b++) {
00335 k=4*a+b;
00336 _sse_mulAbC_set_31(&psi_in(x,a,0),c_id[k],&out(0,0));
00337 k=16*a+4*b+0;
00338 if(c_u[k]!=0) _sse_mulAbC_add_31(&Up(x,0,0),c_u[k],&out(0,0));
00339 if(c_d[k]!=0) _sse_mulAbC_add_31(&Dw(x,0,0),c_d[k],&out(0,0));
00340 k=16*a+4*b+1;
00341 if(c_u[k]!=0) _sse_mulAbC_add_31(&Up(x,1,0),c_u[k],&out(0,0));
00342 if(c_d[k]!=0) _sse_mulAbC_add_31(&Dw(x,1,0),c_d[k],&out(0,0));
00343 if(c_e[k]!=0) _sse_mulAbC_add_31(&Ei(x,0,0),c_e[k],&out(0,0));
00344 if(c_b[k]!=0) _sse_mulAbC_add_31(&Bi(x,0,0),c_b[k],&out(0,0));
00345 k=16*a+4*b+2;
00346 if(c_u[k]!=0) _sse_mulAbC_add_31(&Up(x,2,0),c_u[k],&out(0,0));
00347 if(c_d[k]!=0) _sse_mulAbC_add_31(&Dw(x,2,0),c_d[k],&out(0,0));
00348 if(c_e[k]!=0) _sse_mulAbC_add_31(&Ei(x,1,0),c_e[k],&out(0,0));
00349 if(c_b[k]!=0) _sse_mulAbC_add_31(&Bi(x,1,0),c_b[k],&out(0,0));
00350 k=16*a+4*b+3;
00351 if(c_u[k]!=0) _sse_mulAbC_add_31(&Up(x,3,0),c_u[k],&out(0,0));
00352 if(c_d[k]!=0) _sse_mulAbC_add_31(&Dw(x,3,0),c_d[k],&out(0,0));
00353 if(c_e[k]!=0) _sse_mulAbC_add_31(&Ei(x,2,0),c_e[k],&out(0,0));
00354 if(c_b[k]!=0) _sse_mulAbC_add_31(&Bi(x,2,0),c_b[k],&out(0,0));
00355 k=64*a+16*b+4*0+0;
00356 if(c_uu[k]!=0) _sse_mulAbC_add_31(&u0u0(0,0),c_uu[k],&out(0,0));
00357 if(c_dd[k]!=0) _sse_mulAbC_add_31(&d0d0(0,0),c_dd[k],&out(0,0));
00358 k=64*a+16*b+4*1+1;
00359 if(c_uu[k]!=0) _sse_mulAbC_add_31(&u1u1(0,0),c_uu[k],&out(0,0));
00360 if(c_dd[k]!=0) _sse_mulAbC_add_31(&d1d1(0,0),c_dd[k],&out(0,0));
00361 if(c_ub[k]!=0) _sse_mulAbC_add_31(&u1b1(0,0),c_ub[k],&out(0,0));
00362 if(c_db[k]!=0) _sse_mulAbC_add_31(&d1b1(0,0),c_db[k],&out(0,0));
00363 if(c_bu[k]!=0) _sse_mulAbC_add_31(&b1u1(0,0),c_bu[k],&out(0,0));
00364 if(c_bd[k]!=0) _sse_mulAbC_add_31(&b1d1(0,0),c_bd[k],&out(0,0));
00365 k=64*a+16*b+4*1+2;
00366 if(c_uu[k]!=0) _sse_mulAbC_add_31(&u1u2(0,0),c_uu[k],&out(0,0));
00367 if(c_dd[k]!=0) _sse_mulAbC_add_31(&d1d2(0,0),c_dd[k],&out(0,0));
00368 if(c_ud[k]!=0) _sse_mulAbC_add_31(&u1d2(0,0),c_ud[k],&out(0,0));
00369 if(c_du[k]!=0) _sse_mulAbC_add_31(&d1u2(0,0),c_du[k],&out(0,0));
00370 if(c_ub[k]!=0) _sse_mulAbC_add_31(&u1b2(0,0),c_ub[k],&out(0,0));
00371 if(c_db[k]!=0) _sse_mulAbC_add_31(&d1b2(0,0),c_db[k],&out(0,0));
00372 if(c_bu[k]!=0) _sse_mulAbC_add_31(&b1u2(0,0),c_bu[k],&out(0,0));
00373 if(c_bd[k]!=0) _sse_mulAbC_add_31(&b1d2(0,0),c_bd[k],&out(0,0));
00374 k=64*a+16*b+4*1+3;
00375 if(c_uu[k]!=0) _sse_mulAbC_add_31(&u1u3(0,0),c_uu[k],&out(0,0));
00376 if(c_dd[k]!=0) _sse_mulAbC_add_31(&d1d3(0,0),c_dd[k],&out(0,0));
00377 if(c_ud[k]!=0) _sse_mulAbC_add_31(&u1d3(0,0),c_ud[k],&out(0,0));
00378 if(c_du[k]!=0) _sse_mulAbC_add_31(&d1u3(0,0),c_du[k],&out(0,0));
00379 if(c_ub[k]!=0) _sse_mulAbC_add_31(&u1b3(0,0),c_ub[k],&out(0,0));
00380 if(c_db[k]!=0) _sse_mulAbC_add_31(&d1b3(0,0),c_db[k],&out(0,0));
00381 if(c_bu[k]!=0) _sse_mulAbC_add_31(&b1u3(0,0),c_bu[k],&out(0,0));
00382 if(c_bd[k]!=0) _sse_mulAbC_add_31(&b1d3(0,0),c_bd[k],&out(0,0));
00383 k=64*a+16*b+4*2+1;
00384 if(c_uu[k]!=0) _sse_mulAbC_add_31(&u2u1(0,0),c_uu[k],&out(0,0));
00385 if(c_dd[k]!=0) _sse_mulAbC_add_31(&d2d1(0,0),c_dd[k],&out(0,0));
00386 if(c_ud[k]!=0) _sse_mulAbC_add_31(&u2d1(0,0),c_ud[k],&out(0,0));
00387 if(c_du[k]!=0) _sse_mulAbC_add_31(&d2u1(0,0),c_du[k],&out(0,0));
00388 if(c_ub[k]!=0) _sse_mulAbC_add_31(&u2b1(0,0),c_ub[k],&out(0,0));
00389 if(c_db[k]!=0) _sse_mulAbC_add_31(&d2b1(0,0),c_db[k],&out(0,0));
00390 if(c_bu[k]!=0) _sse_mulAbC_add_31(&b2u1(0,0),c_bu[k],&out(0,0));
00391 if(c_bd[k]!=0) _sse_mulAbC_add_31(&b2d1(0,0),c_bd[k],&out(0,0));
00392 k=64*a+16*b+4*2+2;
00393 if(c_uu[k]!=0) _sse_mulAbC_add_31(&u2u2(0,0),c_uu[k],&out(0,0));
00394 if(c_dd[k]!=0) _sse_mulAbC_add_31(&d2d2(0,0),c_dd[k],&out(0,0));
00395 if(c_ub[k]!=0) _sse_mulAbC_add_31(&u2b2(0,0),c_ub[k],&out(0,0));
00396 if(c_db[k]!=0) _sse_mulAbC_add_31(&d2b2(0,0),c_db[k],&out(0,0));
00397 if(c_bu[k]!=0) _sse_mulAbC_add_31(&b2u2(0,0),c_bu[k],&out(0,0));
00398 if(c_bd[k]!=0) _sse_mulAbC_add_31(&b2d2(0,0),c_bd[k],&out(0,0));
00399 k=64*a+16*b+4*2+3;
00400 if(c_uu[k]!=0) _sse_mulAbC_add_31(&u2u3(0,0),c_uu[k],&out(0,0));
00401 if(c_dd[k]!=0) _sse_mulAbC_add_31(&d2d3(0,0),c_dd[k],&out(0,0));
00402 if(c_ud[k]!=0) _sse_mulAbC_add_31(&u2d3(0,0),c_ud[k],&out(0,0));
00403 if(c_du[k]!=0) _sse_mulAbC_add_31(&d2u3(0,0),c_du[k],&out(0,0));
00404 if(c_ub[k]!=0) _sse_mulAbC_add_31(&u2b3(0,0),c_ub[k],&out(0,0));
00405 if(c_db[k]!=0) _sse_mulAbC_add_31(&d2b3(0,0),c_db[k],&out(0,0));
00406 if(c_bu[k]!=0) _sse_mulAbC_add_31(&b2u3(0,0),c_bu[k],&out(0,0));
00407 if(c_bd[k]!=0) _sse_mulAbC_add_31(&b2d3(0,0),c_bd[k],&out(0,0));
00408 k=64*a+16*b+4*3+1;
00409 if(c_uu[k]!=0) _sse_mulAbC_add_31(&u3u1(0,0),c_uu[k],&out(0,0));
00410 if(c_dd[k]!=0) _sse_mulAbC_add_31(&d3d1(0,0),c_dd[k],&out(0,0));
00411 if(c_ud[k]!=0) _sse_mulAbC_add_31(&u3d1(0,0),c_ud[k],&out(0,0));
00412 if(c_du[k]!=0) _sse_mulAbC_add_31(&d3u1(0,0),c_du[k],&out(0,0));
00413 if(c_ub[k]!=0) _sse_mulAbC_add_31(&u3b1(0,0),c_ub[k],&out(0,0));
00414 if(c_db[k]!=0) _sse_mulAbC_add_31(&d3b1(0,0),c_db[k],&out(0,0));
00415 if(c_bu[k]!=0) _sse_mulAbC_add_31(&b3u1(0,0),c_bu[k],&out(0,0));
00416 if(c_bd[k]!=0) _sse_mulAbC_add_31(&b3d1(0,0),c_bd[k],&out(0,0));
00417 k=64*a+16*b+4*3+2;
00418 if(c_uu[k]!=0) _sse_mulAbC_add_31(&u3u2(0,0),c_uu[k],&out(0,0));
00419 if(c_dd[k]!=0) _sse_mulAbC_add_31(&d3d2(0,0),c_dd[k],&out(0,0));
00420 if(c_ud[k]!=0) _sse_mulAbC_add_31(&u3d2(0,0),c_ud[k],&out(0,0));
00421 if(c_du[k]!=0) _sse_mulAbC_add_31(&d3u2(0,0),c_du[k],&out(0,0));
00422 if(c_ub[k]!=0) _sse_mulAbC_add_31(&u3b2(0,0),c_ub[k],&out(0,0));
00423 if(c_db[k]!=0) _sse_mulAbC_add_31(&d3b2(0,0),c_db[k],&out(0,0));
00424 if(c_bu[k]!=0) _sse_mulAbC_add_31(&b3u2(0,0),c_bu[k],&out(0,0));
00425 if(c_bd[k]!=0) _sse_mulAbC_add_31(&b3d2(0,0),c_bd[k],&out(0,0));
00426 k=64*a+16*b+4*3+3;
00427 if(c_uu[k]!=0) _sse_mulAbC_add_31(&u3u3(0,0),c_uu[k],&out(0,0));
00428 if(c_dd[k]!=0) _sse_mulAbC_add_31(&d3d3(0,0),c_dd[k],&out(0,0));
00429 if(c_ub[k]!=0) _sse_mulAbC_add_31(&u3b3(0,0),c_ub[k],&out(0,0));
00430 if(c_db[k]!=0) _sse_mulAbC_add_31(&d3b3(0,0),c_db[k],&out(0,0));
00431 if(c_bu[k]!=0) _sse_mulAbC_add_31(&b3u3(0,0),c_bu[k],&out(0,0));
00432 if(c_bd[k]!=0) _sse_mulAbC_add_31(&b3d3(0,0),c_bd[k],&out(0,0));
00433 _sse_sumAC_add_31(&out(0,0),&psi_out(x,b,0));
00434
00435 }
00436
00437 }
00438
00439 }
00440 }
00441 };
00442 #endif