#include #include #include #include #include #include #ifdef PARALLEL #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include //some piece of **** defines these elsewhere, so the bfm header gets screwed up #undef ND #undef SPINOR_SIZE #undef HALF_SPINOR_SIZE #undef GAUGE_SIZE #undef Nmu #undef Ncb #undef NMinusPlus #undef Minus #undef Plus #undef DaggerYes #undef DaggerNo #undef SingleToDouble #undef DoubleToSingle #undef Odd #undef Even #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_BFM #include #endif using namespace std; USING_NAMESPACE_CPS void setup_double_latt(Lattice &double_latt, Matrix* orig_gfield, bool gparity_X, bool gparity_Y){ //orig latt ( U_0 U_1 ) ( U_2 U_3 ) ( U_4 U_5 ) ( U_6 U_7 ) //double tatt ( U_0 U_1 U_2 U_3 ) ( U_4 U_5 U_6 U_7 ) ( U_0* U_1* U_2* U_3* ) ( U_4* U_5* U_6* U_7* ) Matrix *dbl_gfield = double_latt.GaugeField(); if(!UniqueID()){ printf("Setting up 1f lattice.\n"); fflush(stdout); } SingleToDoubleLattice lattdoubler(gparity_X,gparity_Y,orig_gfield,double_latt); lattdoubler.Run(); if(!UniqueID()){ printf("Finished setting up 1f lattice\n"); fflush(stdout); } } void setup_double_rng(bool gparity_X, bool gparity_Y){ //orig 4D rng 2 stacked 4D volumes //orig ([R_0 R_1][R'_0 R'_1])([R_2 R_3][R'_2 R'_3])([R_4 R_5][R'_4 R'_5])([R_6 R_7][R'_6 R'_7]) //double (R_0 R_1 R_2 R_3)(R_4 R_5 R_6 R_7)(R'_0 R'_1 R'_2 R'_3)(R'_4 R'_5 R'_6 R'_7) //orig 5D rng 2 stacked 4D volumes per ls/2 slice (ls/2 as only one RNG per 2^4 block) SingleToDouble4dRNG fourDsetup(gparity_X,gparity_Y); SingleToDouble5dRNG fiveDsetup(gparity_X,gparity_Y); LRG.Reinitialize(); //reset the LRG and prepare for doubled lattice form if(!UniqueID()){ printf("Setting up 1f 4D RNG\n"); fflush(stdout); } fourDsetup.Run(); if(!UniqueID()){ printf("Setting up 1f 5D RNG\n"); fflush(stdout); } fiveDsetup.Run(); } void setup_double_matrixfield(Matrix* double_mat, Matrix* orig_mat, int nmat_per_site, bool gparity_X, bool gparity_Y){ if(!UniqueID()){ printf("Setting up 1f matrix field.\n"); fflush(stdout); } SingleToDoubleMatrixField doubler(gparity_X,gparity_Y,nmat_per_site,orig_mat,double_mat); doubler.Run(); if(!UniqueID()){ printf("Finished setting up 1f matrixfield\n"); fflush(stdout); } } void setup_double_5d_vector(Vector *double_vect, Vector* orig_vect, bool gparity_X, bool gparity_Y){ if(!UniqueID()){ printf("Setting up 1f vector field.\n"); fflush(stdout); } SingleToDouble5dVectorField doubler(gparity_X, gparity_Y, orig_vect, double_vect, CANONICAL); doubler.Run(); if(!UniqueID()){ printf("Finished setting up 1f vector field\n"); fflush(stdout); } } void GaugeTransformU(Matrix *gtrans, Lattice &lat); void convert_ferm_cpsord_sord(Float *cps, Float* &sord, bfm_evo &bfm){ Fermion_t handle[2] = { bfm.allocFermion(), bfm.allocFermion() }; bfm.cps_impexFermion(cps,handle,1); long f_size = (long)24 * GJP.VolNodeSites() * GJP.SnodeSites(); if(GJP.Gparity()) f_size*=2; sord = (Float *)pmalloc(sizeof(Float) * f_size); bfm.cps_impexFermion_s(sord,handle,0); bfm.freeFermion(handle[0]); bfm.freeFermion(handle[1]); } void convert_ferm_sord_cpsord(Float *sord, Float* &cps, bfm_evo &bfm){ Fermion_t handle[2] = { bfm.allocFermion(), bfm.allocFermion() }; bfm.cps_impexFermion_s(sord,handle,1); long f_size = (long)24 * GJP.VolNodeSites() * GJP.SnodeSites(); if(GJP.Gparity()) f_size*=2; cps = (Float *)pmalloc(sizeof(Float) * f_size); bfm.cps_impexFermion(cps,handle,0); bfm.freeFermion(handle[0]); bfm.freeFermion(handle[1]); } void setup_bfmargs(bfmarg &dwfa, const BfmSolver &solver){ printf("Setting up bfmargs\n"); int nthreads = 1; #if TARGET == BGQ nthreads = 64; #endif omp_set_num_threads(nthreads); dwfa.node_latt[0] = GJP.XnodeSites(); dwfa.node_latt[1] = GJP.YnodeSites(); dwfa.node_latt[2] = GJP.ZnodeSites(); dwfa.node_latt[3] = GJP.TnodeSites(); multi1d ncoor(4); multi1d procs(4); for(int i=0;i<4;i++){ ncoor[i] = GJP.NodeCoor(i); procs[i] = GJP.Nodes(i); } if(GJP.Gparity()){ dwfa.gparity = 1; printf("G-parity directions: "); for(int d=0;d<3;d++) if(GJP.Bc(d) == BND_CND_GPARITY){ dwfa.gparity_dir[d] = 1; printf("%d ",d); } else dwfa.gparity_dir[d] = 0; for(int d=0;d<4;d++){ dwfa.nodes[d] = procs[d]; dwfa.ncoor[d] = ncoor[d]; } printf("\n"); } dwfa.verbose=1; dwfa.reproduce=0; bfmarg::Threads(nthreads); bfmarg::Reproduce(0); bfmarg::ReproduceChecksum(0); bfmarg::ReproduceMasterCheck(0); bfmarg::Verbose(1); for(int mu=0;mu<4;mu++){ if ( procs[mu]>1 ) { dwfa.local_comm[mu] = 0; printf("Non-local comms in direction %d\n",mu); } else { dwfa.local_comm[mu] = 1; printf("Local comms in direction %d\n",mu); } } dwfa.precon_5d = 1; if(solver == HmCayleyTanh){ dwfa.precon_5d = 0; //mobius uses 4d preconditioning dwfa.mobius_scale = 2.0; //b = 0.5(scale+1) c=0.5(scale-1), hence this corresponds to b=1.5 and c=0.5, the params used for the 48^3 } dwfa.Ls = GJP.SnodeSites(); dwfa.solver = solver; dwfa.M5 = toDouble(GJP.DwfHeight()); dwfa.mass = toDouble(0.5); dwfa.Csw = 0.0; dwfa.max_iter = 5000; dwfa.residual = 1e-08; printf("Finished setting up bfmargs\n"); } Float* rand_5d_canonical_fermion(Lattice &lat){ long f_size = (long)24 * GJP.VolNodeSites() * GJP.SnodeSites(); if(GJP.Gparity()) f_size*=2; Float *v1 = (Float *)pmalloc(sizeof(Float) * f_size); printf("Making random gaussian 5d vector\n"); lat.RandGaussVector((Vector*)v1, 0.5, 2, CANONICAL, FIVE_D); printf("Finished making random gaussian vector\n"); return v1; } static void mTransDotMStarEqual(Float* C, const Float* A, const Float* B){ C[0] = (B[0] * A[0] + B[1] * A[1]) + (B[6] * A[6] + B[7] * A[7]) + (B[12] * A[12] + B[13] * A[13]); C[1] = (B[0] * A[1] - B[1] * A[0]) + (B[6] * A[7] - B[7] * A[6]) + (B[12] * A[13] - B[13] * A[12]); C[2] = (B[0] * A[2] + B[1] * A[3]) + (B[6] * A[8] + B[7] * A[9]) + (B[12] * A[14] + B[13] * A[15]); C[3] = (B[0] * A[3] - B[1] * A[2]) + (B[6] * A[9] - B[7] * A[8]) + (B[12] * A[15] - B[13] * A[14]); C[4] = (B[0] * A[4] + B[1] * A[5]) + (B[6] * A[10] + B[7] * A[11]) + (B[12] * A[16] + B[13] * A[17]); C[5] = (B[0] * A[5] - B[1] * A[4]) + (B[6] * A[11] - B[7] * A[10]) + (B[12] * A[17] - B[13] * A[16]); C[6] = (B[2] * A[0] + B[3] * A[1]) + (B[8] * A[6] + B[9] * A[7]) + (B[14] * A[12] + B[15] * A[13]); C[7] = (B[2] * A[1] - B[3] * A[0]) + (B[8] * A[7] - B[9] * A[6]) + (B[14] * A[13] - B[15] * A[12]); C[8] = (B[2] * A[2] + B[3] * A[3]) + (B[8] * A[8] + B[9] * A[9]) + (B[14] * A[14] + B[15] * A[15]); C[9] = (B[2] * A[3] - B[3] * A[2]) + (B[8] * A[9] - B[9] * A[8]) + (B[14] * A[15] - B[15] * A[14]); C[10] = (B[2] * A[4] + B[3] * A[5]) + (B[8] * A[10] + B[9] * A[11]) + (B[14] * A[16] + B[15] * A[17]); C[11] = (B[2] * A[5] - B[3] * A[4]) + (B[8] * A[11] - B[9] * A[10]) + (B[14] * A[17] - B[15] * A[16]); C[12] = (B[4] * A[0] + B[5] * A[1]) + (B[10] * A[6] + B[11] * A[7]) + (B[16] * A[12] + B[17] * A[13]); C[13] = (B[4] * A[1] - B[5] * A[0]) + (B[10] * A[7] - B[11] * A[6]) + (B[16] * A[13] - B[17] * A[12]); C[14] = (B[4] * A[2] + B[5] * A[3]) + (B[10] * A[8] + B[11] * A[9]) + (B[16] * A[14] + B[17] * A[15]); C[15] = (B[4] * A[3] - B[5] * A[2]) + (B[10] * A[9] - B[11] * A[8]) + (B[16] * A[15] - B[17] * A[14]); C[16] = (B[4] * A[4] + B[5] * A[5]) + (B[10] * A[10] + B[11] * A[11]) + (B[16] * A[16] + B[17] * A[17]); C[17] = (B[4] * A[5] - B[5] * A[4]) + (B[10] * A[11] - B[11] * A[10]) + (B[16] * A[17] - B[17] * A[16]); } static int no_gparity_test(GwilsonFdwf* lattice, const BfmSolver &solver){ //Try using Fbfm and old DWF code to get the same MD force vectors if(solver != DWF) return 0; bool fail; bfmarg dwfa; setup_bfmargs(dwfa,solver); int mom_size = 18*4*GJP.VolNodeSites(); long f_size = (long)24 * GJP.VolNodeSites() * GJP.SnodeSites(); delete lattice; //temporarily delete the lattice object to remove the scope lock. The memory allocated to the gauge field remains Fbfm::current_arg_idx = 0; Fbfm::bfm_args[0] = dwfa; GnoneFbfm *fbfm = new GnoneFbfm; //Generate 2 random CANONICAL ordered fermions LatRanGen LRGbak(LRG); Float *v1 = rand_5d_canonical_fermion(*fbfm); Float *v2 = rand_5d_canonical_fermion(*fbfm); LRG = LRGbak; //Convert to bfm format Fermion_t in[2] = {fbfm->bd.allocFermion(), fbfm->bd.allocFermion()}; Fermion_t in2[2] = {fbfm->bd.allocFermion(), fbfm->bd.allocFermion()}; fbfm->bd.cps_impexFermion(v1,in,1); fbfm->bd.cps_impexFermion(v2,in2,1); //expects phi to be in cps checkerboard format. We can use in[0] and in2[0] as above, and export from bfm to cps format size_t f_size_cb = f_size / 2; Vector* phi1 = (Vector *)pmalloc(sizeof(Float) * f_size_cb); Vector* phi2 = (Vector *)pmalloc(sizeof(Float) * f_size_cb); fbfm->bd.cps_impexcbFermion((Float *)phi1, in[0], 0, 1); //export fbfm->bd.cps_impexcbFermion((Float *)phi2, in2[0], 0, 1); Float *chi = (Float*)phi1; Float *v1_fbfm; Float *v2_fbfm; Float *rho = (Float *)pmalloc(sizeof(Float)*f_size_cb); { Float *v1_fbfm_tmp = (Float *)pmalloc(sizeof(Float) * f_size); Float *v2_fbfm_tmp = (Float *)pmalloc(sizeof(Float) * f_size); fbfm->MatPc( (Vector*)rho, (Vector*)chi, 0.5, DAG_NO); // rho [ODD] = Mprec chi [ODD] fbfm->CalcHmdForceVecsBilinear(v1_fbfm_tmp, v2_fbfm_tmp, (Vector*)rho, (Vector*)chi, 0.5); /* * From BFM: * DWF : Mee = Moo = (5-M5) * Meo = -1/2 Ddwf_eo (5d hopping term) * Moe = -1/2 Ddwf_oe (5d hopping term) * Mprec = Moo-MoeMee^{-1}Meo */ //CalcHmdForceVecsBilinear calculates the following: // v2e = Bee * 1/(5-M5) * Meo phi2 // v2o = Boo phi2 // v1e = 1/(5-M5) Meo^dag phi1 // v1o = 1oo phi1 //For DWF, Boo = Bee = 1 and phi1 = rho, phi2 = chi in the above naming convention //hence the net result is //v1 = ( rho, 1/(5-M5) Meo^dag rho ) v2 = ( chi, 1/(5-M5) * Meo chi ) //i.e. //v1 = ( rho, -1/[2(5-M5)] Deo^dag rho ) v2 = ( chi, -1/[2(5-M5)] * Deo chi ) //with rho = Mprec chi //v1 = ( Mprec chi, -1/[2(5-M5)] Deo^dag Mprec chi ) v2 = ( chi, -1/[2(5-M5)] * Deo chi ) // ((Above vectors are in (odd,even) format)) //convert to CPS canonical ordering convert_ferm_sord_cpsord(v1_fbfm_tmp, v1_fbfm, fbfm->bd); convert_ferm_sord_cpsord(v2_fbfm_tmp, v2_fbfm, fbfm->bd); pfree(v1_fbfm_tmp); pfree(v2_fbfm_tmp); } Float* mom_test5 = (Float *)pmalloc( sizeof(Float) * mom_size); for(int i=0;iEvolveMomFforce( (Matrix*)mom_test5, (Vector*)chi, 0.5, 1.0); delete fbfm; lattice = new GwilsonFdwf; //put the lattice back where it was { CgArg cg_arg ; cg_arg.mass = 0.5; Float *v1_dopdwf = (Float *)pmalloc(sizeof(Float)*f_size); Float *v2_dopdwf = (Float *)pmalloc(sizeof(Float)*f_size); /* DiracOp(Lattice& latt, Vector *f_field_out, Vector *f_field_in, CgArg *arg, CnvFrmType convert) */ DiracOpDwf* dwf = new DiracOpDwf(*lattice, (Vector*)v2_dopdwf, (Vector*)v1_dopdwf, &cg_arg, CNV_FRM_YES) ; { //Test preconditioned matrix Float *rho_dopdwf = (Float *)pmalloc(sizeof(Float)*f_size_cb); dwf->MatPc( (Vector*)rho_dopdwf, (Vector*)chi); //Note: CPS Mprec = 1 - 1/[2(5-M5)]^2 Doe Deo //which differs from BFM Mprec = (5-M5) - 1/[4(5-M5)] Deo Doe //by a normalization factor Mprec [CPS] = 1/(5-M5) Mprec [BFM] Float norm = 5-GJP.DwfHeight(); fail = false; for(int i=0;i 1e-07 ){ printf("|Fail Mprec test Fbfm vs old code %d: %f %f\n",i,norm*rho_dopdwf[i], rho[i]); fail = true; } } if(fail){ printf("Failed Mprec test Fbfm vs old code test\n"); exit(-1); } else printf("Passed Mprec test Fbfm vs old code test\n"); } //Net result is: // f_in = ( -kappa^2 Mprec chi, -kappa^2 D_eo^dag Mprec chi ) f_out = ( chi, D_eo chi ) // where kappa = 1/[2(5-M5)] // where chi = psi1 // These are converted into CANONICAL format when DiracOpDWF is destroyed dwf->CalcHmdForceVecs((Vector*)chi) ; delete dwf; /* Comparing the CPS and BFM HMD force vectors: * BFM : v1 = ( Mprec chi, -1/[2(5-M5)] Deo^dag Mprec chi ) v2 = ( chi, -1/[2(5-M5)] * Deo chi ) * CPS : f_in = ( -1/[2(5-M5)]^2 Mprec chi, -1/[2(5-M5)]^2 D_eo Mprec chi ) f_out = ( chi, D_eo chi ) * and using Mprec [CPS] = 1/(5-M5) Mprec [BFM] * CPS : f_in = ( -1/[2(5-M5)]^2 1/(5-M5) Mprec[BFM] chi, -1/[2(5-M5)]^2 1/(5-M5) D_eo^dag Mprec [BFM] chi ) f_out = ( chi, D_eo chi ) * We expect normalization differences * v1 [ODD,BFM] = -[2(5-M5)]^2 (5-M5) f_in [ODD,CPS] * v1 [EVEN,BFM] = 2(5-M5)^2 * v2 [ODD,BFM] = f_out [ODD,CPS] * v2 [EVEN,BFM] = -1/[2(5-M5)] f_out [EVEN,CPS] * BFM and CPS use 5D preconditioning, i.e. cb = (x+y+z+t+s)&0x1 */ fail = false; for(int i=0;i 1e-08 ){ printf("|Fail MD force vec test Fbfm vs old code v1 (%d %d %d %d %d, %d) [cb %d]: %f %f\n",x[0],x[1],x[2],x[3],x[4], midx, cb, norm_v1 * v1_dopdwf[i],v1_fbfm[i]); fail = true; } if( fabs(norm_v2 * v2_dopdwf[i] - v2_fbfm[i]) > 1e-08 ){ printf("|Fail MD force vec test Fbfm vs old code v2 (%d %d %d %d %d, %d) [cb %d]: %f %f\n",x[0],x[1],x[2],x[3],x[4], midx, cb, norm_v2 * v2_dopdwf[i],v2_fbfm[i]); fail = true; } } if(fail){ printf("Failed MD force vec Fbfm vs old code test\n"); exit(-1); } else printf("Passed MD force vec Fbfm vs old code test\n"); //OK, try to calculate the same thing again but using the old G-parity DWF evolution code I used for the 16^3 lattice Float* mom_test6 = (Float *)pmalloc( sizeof(Float) * mom_size); for(int i=0;iEvolveMomFforce( (Matrix*)mom_test6, (Vector*)chi, 0.5, 1.0); Float _5mM5 = 5-GJP.DwfHeight(); Float mom_norm = _5mM5 * _5mM5; //Hantao knows about this normalization difference; he did not attempt to reconcile the differences between the bfm and CPS fermion normalizations fail = false; for(int i=0;i1e-08){ printf("FforceWilsonType vs. old DWF code test fail midx %d mu %d (%d %d %d %d): %f %f\n",midx,mu,x[0],x[1],x[2],x[3],mom_norm*mom_test6[i],mom_test5[i]); fail=true; } } if(fail){ printf("Failed FforceWilsonType vs. old DWF code test\n"); exit(-1); } else printf("Passed FforceWilsonType vs. old DWF code test\n"); pfree(mom_test5); pfree(mom_test6); pfree(v1_fbfm); pfree(v2_fbfm); pfree(v1_dopdwf); pfree(v2_dopdwf); } return 0; } #if 1 int main(int argc,char *argv[]) { Start(&argc,&argv); //initialises QMP CommandLine::is(argc,argv); bool gparity_X(false); bool gparity_Y(false); int arg0 = CommandLine::arg_as_int(0); printf("Arg0 is %d\n",arg0); if(arg0==1){ gparity_X=true; printf("Doing G-parity HMC test in X direction\n"); }else if(arg0==2){ printf("Doing G-parity HMC test in X and Y directions\n"); gparity_X = true; gparity_Y = true; }else{ printf("Doing No G-parity test\n"); } bool dbl_latt_storemode(false); bool save_config(false); bool load_config(false); bool load_lrg(false); bool save_lrg(false); char *load_config_file; char *save_config_file; char *save_lrg_file; char *load_lrg_file; bool gauge_fix(false); bool verbose(false); bool skip_gparity_inversion(false); bool unit_gauge(false); BfmSolver solver = DWF; int size[] = {2,2,2,2,2}; int i=2; while(iargc-6){ printf("Did not specify enough arguments for 'latt' (require 5 dimensions)\n"); exit(-1); } size[0] = CommandLine::arg_as_int(i); //CommandLine ignores zeroth input arg (i.e. executable name) size[1] = CommandLine::arg_as_int(i+1); size[2] = CommandLine::arg_as_int(i+2); size[3] = CommandLine::arg_as_int(i+3); size[4] = CommandLine::arg_as_int(i+4); i+=6; }else if( strncmp(cmd,"-save_double_latt",20) == 0){ dbl_latt_storemode = true; i++; }else if( strncmp(cmd,"-load_lrg",15) == 0){ if(i==argc-1){ printf("-load_lrg requires an argument\n"); exit(-1); } load_lrg=true; load_lrg_file = argv[i+1]; i+=2; }else if( strncmp(cmd,"-save_lrg",15) == 0){ if(i==argc-1){ printf("-save_lrg requires an argument\n"); exit(-1); } save_lrg=true; save_lrg_file = argv[i+1]; i+=2; }else if( strncmp(cmd,"-gauge_fix",15) == 0){ gauge_fix=true; i++; }else if( strncmp(cmd,"-verbose",15) == 0){ verbose=true; i++; }else if( strncmp(cmd,"-skip_gparity_inversion",30) == 0){ skip_gparity_inversion=true; i++; }else if( strncmp(cmd,"-unit_gauge",15) == 0){ unit_gauge=true; i++; }else if( strncmp(cmd,"-mobius",15) == 0){ solver= HmCayleyTanh; i++; }else{ if(UniqueID()==0) printf("Unrecognised argument: %s\n",cmd); exit(-1); } } printf("Lattice size is %d %d %d %d\n",size[0],size[1],size[2],size[3],size[4]); DoArg do_arg; do_arg.x_sites = size[0]; do_arg.y_sites = size[1]; do_arg.z_sites = size[2]; do_arg.t_sites = size[3]; do_arg.s_sites = size[4]; do_arg.x_node_sites = 0; do_arg.y_node_sites = 0; do_arg.z_node_sites = 0; do_arg.t_node_sites = 0; do_arg.s_node_sites = 0; do_arg.x_nodes = 0; do_arg.y_nodes = 0; do_arg.z_nodes = 0; do_arg.t_nodes = 0; do_arg.s_nodes = 0; do_arg.updates = 0; do_arg.measurements = 0; do_arg.measurefreq = 0; do_arg.cg_reprod_freq = 10; do_arg.x_bc = BND_CND_PRD; do_arg.y_bc = BND_CND_PRD; do_arg.z_bc = BND_CND_PRD; do_arg.t_bc = BND_CND_APRD; do_arg.start_conf_kind = START_CONF_ORD; do_arg.start_conf_load_addr = 0x0; do_arg.start_seed_kind = START_SEED_FIXED; do_arg.start_seed_filename = "../rngs/ckpoint_rng.0"; do_arg.start_conf_filename = "../configurations/ckpoint_lat.0"; do_arg.start_conf_alloc_flag = 6; do_arg.wfm_alloc_flag = 2; do_arg.wfm_send_alloc_flag = 2; do_arg.start_seed_value = 83209; do_arg.beta = 2.25; do_arg.c_1 = -3.3100000000000002e-01; do_arg.u0 = 1.0000000000000000e+00; do_arg.dwf_height = 1.8000000000000000e+00; do_arg.dwf_a5_inv = 1.0000000000000000e+00; do_arg.power_plaq_cutoff = 0.0000000000000000e+00; do_arg.power_plaq_exponent = 0; do_arg.power_rect_cutoff = 0.0000000000000000e+00; do_arg.power_rect_exponent = 0; do_arg.verbose_level = -1202; //VERBOSE_DEBUG_LEVEL; //-1202; do_arg.checksum_level = 0; do_arg.exec_task_list = 0; do_arg.xi_bare = 1.0000000000000000e+00; do_arg.xi_dir = 3; do_arg.xi_v = 1.0000000000000000e+00; do_arg.xi_v_xi = 1.0000000000000000e+00; do_arg.clover_coeff = 0.0000000000000000e+00; do_arg.clover_coeff_xi = 0.0000000000000000e+00; do_arg.xi_gfix = 1.0000000000000000e+00; do_arg.gfix_chkb = 1; do_arg.asqtad_KS = 0.0000000000000000e+00; do_arg.asqtad_naik = 0.0000000000000000e+00; do_arg.asqtad_3staple = 0.0000000000000000e+00; do_arg.asqtad_5staple = 0.0000000000000000e+00; do_arg.asqtad_7staple = 0.0000000000000000e+00; do_arg.asqtad_lepage = 0.0000000000000000e+00; do_arg.p4_KS = 0.0000000000000000e+00; do_arg.p4_knight = 0.0000000000000000e+00; do_arg.p4_3staple = 0.0000000000000000e+00; do_arg.p4_5staple = 0.0000000000000000e+00; do_arg.p4_7staple = 0.0000000000000000e+00; do_arg.p4_lepage = 0.0000000000000000e+00; if(verbose) do_arg.verbose_level = VERBOSE_DEBUG_LEVEL; if(gparity_X) do_arg.x_bc = BND_CND_GPARITY; if(gparity_Y) do_arg.y_bc = BND_CND_GPARITY; GJP.Initialize(do_arg); SerialIO::dbl_latt_storemode = dbl_latt_storemode; LRG.Initialize(); //usually initialised when lattice generated, but I pre-init here so I can load the state from file #ifdef HAVE_BFM cps_qdp_init(&argc,&argv); Chroma::initialize(&argc,&argv); #endif if(load_lrg){ if(UniqueID()==0) printf("Loading RNG state from %s\n",load_lrg_file); LRG.Read(load_lrg_file,32); } if(save_lrg){ if(UniqueID()==0) printf("Writing RNG state to %s\n",save_lrg_file); LRG.Write(save_lrg_file,32); } GwilsonFdwf* lattice = new GwilsonFdwf; if(!load_config){ printf("Creating gauge field\n"); if(!unit_gauge) lattice->SetGfieldDisOrd(); else lattice->SetGfieldOrd(); }else{ ReadLatticeParallel readLat; if(UniqueID()==0) printf("Reading: %s (NERSC-format)\n",load_config_file); readLat.read(*lattice,load_config_file); if(UniqueID()==0) printf("Config read.\n"); } if(save_config){ if(UniqueID()==0) printf("Saving config to %s\n",save_config_file); QioArg wt_arg(save_config_file,0.001); wt_arg.ConcurIONumber=32; WriteLatticeParallel wl; wl.setHeader("disord_id","disord_label",0); wl.write(*lattice,wt_arg); if(!wl.good()) ERR.General("main","()","Failed write lattice %s",save_config_file); if(UniqueID()==0) printf("Config written.\n"); } if(gauge_fix){ lattice->FixGaugeAllocate(FIX_GAUGE_COULOMB_T); lattice->FixGauge(1e-06,2000); if(!UniqueID()){ printf("Gauge fixing finished\n"); fflush(stdout); } } cps_qdp_init(&argc,&argv); if(!gparity_X && !gparity_Y) return no_gparity_test(lattice, solver); { //check mStarDotMTrans Float *a = new Float[18]; Float *b = new Float[18]; Float *c = new Float[18]; Float *d = new Float[18]; Float *e = new Float[18]; for(int i=0;i<18;i++){ a[i] = LRG.Grand(); b[i] = LRG.Grand(); } // mTransDotMStarEqual bfm_evo_aux::mStarDotMTransEqual(c,a,b); Matrix ma; ma.Conj(a); Matrix mb; mb.Trans(b); Matrix mc; mc.DotMEqual(ma,mb); Float *mcp = (Float*)&mc[0]; bool fail(false); for(int i=0;i<18;i++){ if(mcp[i] != c[i]){ printf("Mprod check fail %d: %f %f\n",i,mcp[i],c[i]); fail =true; } } if(fail){ printf("Mprod check fail\n"); exit(-1); } else{ printf("Mprod check pass\n"); } } Float *v1; Float *v2; Float *mom_test1; Float *v1_test2; Float *v2_test2; Float *mom_test3; { bfmarg dwfa; setup_bfmargs(dwfa,solver); bfm_evo bfm; bfm.init(dwfa); LatRanGen LRGbak(LRG); v1 = rand_5d_canonical_fermion(*lattice); v2 = rand_5d_canonical_fermion(*lattice); LRG = LRGbak; //v1 and v2 are in CPS canonical ordering, we need them in 's-ordering' Float* v1_sord; Float* v2_sord; convert_ferm_cpsord_sord(v1, v1_sord, bfm); convert_ferm_cpsord_sord(v2, v2_sord, bfm); int mom_size = 18*4*2*GJP.VolNodeSites(); mom_test1 = (Float *)pmalloc( sizeof(Float) * mom_size); for(int i=0;iBondCond(); Float* gauge = (Float*) lattice->GaugeField(); bfm.cps_importGauge(gauge); //first use random vectors to compute force from internal sites #pragma omp parallel { int me = omp_get_thread_num(); for(int mu=0;mu<4;mu++){ bfm.fforce_internal(mom_test1, gauge, v1_sord, v2_sord, 0.1234, mu, me, nthreads); } } //second test, try calculating MD force vectors //use v1 and v2 from test1 as the 'phi' vectors Fermion_t in[2] = {bfm.allocFermion(), bfm.allocFermion()}; Fermion_t in2[2] = {bfm.allocFermion(), bfm.allocFermion()}; bfm.cps_impexFermion(v1,in,1); bfm.cps_impexFermion(v2,in2,1); Fermion_t v1_tmp[2] = {bfm.allocFermion(), bfm.allocFermion()}; Fermion_t v2_tmp[2] = {bfm.allocFermion(), bfm.allocFermion()}; //zero the output vecs long f_size = (long)2*24 * GJP.VolNodeSites() * GJP.SnodeSites(); Float *zeroferm = (Float *)pmalloc(sizeof(Float) * f_size); for(int i=0;iBondCond(); //undo lattice bcs //OK, Try to calculate the same conjugate momentum using the FforceWilsonType routines (developed later but based on the bfm_evo code I believe) //Much of the code is the same as that above { Float* mom_test4 = (Float *)pmalloc( sizeof(Float) * mom_size); for(int i=0;ibd.cps_impexcbFermion((Float *)phi1, in[0], 0, 1); //export fbfm->bd.cps_impexcbFermion((Float *)phi2, in2[0], 0, 1); fbfm->EvolveMomFforceBase((Matrix*)mom_test4, phi1, phi2, 0.5, 1.0); //compare mom bool fail(false); for(int i=0;i1e-08){ printf("FforceWilsonType test fail flav %d midx %d mu %d (%d %d %d %d): %f %f\n",flav,midx,mu,x[0],x[1],x[2],x[3],mom_test4[i],mom_test3[i]); fail=true; } } if(fail){ printf("Failed FforceWilsonType test\n"); exit(-1); } else printf("Passed FforceWilsonType test\n"); pfree(mom_test4); if(solver == DWF){ //test conversion between different formats //use v1 = CPS CANONICAL FORMAT Fermion_t v1_cb[2] = {fbfm->bd.allocFermion(), fbfm->bd.allocFermion()}; fbfm->bd.cps_impexFermion(v1, v1_cb,1); //try extracting the odd and even checkerboarded parts and check them Float *v1_odd = (Float *)pmalloc(sizeof(Float)*f_size_cb); Float *v1_even = (Float *)pmalloc(sizeof(Float)*f_size_cb); fbfm->bd.cps_impexcbFermion(v1_odd, v1_cb[1],0,1); fbfm->bd.cps_impexcbFermion(v1_even, v1_cb[0],0,0); //check fail = false; for(int i=0;i 1e-08 ){ if(midx == 0) printf("|Fail vec convert test flav %d (%d %d %d %d %d, %d) [cb %d]: %f %f\n",flav,x[0],x[1],x[2],x[3],x[4], midx,cb, vcb[cboff],v1[i]); fail = true; }//else if(midx == 0) printf("Pass vec convert test flav %d (%d %d %d %d %d, %d) [cb %d]: %f %f\n",flav,x[0],x[1],x[2],x[3],x[4], // midx,cb, vcb[cboff],v1[i]); } if(fail){ printf("Failed vec convert test\n"); exit(-1); } else printf("Passed vec convert test\n"); fbfm->bd.freeFermion(v1_cb[0]); fbfm->bd.freeFermion(v1_cb[1]); pfree(v1_odd); pfree(v1_even); } if(solver == DWF){ //OK, run the complete EvolveMomFforce code that includes generating applying the M to phi1 to generate phi2 Float *chi = (Float*)phi1; Float *v1_fbfm; Float *v2_fbfm; Float *rho = (Float *)pmalloc(sizeof(Float)*f_size_cb); { Float *v1_fbfm_tmp = (Float *)pmalloc(sizeof(Float) * f_size); Float *v2_fbfm_tmp = (Float *)pmalloc(sizeof(Float) * f_size); fbfm->MatPc( (Vector*)rho, (Vector*)chi, 0.5, DAG_NO); // rho = Moo chi fbfm->CalcHmdForceVecsBilinear(v1_fbfm_tmp, v2_fbfm_tmp, (Vector*)rho, (Vector*)chi, 0.5); //convert to CPS canonical ordering convert_ferm_sord_cpsord(v1_fbfm_tmp, v1_fbfm, fbfm->bd); convert_ferm_sord_cpsord(v2_fbfm_tmp, v2_fbfm, fbfm->bd); pfree(v1_fbfm_tmp); pfree(v2_fbfm_tmp); } Float* mom_test5 = (Float *)pmalloc( sizeof(Float) * mom_size); for(int i=0;iEvolveMomFforce( (Matrix*)mom_test5, (Vector*)chi, 0.5, 1.0); delete fbfm; lattice = new GwilsonFdwf; //put the lattice back where it was { CgArg cg_arg ; cg_arg.mass = 0.5; Float *v1_dopdwf = (Float *)pmalloc(sizeof(Float)*f_size); Float *v2_dopdwf = (Float *)pmalloc(sizeof(Float)*f_size); /* DiracOp(Lattice& latt, Vector *f_field_out, Vector *f_field_in, CgArg *arg, CnvFrmType convert) */ DiracOpDwf* dwf = new DiracOpDwf(*lattice, (Vector*)v2_dopdwf, (Vector*)v1_dopdwf, &cg_arg, CNV_FRM_YES) ; { //Test preconditioned matrix Float *rho_dopdwf = (Float *)pmalloc(sizeof(Float)*f_size_cb); dwf->MatPc( (Vector*)rho_dopdwf, (Vector*)chi); //Normalization factor: Mprec chi [CPS] = 1/(5-M5) Mprec chi [BFM] (cf. no_gparity_test at top) Float norm = 5-GJP.DwfHeight(); //multiply cps vector by norm fail = false; for(int i=0;i 1e-08 ){ printf("|Fail Mprec test Fbfm vs old code %d: %f %f\n",i,norm*rho_dopdwf[i], rho[i]); fail = true; } } if(fail){ printf("Failed Mprec test Fbfm vs old code test\n"); exit(-1); } else printf("Passed Mprec test Fbfm vs old code test\n"); } dwf->CalcHmdForceVecs((Vector*)chi) ; delete dwf; /* Comparing the CPS and BFM HMD force vectors: (cf. no_gparity_test at top) * BFM : v1 = ( Mprec chi, -1/[2(5-M5)] Deo^dag Mprec chi ) v2 = ( chi, -1/[2(5-M5)] * Deo chi ) * CPS : f_in = ( -1/[2(5-M5)]^2 Mprec chi, -1/[2(5-M5)]^2 D_eo Mprec chi ) f_out = ( chi, D_eo chi ) * and using Mprec [CPS] = 1/(5-M5) Mprec [BFM] * CPS : f_in = ( -1/[2(5-M5)]^2 1/(5-M5) Mprec[BFM] chi, -1/[2(5-M5)]^2 1/(5-M5) D_eo^dag Mprec [BFM] chi ) f_out = ( chi, D_eo chi ) * We expect normalization differences * v1 [ODD,BFM] = -[2(5-M5)]^2 (5-M5) f_in [ODD,CPS] * v1 [EVEN,BFM] = 2(5-M5)^2 * v2 [ODD,BFM] = f_out [ODD,CPS] * v2 [EVEN,BFM] = -1/[2(5-M5)] f_out [EVEN,CPS] * BFM and CPS use 5D preconditioning, i.e. cb = (x+y+z+t+s)&0x1 */ fail = false; for(int i=0;i 1e-08 ){ if(midx == 0) printf("|Fail MD force vec test Fbfm vs old code v1 flav %d (%d %d %d %d %d, %d) [cb %d]: %f %f\n",flav,x[0],x[1],x[2],x[3],x[4], midx,cb, norm_v1 * v1_dopdwf[i],v1_fbfm[i]); fail = true; } if( fabs(norm_v2 * v2_dopdwf[i] - v2_fbfm[i]) > 1e-08 ){ if(midx == 0) printf("|Fail MD force vec test Fbfm vs old code v2 flav %d (%d %d %d %d %d, %d) [cb %d]: %f %f\n",flav,x[0],x[1],x[2],x[3],x[4], midx,cb, norm_v2 * v2_dopdwf[i],v2_fbfm[i]); fail = true; } } if(fail){ printf("Failed MD force vec Fbfm vs old code test\n"); exit(-1); } else printf("Passed MD force vec Fbfm vs old code test\n"); pfree(v1_fbfm); pfree(v2_fbfm); pfree(v1_dopdwf); pfree(v2_dopdwf); } //OK, try to calculate the same thing again but using the old G-parity DWF evolution code I used for the 16^3 lattice Float* mom_test6 = (Float *)pmalloc( sizeof(Float) * mom_size); for(int i=0;iEvolveMomFforce( (Matrix*)mom_test6, phi1, 0.5, 1.0); Float _5mM5 = 5-GJP.DwfHeight(); Float norm = _5mM5 * _5mM5; //Hantao knows about this normalization difference; he did not attempt to reconcile the differences between the bfm and CPS fermion normalizations /* Explanation: The pseudofermion fields are generated by multiplying dslash to random vectors that follow the normal distribution. So the pseudofermion has these conventions built in. */ fail = false; for(int i=0;i1e-08){ printf("FforceWilsonType vs. old DWF code test fail flav %d midx %d mu %d (%d %d %d %d): %f %f\n",flav,midx,mu,x[0],x[1],x[2],x[3],norm*mom_test6[i],mom_test5[i]); fail=true; } } if(fail){ printf("Failed FforceWilsonType vs. old DWF code test\n"); exit(-1); } else printf("Passed FforceWilsonType vs. old DWF code test\n"); pfree(mom_test5); pfree(mom_test6); }else{ delete fbfm; lattice = new GwilsonFdwf; } pfree(phi1); pfree(phi2); } bfm.freeFermion(in[0]); bfm.freeFermion(in[1]); bfm.freeFermion(in2[0]); bfm.freeFermion(in2[1]); } if(UniqueID()==0) printf("Starting double lattice section\n"); int array_size = 2*lattice->GsiteSize() * GJP.VolNodeSites() * sizeof(Float); Matrix *orig_lattice = (Matrix *) pmalloc(array_size); memcpy((void*)orig_lattice, (void*)lattice->GaugeField(), array_size); lattice->FreeGauge(); //free memory and reset delete lattice; //lattice objects are singleton (scope_lock) //setup 1f model. Upon calling GJP.Initialize the lattice size will be doubled in the appropriate directions //and the boundary condition set to APRD if(gparity_X) do_arg.gparity_1f_X = 1; if(gparity_Y) do_arg.gparity_1f_Y = 1; GJP.Initialize(do_arg); if(GJP.Gparity()){ printf("Que?\n"); exit(-1); } if(UniqueID()==0) printf("Doubled lattice : %d %d %d %d\n", GJP.XnodeSites()*GJP.Xnodes(),GJP.YnodeSites()*GJP.Ynodes(), GJP.ZnodeSites()*GJP.Znodes(),GJP.TnodeSites()*GJP.Tnodes()); #ifdef HAVE_BFM { QDP::multi1d nrow(Nd); for(int i = 0;i test(Nd); // nrow=size; QDP::Layout::setLattSize(nrow); QDP::Layout::create(); } #endif //cps_qdp_init(argc,argv); GwilsonFdwf *doubled_lattice = new GwilsonFdwf; setup_double_latt(*doubled_lattice,orig_lattice,gparity_X,gparity_Y); setup_double_rng(gparity_X,gparity_Y); if(gauge_fix){ doubled_lattice->FixGaugeAllocate(FIX_GAUGE_COULOMB_T); doubled_lattice->FixGauge(1e-06,2000); if(!UniqueID()){ printf("Gauge fixing finished\n"); fflush(stdout); } } long f_size = (long)24 * GJP.VolNodeSites() * GJP.SnodeSites(); //convert the random CANONICAL vectors from the 2f setup to 1f form Float *v1_dbl = (Float *)pmalloc(sizeof(Float) * f_size); setup_double_5d_vector((Vector*)v1_dbl,(Vector*)v1, gparity_X,gparity_Y); pfree(v1); Float *v2_dbl = (Float *)pmalloc(sizeof(Float) * f_size); setup_double_5d_vector((Vector*)v2_dbl,(Vector*)v2, gparity_X,gparity_Y); pfree(v2); Float *v1_test2_dbl = (Float *)pmalloc(sizeof(Float) * f_size); setup_double_5d_vector((Vector*)v1_test2_dbl,(Vector*)v1_test2, gparity_X,gparity_Y); pfree(v1_test2); Float *v2_test2_dbl = (Float *)pmalloc(sizeof(Float) * f_size); setup_double_5d_vector((Vector*)v2_test2_dbl,(Vector*)v2_test2, gparity_X,gparity_Y); pfree(v2_test2); Float *mom_test1_dbl = (Float *) pmalloc(GJP.VolNodeSites()*18*4*sizeof(Float)); setup_double_matrixfield((Matrix*)mom_test1_dbl, (Matrix*)mom_test1, 4, gparity_X, gparity_Y); pfree(mom_test1); Float *mom_test3_dbl = (Float *) pmalloc(GJP.VolNodeSites()*18*4*sizeof(Float)); setup_double_matrixfield((Matrix*)mom_test3_dbl, (Matrix*)mom_test3, 4, gparity_X, gparity_Y); pfree(mom_test3); { long f_size = (long)24 * GJP.VolNodeSites() * GJP.SnodeSites(); Fermion_t in[2]; Fermion_t in2[2]; Float* mom_test3_dbllatt; bfmarg dwfa; { setup_bfmargs(dwfa,solver); bfm_evo bfm; bfm.init(dwfa); //v1 and v2 are in CPS canonical ordering, we need them in 's-ordering' Float* v1_sord; Float* v2_sord; convert_ferm_cpsord_sord(v1_dbl, v1_sord, bfm); convert_ferm_cpsord_sord(v2_dbl, v2_sord, bfm); int mom_size = 18*4*GJP.VolNodeSites(); Float* mom_test1_dbllat = (Float *)pmalloc( sizeof(Float) * mom_size); for(int i=0;iBondCond(); Float* gauge = (Float*) doubled_lattice->GaugeField(); bfm.cps_importGauge(gauge); int nthreads = 1; #if TARGET == BGQ nthreads = 64; #endif omp_set_num_threads(nthreads); //first use random vectors to compute force from internal sites #pragma omp parallel { int me = omp_get_thread_num(); for(int mu=0;mu<4;mu++){ bfm.fforce_internal(mom_test1_dbllat, gauge, v1_sord, v2_sord, 0.1234, mu, me, nthreads); } } bool fail(false); if(!gparity_Y){ //skip this test for quad lattice //compare mom for(int i=0;i1e-08){ printf("Mom test 1 fail midx %d mu %d (%d %d %d %d): %f %f\n",midx,mu,x[0],x[1],x[2],x[3],mom_test1_dbllat[i],mom_test1_dbl[i]); fail=true; }//else printf("Mom test 1 pass midx %d mu %d (%d %d %d %d): %f %f\n",midx,mu,x[0],x[1],x[2],x[3],mom_test1_dbllat[i],mom_test1_dbl[i]); } if(fail){ printf("Failed mom test 1\n"); exit(-1); } else printf("Passed mom test 1\n"); } //second test, try calculating MD force vectors //use v1 and v2 from test1 as the 'phi' vectors in[0] = bfm.allocFermion(); in[1] = bfm.allocFermion(); in2[0] = bfm.allocFermion(); in2[1] = bfm.allocFermion(); bfm.cps_impexFermion(v1_dbl,in,1); bfm.cps_impexFermion(v2_dbl,in2,1); Fermion_t v1_tmp[2] = {bfm.allocFermion(), bfm.allocFermion()}; Fermion_t v2_tmp[2] = {bfm.allocFermion(), bfm.allocFermion()}; //zero the output vecs Float *zeroferm = (Float *)pmalloc(sizeof(Float) * f_size); for(int i=0;i 1e-08 ){ printf("|Fail MD force vec test v1 (%d %d %d %d %d, %d): %f %f\n",x[0],x[1],x[2],x[3],x[4],midx,v1_test2_dbllatt[i],v1_test2_dbl[i]); fail = true; } //else printf("Pass MD force vec test v1 (%d %d %d %d %d, %d): %f %f\n",x[0],x[1],x[2],x[3],x[4],midx,v1_test2_dbllatt[i],v1_test2_dbl[i]); if( fabs(v2_test2_dbllatt[i] - v2_test2_dbl[i]) > 1e-08 ){ printf("|Fail MD force vec test v2 (%d %d %d %d %d, %d): %f %f\n",x[0],x[1],x[2],x[3],x[4],midx,v2_test2_dbllatt[i],v2_test2_dbl[i]); fail = true; } //else printf("Pass MD force vec test v2 (%d %d %d %d %d, %d): %f %f\n",x[0],x[1],x[2],x[3],x[4],midx,v2_test2_dbllatt[i],v2_test2_dbl[i]); } if(fail){ printf("Failed MD force vec test\n"); exit(-1); } else printf("Passed MD force vec test\n"); mom_test3_dbllatt = (Float *)pmalloc( sizeof(Float) * mom_size); for(int i=0;i1e-08){ printf("Mom test 3 fail midx %d mu %d (%d %d %d %d): %f %f\n",midx,mu,x[0],x[1],x[2],x[3],mom_test3_dbllatt[i],mom_test3_dbl[i]); fail=true; } } if(fail){ printf("Failed mom test 3\n"); exit(-1); } else printf("Passed mom test 3\n"); // bfm.freeFermion(in[0]); // bfm.freeFermion(in[1]); // bfm.freeFermion(in2[0]); // bfm.freeFermion(in2[1]); doubled_lattice->BondCond(); } //OK, Try to calculate the same conjugate momentum using the FforceWilsonType routines (developed later but based on the bfm_evo code I believe) //Much of the code is the same as that above { int mom_size = 18*4*GJP.VolNodeSites(); Float* mom_test4_dbllatt = (Float *)pmalloc( sizeof(Float) * mom_size); for(int i=0;ibd.cps_impexcbFermion((Float *)phi1, in[0], 0, 1); //export fbfm->bd.cps_impexcbFermion((Float *)phi2, in2[0], 0, 1); fbfm->EvolveMomFforceBase((Matrix*)mom_test4_dbllatt, phi1, phi2, 0.5, 1.0); //compare mom bool fail(false); for(int i=0;i1e-08){ printf("FforceWilsonType double latt test fail midx %d mu %d (%d %d %d %d): %f %f\n",midx,mu,x[0],x[1],x[2],x[3],mom_test4_dbllatt[i],mom_test3_dbllatt[i]); fail=true; } } if(fail){ printf("Failed FforceWilsonType double latt test\n"); exit(-1); } else printf("Passed FforceWilsonType double latt test\n"); pfree(mom_test4_dbllatt); } } #ifdef HAVE_BFM Chroma::finalize(); #endif if(UniqueID()==0){ printf("Main job complete\n"); fflush(stdout); } return 0; } void GaugeTransformU(Matrix *gtrans, Lattice &lat){ Matrix recv_buf; Matrix tmp; //apply the gauge transformation to U int nflav = 1; if(GJP.Gparity()) nflav = 2; for(int flav=0;flav