//CK: In this test we check that the vectorised wilson dslash is correct for both G-parity 1f and 2f. The regular non-vectorised wilson dslash is called for each, along with the vectorised version and the two are compared. Norms are also printed. //Note that the vectorised version does not exist unless CPS is compiled with QDP. If QDP is not used the test code will just spit out the norms from the regular non-vectorised wilson dslash. If QDP is used then the regular wilson dslash actually just calls the vectorised version (albeit with a vector size of 1), so the comparison to the non-QDP non-vectorised result is necessary to ensure everything is working correctly. #include #include #include #include #include #include #ifdef PARALLEL #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if(0==1) #include #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_BFM #include #endif using namespace std; USING_NAMESPACE_CPS void setup_double_latt(Lattice &double_latt, Matrix* orig_gfield, bool gparity_X, bool gparity_Y){ //orig latt ( U_0 U_1 ) ( U_2 U_3 ) ( U_4 U_5 ) ( U_6 U_7 ) //double tatt ( U_0 U_1 U_2 U_3 ) ( U_4 U_5 U_6 U_7 ) ( U_0* U_1* U_2* U_3* ) ( U_4* U_5* U_6* U_7* ) Matrix *dbl_gfield = double_latt.GaugeField(); if(!UniqueID()){ printf("Setting up 1f lattice.\n"); fflush(stdout); } SingleToDoubleLattice lattdoubler(gparity_X,gparity_Y,orig_gfield,double_latt); lattdoubler.Run(); if(!UniqueID()){ printf("Finished setting up 1f lattice\n"); fflush(stdout); } } void setup_double_rng(bool gparity_X, bool gparity_Y){ //orig 4D rng 2 stacked 4D volumes //orig ([R_0 R_1][R'_0 R'_1])([R_2 R_3][R'_2 R'_3])([R_4 R_5][R'_4 R'_5])([R_6 R_7][R'_6 R'_7]) //double (R_0 R_1 R_2 R_3)(R_4 R_5 R_6 R_7)(R'_0 R'_1 R'_2 R'_3)(R'_4 R'_5 R'_6 R'_7) //orig 5D rng 2 stacked 4D volumes per ls/2 slice (ls/2 as only one RNG per 2^4 block) SingleToDouble4dRNG fourDsetup(gparity_X,gparity_Y); SingleToDouble5dRNG fiveDsetup(gparity_X,gparity_Y); LRG.Reinitialize(); //reset the LRG and prepare for doubled lattice form if(!UniqueID()){ printf("Setting up 1f 4D RNG\n"); fflush(stdout); } fourDsetup.Run(); if(!UniqueID()){ printf("Setting up 1f 5D RNG\n"); fflush(stdout); } fiveDsetup.Run(); } void GaugeTransformU(Matrix *gtrans, Lattice &lat); void dwf_dslash_4_nonvec(Vector *out, Matrix *gauge_field, Vector *in, int cb, int dag, Dwf *dwf_lib_arg) { int i; int ls; IFloat *frm_in; IFloat *frm_out; IFloat *g_field; Wilson *wilson_p; int size_cb[2]; int parity; //---------------------------------------------------------------- // Initializations //---------------------------------------------------------------- ls = dwf_lib_arg->ls; frm_in = (IFloat *) in; frm_out = (IFloat *) out; g_field = (IFloat *) gauge_field; wilson_p = dwf_lib_arg->wilson_p; size_cb[0] = 24*wilson_p->vol[0]; //wilson_p->vol[0] is half of the local lattice volume size_cb[1] = 24*wilson_p->vol[1]; if(GJP.Gparity()){ //CK: 2 4d fields on each ls slice size_cb[0]*=2; size_cb[1]*=2; } //---------------------------------------------------------------- // Apply 4-dimensional Dslash //---------------------------------------------------------------- for(i=0; iargc-6){ printf("Did not specify enough arguments for 'latt' (require 5 dimensions)\n"); exit(-1); } size[0] = CommandLine::arg_as_int(i); //CommandLine ignores zeroth input arg (i.e. executable name) size[1] = CommandLine::arg_as_int(i+1); size[2] = CommandLine::arg_as_int(i+2); size[3] = CommandLine::arg_as_int(i+3); size[4] = CommandLine::arg_as_int(i+4); i+=6; }else if( strncmp(cmd,"-save_double_latt",20) == 0){ dbl_latt_storemode = true; i++; }else if( strncmp(cmd,"-load_lrg",15) == 0){ if(i==argc-1){ printf("-load_lrg requires an argument\n"); exit(-1); } load_lrg=true; load_lrg_file = argv[i+1]; i+=2; }else if( strncmp(cmd,"-save_lrg",15) == 0){ if(i==argc-1){ printf("-save_lrg requires an argument\n"); exit(-1); } save_lrg=true; save_lrg_file = argv[i+1]; i+=2; }else if( strncmp(cmd,"-gauge_fix",15) == 0){ gauge_fix=true; i++; }else if( strncmp(cmd,"-verbose",15) == 0){ verbose=true; i++; }else if( strncmp(cmd,"-skip_gparity_inversion",30) == 0){ skip_gparity_inversion=true; i++; }else if( strncmp(cmd,"-unit_gauge",15) == 0){ unit_gauge=true; i++; }else{ if(UniqueID()==0) printf("Unrecognised argument: %s\n",cmd); exit(-1); } } printf("Lattice size is %d %d %d %d\n",size[0],size[1],size[2],size[3],size[4]); DoArg do_arg; do_arg.x_sites = size[0]; do_arg.y_sites = size[1]; do_arg.z_sites = size[2]; do_arg.t_sites = size[3]; do_arg.s_sites = size[4]; do_arg.x_node_sites = 0; do_arg.y_node_sites = 0; do_arg.z_node_sites = 0; do_arg.t_node_sites = 0; do_arg.s_node_sites = 0; do_arg.x_nodes = 0; do_arg.y_nodes = 0; do_arg.z_nodes = 0; do_arg.t_nodes = 0; do_arg.s_nodes = 0; do_arg.updates = 0; do_arg.measurements = 0; do_arg.measurefreq = 0; do_arg.cg_reprod_freq = 10; do_arg.x_bc = BND_CND_PRD; do_arg.y_bc = BND_CND_PRD; do_arg.z_bc = BND_CND_PRD; do_arg.t_bc = BND_CND_APRD; do_arg.start_conf_kind = START_CONF_ORD; do_arg.start_conf_load_addr = 0x0; do_arg.start_seed_kind = START_SEED_FIXED; do_arg.start_seed_filename = "../rngs/ckpoint_rng.0"; do_arg.start_conf_filename = "../configurations/ckpoint_lat.0"; do_arg.start_conf_alloc_flag = 6; do_arg.wfm_alloc_flag = 2; do_arg.wfm_send_alloc_flag = 2; do_arg.start_seed_value = 83209; do_arg.beta = 2.25; do_arg.c_1 = -3.3100000000000002e-01; do_arg.u0 = 1.0000000000000000e+00; do_arg.dwf_height = 1.8000000000000000e+00; do_arg.dwf_a5_inv = 1.0000000000000000e+00; do_arg.power_plaq_cutoff = 0.0000000000000000e+00; do_arg.power_plaq_exponent = 0; do_arg.power_rect_cutoff = 0.0000000000000000e+00; do_arg.power_rect_exponent = 0; do_arg.verbose_level = -1202; //VERBOSE_DEBUG_LEVEL; //-1202; do_arg.checksum_level = 0; do_arg.exec_task_list = 0; do_arg.xi_bare = 1.0000000000000000e+00; do_arg.xi_dir = 3; do_arg.xi_v = 1.0000000000000000e+00; do_arg.xi_v_xi = 1.0000000000000000e+00; do_arg.clover_coeff = 0.0000000000000000e+00; do_arg.clover_coeff_xi = 0.0000000000000000e+00; do_arg.xi_gfix = 1.0000000000000000e+00; do_arg.gfix_chkb = 1; do_arg.asqtad_KS = 0.0000000000000000e+00; do_arg.asqtad_naik = 0.0000000000000000e+00; do_arg.asqtad_3staple = 0.0000000000000000e+00; do_arg.asqtad_5staple = 0.0000000000000000e+00; do_arg.asqtad_7staple = 0.0000000000000000e+00; do_arg.asqtad_lepage = 0.0000000000000000e+00; do_arg.p4_KS = 0.0000000000000000e+00; do_arg.p4_knight = 0.0000000000000000e+00; do_arg.p4_3staple = 0.0000000000000000e+00; do_arg.p4_5staple = 0.0000000000000000e+00; do_arg.p4_7staple = 0.0000000000000000e+00; do_arg.p4_lepage = 0.0000000000000000e+00; if(verbose) do_arg.verbose_level = VERBOSE_DEBUG_LEVEL; if(gparity_X) do_arg.x_bc = BND_CND_GPARITY; if(gparity_Y) do_arg.y_bc = BND_CND_GPARITY; GJP.Initialize(do_arg); SerialIO::dbl_latt_storemode = dbl_latt_storemode; LRG.Initialize(); //usually initialised when lattice generated, but I pre-init here so I can load the state from file if(load_lrg){ if(UniqueID()==0) printf("Loading RNG state from %s\n",load_lrg_file); LRG.Read(load_lrg_file,32); } if(save_lrg){ if(UniqueID()==0) printf("Writing RNG state to %s\n",save_lrg_file); LRG.Write(save_lrg_file,32); } GwilsonFdwf* lattice = new GwilsonFdwf; if(!load_config){ printf("Creating gauge field\n"); if(!unit_gauge) lattice->SetGfieldDisOrd(); else lattice->SetGfieldOrd(); }else{ ReadLatticeParallel readLat; if(UniqueID()==0) printf("Reading: %s (NERSC-format)\n",load_config_file); readLat.read(*lattice,load_config_file); if(UniqueID()==0) printf("Config read.\n"); } if(save_config){ if(UniqueID()==0) printf("Saving config to %s\n",save_config_file); QioArg wt_arg(save_config_file,0.001); wt_arg.ConcurIONumber=32; WriteLatticeParallel wl; wl.setHeader("disord_id","disord_label",0); wl.write(*lattice,wt_arg); if(!wl.good()) ERR.General("main","()","Failed write lattice %s",save_config_file); if(UniqueID()==0) printf("Config written.\n"); } if(gauge_fix){ lattice->FixGaugeAllocate(FIX_GAUGE_COULOMB_T); lattice->FixGauge(1e-06,2000); if(!UniqueID()){ printf("Gauge fixing finished\n"); fflush(stdout); } } #define VEC_NONVEC_COMPARE #ifdef VEC_NONVEC_COMPARE Float norm2_2f_reg; Float norm2_2f_inv_reg; { Lattice &lat = *lattice; bc[0] = 0; bc[1] = 0; bc[2] = 0; bc[3] = 0; if(GJP.Xbc() == BND_CND_APRD) bc[0] = GJP.XnodeCoor() == (GJP.Xnodes()-1) ? 1 : 0; if(GJP.Ybc() == BND_CND_APRD) bc[1] = GJP.YnodeCoor() == (GJP.Ynodes()-1) ? 1 : 0; if(GJP.Zbc() == BND_CND_APRD) bc[2] = GJP.ZnodeCoor() == (GJP.Znodes()-1) ? 1 : 0; if(GJP.Tbc() == BND_CND_APRD) bc[3] = GJP.TnodeCoor() == (GJP.Tnodes()-1) ? 1 : 0; nx[0] = GJP.XnodeSites(); nx[1] = GJP.YnodeSites(); nx[2] = GJP.ZnodeSites(); nx[3] = GJP.TnodeSites(); BondCond(lat,lat.GaugeField()); lat.Convert(WILSON); Dwf* dwf_arg = (Dwf*)lat.FdiracOpInitPtr(); dwf_arg->ls = GJP.SnodeSites(); dwf_arg->dwf_kappa = 1.0 / ( 2 * (4 + GJP.DwfA5Inv() - GJP.DwfHeight()) ); //generate a random 5d odd-parity source int fermsz = 24*GJP.VolNodeSites()*GJP.SnodeSites()/2*2; LatRanGen LRGbak(LRG); Float *src = (Float *) pmalloc(fermsz*sizeof(Float)); lat.RandGaussVector((Vector*)src, 0.5, 1); LRG = LRGbak; Float *reg_sol = (Float *) pmalloc(fermsz*sizeof(Float)); #ifdef USE_QMP //explicitly call un-vectorised form a la dwf_dslash_4 dwf_dslash_4_nonvec( (Vector*)reg_sol, lat.GaugeField(), (Vector*)src, 1,0, dwf_arg); //get vectorised result Float *vec_sol = (Float *) pmalloc(fermsz*sizeof(Float)); dwf_dslash_4( (Vector*)vec_sol, lat.GaugeField(), (Vector*)src, 1,0, dwf_arg); Float err(0.0); for(int i=0;i 1e-06 ){ printf("Err 2f node %d, idx %d: reg %f vec %f\n",UniqueID(),i,reg_sol[i],vec_sol[i]); err = 1.0; } } glb_sum(&err); if(err>0.0){ if(!UniqueID()) printf("2f vec non-vec comparison check failed\n"); exit(-1); }else if(!UniqueID()) printf("2f vec non-vec comparison check passed\n"); pfree(vec_sol); #else dwf_dslash_4( (Vector*)reg_sol, lat.GaugeField(), (Vector*)src, 1,0, dwf_arg); #endif //Solution is defined on even sites if(gparity_Y && GJP.Xnodes()==1 && GJP.Ynodes()==1){ Vector *vsrc = (Vector*)reg_sol; int spins = lat.SpinComponents(); int s=0; int t=0; int z=0; for(int f=0;f<2;f++){ for(int y=0;yFixGaugeFree(); if(UniqueID()==0) printf("Starting double lattice dslash\n"); int array_size = 2*lattice->GsiteSize() * GJP.VolNodeSites() * sizeof(Float); Matrix *orig_lattice = (Matrix *) pmalloc(array_size); memcpy((void*)orig_lattice, (void*)lattice->GaugeField(), array_size); lattice->FreeGauge(); //free memory and reset delete lattice; //lattice objects are singleton (scope_lock) //setup 1f model. Upon calling GJP.Initialize the lattice size will be doubled in the appropriate directions //and the boundary condition set to APRD if(gparity_X) do_arg.gparity_1f_X = 1; if(gparity_Y) do_arg.gparity_1f_Y = 1; GJP.Initialize(do_arg); if(GJP.Gparity()){ printf("Que?\n"); exit(-1); } if(UniqueID()==0) printf("Doubled lattice : %d %d %d %d\n", GJP.XnodeSites()*GJP.Xnodes(),GJP.YnodeSites()*GJP.Ynodes(), GJP.ZnodeSites()*GJP.Znodes(),GJP.TnodeSites()*GJP.Tnodes()); #ifdef HAVE_BFM { QDP::multi1d nrow(Nd); for(int i = 0;i test(Nd); // nrow=size; QDP::Layout::setLattSize(nrow); QDP::Layout::create(); } #endif GwilsonFdwf doubled_lattice; setup_double_latt(doubled_lattice,orig_lattice,gparity_X,gparity_Y); setup_double_rng(gparity_X,gparity_Y); if(gauge_fix){ doubled_lattice.FixGaugeAllocate(FIX_GAUGE_COULOMB_T); doubled_lattice.FixGauge(1e-06,2000); if(!UniqueID()){ printf("Gauge fixing finished\n"); fflush(stdout); } } #ifdef VEC_NONVEC_COMPARE { Lattice &lat = doubled_lattice; bc[0] = 0; bc[1] = 0; bc[2] = 0; bc[3] = 0; if(GJP.Xbc() == BND_CND_APRD) bc[0] = GJP.XnodeCoor() == (GJP.Xnodes()-1) ? 1 : 0; if(GJP.Ybc() == BND_CND_APRD) bc[1] = GJP.YnodeCoor() == (GJP.Ynodes()-1) ? 1 : 0; if(GJP.Zbc() == BND_CND_APRD) bc[2] = GJP.ZnodeCoor() == (GJP.Znodes()-1) ? 1 : 0; if(GJP.Tbc() == BND_CND_APRD) bc[3] = GJP.TnodeCoor() == (GJP.Tnodes()-1) ? 1 : 0; nx[0] = GJP.XnodeSites(); nx[1] = GJP.YnodeSites(); nx[2] = GJP.ZnodeSites(); nx[3] = GJP.TnodeSites(); BondCond(lat,lat.GaugeField()); lat.Convert(WILSON); Dwf* dwf_arg = (Dwf*)lat.FdiracOpInitPtr(); dwf_arg->ls = GJP.SnodeSites(); dwf_arg->dwf_kappa = 1.0 / ( 2 * (4 + GJP.DwfA5Inv() - GJP.DwfHeight()) ); //generate a random 5d odd-parity source int fermsz = 24*GJP.VolNodeSites()*GJP.SnodeSites()/2; LatRanGen LRGbak(LRG); Float *src = (Float *) pmalloc(fermsz*sizeof(Float)); lat.RandGaussVector((Vector*)src, 0.5, 1); LRG = LRGbak; if(GJP.Gparity1fY()){ Vector *vsrc = (Vector*)src; //make source on upper-right quadrant negative (RNGs should be correct) for(int s=0;s=GJP.Xnodes()*GJP.XnodeSites()/2 && gy>=GJP.Ynodes()*GJP.YnodeSites()/2){ int pos[5] = {x,y,z,t,s}; int f_off = lat.FsiteOffsetChkb(pos) * lat.SpinComponents(); for(int spn=0;spn 1e-06 ){ printf("Err 1f node %d, idx %d: reg %f vec %f\n",UniqueID(),i,reg_sol[i],vec_sol[i]); err = 1.0; } } glb_sum(&err); if(err>0.0){ if(!UniqueID()) printf("1f vec non-vec comparison check failed\n"); exit(-1); }else if(!UniqueID()) printf("1f vec non-vec comparison check passed\n"); pfree(vec_sol); #else dwf_dslash_4( (Vector*)reg_sol, lat.GaugeField(), (Vector*)src, 1,0, dwf_arg); #endif if(GJP.Gparity1fY() && GJP.Xnodes()==1 && GJP.Ynodes()==1){ Vector *vsrc = (Vector*)reg_sol; int spins = lat.SpinComponents(); //make source on upper-right quadrant negative (RNGs should be correct) int s=0; int t=0; int z=0; for(int quad=0;quad<4;quad++){ for(int y=0;y=GJP.XnodeSites()/2 && y=GJP.XnodeSites()/2 && y>=GJP.YnodeSites()/2) q = 2; else q = 3; if(q!=quad) continue; int pos[5] = {x,y,z,t,s}; int f_off = lat.FsiteOffsetChkb(pos) * spins; f_off -= GJP.VolNodeSites()*GJP.SnodeSites()/2*spins; Float* e0 = (Float*)(vsrc+f_off); printf("1f quad %d %d %d: %f (%d)\n",quad,x,y,*e0,f_off); } } } } Float norm2_reg = dotProduct((IFloat*)reg_sol,(IFloat*)reg_sol,fermsz); glb_sum(&norm2_reg); if(gparity_X && gparity_Y) norm2_reg/=2.0; lat.Convert(CANONICAL); BondCond(lat,lat.GaugeField()); CgArg frm_cg_arg_md; frm_cg_arg_md.mass = 0.4; frm_cg_arg_md.max_num_iter = 5000; frm_cg_arg_md.stop_rsd = 1e-08; Float *reg_inv = (Float *) pmalloc(fermsz*sizeof(Float)); lat.FmatEvlInv((Vector*)reg_inv, (Vector*)src, &frm_cg_arg_md, CNV_FRM_NO); Float norm2_inv_reg = dotProduct((IFloat*)reg_inv,(IFloat*)reg_inv,fermsz); glb_sum(&norm2_inv_reg); if(gparity_X && gparity_Y) norm2_inv_reg/=2.0; if(!UniqueID()) printf("1f:2f regular norm2: %f %f\n",norm2_reg,norm2_2f_reg); if(!UniqueID()) printf("1f:2f regular inv norm2: %f %f\n",norm2_inv_reg,norm2_2f_inv_reg); pfree(reg_sol); pfree(src); } #endif #ifdef HAVE_BFM Chroma::finalize(); #endif if(UniqueID()==0){ printf("Main job complete\n"); fflush(stdout); } return 0; } void GaugeTransformU(Matrix *gtrans, Lattice &lat){ Matrix recv_buf; Matrix tmp; //apply the gauge transformation to U int nflav = 1; if(GJP.Gparity()) nflav = 2; for(int flav=0;flav eps || fabs(ca.imag()-cb.imag()) > eps ) return false; // } // } // } // } // return true; // } // if(gparity_X && !gparity_Y){ // if(GJP.Xnodes()>1){ // // SingleToDoubleLattice lattdoubler(gparity_X,gparity_Y,orig_gfield,double_latt); // lattdoubler.Run(); // if(!UniqueID()){ printf("Finished setting up doubled lattice\n"); fflush(stdout); } // }else{ // //only one node in X-direction // //copy data from orig_latt stored on this node // int pos[4]; // for(pos[0]=0;pos[0]1){ // SingleToDouble4dRNG fourDsetup; // SingleToDouble5dRNG fiveDsetup; // LRG.Reinitialize(); //reset the LRG and prepare for doubled lattice form // if(!UniqueID()){ printf("Setting up 4D RNG\n"); fflush(stdout); } // fourDsetup.Run(gparity_X,gparity_Y); // if(!UniqueID()){ printf("Setting up 5D RNG\n"); fflush(stdout); } // fiveDsetup.Run(gparity_X,gparity_Y); // }else{ // int n_rgen_4d = GJP.VolNodeSites()/16; //applies both to original and doubled latt // int n_rgen = n_rgen_4d; // if (GJP.SnodeSites()>=2) // n_rgen = GJP.VolNodeSites()*GJP.SnodeSites() / 32; // int stk_index_4d_off = n_rgen_4d/2; //offset for R' on 4D orig latt // int blocks_per_s_layer = n_rgen /( GJP.SnodeSites() / 2 ); //also same for original and doubled latt // int stk_index_5d_off = blocks_per_s_layer/2; //offset for R' on 5D orig latt // //copy the originals // UGrandomGenerator *ugran_4d_orig = new UGrandomGenerator[n_rgen_4d]; // for(int i=0;i=GJP.XnodeSites()/2){ // int orig_idx = (pos[0]-GJP.XnodeSites()/2)/2 + GJP.XnodeSites()/4*(pos[1]/2 + GJP.YnodeSites()/2*(pos[2]/2 + GJP.ZnodeSites()/2*pos[3]/2)) + stk_index_4d_off; // int new_idx = pos[0]/2 + GJP.XnodeSites()/2*(pos[1]/2 + GJP.YnodeSites()/2*(pos[2]/2 + GJP.ZnodeSites()/2*pos[3]/2)); // LRG.UGrandGen4D(new_idx) = ugran_4d_orig[orig_idx]; // }else{ // int orig_idx = pos[0]/2 + GJP.XnodeSites()/4*(pos[1]/2 + GJP.YnodeSites()/2*(pos[2]/2 + GJP.ZnodeSites()/2*pos[3]/2)); // int new_idx = pos[0]/2 + GJP.XnodeSites()/2*(pos[1]/2 + GJP.YnodeSites()/2*(pos[2]/2 + GJP.ZnodeSites()/2*pos[3]/2)); // LRG.UGrandGen4D(new_idx) = ugran_4d_orig[orig_idx]; // } // } // //do the 5D RNG // if(pos[0]>=GJP.XnodeSites()/2){ // int orig_idx = pos[4]/2*blocks_per_s_layer + (pos[0]-GJP.XnodeSites()/2)/2 + GJP.XnodeSites()/4*(pos[1]/2 + GJP.YnodeSites()/2*(pos[2]/2 + GJP.ZnodeSites()/2*pos[3]/2)) + stk_index_5d_off; // int new_idx = pos[4]/2*blocks_per_s_layer + pos[0]/2 + GJP.XnodeSites()/2*(pos[1]/2 + GJP.YnodeSites()/2*(pos[2]/2 + GJP.ZnodeSites()/2*pos[3]/2)); // LRG.UGrandGen(new_idx) = ugran_orig[orig_idx]; // }else{ // int orig_idx = pos[4]/2*blocks_per_s_layer + pos[0]/2 + GJP.XnodeSites()/4*(pos[1]/2 + GJP.YnodeSites()/2*(pos[2]/2 + GJP.ZnodeSites()/2*pos[3]/2)); // int new_idx = pos[4]/2*blocks_per_s_layer + pos[0]/2 + GJP.XnodeSites()/2*(pos[1]/2 + GJP.YnodeSites()/2*(pos[2]/2 + GJP.ZnodeSites()/2*pos[3]/2)); // LRG.UGrandGen(new_idx) = ugran_orig[orig_idx]; // } // } // } // } // } // } // delete[] ugran_4d_orig; // delete[] ugran_orig; // }//single node // }//gpx and gpy // else if(gparity_X && gparity_Y){ // SingleToDouble4dRNG fourDsetup; // SingleToDouble5dRNG fiveDsetup; // LRG.Reinitialize(); //reset the LRG and prepare for doubled lattice form // if(!UniqueID()){ printf("Setting up 4D RNG\n"); fflush(stdout); } // fourDsetup.Run(gparity_X,gparity_Y); // if(!UniqueID()){ printf("Setting up 5D RNG\n"); fflush(stdout); } // fiveDsetup.Run(gparity_X,gparity_Y); // } // #ifdef DOUBLE_RNG_TEST // if(gparity_X && !gparity_Y){ // if(!UniqueID()) printf("Testing RNG\n"); // //generate rands again and compare to originals // if(GJP.Xnodes()==1){ // int pos[5]; // for(pos[4]=0;pos[4]=GJP.XnodeSites()/2){ // origflav = 1; // origidx = (pos[0]-GJP.XnodeSites()/2)/2 + GJP.XnodeSites()/4*(pos[1]/2 + GJP.YnodeSites()/2*(pos[2]/2+GJP.ZnodeSites()/2*pos[3]/2)); // } // IFloat &origval = orig_4d_rands[origflav][origidx]; // IFloat newval = LRG.Urand(FOUR_D);//do the 4D RNG // if(origval != newval){ // printf("4D RNG disparity: (%d %d %d %d): orig %f new %f\n",pos[0],pos[1],pos[2],pos[3],origval,newval); exit(-1); // } // } // int origflav = 0; // int origidx = pos[0]/2 + GJP.XnodeSites()/4*(pos[1]/2 + GJP.YnodeSites()/2*(pos[2]/2+GJP.ZnodeSites()/2*(pos[3]/2+GJP.SnodeSites()/2*pos[4]/2))); // if(pos[0]>=GJP.XnodeSites()/2){ // origflav = 1; // origidx = (pos[0]-GJP.XnodeSites()/2)/2 + GJP.XnodeSites()/4*(pos[1]/2 + GJP.YnodeSites()/2*(pos[2]/2+GJP.ZnodeSites()/2*(pos[3]/2+GJP.SnodeSites()/2*pos[4]/2))); // } // IFloat &origval = orig_5d_rands[origflav][origidx]; // IFloat newval = LRG.Urand(FIVE_D);//do the 5D RNG // if(origval != newval){ // printf("5D RNG disparity: (%d %d %d %d): orig %f new %f\n",pos[0],pos[1],pos[2],pos[3],origval,newval); exit(-1); // } // } // } // } // } // } // }else{ // bool printnode = false; // if(GJP.YnodeCoor()==0 && GJP.ZnodeCoor()==0 && GJP.TnodeCoor()==0) printnode=true; // { //4D RNG check // int n_rgen_4d = GJP.VolNodeSites()/16;// both flavours (remember volume doubled now) // int buf_size = n_rgen_4d * sizeof(Float); // Float *recv_buf = (Float *) pmalloc(buf_size); // Float *send_buf = (Float *) pmalloc(buf_size); // for(int f=0;f<2;f++){ // int foff = n_rgen_4d/2; // for(int site =0; site < n_rgen_4d/2; site++){ // send_buf[site+f*foff] = orig_4d_rands[f][site]; // //if(printnode) printf("Node %d: f %d site %d rand %f\n",GJP.XnodeCoor(),f,site,orig_4d_rands[f][site]); // } // } // // for(int i=0;i=GJP.Xnodes()/2){ // x_origin = (GJP.XnodeCoor()-GJP.Xnodes()/2)*GJP.XnodeSites(); // data_flav = 1; // } // data_nodecoor_hf1 = (x_origin/(GJP.XnodeSites()/2) ) % GJP.Xnodes(); // data_nodecoor_hf2 = (data_nodecoor_hf1+1) % GJP.Xnodes(); // Float nodes_unhappy = 1.0; // Float *cur_data_buf = send_buf; // Float *send_buf_p = send_buf; // Float *recv_buf_p = recv_buf; // int xnode_coor_of_buf_data = GJP.XnodeCoor(); // int got_hf1 = 0; // int got_hf2 = 0; // int pos[5]; pos[4] = 0; // while(nodes_unhappy != 0.0){ // if(xnode_coor_of_buf_data == data_nodecoor_hf1 || xnode_coor_of_buf_data == data_nodecoor_hf2 ){ // if(xnode_coor_of_buf_data == data_nodecoor_hf1 && printnode) printf("Node %d: Buffer contains data from node %d, testing 1st half (flav %d)\n",GJP.XnodeCoor(),xnode_coor_of_buf_data,data_flav); // else if(xnode_coor_of_buf_data == data_nodecoor_hf2 && printnode) printf("Node %d: Buffer contains data from node %d, testing 2nd half (flav %d)\n",GJP.XnodeCoor(),xnode_coor_of_buf_data,data_flav); // for(pos[3]=0;pos[3]=GJP.XnodeSites()/2 && xnode_coor_of_buf_data == data_nodecoor_hf2){ // int orig_idx = (pos[0]-GJP.XnodeSites()/2)/2 + GJP.XnodeSites()/4*(pos[1]/2 + GJP.YnodeSites()/2*(pos[2]/2 + GJP.ZnodeSites()/2*pos[3]/2)) + data_flav * n_rgen_4d/2; // IFloat &origval = cur_data_buf[orig_idx]; // IFloat newval = LRG.Urand(FOUR_D);//do the 4D RNG // if(origval != newval){ // printf("Node %d: 4D RNG disparity: (%d %d %d %d): orig %f new %f (idx %d)\n",GJP.XnodeCoor(),pos[0],pos[1],pos[2],pos[3],origval,newval,orig_idx); exit(-1); // } // got_hf2 = 1; // }else if(pos[0]=GJP.Xnodes()/2){ // x_origin = (GJP.XnodeCoor()-GJP.Xnodes()/2)*GJP.XnodeSites(); // data_flav = 1; // } // data_nodecoor_hf1 = (x_origin/(GJP.XnodeSites()/2) ) % GJP.Xnodes(); // data_nodecoor_hf2 = (data_nodecoor_hf1+1) % GJP.Xnodes(); // Float nodes_unhappy = 1.0; // Float *cur_data_buf = send_buf; // Float *send_buf_p = send_buf; // Float *recv_buf_p = recv_buf; // int xnode_coor_of_buf_data = GJP.XnodeCoor(); // int got_hf1 = 0; // int got_hf2 = 0; // while(nodes_unhappy != 0.0){ // if(xnode_coor_of_buf_data == data_nodecoor_hf1 || xnode_coor_of_buf_data == data_nodecoor_hf2 ){ // if(xnode_coor_of_buf_data == data_nodecoor_hf1 && printnode) printf("Node %d: Buffer contains data from node %d, testing 1st half (flav %d)\n",GJP.XnodeCoor(),xnode_coor_of_buf_data,data_flav); // else if(xnode_coor_of_buf_data == data_nodecoor_hf2 && printnode) printf("Node %d: Buffer contains data from node %d, testing 2nd half (flav %d)\n",GJP.XnodeCoor(),xnode_coor_of_buf_data,data_flav); // for(pos[4]=0;pos[4]=GJP.XnodeSites()/2 && xnode_coor_of_buf_data == data_nodecoor_hf2){ // int orig_idx = pos[4]/2 * GJP.VolNodeSites()/16 + (pos[0]-GJP.XnodeSites()/2)/2 + GJP.XnodeSites()/4*(pos[1]/2 + GJP.YnodeSites()/2*(pos[2]/2 + GJP.ZnodeSites()/2*pos[3]/2)) + data_flav * GJP.VolNodeSites()/32; // IFloat &origval = cur_data_buf[orig_idx]; // IFloat newval = LRG.Urand(FIVE_D); // if(origval != newval){ // printf("Node %d: 5D RNG disparity: (%d %d %d %d %d): orig %f new %f (idx %d)\n",GJP.XnodeCoor(),pos[0],pos[1],pos[2],pos[3],pos[4],origval,newval,orig_idx); exit(-1); // } // got_hf2 = 1; // }else if(pos[0]